#XOR model by using multi layer perceptron
This code below is all about solving XOR problem  by using multi layer perceptron, instead of using single-layer perceptron which can not solve this problem.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device: ', device)

device:  cpu


In [None]:
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [None]:
class multi_layer_perceptron(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Sequential(
            nn.Linear(2,10),
            nn.Sigmoid()
        ).to(device)
        self.layer_2 = nn.Sequential(
            nn.Linear(10,10),
            nn.Sigmoid()
        ).to(device)
        self.layer_3 = nn.Sequential(
            nn.Linear(10,10),
            nn.Sigmoid()
        ).to(device)
        self.layer_4 = nn.Sequential(
            nn.Linear(10,1),
            nn.Sigmoid()
        ).to(device)
    
    def forward(self, x):
        out = self.layer_1(x)
        out = self.layer_2(out)
        out = self.layer_3(out)
        out = self.layer_4(out)
        return out

model_origin = multi_layer_perceptron()
model_dataloader = multi_layer_perceptron()

In [None]:
optimizer_origin = optim.SGD(model_origin.parameters(),lr = 1)
optimizer_dataloader = optim.SGD(model_dataloader.parameters(),lr=1)
criterion = nn.BCELoss().to(device)

###Correct training in small dataset

In [None]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

nb_epochs = 10000

for epoch in range(nb_epochs+1):
    avg_cost = 0
    hypothesis = model_origin(X)
    cost = criterion(hypothesis, Y)
    
    optimizer_origin.zero_grad()
    cost.backward()
    optimizer_origin.step()
    if epoch % 100 == 0:
        print("Epoch {:4d}/{} Cost: {:.5f}".format(epoch, nb_epochs, cost))

Epoch    0/10000 Cost: 0.69490
Epoch  100/10000 Cost: 0.69316
Epoch  200/10000 Cost: 0.69315
Epoch  300/10000 Cost: 0.69315
Epoch  400/10000 Cost: 0.69315
Epoch  500/10000 Cost: 0.69315
Epoch  600/10000 Cost: 0.69315
Epoch  700/10000 Cost: 0.69314
Epoch  800/10000 Cost: 0.69314
Epoch  900/10000 Cost: 0.69314
Epoch 1000/10000 Cost: 0.69314
Epoch 1100/10000 Cost: 0.69314
Epoch 1200/10000 Cost: 0.69313
Epoch 1300/10000 Cost: 0.69313
Epoch 1400/10000 Cost: 0.69313
Epoch 1500/10000 Cost: 0.69313
Epoch 1600/10000 Cost: 0.69313
Epoch 1700/10000 Cost: 0.69312
Epoch 1800/10000 Cost: 0.69312
Epoch 1900/10000 Cost: 0.69312
Epoch 2000/10000 Cost: 0.69312
Epoch 2100/10000 Cost: 0.69311
Epoch 2200/10000 Cost: 0.69311
Epoch 2300/10000 Cost: 0.69311
Epoch 2400/10000 Cost: 0.69311
Epoch 2500/10000 Cost: 0.69310
Epoch 2600/10000 Cost: 0.69310
Epoch 2700/10000 Cost: 0.69309
Epoch 2800/10000 Cost: 0.69309
Epoch 2900/10000 Cost: 0.69308
Epoch 3000/10000 Cost: 0.69308
Epoch 3100/10000 Cost: 0.69307
Epoch 32

###incorrect use of dataloader(using minibatches)
In first time, I use minibatches and it is not work because this dataset is too small to get appropriate parameters and train the model.

In [None]:
class xor_dataset(Dataset):
    def __init__(self):
        self.x_train = [[0, 0], [0, 1], [1, 0], [1, 1]]
        self.y_train = [[0], [1], [1], [0]]
        
    def __len__(self):
        return len(self.x_train)

    def __getitem__(self, idx):
        return torch.FloatTensor(self.x_train[idx]).to(device), torch.FloatTensor(self.y_train[idx]).to(device)

dataset = xor_dataset()

batch_size = 2
dataloader = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True, drop_last = True )
iteration = len(dataloader)


nb_epochs = 10000

for epoch in range(nb_epochs+1):
    avg_cost = 0
    for (X,Y) in dataloader:
        hypothesis = model_dataloader(X)
        cost = criterion(hypothesis, Y)

        optimizer_dataloader.zero_grad()
        cost.backward()
        optimizer_dataloader.step()
        avg_cost += (cost / iteration)
    if epoch % 100 == 0:
        print("Epoch {:4d}/{} Cost: {:.5f}".format(epoch, nb_epochs, avg_cost))

Epoch    0/10000 Cost: 0.71784
Epoch  100/10000 Cost: 0.69862
Epoch  200/10000 Cost: 0.69565
Epoch  300/10000 Cost: 0.69545
Epoch  400/10000 Cost: 0.69315
Epoch  500/10000 Cost: 0.69318
Epoch  600/10000 Cost: 0.83838
Epoch  700/10000 Cost: 0.83446
Epoch  800/10000 Cost: 0.83665
Epoch  900/10000 Cost: 0.69330
Epoch 1000/10000 Cost: 0.69315
Epoch 1100/10000 Cost: 0.69326
Epoch 1200/10000 Cost: 0.69314
Epoch 1300/10000 Cost: 0.83529
Epoch 1400/10000 Cost: 0.83700
Epoch 1500/10000 Cost: 0.83382
Epoch 1600/10000 Cost: 0.69557
Epoch 1700/10000 Cost: 0.69426
Epoch 1800/10000 Cost: 0.69387
Epoch 1900/10000 Cost: 0.69481
Epoch 2000/10000 Cost: 0.69333
Epoch 2100/10000 Cost: 0.69350
Epoch 2200/10000 Cost: 0.83345
Epoch 2300/10000 Cost: 0.83557
Epoch 2400/10000 Cost: 0.69377
Epoch 2500/10000 Cost: 0.69409
Epoch 2600/10000 Cost: 0.69332
Epoch 2700/10000 Cost: 0.83323
Epoch 2800/10000 Cost: 0.69318
Epoch 2900/10000 Cost: 0.69318
Epoch 3000/10000 Cost: 0.69316
Epoch 3100/10000 Cost: 0.69315
Epoch 32

###Validation

In [None]:
with torch.no_grad():
     X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
     Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

     hypothesis_origin = model_origin(X)
     hypothesis_dataloader = model_dataloader(X)

     prediction_origin     = (hypothesis_origin > 0.5).float()
     prediction_dataloader = (hypothesis_dataloader > 0.5).float()

     accuracy_origin     = (prediction_origin == Y).float().mean()
     accuracy_dataloader = (prediction_dataloader == Y).float().mean()

     print(hypothesis_origin.detach().cpu().numpy())
     print(prediction_origin.detach().cpu().numpy())
     print(Y.detach().cpu().numpy())
     print(accuracy_origin.item())

     print(hypothesis_dataloader.detach().cpu().numpy())
     print(prediction_dataloader.detach().cpu().numpy())
     print(Y.detach().cpu().numpy())
     print(accuracy_dataloader.item())

[[1.1168801e-04]
 [9.9982882e-01]
 [9.9984229e-01]
 [1.8529482e-04]]
[[0.]
 [1.]
 [1.]
 [0.]]
[[0.]
 [1.]
 [1.]
 [0.]]
1.0
[[0.33999333]
 [0.9996611 ]
 [0.33957064]
 [0.3399027 ]]
[[0.]
 [1.]
 [0.]
 [0.]]
[[0.]
 [1.]
 [1.]
 [0.]]
0.75
