In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class XORNet(nn.Module):
    
    def __init__(self):
        super(XORNet, self).__init__()
        self.hidden = nn.Linear(2, 4)
        self.output = nn.Linear(4, 1)
        
    def forward(self, x):
        h1 =  F.sigmoid(self.hidden(x))
        return F.sigmoid(self.output(h1))
        


In [3]:
def dataset(n):
    import random
    for _ in range(n):
        x1 = random.choice([0, 1])
        x2 = random.choice([0, 1])
        y = int(bool(x1)^bool(x2))
        yield torch.FloatTensor([[x1, x2]]), torch.FloatTensor([[y]])

In [4]:
mdl = XORNet()
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(mdl.parameters(), lr=0.5)
print(mdl)
running_loss = 0
import time 
start = time.time()
for i, (x, y) in enumerate(dataset(10000)):
    optimizer.zero_grad()
    out = mdl(x)
    loss = criterion(out, y)
    if i%1000 == 999:
        print('{:.3f}'.format(running_loss/1000))
        running_loss = 0
    running_loss += loss.item()
    loss.backward()
    optimizer.step()
print('{:.2f} sec'.format(time.time()-start))

XORNet(
  (hidden): Linear(in_features=2, out_features=4, bias=True)
  (output): Linear(in_features=4, out_features=1, bias=True)
)
0.510
0.053
0.014
0.008
0.005
0.004
0.003
0.003
0.002
0.002
1.23 sec


In [5]:
x = torch.FloatTensor([1, 1])
mdl(x)

tensor(1.00000e-03 *
       [ 3.0413])

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
mdl = XORNet().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(mdl.parameters(), lr=0.5)
print(mdl)

import time 
start = time.time()
for i,(x, y) in enumerate(dataset(10000)):
    optimizer.zero_grad()
    out = mdl(x.to(device))
    loss = criterion(out, y.to(device))
    if i%1000 == 999:
        print('{:.3f}'.format(running_loss/1000))
        running_loss = 0
    running_loss += loss.item()
    loss.backward()
    optimizer.step()
print('{:.2f} sec'.format(time.time()-start))

XORNet(
  (hidden): Linear(in_features=2, out_features=4, bias=True)
  (output): Linear(in_features=4, out_features=1, bias=True)
)
0.631
0.080
0.014
0.007
0.005
0.004
0.003
0.002
0.002
0.002
5.99 sec


In [7]:
x = torch.FloatTensor([1, 1]).to(device)
mdl(x)

tensor(1.00000e-03 *
       [ 3.5465], device='cuda:0')

In [8]:
x = torch.FloatTensor([1, 1])
mdl.to('cpu')
mdl(x)

tensor(1.00000e-03 *
       [ 3.5465])

In [9]:
def batched_dataset(n, batch_size=32):
    import random
    b_x, b_y = [], []
    for i in range(n):
        if i%batch_size == batch_size-1:
            yield torch.FloatTensor(b_x), torch.FloatTensor(b_y)
            b_x, b_y = [], []
        x1 = random.choice([0, 1])
        x2 = random.choice([0, 1])
        y = int(bool(x1)^bool(x2))
        b_x.append([x1, x2])
        b_y.append([y])
    yield torch.FloatTensor(b_x), torch.FloatTensor(b_y)
    

In [13]:
mdl = XORNet()
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(mdl.parameters(), lr=10.)
print(mdl)

import time 
start = time.time()
for i,(x, y) in enumerate(batched_dataset(10000)):
    optimizer.zero_grad()
    out = mdl(x)
    loss = criterion(out, y)
    if i%30 == 29:
        print('{:.3f}'.format(running_loss/30))
        running_loss = 0
    running_loss += loss.item()
    loss.backward()
    optimizer.step()
print('{:.2f} sec'.format(time.time()-start))

XORNet(
  (hidden): Linear(in_features=2, out_features=4, bias=True)
  (output): Linear(in_features=4, out_features=1, bias=True)
)
0.959
0.690
0.337
0.046
0.021
0.012
0.009
0.007
0.006
0.005
0.08 sec


In [16]:
mdl = XORNet().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(mdl.parameters(), lr=10.)
print(mdl)

import time 
start = time.time()
for i,(x, y) in enumerate(batched_dataset(10000)):
    optimizer.zero_grad()
    out = mdl(x.to(device))
    loss = criterion(out, y.to(device))
    if i%30 == 29:
        print('{:.3f}'.format(running_loss/30))
        running_loss = 0
    running_loss += loss.item()
    loss.backward()
    optimizer.step()
print('{:.2f} sec'.format(time.time()-start))

XORNet(
  (hidden): Linear(in_features=2, out_features=4, bias=True)
  (output): Linear(in_features=4, out_features=1, bias=True)
)
0.908
0.557
0.540
0.558
0.538
0.527
0.549
0.539
0.573
0.493
0.18 sec
