## XOR nn

In [1]:
import torch

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [4]:
# nn layers
linear1 = torch.nn.Linear(2, 2, bias = True)
linear2 = torch.nn.Linear(2, 1, bias = True)
sigmoid = torch.nn.Sigmoid()

In [5]:
# model
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

In [6]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)

In [7]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.7434073090553284
100 0.693165123462677
200 0.6931577920913696
300 0.6931517124176025
400 0.6931463479995728
500 0.6931411027908325
600 0.6931357383728027
700 0.6931294798851013
800 0.6931220293045044
900 0.6931126117706299
1000 0.6930999755859375
1100 0.6930822730064392
1200 0.6930569410324097
1300 0.6930190324783325
1400 0.6929606199264526
1500 0.6928660273551941
1600 0.6927032470703125
1700 0.6923959255218506
1800 0.6917300820350647
1900 0.6899652481079102
2000 0.6838312149047852
2100 0.6561650037765503
2200 0.4310865104198456
2300 0.13488933444023132
2400 0.06630323827266693
2500 0.04216768220067024
2600 0.03045358881354332
2700 0.023665759712457657
2800 0.019277628511190414
2900 0.016223931685090065
3000 0.013983718119561672
3100 0.012273887172341347
3200 0.010928073897957802
3300 0.009842442348599434
3400 0.008948973380029202
3500 0.008201291784644127
3600 0.0075667379423975945
3700 0.007021641358733177
3800 0.006548580713570118
3900 0.006134208757430315
4000 0.005768344737589

In [8]:
# Accuracy computation
with torch.no_grad():
    hypothesis = model(X)
    pred = (hypothesis > 0.5).float()
    accuracy = (pred == y).float().mean()
    print('\nHypothesis : ', hypothesis.detach().cpu().numpy(), '\nCorrect : ', pred.detach().cpu().numpy(), '\nAccuracy : ', accuracy.item())


Hypothesis :  [[0.00106364]
 [0.99889404]
 [0.99889404]
 [0.00165861]] 
Correct :  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy :  1.0


## XOR_nn_wide_deep

In [9]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [12]:
# nn layers
linear1 = torch.nn.Linear(2, 10, bias = True)
linear2 = torch.nn.Linear(10, 10, bias = True)
linear3 = torch.nn.Linear(10, 10, bias = True)
linear4 = torch.nn.Linear(10, 1, bias = True)
sigmoid = torch.nn.Sigmoid()

In [13]:
# model
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

In [14]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)

In [15]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.6990118026733398
100 0.6931530237197876
200 0.6931522488594055
300 0.6931514739990234
400 0.6931506395339966
500 0.693149983882904
600 0.693149209022522
700 0.6931484937667847
800 0.6931477785110474
900 0.6931469440460205
1000 0.6931461691856384
1100 0.6931453943252563
1200 0.6931445598602295
1300 0.6931437253952026
1400 0.6931428909301758
1500 0.6931421160697937
1600 0.6931412220001221
1700 0.6931402683258057
1800 0.6931392550468445
1900 0.6931382417678833
2000 0.6931371688842773
2100 0.6931360960006714
2200 0.6931348443031311
2300 0.6931335926055908
2400 0.6931322813034058
2500 0.6931307911872864
2600 0.693129301071167
2700 0.6931276321411133
2800 0.6931259036064148
2900 0.6931239366531372
3000 0.6931218504905701
3100 0.6931197047233582
3200 0.6931171417236328
3300 0.6931145191192627
3400 0.6931114792823792
3500 0.693108320236206
3600 0.69310462474823
3700 0.6931005716323853
3800 0.6930960416793823
3900 0.6930910348892212
4000 0.6930853128433228
4100 0.6930787563323975
4200 0.693

In [16]:
# Accuracy computation
with torch.no_grad():
    hypothesis = model(X)
    pred = (hypothesis > 0.5).float()
    accuracy = (pred == y).float().mean()
    print('\nHypothesis : ', hypothesis.detach().cpu().numpy(), '\nCorrect : ', pred.detach().cpu().numpy(), '\nAccuracy : ', accuracy.item())


Hypothesis :  [[1.5254840e-04]
 [9.9984062e-01]
 [9.9979419e-01]
 [2.7489042e-04]] 
Correct :  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy :  1.0


## MNIST_back_prop

In [18]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [19]:
# parameters
learning_rate = 0.5
batch_size = 10

In [20]:
# MNIST dataset
mnist_train = dsets.MNIST(root = 'MNIST_data/',
                         train = True,
                         transform = transforms.ToTensor(),
                         download = True)
mnist_test = dsets.MNIST(root = 'MNIST_data/',
                        train = False,
                        transform = transforms.ToTensor(),
                        download = True)

In [21]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                         batch_size = batch_size,
                                         shuffle = True,
                                         drop_last = True)

In [22]:
w1 = torch.nn.Parameter(torch.Tensor(784, 30)).to(device)
b1 = torch.nn.Parameter(torch.Tensor(30)).to(device)
w2 = torch.nn.Parameter(torch.Tensor(30, 10)).to(device)
b2 = torch.nn.Parameter(torch.Tensor(10)).to(device)

In [23]:
torch.nn.init.normal_(w1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(w2)
torch.nn.init.normal_(b2)

Parameter containing:
tensor([-0.0742, -0.3123, -0.4766,  0.4226,  0.4874, -0.3274, -0.3057,  1.3440,
         1.5518, -0.4213], requires_grad=True)

In [24]:
def sigmoid(x):
    return 1.0 / (1.0 + torch.exp(-x))

In [25]:
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [26]:
X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)[:1000]
y_test = mnist_test.test_labels.to(device)[:1000]
i = 0



In [27]:
while not i == 10000:
    for X, y in data_loader:
        i += 1
        
        # forward
        X = X.view(-1, 28 * 28).to(device)
        y = torch.zeros((batch_size, 10)).scatter_(1, y.unsqueeze(1), 1).to(device) # one-hot
        l1 = torch.add(torch.matmul(X, w1), b1)
        a1 = sigmoid(l1)
        l2 = torch.add(torch.matmul(a1, w2), b2)
        pred = sigmoid(l2)
        
        diff = pred - y
        
        # Back prop
        d_l2 = diff * sigmoid_prime(l2)
        d_b2 = d_l2
        d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)
        
        d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))
        d_l1 = d_a1 * sigmoid_prime(l1)
        d_b1 = d_l1
        d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)
        
        w1 = w1 - learning_rate * d_w1
        b1 = b1 - learning_rate * torch.mean(d_b1, 0)
        w2 = w2 - learning_rate * d_w2
        b2 = b2 - learning_rate * torch.mean(d_b2, 0)
        
        if i % 1000 == 0:
            l1 = torch.add(torch.matmul(X_test, w1), b1)
            a1 = sigmoid(l1)
            l2 = torch.add(torch.matmul(a1, w2), b2)
            pred = sigmoid(l2)
            acct_mat = torch.argmax(pred, 1) == y_test
            acct_res = acct_mat.sum()
            print(acct_res.item())
            
        if i == 10000:
            break

707
784
866
870
891
890
896
894
902
907
