In [1]:
# XOR by Single Layer
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
Y = torch.tensor([[0],[1],[1],[0]], dtype=torch.float32)

linear = torch.nn.Linear(2,1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = nn.Sequential(linear, sigmoid)
optimizer = optim.SGD(model.parameters(), lr = 0.1)

for epoch in range(10000 + 1):
    cost = F.binary_cross_entropy(model(X), Y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    if epoch % 100 == 0:
        print("{} : {}", epoch, cost.item())

{} : {} 0 0.7260097861289978
{} : {} 100 0.6983544230461121
{} : {} 200 0.6955159902572632
{} : {} 300 0.6942253708839417
{} : {} 400 0.6936377882957458
{} : {} 500 0.6933704614639282
{} : {} 600 0.6932488679885864
{} : {} 700 0.6931935548782349
{} : {} 800 0.6931682825088501
{} : {} 900 0.6931567788124084
{} : {} 1000 0.6931515336036682
{} : {} 1100 0.6931490898132324
{} : {} 1200 0.6931480765342712
{} : {} 1300 0.6931475400924683
{} : {} 1400 0.6931473612785339
{} : {} 1500 0.6931473016738892
{} : {} 1600 0.6931472420692444
{} : {} 1700 0.6931471824645996
{} : {} 1800 0.6931471824645996
{} : {} 1900 0.6931471824645996
{} : {} 2000 0.6931471824645996
{} : {} 2100 0.6931471824645996
{} : {} 2200 0.6931471824645996
{} : {} 2300 0.6931471824645996
{} : {} 2400 0.6931471824645996
{} : {} 2500 0.6931472420692444
{} : {} 2600 0.6931471824645996
{} : {} 2700 0.6931471824645996
{} : {} 2800 0.6931471824645996
{} : {} 2900 0.6931471824645996
{} : {} 3000 0.6931471228599548
{} : {} 3100 0.69314

In [2]:
print(model(X))
print(Y)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward0>)
tensor([[0.],
        [1.],
        [1.],
        [0.]])


In [13]:
# Backpropagation implementation
# XOR by Multiple Layer

import torch

X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
Y = torch.tensor([[0],[1],[1],[0]], dtype=torch.float32)

# ???
w1 = torch.Tensor(2,1)
b1 = torch.Tensor(2)
w2 = torch.Tensor(2,1)
b2 = torch.Tensor(1)

def sigmoid(x):
    return 1. / (1. + torch.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

learning_rate = 0.1

for step in range(10000 + 1):
    # forward pass. Multi Layer
    l1 = torch.add(torch.matmul(X, w1), b1) # w1*X + b1
    a1 = sigmoid(l1) # sigmoid(w1*X + b1)
    l2 = torch.add(torch.matmul(a1, w2), b2) # w2*a1 + b2
    Y_pred = sigmoid(l2) # sigmoid(w2*a1 + b2)
    
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred)) # B-Cross Entropy
    
    # backwward Propagation : chain rule까지 적용된 결과
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1. - Y_pred) + 0.00000001) # Loss : B-Cross Entropy의 미분
    # Layer 2
    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2 # 1 * d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2) # w2*X + b2를 w2로 미분하면 X만 남는다.
    # d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
    # Layer 1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2,0,1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1 
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)
    # d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
    
    # Update process
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    
    if step % 100 == 0:
        print(step, cost.item())

0 2.3309504985809326
100 0.6948732733726501
200 0.6918835043907166
300 0.6903694868087769
400 0.6859269142150879
500 0.6719667911529541
600 0.6381361484527588
700 0.5848764181137085
800 0.5269197821617126
900 0.4761895537376404
1000 0.4280622601509094
1100 0.372464120388031
1200 0.3064245581626892
1300 0.2388092279434204
1400 0.1816982477903366
1500 0.13937939703464508
1600 0.10957132279872894
1700 0.08858106285333633
1800 0.07348067313432693
1900 0.06231287494301796
2000 0.05382362753152847
2100 0.047207269817590714
2200 0.04193601384758949
2300 0.0376550629734993
2400 0.03412013128399849
2500 0.03115866705775261
2600 0.028646118938922882
2700 0.02649061754345894
2800 0.024623310193419456
2900 0.02299145795404911
3000 0.02155422978103161
3100 0.02027968131005764
3200 0.0191422738134861
3300 0.018121372908353806
3400 0.017200399190187454
3500 0.016365595161914825
3600 0.015605677850544453
3700 0.0149111682549119
3800 0.014274059794843197
3900 0.013687780126929283
4000 0.013146381825208

In [14]:
print(Y_pred)
print(Y)

tensor([[0.0023],
        [0.9952],
        [0.9952],
        [0.0033]])
tensor([[0.],
        [1.],
        [1.],
        [0.]])


In [18]:
# 이어서 xor를 high level 구현으로 풀어본다.
# Single Layer

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
Y = torch.tensor([[0],[1],[1],[0]], dtype=torch.float32)

linear1 = nn.Linear(2,2,bias=True)
linear2 = nn.Linear(2,1,bias=True)
sigmoid = nn.Sigmoid()
model = nn.Sequential(linear1, sigmoid, linear2, sigmoid)
optimizer = optim.SGD(model.parameters(), lr=1)

for step in range(10000 + 1):
    hypothesis = model(X)
    cost = F.binary_cross_entropy(hypothesis, Y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())
print(model(X))


0 0.7013025283813477
100 0.6910400986671448
200 0.6685031056404114
300 0.5564861297607422
400 0.36737060546875
500 0.1381683349609375
600 0.06692102551460266
700 0.04205302149057388
800 0.03019404225051403
900 0.02339489758014679
1000 0.019027316942811012
1100 0.015999939292669296
1200 0.01378466933965683
1300 0.012096784077584743
1400 0.010769703425467014
1500 0.009700112044811249
1600 0.008820261806249619
1700 0.008084287866950035
1800 0.007459857501089573
1900 0.006923528388142586
2000 0.006458045914769173
2100 0.006050391122698784
2200 0.005690417252480984
2300 0.0053703803569078445
2400 0.005083948373794556
2500 0.004826163873076439
2600 0.004592944867908955
2700 0.00438095023855567
2800 0.004187471698969603
2900 0.00401014881208539
3000 0.0038470569998025894
3100 0.003696588333696127
3200 0.003557301592081785
3300 0.0034280845429748297
3400 0.0033077667467296124
3500 0.0031955528538674116
3600 0.003090647514909506
3700 0.002992300782352686
3800 0.0028999727219343185
3900 0.002813

In [19]:
# 더 깊은 Layer

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
Y = torch.tensor([[0],[1],[1],[0]], dtype=torch.float32)

model = nn.Sequential(nn.Linear(2,10,bias=True),
                      nn.Sigmoid(),
                      nn.Linear(10,10,bias=True),
                      nn.Sigmoid(),
                      nn.Linear(10,10,bias=True),
                      nn.Sigmoid(),
                      nn.Linear(10,1,bias=True),
                      nn.Sigmoid())
optimizer = optim.SGD(model.parameters(), lr=1)

for step in range(10000 + 1):
    hypothesis = model(X)
    cost = F.binary_cross_entropy(hypothesis, Y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())
print(model(X))


0 0.6995787620544434
100 0.6931189894676208
200 0.6931160688400269
300 0.6931127905845642
400 0.693109393119812
500 0.6931055188179016
600 0.6931014657020569
700 0.6930969953536987
800 0.6930919885635376
900 0.6930864453315735
1000 0.6930801272392273
1100 0.693073034286499
1200 0.6930650472640991
1300 0.6930557489395142
1400 0.6930451393127441
1500 0.6930326223373413
1600 0.693017840385437
1700 0.6930001974105835
1800 0.6929787397384644
1900 0.6929523944854736
2000 0.6929193735122681
2100 0.6928770542144775
2200 0.6928216814994812
2300 0.6927469372749329
2400 0.692642092704773
2500 0.6924887299537659
2600 0.6922504901885986
2700 0.6918508410453796
2800 0.6911025047302246
2900 0.689449667930603
3000 0.6845925450325012
3100 0.6584906578063965
3200 0.4794733226299286
3300 0.02082980051636696
3400 0.00692561361938715
3500 0.003891710191965103
3600 0.0026430280413478613
3700 0.0019775887485593557
3800 0.0015689211431890726
3900 0.0012944522313773632
4000 0.0010982801904901862
4100 0.0009515