# Backpropagation

In [1]:
import torch

In [37]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

# nn layers
w1 = torch.Tensor(2, 2)
b1 = torch.Tensor(2)
w2 = torch.Tensor(2, 1)
b2 = torch.Tensor(1)

def sigmoid(x):
    return 1.0 / (1.0 + torch.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

learning_rate = 1

for step in range(10001):
    # forward
    l1 = torch.add(torch.matmul(X, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    Y_pred = sigmoid(l2)
    
    # binary cross entropy
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))
    
    # backprop(chain rule)
    # loss derivative
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
    
    # layer2
    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
    
    # layer1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
    
    
    # weight update
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.6931471824645996
100 0.6931471824645996
200 0.6931471824645996
300 0.6931365728378296
400 0.5012093782424927
500 0.03938251733779907
600 0.0179862380027771
700 0.011541211977601051
800 0.008468648418784142
900 0.006677658297121525
1000 0.005507137626409531
1100 0.00468321330845356
1200 0.004072200041264296
1300 0.0036013040225952864
1400 0.0032273789402097464
1500 0.0029233861714601517
1600 0.0026713875122368336
1700 0.0024591770488768816
1800 0.002278008498251438
1900 0.0021215835586190224
2000 0.001985180890187621
2100 0.0018651445861905813
2200 0.0017587037291377783
2300 0.0016637472435832024
2400 0.0015784635907039046
2500 0.0015014464734122157
2600 0.0014315582811832428
2700 0.0013678570976480842
2800 0.0013095646863803267
2900 0.0012560379691421986
3000 0.0012067091884091496
3100 0.0011610251385718584
3200 0.0011187163181602955
3300 0.0010793496621772647
3400 0.001042656134814024
3500 0.0010083516826853156
3600 0.0009762868285179138
3700 0.0009461033623665571
3800 0.000917741

# Code : xor-nn

In [36]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

# nn layers (MLP) -> 다층 퍼셉트론
linear1 = torch.nn.Linear(2, 2, bias = True)
linear2 = torch.nn.Linear(2, 1, bias = True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid)

#define cost/loss & optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1)

for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.709502100944519
100 0.6932308673858643
200 0.6930591464042664
300 0.6927437782287598
400 0.691247284412384
500 0.6776837110519409
600 0.5907617807388306
700 0.40993380546569824
800 0.14501768350601196
900 0.06711645424365997
1000 0.04173107445240021
1100 0.029875902459025383
1200 0.02313094027340412
1300 0.018812065944075584
1400 0.015822621062397957
1500 0.013636388815939426
1600 0.011970898136496544
1700 0.010661439970135689
1800 0.009605835191905499
1900 0.008737370371818542
2000 0.00801066868007183
2100 0.007393894717097282
2200 0.006864079274237156
2300 0.0064041223376989365
2400 0.006001196801662445
2500 0.005645343102514744
2600 0.005328869912773371
2700 0.005045552738010883
2800 0.004790512844920158
2900 0.0045597003772854805
3000 0.004349926486611366
3100 0.004158392548561096
3200 0.003982815425843
3300 0.003821346443146467
3400 0.003672317834571004
3500 0.0035343635827302933
3600 0.0034063123166561127
3700 0.003287113970145583
3800 0.0031759280245751143
3900 0.00307191442

In [24]:
hypothesis

tensor([[9.2568e-04],
        [9.9914e-01],
        [9.9872e-01],
        [8.0427e-04]], grad_fn=<SigmoidBackward>)

In [25]:
correct = (hypothesis > 0.5) * 1
correct

tensor([[0],
        [1],
        [1],
        [0]])

# Code : xor-nn-wide-deep

In [22]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

# nn layers (MLP) -> 다층 퍼셉트론
linear1 = torch.nn.Linear(2, 10, bias = True)
linear2 = torch.nn.Linear(10, 10, bias = True)
linear3 = torch.nn.Linear(10, 10, bias = True)
linear4 = torch.nn.Linear(10, 1, bias = True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid)

#define cost/loss & optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1)

for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.6931792497634888
100 0.6931456327438354
200 0.6931451559066772
300 0.6931446194648743
400 0.6931441426277161
500 0.6931436657905579
600 0.6931430697441101
700 0.6931425333023071
800 0.6931419968605042
900 0.6931414604187012
1000 0.6931408643722534
1100 0.6931403279304504
1200 0.6931397914886475
1300 0.6931390762329102
1400 0.6931384801864624
1500 0.6931378245353699
1600 0.6931371688842773
1700 0.6931365132331848
1800 0.6931357979774475
1900 0.6931350231170654
2000 0.6931342482566833
2100 0.6931334733963013
2200 0.6931325793266296
2300 0.6931317448616028
2400 0.6931308507919312
2500 0.6931297779083252
2600 0.693128764629364
2700 0.6931276321411133
2800 0.6931265592575073
2900 0.6931253671646118
3000 0.6931240558624268
3100 0.6931227445602417
3200 0.6931213140487671
3300 0.6931196451187134
3400 0.6931180357933044
3500 0.6931161880493164
3600 0.6931143403053284
3700 0.6931121945381165
3800 0.6931099891662598
3900 0.6931074857711792
4000 0.6931047439575195
4100 0.6931017637252808
4200 