## Practice - Backpropagation

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
print(torch.__version__)

1.0.0


### Graph representation

In [9]:
# tensor를 생성하고 function을 지정합니다.

x = torch.tensor([-2.0],requires_grad=True)
y = torch.tensor([5.0],requires_grad=True)
z = torch.tensor([-4.0], requires_grad=True)

q =  x+y
f = q*z

In [10]:
print(x)
print(y)
print(z)

tensor([-2.], requires_grad=True)
tensor([5.], requires_grad=True)
tensor([-4.], requires_grad=True)


In [11]:
print(q)
print(f)

tensor([3.], grad_fn=<AddBackward0>)
tensor([-12.], grad_fn=<MulBackward0>)


In [12]:
#class로 하면 retain_graph=True 옵션을 안넣어도 됩니다. 차후에 다루겠습니다.

f.backward(retain_graph=True)

In [13]:
# .grad_fn을 통해 gradient function을 알 수 있습니다.

print(f.grad_fn)
print(q.grad_fn)

<MulBackward0 object at 0x000001FC6D225470>
<AddBackward0 object at 0x000001FC6D2251D0>


In [14]:
# .grad로 gradient를 출력할 수 있습니다.

print(x.grad)
print(y.grad)
print(z.grad)

tensor([-4.])
tensor([-4.])
tensor([3.])


## TODO - Backpropagation

<img src="https://drive.google.com/uc?id=1Jvlk56B36HPyihMRACOxUFm7FwUTVL3v">

In [15]:
# 위의 directed acyclic graph를 구현해보겠습니다.

w0 = torch.tensor([2.0], requires_grad=True)
x0 = torch.tensor([-1.0],  requires_grad=True)
w1 = torch.tensor([-3.0],  requires_grad=True)
x1 = torch.tensor([-2.0],  requires_grad=True)
w2 = torch.tensor([-3.0],  requires_grad=True)

q1 =  w0*x0
q2 = w1*x1
f1 = q1+q2
s = f1+w2

out = torch.sigmoid(s)
print(out)
out.backward(retain_graph=True)

tensor([0.7311], grad_fn=<SigmoidBackward>)


In [16]:
print(w0.grad)
print(x0.grad)
print(w1.grad)
print(x1.grad)
print(w2.grad)

tensor([-0.1966])
tensor([0.3932])
tensor([-0.3932])
tensor([-0.5898])
tensor([0.1966])


## Practice - Weight Update

In [19]:
# 위 graph의 weight update 과정을 자세히 보겠습니다.

w0 = torch.tensor([2.0], requires_grad=True)
x0 = torch.tensor([-1.0], requires_grad=True)
w1 = torch.tensor([-3.0], requires_grad=True)
x1 = torch.tensor([-2.0], requires_grad=True)
w2 = torch.tensor([-3.0], requires_grad=True)

q1 = w0*x0
q2 = w1*x1

r = q1 + q2
s = r + w2

# weight를 수정하지 않고 prediction을 해봅니다.
out = torch.sigmoid(s)
print("=====================")
print("First Prediction")
print(out)

# original weight를 출력합니다.
print("=====================")
print("Original Weight")
print(w0)
print(w1)
print(w2)

# optimizer와 loss를 정의합니다.
# .backward()를 통해 gradient를 계산합니다.
target = torch.tensor([1.0])
optimizer = optim.SGD([w0, w1, w2], lr=0.1)
criterion = nn.MSELoss()
loss = criterion(out, target)
loss.backward()

# 계산된 gradient를 출력합니다.
print("=====================")
print("Gradient")

print(w0.grad)
print(w1.grad)
print(w2.grad)

# weight를 update합니다.
optimizer.step()

print("=====================")
print("Updated Weight") 

# w_new = w_origin - (learning_rate * gradient)
print(w0) # w0_new = w0(=2.0) - (0.1 * 0.1058) = 1.9894
print(w1) # w1_new = w1(=-3.0) - (0.1 * 0.2115) = -3.0212
print(w2) # w2_new = w2(=-3.0) - (0.1 * 0.1058) = -2.9894

q1 = w0*x0
q2 = w1*x1

r = q1 + q2
s = r + w2

out = torch.sigmoid(s)
print("=====================")
print("Second Prediction")
# update된 weight를 바탕으로 두번째 prediction을 합니다.
print(out)

First Prediction
tensor([0.7311], grad_fn=<SigmoidBackward>)
Original Weight
tensor([2.], requires_grad=True)
tensor([-3.], requires_grad=True)
tensor([-3.], requires_grad=True)
Gradient
tensor([0.1058])
tensor([0.2115])
tensor([-0.1058])
Updated Weight
tensor([1.9894], requires_grad=True)
tensor([-3.0212], requires_grad=True)
tensor([-2.9894], requires_grad=True)
Second Prediction
tensor([0.7433], grad_fn=<SigmoidBackward>)
