## summary of deep learning
1. Define the architecture
2. Forward propagate on the architecture using input data
3. Calculate the loss
4. Backpropagate to calculate the gradient for each weight
5. Update the weights using a learning rate

# pytorch 
* framework: easy to manipulate
* fastai
* tensorflow / keras

## (almost) All you need to know is:
1. tensor
2. autograd
3. nn

In [4]:
import torch
import numpy as np

### torch.tensor

In [7]:
torch.tensor([1]) # scalar

tensor([1])

In [8]:
torch.tensor([1,2,3]) # vector

tensor([1, 2, 3])

In [9]:
torch.tensor([[1,2],[3,4]]) # matrix

tensor([[1, 2],
        [3, 4]])

In [13]:
torch.tensor( [ [[1,2], [3,4]], [[5,6], [7,8]] ] ) # tensor

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [5]:
data = [[1, 2],[3, 4]] # numpy array
x_data = torch.Tensor(data) # array to torch tensor
x_data

tensor([[1., 2.],
        [3., 4.]])

In [18]:
a = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[5,6],[7,8]])
a+b, a-b, a*b, a/b # element-wise 

(tensor([[ 6,  8],
         [10, 12]]), tensor([[-4, -4],
         [-4, -4]]), tensor([[ 5, 12],
         [21, 32]]), tensor([[0.2000, 0.3333],
         [0.4286, 0.5000]]))

In [20]:
torch.matmul(a,b) # matrix (tensor) multiplication

tensor([[19, 22],
        [43, 50]])

In [28]:
torch.ones(3,2) 

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [29]:
torch.rand(2,3) # (0,1) 랜덤넘버로 이루어진 텐서

tensor([[0.9219, 0.8050, 0.3345],
        [0.3638, 0.9512, 0.2446]])

## autograd (자동미분)

In [72]:
def f(x): 
    return x**2

xt = torch.tensor(3.).requires_grad_() # requires_grad_ 미분값 필요

In [73]:
y = f(xt)
y.backward() # 그래디언트 계산 = calculate gradient!

In [74]:
xt.grad # 

tensor(6.)

In [84]:
xt = torch.tensor([1.,2.,3.]).requires_grad_()

In [85]:
def g(x):
    return sum(x**2)
yt = g(xt)
yt.backward()

In [86]:
xt.grad

tensor([2., 4., 6.])

## nn (neural networks) : XOR example

In [88]:
import torch

# input
X = torch.tensor([[0.,0.],[0.,1.],[1.,0.],[1.,1.]])
y = torch.tensor([[0.],[1.],[1.],[0.]]) # check dimension!

In [91]:
din, dh, dout = 2,4,1
model = torch.nn.Sequential(
    torch.nn.Linear(din,dh), # input - hidden
    torch.nn.Sigmoid(),         # activation function of hidden layer 
    torch.nn.Linear(dh,dout), # hidden - out
    torch.nn.Sigmoid()
)

In [94]:
learning_rate = 1e-3 
loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [104]:
for t in range(10000):
    y_pred = model(X)
    E = loss(y_pred,y)
    optimizer.zero_grad() # zero_grad(): 미분값 초기화
    E.backward() # backpropagation
    optimizer.step() # update weights and biases

In [106]:
model(X)

tensor([[0.0062],
        [0.9660],
        [0.9687],
        [0.0434]], grad_fn=<SigmoidBackward0>)