## Gradient Desent

### $\theta_{W_t} = \theta_{W_{t-1}} - \eta \frac{\partial L}{\partial W_{t-1}} = \theta_{W_{t-1}} - \eta {\nabla}W_{t-1}$  

## What is the Stochastic in the Gradient Descent?

### Fully-connected Layer(Dense Layer)
## $H(x) = Wx + b$
### - x = [1,2,3]
### - y = [1,2,3]

In [79]:
import numpy as np
import torch
import torch.nn as nn
import random

In [80]:
import random
random_seed =2
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [81]:
class model(nn.Module):
    def __init__(self):
        super().__init__()
        # 앞의 1 = 입력 노드의 개수(100), 뒤의 1 = 출력 노드의 개수(히든층 혹은 출력층)(10) = 1
        self.linear = nn.Linear(1,1,bias=True) # W = [1] , B = [1]
        
    def forward(self,x):
        return self.linear(x)
    

In [82]:
model1 = model()
model2 = model()

In [83]:
model2.load_state_dict(model1.state_dict())

<All keys matched successfully>

In [84]:
x_train = torch.tensor(np.array([[1],[2],[3],[4],[5],[6]]),dtype=torch.float)
y_train = torch.tensor(np.array([[1],[2],[3],[4],[5],[6]]))

In [85]:
class mean_squred_error(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x,y):
        return torch.mean(torch.square(x-y))

In [86]:
cost = mean_squred_error()

In [87]:
learning_rate = 0.01
optim1 = torch.optim.SGD(model1.parameters(),lr=learning_rate)
optim2 = torch.optim.SGD(model2.parameters(),lr=learning_rate)

In [88]:
print(model1.linear.weight,model1.linear.bias)
print(model2.linear.weight,model2.linear.bias)

Parameter containing:
tensor([[0.2294]], requires_grad=True) Parameter containing:
tensor([-0.2380], requires_grad=True)
Parameter containing:
tensor([[0.2294]], requires_grad=True) Parameter containing:
tensor([-0.2380], requires_grad=True)


In [89]:
batch_index = np.arange(0,len(x_train))
print(batch_index)
random.shuffle(batch_index)
print(batch_index)

print(x_train)
print(y_train)

new_x_train = x_train[batch_index]
new_y_train = y_train[batch_index]

print(new_x_train)
print(new_y_train)

[0 1 2 3 4 5]
[2 3 1 4 5 0]
tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.]])
tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])
tensor([[3.],
        [4.],
        [2.],
        [5.],
        [6.],
        [1.]])
tensor([[3],
        [4],
        [2],
        [5],
        [6],
        [1]])


In [90]:
optim1.zero_grad()

pred = model1(new_x_train)

loss = cost(pred,new_y_train)

loss.backward()
optim1.step()

In [91]:
for i in range(3):
    optim2.zero_grad()
    pred = model2(new_x_train[i*2:(i+1)*2])

    loss = cost(pred,new_y_train[i*2:(i+1)*2])
    loss.backward()
    optim2.step()

In [92]:
print(model1.linear.weight,model1.linear.bias)
print(model2.linear.weight,model2.linear.bias)

Parameter containing:
tensor([[0.4798]], requires_grad=True) Parameter containing:
tensor([-0.1793], requires_grad=True)
Parameter containing:
tensor([[0.7664]], requires_grad=True) Parameter containing:
tensor([-0.1066], requires_grad=True)


In [93]:
epochs = 100
batch_index = np.arange(0,len(x_train))
for epoch in range(epochs):
    random.shuffle(batch_index)
    new_x_train = x_train[batch_index]
    new_y_train = y_train[batch_index]
    
    optim1.zero_grad()

    pred = model1(new_x_train)

    loss = cost(pred,new_y_train)

    loss.backward()
    optim1.step()
    
    for i in range(3):
        optim2.zero_grad()
        pred = model2(new_x_train[i*2:(i+1)*2])

        loss = cost(pred,new_y_train[i*2:(i+1)*2])
        loss.backward()
        optim2.step()

In [94]:
print(model1.linear.weight,model1.linear.bias)
print(model2.linear.weight,model2.linear.bias)

Parameter containing:
tensor([[1.0089]], requires_grad=True) Parameter containing:
tensor([-0.0380], requires_grad=True)
Parameter containing:
tensor([[1.0037]], requires_grad=True) Parameter containing:
tensor([-0.0157], requires_grad=True)
