# 實作NN with input :1000  hidden :100 output :10


# 1. 原始版本

In [1]:
import torch
dtype = torch.float
device = torch.device("cuda:0")
# N = batch size
N,D_in,D_hidden,D_out = 64,1000,100,10

# 隨便的輸入和輸出
x = torch.randn(N,D_in,device = device,dtype=dtype)
y = torch.randn(N,D_out,device = device,dtype=dtype)

# 權重
w1 = torch.randn(D_in,D_hidden,device = device,dtype=dtype)
w2 = torch.randn(D_hidden,D_out,device = device,dtype=dtype)

learning_rate = 1e-6
for t in range(500):
    # 輸入*w1
    h = x.mm(w1)
    # 進行 relu: 0以下去掉
    h_relu = h.clamp(min=0)
    # 去掉後*w2
    y_pred = h_relu.mm(w2)
    # 計算loss
    loss = (y_pred - y).pow(2).sum().item()
    
    # loss function 當下的斜率
    grad_y_pred = -2.0*(y-y_pred)
    # t() 是 transpose的意思
    
    # 算backword
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h<0]=0
    grad_w1 = x.t().mm(grad_h)
    
    w1 -= learning_rate*grad_w1
    w2 -= learning_rate*grad_w2
print('done')

done


# 2. 用autograd  package 自動微分和求反導
+ requires_grad=True 可以記錄梯度過程，自動Back-propagation
+ [求梯度](https://zhuanlan.zhihu.com/p/81369826)

In [4]:
import torch
import torch.nn as nn
dtype = torch.float
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print('CUDA')
else:
    device = torch.device("cpu")
    print('CPU')

N,D_in,D_hidden,D_out = 64,1000,100,10

# 隨便的輸入和輸出
x = torch.randn(N,D_in,device = device,dtype=dtype)
y = torch.randn(N,D_out,device = device,dtype=dtype)

# 權重
w1 = torch.randn(D_in,D_hidden,device = device,dtype=dtype,requires_grad=True)
w2 = torch.randn(D_hidden,D_out,device = device,dtype=dtype,requires_grad=True)

learning_rate = 1e-6

mse_loss = nn.MSELoss(reduction='sum')

for t in range(500):
    y_pred = x.mm(w1).clamp(min=0).mm(w2)


    # mean squre loss
    loss = mse_loss(y_pred,y)
    
    if(t%100 == 99):
        print(t,loss.item())
        
    # 自動BP    
    loss.backward()
     
    with torch.no_grad():
        w1 -= learning_rate*w1.grad
        w2 -= learning_rate*w2.grad
        
        w1.grad.zero_()
        w2.grad.zero_()

CUDA
99 385.69366455078125
199 1.1587060689926147
299 0.00535293435677886
399 0.00014542671851813793
499 3.056876448681578e-05


# 3. 用sequential 建造 model

In [6]:
import torch
N,D_in,D_hidden,D_out = 64,1000,100,10

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in,D_hidden),
    torch.nn.ReLU(),
    torch.nn.Linear(D_hidden,D_out),
)

lossfn = torch.nn.MSELoss()
learning_rate = 1e-4

for t in range(500):
    y_pred = model(x)
    
    loss = lossfn(y_pred,y)
    
    if t % 100 == 99:
        print(t, loss.item())
    
    model.zero_grad()
    loss.backward()
    # 去除grad計算，因為他只是要update value 而已        
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad


99 0.9884147047996521
199 0.9760690927505493
299 0.9639668464660645
399 0.9521039724349976
499 0.9405290484428406


# 4. 客製化NN MODEL
+ [Optimizer 種類](https://medium.com/%E9%9B%9E%E9%9B%9E%E8%88%87%E5%85%94%E5%85%94%E7%9A%84%E5%B7%A5%E7%A8%8B%E4%B8%96%E7%95%8C/%E6%A9%9F%E5%99%A8%E5%AD%B8%E7%BF%92ml-note-sgd-momentum-adagrad-adam-optimizer-f20568c968db)
+ [Auto grad](https://pytorch.org/tutorials/beginner/former_torchies/autograd_tutorial_old.html?highlight=torch%20autograd%20backward)

In [10]:
import torch

import pyprind

device = torch.device("cuda:0")

class myNN(torch.nn.Module):
    def __init__(self,D_in,D_hidden,D_out):
        super(myNN,self).__init__()
        # layer 1
        self.linear1 = torch.nn.Linear(D_in,D_hidden)
        # layer 2
        self.linear2 = torch.nn.Linear(D_hidden,D_out)
        # 激勵
        self.activation = torch.nn.ReLU()
        self.lossfn = torch.nn.MSELoss()
    # 訓練 
    def forward(self,x):
        l1 = self.linear1(x)
        l1_act = self.activation(l1)
        y_pred = self.linear2(l1_act)
        return y_pred


N,D_in,D_hidden,D_out = 64,1000,100,10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

model = myNN(D_in, D_hidden, D_out)



# 用於更新參數
optimizer = torch.optim.SGD(model.parameters(),lr=1e-4)
#optimizer = torch.optim.Adam(model.parameters(),lr=1e-4)
print('step','loss')

pbar = pyprind.ProgBar(5000)

for t in range(5000):
    pbar.update()
    y_pred = model(x)
    
    loss = model.lossfn(y_pred,y)
    #if t%100==99:
    #    print(t,loss.item(),end = '')

    # 梯度初始化
    optimizer.zero_grad()
    
    # 計算 backward
    loss.backward()
    
    # 更新參數
    optimizer.step()

step loss


0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:06
