<a href="https://colab.research.google.com/github/jimmywang2/NLP/blob/main/PyTorch_%E6%A1%86%E6%9E%B6%E8%A8%88%E7%AE%97%E6%90%8D%E5%A4%B1%E5%87%BD%E6%95%B8%E4%BB%A5%E5%8F%8A%E6%9B%B4%E6%96%B0%E5%8F%83%E6%95%B8_HW9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


### 搭建模型

In [None]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [None]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [None]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)


In [None]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)

In [None]:
output = model(dummy_input)
print(output)

tensor([[0.0575, 0.1173, 0.1355, 0.1671, 0.0946, 0.0714, 0.0804, 0.1449, 0.0494,
         0.0819],
        [0.0873, 0.1133, 0.0622, 0.0987, 0.1071, 0.1449, 0.0644, 0.1380, 0.0715,
         0.1127],
        [0.1059, 0.1363, 0.0724, 0.1000, 0.1285, 0.0855, 0.0517, 0.1073, 0.0609,
         0.1513],
        [0.1411, 0.0949, 0.0672, 0.1434, 0.1396, 0.0872, 0.0750, 0.1328, 0.0822,
         0.0365]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [None]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [None]:
criterion = NLLLoss() 

In [None]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [None]:
loss.backward()


In [None]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0200, -0.0541, -0.0286,  ...,  0.0084,  0.0547,  0.0107],
        [-0.0070,  0.0466,  0.0557,  ..., -0.0375,  0.0325,  0.0253],
        [ 0.0326,  0.0096, -0.0434,  ..., -0.0078, -0.0206,  0.0448],
        ...,
        [-0.0371,  0.0088, -0.0537,  ...,  0.0086,  0.0210,  0.0212],
        [ 0.0096,  0.0484,  0.0464,  ..., -0.0415, -0.0260,  0.0489],
        [-0.0255,  0.0090, -0.0460,  ..., -0.0230,  0.0444,  0.0168]],
       requires_grad=True)


grad : tensor([[ 2.6608e-04, -5.1024e-05, -1.6257e-04,  ...,  8.3882e-05,
         -2.2236e-04, -6.7840e-05],
        [-5.1933e-02, -1.1145e-03,  5.7692e-02,  ...,  6.2620e-02,
          2.0801e-02, -6.5721e-02],
        [-6.8930e-04,  3.8470e-04,  6.6275e-04,  ...,  1.8281e-04,
         -4.2427e-04,  9.5983e-04],
        ...,
        [-2.3458e-02, -3.1425e-03,  3.2352e-02,  ...,  2.7947e-02,
          2.4986e-02, -4.0178e-02],
        [ 1.2301e-02, -1.8653e-03, -2.4341e-02,  ..., -2.4919e-02,
       

In [None]:
optimizer.step()


In [None]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0210, -0.0531, -0.0276,  ...,  0.0074,  0.0557,  0.0117],
        [-0.0060,  0.0476,  0.0547,  ..., -0.0385,  0.0315,  0.0263],
        [ 0.0336,  0.0086, -0.0444,  ..., -0.0088, -0.0196,  0.0438],
        ...,
        [-0.0361,  0.0098, -0.0547,  ...,  0.0076,  0.0200,  0.0222],
        [ 0.0086,  0.0494,  0.0474,  ..., -0.0405, -0.0270,  0.0479],
        [-0.0265,  0.0080, -0.0450,  ..., -0.0220,  0.0454,  0.0158]],
       requires_grad=True)


grad : tensor([[ 2.6608e-04, -5.1024e-05, -1.6257e-04,  ...,  8.3882e-05,
         -2.2236e-04, -6.7840e-05],
        [-5.1933e-02, -1.1145e-03,  5.7692e-02,  ...,  6.2620e-02,
          2.0801e-02, -6.5721e-02],
        [-6.8930e-04,  3.8470e-04,  6.6275e-04,  ...,  1.8281e-04,
         -4.2427e-04,  9.5983e-04],
        ...,
        [-2.3458e-02, -3.1425e-03,  3.2352e-02,  ...,  2.7947e-02,
          2.4986e-02, -4.0178e-02],
        [ 1.2301e-02, -1.8653e-03, -2.4341e-02,  ..., -2.4919e-02,
       

### 清空 gradient

In [None]:
optimizer.zero_grad()

In [None]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0210, -0.0531, -0.0276,  ...,  0.0074,  0.0557,  0.0117],
        [-0.0060,  0.0476,  0.0547,  ..., -0.0385,  0.0315,  0.0263],
        [ 0.0336,  0.0086, -0.0444,  ..., -0.0088, -0.0196,  0.0438],
        ...,
        [-0.0361,  0.0098, -0.0547,  ...,  0.0076,  0.0200,  0.0222],
        [ 0.0086,  0.0494,  0.0474,  ..., -0.0405, -0.0270,  0.0479],
        [-0.0265,  0.0080, -0.0450,  ..., -0.0220,  0.0454,  0.0158]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
