In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim

### 搭建模型

In [2]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [3]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [5]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(model.parameters(), lr = 1e-3, weight_decay=1e-3)

In [6]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)

In [7]:
output = model(dummy_input)
print(output)

tensor([[0.0700, 0.0913, 0.2435, 0.0392, 0.1084, 0.0181, 0.1711, 0.0842, 0.0649,
         0.1094],
        [0.0940, 0.1004, 0.0740, 0.1349, 0.0945, 0.1279, 0.1506, 0.0537, 0.0792,
         0.0909],
        [0.0723, 0.1930, 0.0416, 0.1084, 0.0472, 0.0427, 0.1407, 0.1779, 0.0883,
         0.0878],
        [0.0553, 0.0794, 0.1107, 0.1053, 0.2255, 0.0489, 0.1332, 0.0820, 0.0677,
         0.0921]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [8]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [9]:
criterion = NLLLoss()

In [10]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [11]:
loss.backward()

In [12]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0266, -0.0585,  0.0266,  ..., -0.0512, -0.0498,  0.0245],
        [-0.0423, -0.0327,  0.0356,  ...,  0.0135, -0.0549, -0.0488],
        [ 0.0029,  0.0024,  0.0227,  ...,  0.0230, -0.0213, -0.0035],
        ...,
        [-0.0055,  0.0607,  0.0456,  ...,  0.0551, -0.0043, -0.0472],
        [-0.0120,  0.0305,  0.0053,  ..., -0.0142, -0.0155, -0.0261],
        [-0.0548,  0.0059, -0.0229,  ...,  0.0181,  0.0357, -0.0591]],
       requires_grad=True)


grad : tensor([[ 1.3785e-03,  3.4739e-03,  1.1290e-02,  ..., -2.1689e-03,
          4.4383e-03,  2.0131e-03],
        [-2.6433e-06,  2.0694e-06,  6.1515e-06,  ..., -7.9117e-07,
         -1.0465e-06,  1.6811e-07],
        [-2.3271e-03,  2.5678e-03, -2.1956e-03,  ..., -1.6325e-03,
          1.0487e-03, -5.1683e-03],
        ...,
        [ 2.4344e-04, -1.9059e-04, -5.6654e-04,  ...,  7.2865e-05,
          9.6378e-05, -1.5482e-05],
        [ 1.7085e-02, -2.8170e-03, -9.7140e-03,  ..., -4.2493e-03,
       

In [13]:
optimizer.step()

In [14]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0276, -0.0595,  0.0256,  ..., -0.0502, -0.0508,  0.0235],
        [-0.0413, -0.0317,  0.0346,  ...,  0.0125, -0.0539, -0.0478],
        [ 0.0039,  0.0014,  0.0237,  ...,  0.0240, -0.0223, -0.0025],
        ...,
        [-0.0065,  0.0617,  0.0466,  ...,  0.0541, -0.0053, -0.0462],
        [-0.0130,  0.0315,  0.0063,  ..., -0.0132, -0.0165, -0.0251],
        [-0.0558,  0.0069, -0.0239,  ...,  0.0191,  0.0347, -0.0601]],
       requires_grad=True)


grad : tensor([[ 1.3785e-03,  3.4739e-03,  1.1290e-02,  ..., -2.1689e-03,
          4.4383e-03,  2.0131e-03],
        [-2.6433e-06,  2.0694e-06,  6.1515e-06,  ..., -7.9117e-07,
         -1.0465e-06,  1.6811e-07],
        [-2.3271e-03,  2.5678e-03, -2.1956e-03,  ..., -1.6325e-03,
          1.0487e-03, -5.1683e-03],
        ...,
        [ 2.4344e-04, -1.9059e-04, -5.6654e-04,  ...,  7.2865e-05,
          9.6378e-05, -1.5482e-05],
        [ 1.7085e-02, -2.8170e-03, -9.7140e-03,  ..., -4.2493e-03,
       

### 清空 gradient

In [15]:
optimizer.zero_grad()

In [16]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0276, -0.0595,  0.0256,  ..., -0.0502, -0.0508,  0.0235],
        [-0.0413, -0.0317,  0.0346,  ...,  0.0125, -0.0539, -0.0478],
        [ 0.0039,  0.0014,  0.0237,  ...,  0.0240, -0.0223, -0.0025],
        ...,
        [-0.0065,  0.0617,  0.0466,  ...,  0.0541, -0.0053, -0.0462],
        [-0.0130,  0.0315,  0.0063,  ..., -0.0132, -0.0165, -0.0251],
        [-0.0558,  0.0069, -0.0239,  ...,  0.0191,  0.0347, -0.0601]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
