In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


### 搭建模型

In [2]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [3]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [4]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)

In [5]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)

In [7]:
output = model(dummy_input)
print(output)

tensor([[0.1087, 0.0521, 0.0948, 0.0656, 0.0846, 0.0407, 0.0952, 0.0625, 0.1524,
         0.2434],
        [0.0755, 0.0614, 0.0489, 0.1665, 0.1386, 0.1069, 0.0933, 0.1310, 0.0956,
         0.0823],
        [0.0740, 0.1038, 0.1096, 0.0507, 0.2534, 0.0562, 0.1185, 0.0758, 0.1069,
         0.0511],
        [0.0839, 0.0974, 0.0849, 0.1748, 0.1311, 0.0852, 0.1154, 0.1054, 0.0515,
         0.0703]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [8]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [9]:
criterion = NLLLoss()

In [10]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [11]:
loss.backward()

In [12]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0531,  0.0247, -0.0010,  ..., -0.0077,  0.0522, -0.0448],
        [ 0.0481,  0.0432,  0.0321,  ..., -0.0328,  0.0417, -0.0334],
        [ 0.0531,  0.0606,  0.0115,  ...,  0.0224,  0.0539,  0.0256],
        ...,
        [ 0.0204,  0.0449, -0.0230,  ..., -0.0119, -0.0528, -0.0531],
        [-0.0551, -0.0469, -0.0225,  ...,  0.0612,  0.0331,  0.0133],
        [-0.0038, -0.0366,  0.0141,  ..., -0.0034,  0.0421, -0.0011]],
       requires_grad=True)


grad : tensor([[ 0.0146,  0.0075, -0.0084,  ...,  0.0377, -0.0261, -0.0067],
        [ 0.1080, -0.0516,  0.0277,  ..., -0.0318, -0.0465,  0.0198],
        [-0.0326,  0.0211, -0.0126,  ...,  0.0021,  0.0235,  0.0049],
        ...,
        [ 0.0679, -0.0205,  0.0507,  ..., -0.0696, -0.0247,  0.0137],
        [-0.0086, -0.0015,  0.0354,  ..., -0.0128, -0.0209, -0.0324],
        [ 0.1082, -0.0580,  0.0170,  ...,  0.0483, -0.0956, -0.0221]])


In [13]:
optimizer.step()

In [14]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-5.4107e-02,  2.3654e-02, -9.3911e-06,  ..., -8.7473e-03,
          5.3188e-02, -4.3811e-02],
        [ 4.7101e-02,  4.4231e-02,  3.1085e-02,  ..., -3.1816e-02,
          4.2653e-02, -3.4355e-02],
        [ 5.4105e-02,  5.9623e-02,  1.2523e-02,  ...,  2.1390e-02,
          5.2945e-02,  2.4614e-02],
        ...,
        [ 1.9432e-02,  4.5919e-02, -2.3956e-02,  ..., -1.0902e-02,
         -5.1839e-02, -5.4077e-02],
        [-5.4081e-02, -4.5889e-02, -2.3463e-02,  ...,  6.2156e-02,
          3.4139e-02,  1.4349e-02],
        [-4.8210e-03, -3.5602e-02,  1.3085e-02,  ..., -4.4479e-03,
          4.3123e-02, -1.2605e-04]], requires_grad=True)


grad : tensor([[ 0.0146,  0.0075, -0.0084,  ...,  0.0377, -0.0261, -0.0067],
        [ 0.1080, -0.0516,  0.0277,  ..., -0.0318, -0.0465,  0.0198],
        [-0.0326,  0.0211, -0.0126,  ...,  0.0021,  0.0235,  0.0049],
        ...,
        [ 0.0679, -0.0205,  0.0507,  ..., -0.0696, -0.0247,  0.0137],
        [-0.008

### 清空 gradient

In [15]:
optimizer.zero_grad()

In [16]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-5.4107e-02,  2.3654e-02, -9.3911e-06,  ..., -8.7473e-03,
          5.3188e-02, -4.3811e-02],
        [ 4.7101e-02,  4.4231e-02,  3.1085e-02,  ..., -3.1816e-02,
          4.2653e-02, -3.4355e-02],
        [ 5.4105e-02,  5.9623e-02,  1.2523e-02,  ...,  2.1390e-02,
          5.2945e-02,  2.4614e-02],
        ...,
        [ 1.9432e-02,  4.5919e-02, -2.3956e-02,  ..., -1.0902e-02,
         -5.1839e-02, -5.4077e-02],
        [-5.4081e-02, -4.5889e-02, -2.3463e-02,  ...,  6.2156e-02,
          3.4139e-02,  1.4349e-02],
        [-4.8210e-03, -3.5602e-02,  1.3085e-02,  ..., -4.4479e-03,
          4.3123e-02, -1.2605e-04]], requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
