In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


### 搭建模型

In [2]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [3]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 256)
        self.layer3 = LinearBNAC(256, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [4]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)

In [5]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)
target

tensor([9, 5, 4, 4])

In [6]:
output = model(dummy_input)
print(output)

tensor([[0.0636, 0.1703, 0.1634, 0.0304, 0.2290, 0.0533, 0.0464, 0.0647, 0.1097,
         0.0691],
        [0.0688, 0.1990, 0.0702, 0.0585, 0.1243, 0.1169, 0.0605, 0.1280, 0.1164,
         0.0574],
        [0.1064, 0.0913, 0.1580, 0.0484, 0.0918, 0.0697, 0.0900, 0.1060, 0.1284,
         0.1101],
        [0.0701, 0.0980, 0.1214, 0.0530, 0.0795, 0.1139, 0.0684, 0.0883, 0.2055,
         0.1018]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [7]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [9]:
criterion = CrossEntropyLoss()

In [10]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [11]:
loss.backward()

  allow_unreachable=True)  # allow_unreachable flag


In [12]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0269,  0.0410,  0.0028,  ..., -0.0398,  0.0064,  0.0160],
        [-0.0593,  0.0463, -0.0167,  ..., -0.0453,  0.0438,  0.0152],
        [-0.0123, -0.0061, -0.0061,  ..., -0.0620,  0.0608,  0.0599],
        ...,
        [ 0.0461, -0.0490, -0.0472,  ..., -0.0618, -0.0598, -0.0493],
        [ 0.0020, -0.0491, -0.0527,  ...,  0.0348, -0.0254,  0.0220],
        [ 0.0282,  0.0058,  0.0375,  ...,  0.0559,  0.0287,  0.0578]],
       requires_grad=True)


grad : tensor([[ 6.1108e-03,  6.1443e-03,  1.5322e-02,  ...,  5.9090e-03,
         -2.4297e-02, -3.9286e-04],
        [ 2.2729e-02,  1.5878e-02,  1.3199e-02,  ...,  6.5021e-03,
         -3.8003e-02, -1.8227e-02],
        [-1.9294e-02, -1.4607e-02, -4.0015e-03,  ..., -2.5983e-03,
          3.5437e-02,  2.7809e-02],
        ...,
        [ 3.0527e-04,  1.9531e-04,  1.6899e-04,  ..., -3.0886e-04,
          5.2645e-04,  7.3982e-05],
        [ 1.0931e-05, -3.5413e-04, -9.2118e-04,  ...,  8.1000e-04,
       

In [39]:
"自行輸入"

In [40]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0020, -0.0373, -0.0050,  ...,  0.0348, -0.0255, -0.0287],
        [-0.0442, -0.0372,  0.0394,  ...,  0.0370,  0.0108,  0.0457],
        [-0.0266, -0.0532, -0.0156,  ...,  0.0218,  0.0579,  0.0164],
        ...,
        [ 0.0132, -0.0488,  0.0491,  ..., -0.0171, -0.0319, -0.0104],
        [ 0.0273,  0.0389,  0.0589,  ..., -0.0364, -0.0182, -0.0368],
        [-0.0505, -0.0091,  0.0553,  ...,  0.0418,  0.0059,  0.0202]],
       requires_grad=True)


grad : tensor([[ 0.0834, -0.0256, -0.0764,  ...,  0.0718,  0.0144,  0.0422],
        [ 0.1144, -0.0429, -0.0292,  ..., -0.0337,  0.0184, -0.0535],
        [-0.2403,  0.1393,  0.2140,  ..., -0.1664, -0.1559, -0.0831],
        ...,
        [-0.2777,  0.1198,  0.1241,  ..., -0.1771,  0.0594, -0.0452],
        [-0.0297,  0.1155, -0.0428,  ...,  0.0807, -0.1282,  0.0959],
        [ 0.0055, -0.0410, -0.0648,  ...,  0.0877,  0.0826,  0.0698]])


### 清空 gradient

In [41]:
"自行輸入"

In [42]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0020, -0.0373, -0.0050,  ...,  0.0348, -0.0255, -0.0287],
        [-0.0442, -0.0372,  0.0394,  ...,  0.0370,  0.0108,  0.0457],
        [-0.0266, -0.0532, -0.0156,  ...,  0.0218,  0.0579,  0.0164],
        ...,
        [ 0.0132, -0.0488,  0.0491,  ..., -0.0171, -0.0319, -0.0104],
        [ 0.0273,  0.0389,  0.0589,  ..., -0.0364, -0.0182, -0.0368],
        [-0.0505, -0.0091,  0.0553,  ...,  0.0418,  0.0059,  0.0202]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
