In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


### 搭建模型

In [4]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [19]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

In [20]:
model.parameters

<bound method Module.parameters of Model(
  (layer1): LinearBNAC(
    (linear): Sequential(
      (0): Linear(in_features=256, out_features=128, bias=True)
      (1): Dropout(p=0.3, inplace=False)
      (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): LeakyReLU(negative_slope=0.01, inplace=True)
    )
  )
  (layer2): LinearBNAC(
    (linear): Sequential(
      (0): Linear(in_features=256, out_features=64, bias=True)
      (1): Dropout(p=0.3, inplace=False)
      (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): LeakyReLU(negative_slope=0.01, inplace=True)
    )
  )
  (layer3): LinearBNAC(
    (linear): Sequential(
      (0): Linear(in_features=256, out_features=32, bias=True)
      (1): Dropout(p=0.3, inplace=False)
      (2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): LeakyReLU(negative_slope=0.01, inplace=True)
    )
  )
  (output): LinearBNAC(

### 準備輸入資料、優化器、標籤資料、模型輸出

In [21]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)

In [22]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)

  target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)


In [23]:
output = model(dummy_input)
print(output)

tensor([[0.0882, 0.2048, 0.1534, 0.0998, 0.0962, 0.0606, 0.0412, 0.1108, 0.0855,
         0.0595],
        [0.1063, 0.1939, 0.0998, 0.1358, 0.0881, 0.0800, 0.0500, 0.1097, 0.0482,
         0.0882],
        [0.0690, 0.1328, 0.0867, 0.0943, 0.0810, 0.1328, 0.1941, 0.0995, 0.0288,
         0.0809],
        [0.0449, 0.0660, 0.0672, 0.2198, 0.1115, 0.0882, 0.0773, 0.2004, 0.0401,
         0.0846]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [24]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [36]:
criterion = NLLLoss()

In [37]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [38]:
loss.backward()

In [39]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0483, -0.0002,  0.0239,  ...,  0.0114,  0.0442,  0.0081],
        [-0.0059,  0.0571,  0.0071,  ...,  0.0401,  0.0317, -0.0590],
        [ 0.0300, -0.0216, -0.0415,  ..., -0.0303, -0.0282,  0.0401],
        ...,
        [ 0.0028,  0.0111,  0.0609,  ..., -0.0439,  0.0604, -0.0584],
        [-0.0450,  0.0238,  0.0383,  ...,  0.0225,  0.0090, -0.0286],
        [-0.0023, -0.0390, -0.0479,  ..., -0.0214,  0.0534,  0.0474]],
       requires_grad=True)


grad : tensor([[-1.2451e-02, -1.5396e-03, -1.3489e-02,  ...,  7.5186e-03,
         -7.2275e-03,  9.2316e-05],
        [-1.0376e-03, -1.3627e-03, -1.8037e-06,  ..., -1.9224e-03,
          2.0201e-03, -1.0938e-03],
        [-2.3476e-03, -1.9816e-03,  3.9279e-03,  ..., -3.0957e-03,
          5.0963e-03, -3.7654e-03],
        ...,
        [-1.5314e-04, -2.2375e-04,  1.7275e-04,  ..., -2.9125e-04,
          3.6786e-04, -2.5134e-04],
        [-1.7464e-01, -1.1071e-01, -2.6418e-01,  ...,  2.1880e-02,
       

In [41]:
optimizer.step()

In [42]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0473,  0.0008,  0.0249,  ...,  0.0104,  0.0452,  0.0071],
        [-0.0049,  0.0581,  0.0061,  ...,  0.0411,  0.0307, -0.0580],
        [ 0.0310, -0.0206, -0.0425,  ..., -0.0293, -0.0292,  0.0411],
        ...,
        [ 0.0038,  0.0121,  0.0599,  ..., -0.0429,  0.0594, -0.0574],
        [-0.0440,  0.0248,  0.0393,  ...,  0.0215,  0.0100, -0.0276],
        [-0.0033, -0.0400, -0.0469,  ..., -0.0224,  0.0544,  0.0464]],
       requires_grad=True)


grad : tensor([[-1.2451e-02, -1.5396e-03, -1.3489e-02,  ...,  7.5186e-03,
         -7.2275e-03,  9.2316e-05],
        [-1.0376e-03, -1.3627e-03, -1.8037e-06,  ..., -1.9224e-03,
          2.0201e-03, -1.0938e-03],
        [-2.3476e-03, -1.9816e-03,  3.9279e-03,  ..., -3.0957e-03,
          5.0963e-03, -3.7654e-03],
        ...,
        [-1.5314e-04, -2.2375e-04,  1.7275e-04,  ..., -2.9125e-04,
          3.6786e-04, -2.5134e-04],
        [-1.7464e-01, -1.1071e-01, -2.6418e-01,  ...,  2.1880e-02,
       

### 清空 gradient

In [43]:
optimizer.zero_grad()

In [44]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0473,  0.0008,  0.0249,  ...,  0.0104,  0.0452,  0.0071],
        [-0.0049,  0.0581,  0.0061,  ...,  0.0411,  0.0307, -0.0580],
        [ 0.0310, -0.0206, -0.0425,  ..., -0.0293, -0.0292,  0.0411],
        ...,
        [ 0.0038,  0.0121,  0.0599,  ..., -0.0429,  0.0594, -0.0574],
        [-0.0440,  0.0248,  0.0393,  ...,  0.0215,  0.0100, -0.0276],
        [-0.0033, -0.0400, -0.0469,  ..., -0.0224,  0.0544,  0.0464]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
