In [18]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np

### 模型搭建

In [105]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [106]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 測試

In [108]:
model = Model(input_dimention=500,output_classes=1)

In [112]:
for name,_ in model.named_parameters():
    print(name, _.shape)

layer1.linear.0.weight torch.Size([128, 500])
layer1.linear.0.bias torch.Size([128])
layer1.linear.2.weight torch.Size([128])
layer1.linear.2.bias torch.Size([128])
layer2.linear.0.weight torch.Size([64, 128])
layer2.linear.0.bias torch.Size([64])
layer2.linear.2.weight torch.Size([64])
layer2.linear.2.bias torch.Size([64])
layer3.linear.0.weight torch.Size([32, 64])
layer3.linear.0.bias torch.Size([32])
layer3.linear.2.weight torch.Size([32])
layer3.linear.2.bias torch.Size([32])
output.linear.0.weight torch.Size([1, 32])
output.linear.0.bias torch.Size([1])


In [282]:
batch_size = 12
input_features = 500
dummy_input = torch.randn(batch_size, input_features)


In [283]:
output = model(dummy_input)
print(output)

tensor([[0.6246],
        [0.4512],
        [0.5598],
        [0.5134],
        [0.4838],
        [0.4538],
        [0.4384],
        [0.6206],
        [0.4373],
        [0.3914],
        [0.3709],
        [0.4022]], grad_fn=<SigmoidBackward>)


## Loss

#### classification

* binary classification

In [317]:
from torch.nn import BCELoss, BCEWithLogitsLoss

sigmoid = nn.Sigmoid()

prediction = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)

## 將每一組 prediciotn, target 算出來的值相加
criterios = BCELoss(reduction='sum')
loss_1 = criterios(sigmoid(prediction), target)

## 將每一組 prediciotn, target 算出來的值平均
criterios = BCELoss(reduction='mean')
loss_2 = criterios(sigmoid(prediction), target)

print(loss_1, loss_2)

tensor(2.4442, grad_fn=<BinaryCrossEntropyBackward>) tensor(0.8147, grad_fn=<BinaryCrossEntropyBackward>)


In [318]:
## BCEWithLogitsLoss自帶 sigmoid 功能
criterios = BCEWithLogitsLoss(reduction='mean')
loss_3 = criterios(prediction, target)
assert loss_2 == loss_3 ## 應該要與 output2相同
print(loss_3)

tensor(0.8147, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)


* multiclass classification

In [214]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [215]:
## CrossEntropyLoss = LogSoftmax + NLLLoss

In [221]:
prediction = torch.randn(2, 3)
ground_truth = torch.tensor([2, 0])

* LogSoftmax

In [253]:
log_softmax = LogSoftmax()

output = log_softmax(prediction)

softmax = nn.Softmax()
assert '{:.4f}'.format(output.sum()) == '{:.4f}'.format(torch.log(softmax(prediction)).sum())

  This is separate from the ipykernel package so we can avoid doing imports until
  


* NLLLoss

In [260]:
criterion = NLLLoss()
loss_1 = criterion(output, ground_truth)
print(loss_1)

tensor(0.8571)


* 將NLLLoss拆解

In [270]:
ground_truth_onehot = torch.FloatTensor(prediction.shape)
ground_truth_onehot.zero_()
ground_truth_onehot.scatter_(1, ground_truth.reshape(-1,1), 1)
loss_count = - torch.mul(ground_truth_onehot, output).sum(-1).mean()
assert '{:.4f}'.format(loss_1) == '{:.4f}'.format(loss_count)
print(loss_count)

tensor(0.8571)


* CrossEntropyLoss

In [271]:
criterion = CrossEntropyLoss()

In [272]:
loss_2 = criterion(prediction, ground_truth)
assert '{:.4f}'.format(loss_1) == '{:.4f}'.format(loss_2)
print(loss_2)

tensor(0.8571)


#### regression

In [281]:
## MSE == L2 loss

!['pic'](l1_l2_smooth.png)

In [275]:
from torch.nn import MSELoss, L1Loss, SmoothL1Loss

In [276]:
prediction = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)


* MSELoss

In [278]:
criterion = MSELoss()
output = criterion(prediction, target)
print(output)

tensor(1.1713, grad_fn=<MseLossBackward>)


* L1Loss

In [279]:
criterion = L1Loss()
output = criterion(prediction, target)
print(output)

tensor(0.7915, grad_fn=<L1LossBackward>)


* SmoothL1Loss

In [280]:
criterion = SmoothL1Loss()
output = criterion(prediction, target)
print(output)

tensor(0.4373, grad_fn=<SmoothL1LossBackward>)


---

## optimizer

In [393]:
import torch.optim as optim

In [394]:
model = Model(input_dimention=500,output_classes=1)

* params : iterable of parameters
* lr : learning rate
* weight_decay : (L2) Regularization (正則化) 

In [395]:
optimizer = optim.SGD(params=model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-3)
optimizer = optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)
optimizer = optim.RMSprop(params=model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0,\
                          momentum=0, centered=False)

### 利用 optimizer.step() 來實現參數更新

In [396]:
batch_size = 12
input_features = 500
dummy_input = torch.randn(batch_size, input_features)

prediction = model(dummy_input)
target = torch.empty(12, dtype=torch.float).random_(2)

In [397]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0309,  0.0286, -0.0084,  ..., -0.0153, -0.0038,  0.0111],
        [-0.0070, -0.0294,  0.0051,  ...,  0.0125, -0.0431,  0.0296],
        [ 0.0400, -0.0368,  0.0265,  ...,  0.0180,  0.0042,  0.0041],
        ...,
        [-0.0134, -0.0248,  0.0377,  ..., -0.0387, -0.0249, -0.0385],
        [-0.0039, -0.0131, -0.0083,  ...,  0.0113, -0.0177,  0.0394],
        [-0.0050,  0.0114,  0.0042,  ...,  0.0302, -0.0015, -0.0022]],
       requires_grad=True)


grad : None


In [398]:
criterios = BCELoss(reduction='mean')
loss = criterios(prediction.reshape(-1), target)

In [399]:
loss.backward()

In [400]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0309,  0.0286, -0.0084,  ..., -0.0153, -0.0038,  0.0111],
        [-0.0070, -0.0294,  0.0051,  ...,  0.0125, -0.0431,  0.0296],
        [ 0.0400, -0.0368,  0.0265,  ...,  0.0180,  0.0042,  0.0041],
        ...,
        [-0.0134, -0.0248,  0.0377,  ..., -0.0387, -0.0249, -0.0385],
        [-0.0039, -0.0131, -0.0083,  ...,  0.0113, -0.0177,  0.0394],
        [-0.0050,  0.0114,  0.0042,  ...,  0.0302, -0.0015, -0.0022]],
       requires_grad=True)


grad : tensor([[-0.0136, -0.0266,  0.0739,  ...,  0.0317, -0.0294, -0.0088],
        [ 0.0222,  0.0095, -0.0154,  ...,  0.0556, -0.0070,  0.0026],
        [-0.0034, -0.0127, -0.0272,  ..., -0.0049, -0.0230, -0.0004],
        ...,
        [ 0.0166, -0.0325, -0.0516,  ...,  0.0019, -0.0058,  0.0084],
        [ 0.0040,  0.0156,  0.0064,  ..., -0.0193,  0.0008,  0.0053],
        [-0.0104, -0.0069, -0.0242,  ..., -0.0024, -0.0174, -0.0001]])


In [401]:
optimizer.step()

In [402]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0691,  0.1286, -0.1084,  ..., -0.1153,  0.0962,  0.1111],
        [-0.1070, -0.1294,  0.1051,  ..., -0.0875,  0.0569, -0.0704],
        [ 0.1400,  0.0632,  0.1265,  ...,  0.1180,  0.1042,  0.1041],
        ...,
        [-0.1134,  0.0752,  0.1377,  ..., -0.1387,  0.0751, -0.1385],
        [-0.1039, -0.1131, -0.1083,  ...,  0.1113, -0.1177, -0.0606],
        [ 0.0950,  0.1114,  0.1042,  ...,  0.1302,  0.0985,  0.0977]],
       requires_grad=True)


grad : tensor([[-0.0136, -0.0266,  0.0739,  ...,  0.0317, -0.0294, -0.0088],
        [ 0.0222,  0.0095, -0.0154,  ...,  0.0556, -0.0070,  0.0026],
        [-0.0034, -0.0127, -0.0272,  ..., -0.0049, -0.0230, -0.0004],
        ...,
        [ 0.0166, -0.0325, -0.0516,  ...,  0.0019, -0.0058,  0.0084],
        [ 0.0040,  0.0156,  0.0064,  ..., -0.0193,  0.0008,  0.0053],
        [-0.0104, -0.0069, -0.0242,  ..., -0.0024, -0.0174, -0.0001]])


In [403]:
optimizer.zero_grad()

In [404]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0691,  0.1286, -0.1084,  ..., -0.1153,  0.0962,  0.1111],
        [-0.1070, -0.1294,  0.1051,  ..., -0.0875,  0.0569, -0.0704],
        [ 0.1400,  0.0632,  0.1265,  ...,  0.1180,  0.1042,  0.1041],
        ...,
        [-0.1134,  0.0752,  0.1377,  ..., -0.1387,  0.0751, -0.1385],
        [-0.1039, -0.1131, -0.1083,  ...,  0.1113, -0.1177, -0.0606],
        [ 0.0950,  0.1114,  0.1042,  ...,  0.1302,  0.0985,  0.0977]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


---