In [35]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np

### 模型搭建

In [36]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [37]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 測試

In [38]:
model = Model(input_dimention=500,output_classes=1)

In [39]:
for name,_ in model.named_parameters():
    print(name, _.shape)

layer1.linear.0.weight torch.Size([128, 500])
layer1.linear.0.bias torch.Size([128])
layer1.linear.2.weight torch.Size([128])
layer1.linear.2.bias torch.Size([128])
layer2.linear.0.weight torch.Size([64, 128])
layer2.linear.0.bias torch.Size([64])
layer2.linear.2.weight torch.Size([64])
layer2.linear.2.bias torch.Size([64])
layer3.linear.0.weight torch.Size([32, 64])
layer3.linear.0.bias torch.Size([32])
layer3.linear.2.weight torch.Size([32])
layer3.linear.2.bias torch.Size([32])
output.linear.0.weight torch.Size([1, 32])
output.linear.0.bias torch.Size([1])


In [40]:
batch_size = 12
input_features = 500
dummy_input = torch.randn(batch_size, input_features)


In [41]:
output = model(dummy_input)
print(output)

tensor([[0.3981],
        [0.4823],
        [0.6930],
        [0.5917],
        [0.4467],
        [0.5056],
        [0.5486],
        [0.5445],
        [0.5141],
        [0.7055],
        [0.6723],
        [0.4662]], grad_fn=<SigmoidBackward>)


## Loss

#### classification

* binary classification

In [42]:
from torch.nn import BCELoss, BCEWithLogitsLoss

sigmoid = nn.Sigmoid()

prediction = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)

## 將每一組 prediciotn, target 算出來的值相加
criterios = BCELoss(reduction='sum')
loss_1 = criterios(sigmoid(prediction), target)

## 將每一組 prediciotn, target 算出來的值平均
criterios = BCELoss(reduction='mean')
loss_2 = criterios(sigmoid(prediction), target)

print(loss_1, loss_2)

tensor(2.1981, grad_fn=<BinaryCrossEntropyBackward>) tensor(0.7327, grad_fn=<BinaryCrossEntropyBackward>)


In [43]:
## BCEWithLogitsLoss自帶 sigmoid 功能
criterios = BCEWithLogitsLoss(reduction='mean')
loss_3 = criterios(prediction, target)
assert loss_2 == loss_3 ## 應該要與 output2相同
print(loss_3)

tensor(0.7327, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)


* multiclass classification

In [44]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [45]:
## CrossEntropyLoss = LogSoftmax + NLLLoss

In [46]:
prediction = torch.randn(2, 3)
ground_truth = torch.tensor([2, 0])

* LogSoftmax

In [47]:
log_softmax = LogSoftmax()

output = log_softmax(prediction)

softmax = nn.Softmax()
assert '{:.4f}'.format(output.sum()) == '{:.4f}'.format(torch.log(softmax(prediction)).sum())

  output = log_softmax(prediction)
  assert '{:.4f}'.format(output.sum()) == '{:.4f}'.format(torch.log(softmax(prediction)).sum())


* NLLLoss

In [48]:
criterion = NLLLoss()
loss_1 = criterion(output, ground_truth)
print(loss_1)

tensor(1.9151)


* 將NLLLoss拆解

In [49]:
ground_truth_onehot = torch.FloatTensor(prediction.shape)
ground_truth_onehot.zero_()
ground_truth_onehot.scatter_(1, ground_truth.reshape(-1,1), 1)
loss_count = - torch.mul(ground_truth_onehot, output).sum(-1).mean()
assert '{:.4f}'.format(loss_1) == '{:.4f}'.format(loss_count)
print(loss_count)

tensor(1.9151)


* CrossEntropyLoss

In [50]:
criterion = CrossEntropyLoss()

In [51]:
loss_2 = criterion(prediction, ground_truth)
assert '{:.4f}'.format(loss_1) == '{:.4f}'.format(loss_2)
print(loss_2)

tensor(1.9151)


#### regression

In [52]:
## MSE == L2 loss

!['pic'](l1_l2_smooth.png)

In [53]:
from torch.nn import MSELoss, L1Loss, SmoothL1Loss

In [54]:
prediction = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)


* MSELoss

In [55]:
criterion = MSELoss()
output = criterion(prediction, target)
print(output)

tensor(1.2605, grad_fn=<MseLossBackward>)


* L1Loss

In [56]:
criterion = L1Loss()
output = criterion(prediction, target)
print(output)

tensor(0.8954, grad_fn=<L1LossBackward>)


* SmoothL1Loss

In [57]:
criterion = SmoothL1Loss()
output = criterion(prediction, target)
print(output)

tensor(0.5182, grad_fn=<SmoothL1LossBackward>)


---

## optimizer

In [58]:
import torch.optim as optim

In [59]:
model = Model(input_dimention=500,output_classes=1)

* params : iterable of parameters
* lr : learning rate
* weight_decay : (L2) Regularization (正則化) 

In [60]:
optimizer = optim.SGD(params=model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-3)
optimizer = optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)
optimizer = optim.RMSprop(params=model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0,\
                          momentum=0, centered=False)

### 利用 optimizer.step() 來實現參數更新

In [61]:
batch_size = 12
input_features = 500
dummy_input = torch.randn(batch_size, input_features)

prediction = model(dummy_input)
target = torch.empty(12, dtype=torch.float).random_(2)

In [62]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0143, -0.0278,  0.0318,  ...,  0.0302, -0.0134,  0.0200],
        [ 0.0241,  0.0418, -0.0318,  ...,  0.0057,  0.0250,  0.0087],
        [-0.0024,  0.0388, -0.0266,  ...,  0.0007,  0.0086, -0.0360],
        ...,
        [-0.0131,  0.0092,  0.0143,  ..., -0.0247,  0.0072, -0.0266],
        [-0.0303,  0.0079,  0.0153,  ...,  0.0137, -0.0331, -0.0120],
        [ 0.0370, -0.0115, -0.0273,  ..., -0.0003,  0.0298, -0.0254]],
       requires_grad=True)


grad : None


In [63]:
criterios = BCELoss(reduction='mean')
loss = criterios(prediction.reshape(-1), target)

In [64]:
loss.backward()

In [65]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0143, -0.0278,  0.0318,  ...,  0.0302, -0.0134,  0.0200],
        [ 0.0241,  0.0418, -0.0318,  ...,  0.0057,  0.0250,  0.0087],
        [-0.0024,  0.0388, -0.0266,  ...,  0.0007,  0.0086, -0.0360],
        ...,
        [-0.0131,  0.0092,  0.0143,  ..., -0.0247,  0.0072, -0.0266],
        [-0.0303,  0.0079,  0.0153,  ...,  0.0137, -0.0331, -0.0120],
        [ 0.0370, -0.0115, -0.0273,  ..., -0.0003,  0.0298, -0.0254]],
       requires_grad=True)


grad : tensor([[-0.0190, -0.0331,  0.0194,  ..., -0.0426, -0.0159,  0.0356],
        [ 0.0039, -0.0019,  0.0019,  ...,  0.0019,  0.0008,  0.0002],
        [ 0.0067,  0.0125, -0.0092,  ...,  0.0172, -0.0085,  0.0047],
        ...,
        [-0.0137, -0.0029, -0.0002,  ..., -0.0037,  0.0046,  0.0098],
        [-0.0012, -0.0015, -0.0001,  ..., -0.0021, -0.0006,  0.0027],
        [-0.0035,  0.0116, -0.0013,  ..., -0.0050,  0.0114,  0.0221]])


In [66]:
optimizer.step()

In [67]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0857,  0.0722, -0.0682,  ...,  0.1302,  0.0866, -0.0800],
        [-0.0759,  0.1418, -0.1318,  ..., -0.0943, -0.0750, -0.0912],
        [-0.1024, -0.0612,  0.0734,  ..., -0.0993,  0.1086, -0.1360],
        ...,
        [ 0.0869,  0.1092,  0.1143,  ...,  0.0753, -0.0928, -0.1266],
        [ 0.0696,  0.1079,  0.1152,  ...,  0.1137,  0.0668, -0.1120],
        [ 0.1370, -0.1115,  0.0727,  ...,  0.0997, -0.0702, -0.1254]],
       requires_grad=True)


grad : tensor([[-0.0190, -0.0331,  0.0194,  ..., -0.0426, -0.0159,  0.0356],
        [ 0.0039, -0.0019,  0.0019,  ...,  0.0019,  0.0008,  0.0002],
        [ 0.0067,  0.0125, -0.0092,  ...,  0.0172, -0.0085,  0.0047],
        ...,
        [-0.0137, -0.0029, -0.0002,  ..., -0.0037,  0.0046,  0.0098],
        [-0.0012, -0.0015, -0.0001,  ..., -0.0021, -0.0006,  0.0027],
        [-0.0035,  0.0116, -0.0013,  ..., -0.0050,  0.0114,  0.0221]])


In [68]:
optimizer.zero_grad()

In [69]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0857,  0.0722, -0.0682,  ...,  0.1302,  0.0866, -0.0800],
        [-0.0759,  0.1418, -0.1318,  ..., -0.0943, -0.0750, -0.0912],
        [-0.1024, -0.0612,  0.0734,  ..., -0.0993,  0.1086, -0.1360],
        ...,
        [ 0.0869,  0.1092,  0.1143,  ...,  0.0753, -0.0928, -0.1266],
        [ 0.0696,  0.1079,  0.1152,  ...,  0.1137,  0.0668, -0.1120],
        [ 0.1370, -0.1115,  0.0727,  ...,  0.0997, -0.0702, -0.1254]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


---