## Building models with Pytorch

### torch.nn.Module and torch.nn.Parameter

In [1]:
import torch

class TinyModel(torch.nn.Module):

    def __init__(self):
        super(TinyModel, self).__init__()

        self.linear1 = torch.nn.Linear(100, 200)
        self.activation = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(200, 10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.softmax(x)
        return x

tinymodel = TinyModel()

print('The model:')
print(tinymodel)

print('\n\nJust one layer:')
print(tinymodel.linear2)

print('\n\nModel params:')
for param in tinymodel.parameters():
    print(param)

print('\n\nLayer params:')
for param in tinymodel.linear2.parameters():
    print(param)

The model:
TinyModel(
  (linear1): Linear(in_features=100, out_features=200, bias=True)
  (activation): ReLU()
  (linear2): Linear(in_features=200, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)


Just one layer:
Linear(in_features=200, out_features=10, bias=True)


Model params:
Parameter containing:
tensor([[-0.0075, -0.0604,  0.0342,  ...,  0.0127, -0.0012, -0.0891],
        [ 0.0244,  0.0900, -0.0263,  ..., -0.0509, -0.0254,  0.0733],
        [-0.0652, -0.0981,  0.0814,  ..., -0.0504,  0.0674,  0.0655],
        ...,
        [-0.0941,  0.0473,  0.0610,  ...,  0.0470, -0.0247, -0.0188],
        [-0.0131, -0.0672,  0.0310,  ...,  0.0930, -0.0663, -0.0901],
        [ 0.0673, -0.0709, -0.0210,  ...,  0.0470,  0.0570, -0.0910]],
       requires_grad=True)
Parameter containing:
tensor([-0.0735,  0.0620, -0.0872,  0.0041, -0.0500,  0.0237, -0.0474, -0.0921,
         0.0596, -0.0763, -0.0524, -0.0260,  0.0272, -0.0265, -0.0366, -0.0980,
         0.0161, -0.0488,  0.0601,  0.01

### Common Layer Types

#### Linear Layers

In [2]:
# 모델에 m개 입력과 n개 출력이 있는 경우 가중치는 m*n 행렬이 됨

lin = torch.nn.Linear(3, 2)
x = torch.rand(1, 3)
print('Input:')
print(x)

print('\n\nWeight and Bias parameters:')
for param in lin.parameters():
    print(param)

y = lin(x)
print('\n\nOutput:')
print(y)

Input:
tensor([[0.8026, 0.6262, 0.6537]])


Weight and Bias parameters:
Parameter containing:
tensor([[ 0.4910,  0.0063, -0.5475],
        [ 0.4920, -0.1489,  0.1064]], requires_grad=True)
Parameter containing:
tensor([ 0.0999, -0.5635], requires_grad=True)


Output:
tensor([[ 0.1400, -0.1923]], grad_fn=<AddmmBackward0>)


#### Convolutional Layers

In [4]:
import torch.functional as F


class LeNet(torch.nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel (black & white), 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = torch.nn.Conv2d(1, 6, 5) # 6 * 28 * 28
        self.conv2 = torch.nn.Conv2d(6, 16, 3) # 16 * 12 * 12
        # an affine operation: y = Wx + b
        self.fc1 = torch.nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # 6 * 14 * 14
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # 16 * 6 * 6
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

#### Recurrent Layers

In [5]:
class LSTMTagger(torch.nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = torch.nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

### Other Layers and Functions

#### Data Manipulation Layers

In [9]:
# Max pooling
my_tensor = torch.rand(1, 6, 6)
print(my_tensor)

maxpool_layer = torch.nn.MaxPool2d(3)
print(maxpool_layer(my_tensor))

tensor([[[0.9191, 0.9620, 0.6940, 0.5174, 0.5445, 0.9370],
         [0.7855, 0.2736, 0.0172, 0.2840, 0.3287, 0.0739],
         [0.4004, 0.8372, 0.6779, 0.2387, 0.1580, 0.5195],
         [0.5005, 0.5857, 0.3439, 0.3425, 0.4621, 0.7411],
         [0.5540, 0.6065, 0.7924, 0.8993, 0.2313, 0.3722],
         [0.0394, 0.5770, 0.3195, 0.1892, 0.2926, 0.8974]]])
tensor([[[0.9620, 0.9370],
         [0.7924, 0.8993]]])


In [10]:
# Normalization layers
my_tensor = torch.rand(1, 4, 4) * 20 + 5
print(my_tensor)

print(my_tensor.mean())

norm_layer = torch.nn.BatchNorm1d(4)
normed_tensor = norm_layer(my_tensor)
print(normed_tensor)

print(normed_tensor.mean())

tensor([[[10.7628,  5.9726,  5.9309, 10.8419],
         [13.5741,  8.0906, 13.4338,  7.5920],
         [23.7558,  8.5548, 19.1348, 11.2813],
         [18.7796, 11.5719, 10.1376,  5.7251]]])
tensor(11.5712)
tensor([[[ 0.9836, -0.9913, -1.0085,  1.0162],
         [ 1.0226, -0.9101,  0.9732, -1.0858],
         [ 1.3307, -1.1745,  0.5691, -0.7252],
         [ 1.5389,  0.0039, -0.3016, -1.2413]]],
       grad_fn=<NativeBatchNormBackward0>)
tensor(1.4901e-08, grad_fn=<MeanBackward0>)


In [11]:
# Dropout layers
my_tensor = torch.rand(1, 4, 4)

dropout = torch.nn.Dropout(p=0.4) # default value : 0.5
print(dropout(my_tensor))
print(dropout(my_tensor))

tensor([[[0.3433, 0.1102, 0.0000, 0.0000],
         [0.7279, 0.3468, 0.5635, 1.0741],
         [0.0000, 1.2266, 0.0000, 0.0000],
         [0.0000, 1.2634, 0.9685, 0.5141]]])
tensor([[[0.0000, 0.0000, 0.9550, 0.0000],
         [0.0000, 0.3468, 0.0000, 1.0741],
         [1.5275, 1.2266, 1.2983, 0.6745],
         [0.1613, 0.0000, 0.0000, 0.0000]]])
