## Build a NN

In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


### Define the class

In [15]:
# nn.Module을 상속받아 사용한다
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__() #nn.Module을 초기화 상속(생성자 역할), 반드시 있어야함
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
            nn.ReLU()
        )
    # forward(*input) 형태로, 연산 정의를 담당. subclasses에서 overridden되어야 한다!
    def forward(self, x): #nn.module을 상속받았기에 forward을 바꾸면 안됨
        x = self.flatten(x) #펴주고
        logits = self.linear_relu_stack(x) #init에서 정의한 layer통과시키기 
        return logits

In [16]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [17]:
#prediction
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits) #get the prediction densities
y_pred = pred_probab.argmax(1)
print(f"Predicted class : {y_pred}")

Predicted class : tensor([1], device='cuda:0')


### Weights and Bias

+ nn.Linear randomly initializes the weights and bias for each layers and internally stores

In [18]:
print(f"First Linear weights: {model.linear_relu_stack[0].weight} \n")

print(f"First Linear weights: {model.linear_relu_stack[0].bias} \n")

First Linear weights: Parameter containing:
tensor([[ 0.0087, -0.0046, -0.0333,  ...,  0.0283, -0.0220, -0.0045],
        [ 0.0168,  0.0060,  0.0144,  ..., -0.0010,  0.0279,  0.0338],
        [-0.0315,  0.0143,  0.0160,  ..., -0.0126,  0.0042, -0.0137],
        ...,
        [-0.0167,  0.0113, -0.0351,  ..., -0.0226,  0.0078, -0.0316],
        [ 0.0317, -0.0105, -0.0014,  ..., -0.0024,  0.0238, -0.0204],
        [-0.0295, -0.0178,  0.0053,  ..., -0.0286, -0.0187,  0.0082]],
       device='cuda:0', requires_grad=True) 

First Linear weights: Parameter containing:
tensor([ 0.0350,  0.0086,  0.0060, -0.0189,  0.0204, -0.0059, -0.0001,  0.0317,
         0.0255,  0.0177,  0.0050,  0.0002, -0.0242, -0.0264,  0.0146, -0.0262,
        -0.0036, -0.0264,  0.0295,  0.0227,  0.0231,  0.0241,  0.0110,  0.0097,
        -0.0007, -0.0164, -0.0228,  0.0340, -0.0051, -0.0109, -0.0194, -0.0147,
        -0.0196, -0.0034,  0.0054,  0.0086, -0.0140, -0.0146,  0.0304,  0.0305,
        -0.0205, -0.0120,  0.014

### Model layers

In [20]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [21]:
# nn.Flatten
# convert 2D 28*28 image into contiguous array of 784 pixel values
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [22]:
# nn.Linear
# weight * input + bias
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [23]:
# nn.ReLU
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.0800, -0.0684, -0.5578,  0.1662,  0.5222, -0.6619,  0.2748,  0.1996,
         -0.0531,  0.0654,  0.2851,  0.1011,  0.0594,  0.2423,  0.4047, -0.0472,
         -0.3638,  0.1493,  0.4425, -0.4126],
        [-0.4232,  0.2929, -0.5999, -0.0888,  0.4006, -0.3633, -0.0225, -0.0853,
         -0.1753,  0.4773,  0.2756, -0.0682,  0.3939,  0.2675, -0.2154, -0.1541,
         -0.4403,  0.2668,  0.5576, -0.6980],
        [-0.2899,  0.1134, -1.1110,  0.3200,  0.4816, -0.5326,  0.0292,  0.1373,
         -0.2280, -0.0103,  0.4970, -0.1035,  0.3570,  0.3454,  0.3748,  0.0444,
         -0.3405,  0.0803,  0.6375, -0.6642]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0800, 0.0000, 0.0000, 0.1662, 0.5222, 0.0000, 0.2748, 0.1996, 0.0000,
         0.0654, 0.2851, 0.1011, 0.0594, 0.2423, 0.4047, 0.0000, 0.0000, 0.1493,
         0.4425, 0.0000],
        [0.0000, 0.2929, 0.0000, 0.0000, 0.4006, 0.0000, 0.0000, 0.0000, 0.0000,
         0.4773, 0.2756, 0.0000, 0.3939, 0.2675, 0.00

In [24]:
# nn.Sequential
# ordered container of modules
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

In [25]:
print(logits)

tensor([[-1.9374e-01,  1.5458e-01,  1.2801e-01, -1.2943e-01,  4.3407e-01,
          3.5814e-04,  1.9208e-01, -1.9306e-01,  2.9199e-01,  4.5456e-02],
        [-1.4926e-01,  7.1454e-02,  1.1326e-01, -9.0863e-02,  4.2060e-01,
         -1.1145e-01,  5.6299e-02, -1.2268e-01,  3.0873e-01, -3.0210e-03],
        [-1.8101e-01,  8.6543e-02,  8.0852e-02, -1.8911e-01,  4.6967e-01,
         -9.4538e-02,  1.4827e-01, -1.2297e-01,  3.4004e-01, -3.4773e-02]],
       grad_fn=<AddmmBackward0>)


In [28]:
## nn.Softmax
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab)
print(pred_probab.argmax(1))

tensor([[0.0751, 0.1064, 0.1036, 0.0801, 0.1407, 0.0912, 0.1104, 0.0751, 0.1220,
         0.0954],
        [0.0806, 0.1005, 0.1048, 0.0854, 0.1425, 0.0837, 0.0990, 0.0828, 0.1274,
         0.0933],
        [0.0775, 0.1013, 0.1008, 0.0769, 0.1486, 0.0845, 0.1078, 0.0822, 0.1306,
         0.0898]], grad_fn=<SoftmaxBackward0>)
tensor([4, 4, 4])


In [29]:
# model parameters
# model.named_parameters() 로 parameter들을 알 수 있다
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0087, -0.0046, -0.0333,  ...,  0.0283, -0.0220, -0.0045],
        [ 0.0168,  0.0060,  0.0144,  ..., -0.0010,  0.0279,  0.0338]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0350, 0.0086], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0062,  0.0094,  0.0229,  ...,  0.0389, -0.0222, -0.0272],
        [-0.0297, -0.0017,  0.0151,  ..., -0.0003,  0.0384,  0.0123]],
       device='cuda