In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f'using {device}')

using mps


In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [6]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [7]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f'Predicted class:{y_pred}')

Predicted class:tensor([4], device='mps:0')


`nn.Flaten()`を使って2次元（28x28）の画像を1次元の784ピクセルの値へと変換する．

In [12]:
input_image = torch.rand(3, 28, 28)
flat_image = nn.Flatten()(input_image)
print(input_image.size())
print(flat_image.size())

torch.Size([3, 28, 28])
torch.Size([3, 784])


`nn.Linear()`で線形変換する

In [22]:
hidden1 = nn.Linear(in_features=28*28, out_features=20)(flat_image)
print(hidden1.size())

torch.Size([3, 20])


ReLUは非線形な活性化関数．  
<div style="display:flex; align-items: center;">
  <div style="flex: 1;">
  <p>シグモイド関数を重ねれば勾配が消える．じゃあ最初から消したれみたいな</p>
  <p>計算式がシンプルなので処理が速い</p>
  <p>0以下は常に0となるので，ニューロン群の活性化がスパース（sparse： 疎、スカスカ）になり，</p>
  <p>発火しないニューロン（＝生体ニューロンに近い動作）も表現できることで精度が向上しやすい</p>
  </div>
  <div style="flex: 1; padding-left: 20px;">
    <img src="https://image.itmedia.co.jp/ait/articles/2003/11/di-01.gif" width="250">
  </div>
</div>

In [23]:
print(f'before {hidden1}')
hidden1 = nn.ReLU()(hidden1)
print(f'after {hidden1}')

before tensor([[ 0.2961, -0.3432,  0.2652, -0.0196,  0.2460, -0.5234, -0.5450,  0.2542,
          0.0142,  0.0186, -0.1364,  0.0399,  0.0216, -0.2731, -0.3349, -0.8027,
          0.2520,  0.2269, -0.2311,  0.0732],
        [ 0.3415, -0.4126,  0.1674,  0.4506,  0.2617, -0.2493, -0.4176, -0.0111,
          0.0323,  0.3014, -0.4279, -0.0962,  0.5515, -0.2296, -0.4217, -0.5802,
         -0.1880,  0.1947, -0.2631,  0.1670],
        [ 0.7010, -0.0086,  0.1213,  0.2129,  0.0844, -0.5514, -0.4018,  0.0335,
          0.1849,  0.1352, -0.4026, -0.2766,  0.1033, -0.5516, -0.1342, -0.5763,
         -0.1305, -0.0310,  0.0945, -0.2719]], grad_fn=<AddmmBackward0>)
after tensor([[0.2961, 0.0000, 0.2652, 0.0000, 0.2460, 0.0000, 0.0000, 0.2542, 0.0142,
         0.0186, 0.0000, 0.0399, 0.0216, 0.0000, 0.0000, 0.0000, 0.2520, 0.2269,
         0.0000, 0.0732],
        [0.3415, 0.0000, 0.1674, 0.4506, 0.2617, 0.0000, 0.0000, 0.0000, 0.0323,
         0.3014, 0.0000, 0.0000, 0.5515, 0.0000, 0.0000, 0.0000, 0.

In [24]:
seq_modules = nn.Sequential(
    nn.Flatten(),
    nn.Linear(in_features=28*28, out_features=20),
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
print(logits)

tensor([[ 0.1034,  0.2871,  0.3943,  0.4723,  0.0888,  0.2177,  0.2782,  0.1377,
          0.0326, -0.2032],
        [-0.0363,  0.2397,  0.4561,  0.4131,  0.1181,  0.2405,  0.3116,  0.2673,
          0.1039, -0.2300],
        [ 0.1198,  0.4006,  0.4683,  0.4197,  0.2439,  0.1305,  0.4501,  0.3588,
         -0.0066, -0.1807]], grad_fn=<AddmmBackward0>)


In [25]:
pred_probab = nn.Softmax(dim=1)(logits)
print(pred_probab)

tensor([[0.0910, 0.1094, 0.1218, 0.1316, 0.0897, 0.1021, 0.1084, 0.0942, 0.0848,
         0.0670],
        [0.0784, 0.1033, 0.1283, 0.1229, 0.0915, 0.1034, 0.1110, 0.1062, 0.0902,
         0.0646],
        [0.0868, 0.1150, 0.1230, 0.1172, 0.0983, 0.0878, 0.1208, 0.1103, 0.0765,
         0.0643]], grad_fn=<SoftmaxBackward0>)


In [26]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0303, -0.0210, -0.0154,  ..., -0.0033,  0.0335, -0.0282],
        [-0.0005,  0.0172, -0.0178,  ..., -0.0356,  0.0021,  0.0185]],
       device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0086, 0.0253], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0190, -0.0187, -0.0085,  ...,  0.0244, -0.0148, -0.0072],
        [ 0.0343, -0.0280, -0.0087,  ..., -0.0198, -0.0171, -0.0398]],
       device='mps:0'