In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6], device='cuda:0')


下拆分观察nn的过程

In [6]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


flatten过程

In [7]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


前进过程

In [8]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [9]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.0929,  0.0100, -0.2197, -0.2993, -0.1376, -0.2856,  0.0691, -0.0912,
          0.5493, -0.4532,  0.8221, -0.0752, -0.1915,  0.1019, -0.2225,  0.5605,
          0.4710,  0.0931,  0.0124,  0.0252],
        [-0.3889, -0.6523,  0.0683, -0.0070, -0.2248, -0.1752,  0.1616,  0.1842,
          0.4291, -0.1057,  0.7313,  0.2044,  0.0187,  0.1827, -0.4102,  0.8534,
          0.7557,  0.1268,  0.2263,  0.0568],
        [-0.1057, -0.1609, -0.0931, -0.2501, -0.0385, -0.2884, -0.0236,  0.2805,
          0.3085, -0.2819,  0.2764,  0.1126, -0.1531,  0.4356, -0.3704,  0.6306,
          0.3627,  0.0402, -0.4377,  0.1977]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0100, 0.0000, 0.0000, 0.0000, 0.0000, 0.0691, 0.0000, 0.5493,
         0.0000, 0.8221, 0.0000, 0.0000, 0.1019, 0.0000, 0.5605, 0.4710, 0.0931,
         0.0124, 0.0252],
        [0.0000, 0.0000, 0.0683, 0.0000, 0.0000, 0.0000, 0.1616, 0.1842, 0.4291,
         0.0000, 0.7313, 0.2044, 0.0187, 0.1827, 0.00

nn过程解释结束 here

In [10]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0230,  0.0165,  0.0272,  ...,  0.0322,  0.0270, -0.0166],
        [-0.0334, -0.0165,  0.0160,  ...,  0.0255,  0.0176,  0.0054]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0151, 0.0312], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0391,  0.0049, -0.0441,  ..., -0.0132, -0.0153, -0.0303],
        [-0.0051, -0.0074, -0.0006,  ..., -0.0101,  0.0006, -0.0348]],
       device='cuda:0', grad_fn=<Slic