In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 设备

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device.")

Using cpu device.


# 定义模型类

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [7]:
X = torch.rand(1, 28, 28, device = device)
X

tensor([[[0.8827, 0.4748, 0.9980, 0.7501, 0.3412, 0.2546, 0.9781, 0.9938,
          0.0226, 0.3049, 0.4617, 0.7838, 0.4322, 0.9491, 0.1079, 0.4271,
          0.3812, 0.5430, 0.6718, 0.1848, 0.7810, 0.3025, 0.3734, 0.6175,
          0.2002, 0.2370, 0.0329, 0.5553],
         [0.6680, 0.5696, 0.9502, 0.0231, 0.6185, 0.0188, 0.7499, 0.9950,
          0.0864, 0.9608, 0.9969, 0.7731, 0.5248, 0.1811, 0.5793, 0.0501,
          0.6823, 0.9423, 0.9900, 0.0606, 0.4749, 0.7027, 0.7036, 0.0583,
          0.9025, 0.6833, 0.8207, 0.8332],
         [0.0324, 0.9485, 0.0987, 0.2267, 0.8979, 0.5660, 0.8757, 0.8633,
          0.1018, 0.6406, 0.2479, 0.7821, 0.8969, 0.4096, 0.8256, 0.6529,
          0.0945, 0.8785, 0.9574, 0.7622, 0.7375, 0.0898, 0.6492, 0.2897,
          0.1735, 0.7475, 0.0142, 0.7195],
         [0.8092, 0.3320, 0.3739, 0.0430, 0.1559, 0.7271, 0.3376, 0.1652,
          0.2119, 0.1661, 0.7856, 0.2865, 0.8252, 0.4608, 0.5091, 0.4766,
          0.5286, 0.0039, 0.6901, 0.7524, 0.9549, 0.5953,

In [8]:
logits = model(X)
logits

tensor([[ 0.0617, -0.0297, -0.0450, -0.0357,  0.0159,  0.1208, -0.0814,  0.0692,
          0.0459,  0.0449]], grad_fn=<AddmmBackward>)

In [9]:
pred_probab = nn.Softmax(dim = 1)(logits)
pred_probab

tensor([[0.1044, 0.0953, 0.0939, 0.0947, 0.0997, 0.1108, 0.0905, 0.1052, 0.1028,
         0.1027]], grad_fn=<SoftmaxBackward>)

In [10]:
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([5])


# 模型层(Model Layers)

In [13]:
input_image = torch.rand(3, 28, 28)
print(input_image)
print(input_image.size())

tensor([[[0.9696, 0.5141, 0.2983,  ..., 0.7696, 0.6447, 0.7344],
         [0.6044, 0.8782, 0.2120,  ..., 0.4860, 0.2735, 0.7714],
         [0.5112, 0.3912, 0.7548,  ..., 0.7118, 0.7205, 0.9916],
         ...,
         [0.7035, 0.6355, 0.3708,  ..., 0.7702, 0.2575, 0.1311],
         [0.0036, 0.0127, 0.0460,  ..., 0.3576, 0.9409, 0.4055],
         [0.0094, 0.6050, 0.7607,  ..., 0.1117, 0.9697, 0.1607]],

        [[0.9388, 0.1084, 0.2759,  ..., 0.8164, 0.6986, 0.1027],
         [0.6053, 0.3331, 0.4235,  ..., 0.3879, 0.9233, 0.1842],
         [0.5291, 0.6479, 0.5161,  ..., 0.0524, 0.5421, 0.8979],
         ...,
         [0.7359, 0.8663, 0.1267,  ..., 0.4311, 0.2346, 0.3589],
         [0.3783, 0.8926, 0.2386,  ..., 0.5808, 0.1326, 0.7208],
         [0.0141, 0.6932, 0.9330,  ..., 0.4348, 0.0702, 0.6941]],

        [[0.1275, 0.0396, 0.1776,  ..., 0.1419, 0.2053, 0.4277],
         [0.4060, 0.2681, 0.2468,  ..., 0.6681, 0.0429, 0.4476],
         [0.2458, 0.1886, 0.4635,  ..., 0.8668, 0.5022, 0.

## nn.Flatten

In [17]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


## nn.Linear

In [18]:
layer1 = nn.Linear(in_features = 28 * 28, out_features = 20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


## nn.ReLU

In [19]:
print(f"Before ReLU: {hidden1} \n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.2668,  0.0135,  0.4409, -0.1953, -0.0484,  0.1787,  0.3525,  0.2317,
          0.1331,  0.3980,  0.2933, -0.4499,  0.3150, -0.0141,  0.0278,  0.0193,
          0.5590, -0.6276,  0.6792, -0.2618],
        [-0.0089,  0.4920,  0.3460,  0.1253, -0.0807, -0.5219,  0.4012,  0.1189,
          0.2139,  0.7205, -0.0409, -0.0898, -0.2142, -0.1771, -0.0727, -0.1149,
          0.3644, -1.1675,  0.4720, -0.6212],
        [ 0.3056,  0.3765, -0.0782, -0.1295,  0.1011,  0.2607,  0.1800, -0.1392,
          0.1598,  0.5965,  0.0977, -0.5815,  0.1223, -0.3159,  0.1340, -0.0624,
          0.7305, -0.3004,  0.3221, -0.2867]], grad_fn=<AddmmBackward>) 


After ReLU: tensor([[0.0000, 0.0135, 0.4409, 0.0000, 0.0000, 0.1787, 0.3525, 0.2317, 0.1331,
         0.3980, 0.2933, 0.0000, 0.3150, 0.0000, 0.0278, 0.0193, 0.5590, 0.0000,
         0.6792, 0.0000],
        [0.0000, 0.4920, 0.3460, 0.1253, 0.0000, 0.0000, 0.4012, 0.1189, 0.2139,
         0.7205, 0.0000, 0.0000, 0.0000, 0.0000, 0.00

## nn.Sequential

In [21]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
logits

tensor([[-0.0911,  0.2999,  0.0753,  0.3999, -0.0911, -0.0645, -0.0314,  0.1037,
         -0.0436,  0.2484],
        [ 0.0618,  0.2854,  0.2894,  0.2883,  0.0167, -0.1007, -0.0255,  0.1706,
         -0.1682,  0.2510],
        [ 0.0682,  0.3409,  0.2841,  0.2363, -0.0614, -0.0487, -0.0891,  0.1501,
         -0.1821,  0.3231]], grad_fn=<AddmmBackward>)

## nn.Softmax

In [22]:
softmax = nn.Softmax(dim = 1)
pred_probab = softmax(logits)

# 模型参数

In [14]:
print("Model structure: ", model, "\n\n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 




In [16]:
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]}")

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[ 0.0071, -0.0298,  0.0350,  ...,  0.0129,  0.0348, -0.0097],
        [ 0.0039, -0.0301,  0.0322,  ..., -0.0100, -0.0226, -0.0144]],
       grad_fn=<SliceBackward>)
Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([0.0139, 0.0281], grad_fn=<SliceBackward>)
Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[-0.0183, -0.0186,  0.0114,  ..., -0.0048, -0.0327, -0.0207],
        [ 0.0051,  0.0205,  0.0393,  ..., -0.0163, -0.0269,  0.0401]],
       grad_fn=<SliceBackward>)
Layer: linear_relu_stack.2.bias | Size: torch.Size([512]) | Values: tensor([-0.0383, -0.0090], grad_fn=<SliceBackward>)
Layer: linear_relu_stack.4.weight | Size: torch.Size([10, 512]) | Values: tensor([[-0.0339,  0.0023, -0.0093,  ..., -0.0040, -0.0350, -0.0427],
        [ 0.0287,  0.0422,  0.0421,  ..., -0.0397,  0.0437,  0.0196]],
       grad_fn=<SliceBackward>)
Layer: linear_relu_