In [2]:
import torch
from torch import nn
from d2l import torch as d2l

train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size = 256, resize=(224, 224))
X_example, y_example = next(iter(train_iter))
print(X_example.shape, y_example.shape)

torch.Size([256, 1, 224, 224]) torch.Size([256])


In [8]:
def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    return nn.Sequential(nn.Conv2d(in_channels, out_channels, 
                                kernel_size = kernel_size, padding = padding, stride = stride),
                         nn.ReLU(), nn.Conv2d(out_channels, out_channels, 
                                kernel_size = kernel_size, padding = padding, stride = stride),
                        nn.ReLU(), nn.Conv2d(out_channels, out_channels, 
                                kernel_size = kernel_size, padding = padding, stride = stride),
                        nn.ReLU())

In [10]:
net = nn.Sequential(
    nin_block(1, 96, 11, 4, 0),
    nn.MaxPool2d(3, stride = 2),
    nin_block(96, 256, 5, 1, 2),
    nn.MaxPool2d(3, stride = 2),
    nin_block(256, 384, 3, 1, 1),
    nn.MaxPool2d(3, stride = 2), nn.Dropout(0.5),
    nin_block(384, 10, 3, 1, 1),
    nn.AdaptiveAvgPool2d((1, 1)),
    nn.Flatten()
)
# 获取参数大小和参数数量
total_params = 0
for name, param in net.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Count: {param.nelement()} | {param.data.dtype}")
    total_params += param.nelement()

print(f"Total parameters count: {total_params}")

Layer: 0.0.weight | Size: torch.Size([96, 1, 11, 11]) | Count: 11616 | torch.float32
Layer: 0.0.bias | Size: torch.Size([96]) | Count: 96 | torch.float32
Layer: 0.2.weight | Size: torch.Size([96, 96, 11, 11]) | Count: 1115136 | torch.float32
Layer: 0.2.bias | Size: torch.Size([96]) | Count: 96 | torch.float32
Layer: 0.4.weight | Size: torch.Size([96, 96, 11, 11]) | Count: 1115136 | torch.float32
Layer: 0.4.bias | Size: torch.Size([96]) | Count: 96 | torch.float32
Layer: 2.0.weight | Size: torch.Size([256, 96, 5, 5]) | Count: 614400 | torch.float32
Layer: 2.0.bias | Size: torch.Size([256]) | Count: 256 | torch.float32
Layer: 2.2.weight | Size: torch.Size([256, 256, 5, 5]) | Count: 1638400 | torch.float32
Layer: 2.2.bias | Size: torch.Size([256]) | Count: 256 | torch.float32
Layer: 2.4.weight | Size: torch.Size([256, 256, 5, 5]) | Count: 1638400 | torch.float32
Layer: 2.4.bias | Size: torch.Size([256]) | Count: 256 | torch.float32
Layer: 4.0.weight | Size: torch.Size([384, 256, 3, 3]) | 

In [None]:
for model in net:
    X_example = model(X_example)
    print(model.__class__.__name__, X_example.shape)

In [12]:
help(d2l.train_ch6)

Help on function train_ch6 in module d2l.torch:

train_ch6(net, train_iter, test_iter, num_epochs, lr, device)
    Train a model with a GPU (defined in Chapter 6).
    
    Defined in :numref:`sec_lenet`



In [13]:
d2l.train_ch6(net, train_iter, test_iter, 10, 0.1, "cuda")

training on cuda


RuntimeError: CUDA out of memory. Tried to allocate 100.00 MiB (GPU 0; 1.83 GiB total capacity; 14.21 MiB already allocated; 89.75 MiB free; 22.00 MiB reserved in total by PyTorch)