In [9]:
import torch
from torch import nn
from d2l import torch as d2l

train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size = 256, resize=(224, 224))
X_example, y_example = next(iter(train_iter))
print(X_example.shape, y_example.shape)

torch.Size([256, 1, 224, 224]) torch.Size([256])


In [10]:
net = nn.Sequential(nn.Conv2d(1, 96, kernel_size = 11, stride = 4, padding = 1), nn.ReLU(), 
                    nn.MaxPool2d(kernel_size = 3, stride = 2), 
                    nn.Conv2d(96, 256, kernel_size = 5, padding = 2), nn.ReLU(), 
                    nn.MaxPool2d(kernel_size = 3, stride = 2), 
                    nn.Conv2d(256, 384, kernel_size = 3, padding = 1), nn.ReLU(), 
                    nn.Conv2d(384, 384, kernel_size = 3, padding = 1), nn.ReLU(), 
                    nn.Conv2d(384, 256, kernel_size = 3, padding = 1), nn.ReLU(), 
                    nn.MaxPool2d(kernel_size = 3, stride = 2), nn.Flatten(),
                    nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(),
                    nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(),
                    nn.Linear(4096, 10)
                   )
# 获取参数大小和参数数量
total_params = 0
for name, param in net.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Count: {param.nelement()} | {param.data.dtype}")
    total_params += param.nelement()

print(f"Total parameters count: {total_params}")

Layer: 0.weight | Size: torch.Size([96, 1, 11, 11]) | Count: 11616 | torch.float32
Layer: 0.bias | Size: torch.Size([96]) | Count: 96 | torch.float32
Layer: 3.weight | Size: torch.Size([256, 96, 5, 5]) | Count: 614400 | torch.float32
Layer: 3.bias | Size: torch.Size([256]) | Count: 256 | torch.float32
Layer: 6.weight | Size: torch.Size([384, 256, 3, 3]) | Count: 884736 | torch.float32
Layer: 6.bias | Size: torch.Size([384]) | Count: 384 | torch.float32
Layer: 8.weight | Size: torch.Size([384, 384, 3, 3]) | Count: 1327104 | torch.float32
Layer: 8.bias | Size: torch.Size([384]) | Count: 384 | torch.float32
Layer: 10.weight | Size: torch.Size([256, 384, 3, 3]) | Count: 884736 | torch.float32
Layer: 10.bias | Size: torch.Size([256]) | Count: 256 | torch.float32
Layer: 14.weight | Size: torch.Size([4096, 6400]) | Count: 26214400 | torch.float32
Layer: 14.bias | Size: torch.Size([4096]) | Count: 4096 | torch.float32
Layer: 17.weight | Size: torch.Size([4096, 4096]) | Count: 16777216 | torch.

In [11]:
for model in net:
    X_example = model(X_example)
    print(model.__class__.__name__, X_example.shape)

Conv2d torch.Size([256, 96, 54, 54])
ReLU torch.Size([256, 96, 54, 54])
MaxPool2d torch.Size([256, 96, 26, 26])
Conv2d torch.Size([256, 256, 26, 26])
ReLU torch.Size([256, 256, 26, 26])
MaxPool2d torch.Size([256, 256, 12, 12])
Conv2d torch.Size([256, 384, 12, 12])
ReLU torch.Size([256, 384, 12, 12])
Conv2d torch.Size([256, 384, 12, 12])
ReLU torch.Size([256, 384, 12, 12])
Conv2d torch.Size([256, 256, 12, 12])
ReLU torch.Size([256, 256, 12, 12])
MaxPool2d torch.Size([256, 256, 5, 5])
Flatten torch.Size([256, 6400])
Linear torch.Size([256, 4096])
ReLU torch.Size([256, 4096])
Dropout torch.Size([256, 4096])
Linear torch.Size([256, 4096])
ReLU torch.Size([256, 4096])
Dropout torch.Size([256, 4096])
Linear torch.Size([256, 10])


In [12]:
help(d2l.train_ch6)

Help on function train_ch6 in module d2l.torch:

train_ch6(net, train_iter, test_iter, num_epochs, lr, device)
    Train a model with a GPU (defined in Chapter 6).
    
    Defined in :numref:`sec_lenet`



In [13]:
d2l.train_ch6(net, train_iter, test_iter, 10, 0.1, "cuda")

training on cuda


RuntimeError: CUDA out of memory. Tried to allocate 100.00 MiB (GPU 0; 1.83 GiB total capacity; 14.21 MiB already allocated; 89.75 MiB free; 22.00 MiB reserved in total by PyTorch)