In [1]:
import time
import torch
from torch import nn, optim

import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

print(torch.__version__)

device = device = d2l.get_current_device()
print(device)

1.3.0+cpu
cpu


In [None]:
def vgg_block(num_convs, in_channels, out_channels):
    blk = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=3, padding=1) )
    for i in range(1, num_convs):
        blk.add_module( i, nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1) )
        blk.add_module( i, nn.nn.ReLU() )
        blk.add_module( i, nn.MaxPool2d(kernel_size=2, stride=2) )
    return blk

In [12]:
test_block = vgg_block(1, 1, 64)
print(test_block)

Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [26]:
#conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
in_channels = 1

In [32]:
def vgg(conv_arch, in_channels):
    net = nn.Sequential()
    # 初始化
    in_channels = in_channels
    # 卷积层部分
    for i, (num_convs, out_channels) in enumerate(conv_arch):
        net.add_module('sequential' + str(i+1), vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels
    # 全连接层部分
    net.add_module( 'flatten', nn.Flatten() ) # 先把卷积层输出的形状匹配全连接层输入的形状
    net.add_module( 'dense0', nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU() ) )
    net.add_module( 'dropout0', nn.Dropout(0.5) )
    net.add_module( 'dense1', nn.Sequential( nn.Linear(4096, 4096), nn.ReLU() ) )
    net.add_module( 'dropout1', nn.Dropout(0.5) )  
    net.add_module( 'dense2', nn.Sequential( nn.Linear(4096, 10), nn.ReLU() ) )
                                 
    return net

net = vgg(conv_arch, 1)

In [33]:
net

Sequential(
  (sequential1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (sequential2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (sequential3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (sequential4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0,

In [34]:
X = torch.rand(1, 1, 224, 224)

# named_children获取一级子模块及其名字(named_modules会返回所有子模块,包括子模块的子模块)
for name, blk in net.named_children(): 
    X = blk(X)
    print(name, 'output shape: ', X.shape)

sequential1 output shape:  torch.Size([1, 64, 112, 112])
sequential2 output shape:  torch.Size([1, 128, 56, 56])
sequential3 output shape:  torch.Size([1, 256, 28, 28])
sequential4 output shape:  torch.Size([1, 512, 14, 14])
sequential5 output shape:  torch.Size([1, 512, 7, 7])
flatten output shape:  torch.Size([1, 25088])
dense0 output shape:  torch.Size([1, 4096])
dropout0 output shape:  torch.Size([1, 4096])
dense1 output shape:  torch.Size([1, 4096])
dropout1 output shape:  torch.Size([1, 4096])
dense2 output shape:  torch.Size([1, 10])


In [36]:
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
print(small_conv_arch)

[(1, 16), (1, 32), (2, 64), (2, 128), (2, 128)]


In [37]:
lr, num_epochs, batch_size = 0.05, 5, 128
# 如出现“out of memory”的报错信息，可减小batch_size或resize
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cpu


RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 3699376128 bytes. Buy new RAM!
