In [3]:
import torch
from torch import nn
import torch.nn.functional as F
from d2l import torch as d2l




In [4]:
class Reshape(nn.Module):
    def forward(self, x: torch.Tensor):
        return x.reshape(-1, 1, 28, 28)

net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(), # 不改变大小.
    nn.AvgPool2d(2), # 不重叠, 默认stride和K的大小是一样的. # 13
    nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(), # 
    nn.AvgPool2d(2),  # 
    nn.Flatten(),
    nn.Linear(16*5*5, 120), nn.Sigmoid(), 
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10)
)

def init_with_xaviver(m:nn.Module):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        nn.init.xavier_normal_(m.weight) # 初始化


net.apply(init_with_xaviver) # 模型参数初始化哇.

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [5]:
X = torch.randn(size=(1, 1, 28, 28), dtype=torch.float32, device=d2l.try_gpu(0))
net.to(d2l.try_gpu(0))

for layer in net:
    X = layer(X)
    print(f"{layer.__class__.__name__:.10}: \t{X.shape}")

Conv2d: 	torch.Size([1, 6, 28, 28])
Sigmoid: 	torch.Size([1, 6, 28, 28])
AvgPool2d: 	torch.Size([1, 6, 14, 14])
Conv2d: 	torch.Size([1, 16, 10, 10])
Sigmoid: 	torch.Size([1, 16, 10, 10])
AvgPool2d: 	torch.Size([1, 16, 5, 5])
Flatten: 	torch.Size([1, 400])
Linear: 	torch.Size([1, 120])
Sigmoid: 	torch.Size([1, 120])
Linear: 	torch.Size([1, 84])
Sigmoid: 	torch.Size([1, 84])
Linear: 	torch.Size([1, 10])


In [6]:
# 读取数据:
batch_size = 256

train_iter, val_iter = d2l.load_data_fashion_mnist(batch_size)

In [None]:
def accury(y_hat: torch.Tensor, y: torch.Tensor) -> float:
    

In [8]:
    


def evaluate_accuray_with_gpu(net:nn.Module, val_iter:torch.utils.data.DataLoader, device=d2l.try_gpu(0)) -> float:
    net.eval()
    if not device:
        device =next(iter(net.parameters())).device
    
    # 生成累加器:
    metric = d2l.Accumulator(3)
    with torch.no_grad():
        for X, y in val_iter:
            # GPU:
            if isinstance(X, list):
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            
            y = y.to(device)
            y_hat = net(X)
            metric.add(accury(y_hat, y), y.numel())
            # 为什么不在accury里面计算, 因为我们不知道batch有多大, 所以拿出来每次取numel.
    return metric[0] / metric[1] # 得到平均accuracy.



In [11]:
# 训练:
def train_ch6_with_gpu(net: nn.Module, train_iter, test_iter, num_epochs, lr, device: torch.device = d2l.try_gpu()):
    net.to(device)
    print("train on: ", device)
    
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
                            legend=['train loss', 'train acc', 'test acc']) # 动画效果
    timer, num_batches = d2l.Timer(), len(train_iter) # 创建定时器, 和一个批量长度.
    
    for epoch in range(num_epochs):
        metric = d2l.Accumulator(3)
        net.train()
        for idx, (X, y) in enumerate(train_iter):
            timer.start() # start counting.
            optimizer.zero_grad() # empty gradient
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            # reduction 参数有三种选择：
            # elementwise_mean：为默认情况，表明对N个样本的loss进行求平均之后返回
            l.backward()
            optimizer.step()
            
            with torch.no_grad():
                metric.add(l*X.shape[0],  accury(y_hat, y), X.shape[0]) # X.shape[0]表示batch_size的大小.
                # 因为上面我们求平均了, 所以把平均乘回batch就得到了总得损失. 
            timer.stop() # stop counting.
            
            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            
            if (idx + 1) % (num_batches // 5)     == 0 or idx == num_batches - 1: # 也就是输出五次, 以及最后一次.
                animator.add(epoch + (idx + 1) / num_batches, (train_loss, train_acc, None))
                
        # 每个epoch后我们需要再测试集合上进行评估
        test_acc = evaluate_accuray_with_gpu(net, test_iter)
        animator.add(epoch + 1, (None, None, test_acc)) # 将测试集的精度添加进去. 
    
    # 所有的epoch完成后:
    print(f'loss {train_loss}, train accuracy: {train_acc:.3f}, test accuracy: {test_accuracy:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on {str(device)}')
    # 总样本数 * 轮数 = 总遍历的样本数. 然后除以所用时间之和. 得到的就是每秒多少个样本, 然后再看看再那个设备上. 
        
                
                
            
                
                
                
            
            


In [12]:
lr, num_epochs = 0.5, 10

train_ch6_with_gpu(net, train_iter, val_iter, num_epochs, lr)



tensor([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], device='cuda:0')
tensor([False, False, False, False,  True, False, False,  True, False, False,
        False, False, False, False, False, False

KeyboardInterrupt: 

IndexError: list index out of range

<Figure size 350x250 with 1 Axes>