# 5.6 AlexNet

计算机视觉流程中真正重要的是数据和特征。也就是说，使用较干净的数据集和较有效的特征甚至比机器学习模型的选择对图像分类结果的影响更大。



In [5]:
import torch
from torch import nn,optim
import torchvision
# import d2l_pytorch as d2l 
import time
sys.path.append('..')

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
num_classes=10
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.conv=nn.Sequential(
            # layer 1
            nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
            # layer 2
            nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,groups=2,padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
            # layer 3
            nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            # layer 4
            nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            # layer 5
            nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2)
        )
        self.fc=nn.Sequential(
            # layer 6
            nn.Linear(in_features=6*6*256,out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            # layer 7
            nn.Linear(in_features=4096,out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            # layer 8
            nn.Linear(in_features=4096,out_features=num_classes)
        )
    def forward(self,img):
            feature=self.conv(img)
            # output=self.fc(feature.view(img.shape[0],-1))
            output=self.fc(feature.view(-1,6*6*256))
            return output

In [7]:
# print net
net=AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=2)
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_feat

## 读取数据

读取数据的时候我们额外做了一步将图像高和宽扩大到AlexNet使用的图像高和宽224。

这个可以通过torchvision.transforms.Resize实例来实现。

也就是说，我们在ToTensor实例前使用Resize实例，然后使用Compose实例来将这两个变换串联以方便调用。

In [8]:
# 图像预处理

# 构建一个列表：将多个图像变换实例集合到一起

resize=227
from torchvision import transforms
transform = transforms.Compose([
    transforms.Resize(size=resize),
    transforms.ToTensor(),
  	# Normalize 这8个值是针对 CIFAR-10 这个数据集算出来的，对于其他数据集不适用
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

In [9]:
# 加载数据集

# CIFAR10 dataset
train_dataset = torchvision.datasets.CIFAR10(
    root='../datasets/', 
    train=True, 
    download=False, 
    transform=transform)

test_dataset= torchvision.datasets.CIFAR10(
    root='../datasets/', 
    train=False,
    download=False, 
    transform=transform)


In [10]:
batch_size=128

# data loader
train_iter=torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=4)
test_iter=torch.utils.data.DataLoader(
    test_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=4)


In [14]:
# len(train_iter)   # 391

def evaluate_accuracy_ch05(data_iter,net,device=None):
    # gpu
    if device is None and isinstance(net,torch.nn.Module):
        # 如果没有指定device，则使用net的device
        device=list(net.parameters())[0].device
    # 准确率，总数
    acc_sum,n=0.0,0
    # with torch.no_grad： disables tracking of gradients in autograd. 
    # model.eval()： changes the forward() behaviour of the module it is called upon.
    with torch.no_grad():
        for X,y in data_iter:
            if isinstance(net,torch.nn.Module):
                # 评估模式，该模式会关闭dropout
                net.eval()
                # torch.argmax(input, dim, keepdim=False) → LongTensor返回指定维度的最大值的索引。
                acc_sum+=( net(X.to(device)).argmax(dim=1) == y.to(device) ).float().sum().cpu().item()
            else: # 无GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n+=y.shape[0]
    return acc_sum/n

def train_ch05(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs):
    net=net.to(device)
    print('training on ',device)
    # 损失函数，使用交叉熵损失函数
    loss=torch.nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,batch_count,start=0.0,0.0,0,0,time.time()
        for i,(X,y) in enumerate(train_iter):
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            # 计算损失
            l=loss(y_hat,y)
            # 梯度清零
            optimizer.zero_grad()
            # 反向传播
            l.backward()
            # 更新参数
            optimizer.step()
            
            print('epoch %d/%d, iter %d/391, loss %.3f' % (epoch,num_epochs,i,l.cpu().item()))


            # 更新损失和正确率
            train_l_sum+=l.cpu().item()
            train_acc_sum+=(y_hat.argmax(dim=1) == y ).sum().cpu().item()
            n+=y.shape[0]
            batch_count+=1
        # 测试集上的正确率
        test_acc=evaluate_accuracy_ch05(test_iter,net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
        %(epoch+1,train_l_sum/batch_count,train_acc_sum/n,test_acc,time.time()-start))    




In [15]:
# hyperparameters

# learning rate, epoch
lr,num_epochs=0.001,1
# optimizer
optimizer=torch.optim.Adam(net.parameters(),lr=lr)


In [16]:
# training
train_ch05(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cpu
epoch 0/1, iter 0/391, loss 4.177
epoch 0/1, iter 1/391, loss 2.314
epoch 0/1, iter 2/391, loss 2.302
epoch 0/1, iter 3/391, loss 2.312
epoch 0/1, iter 4/391, loss 2.298
epoch 0/1, iter 5/391, loss 2.299
epoch 0/1, iter 6/391, loss 2.309
epoch 0/1, iter 7/391, loss 2.307
epoch 0/1, iter 8/391, loss 2.309
epoch 0/1, iter 9/391, loss 2.294
epoch 0/1, iter 10/391, loss 2.298
epoch 0/1, iter 11/391, loss 2.308
epoch 0/1, iter 12/391, loss 2.298
epoch 0/1, iter 13/391, loss 2.308
epoch 0/1, iter 14/391, loss 2.288
epoch 0/1, iter 15/391, loss 2.301
epoch 0/1, iter 16/391, loss 2.298
epoch 0/1, iter 17/391, loss 2.317
epoch 0/1, iter 18/391, loss 2.291
epoch 0/1, iter 19/391, loss 2.289
epoch 0/1, iter 20/391, loss 2.339
epoch 0/1, iter 21/391, loss 2.266
epoch 0/1, iter 22/391, loss 2.228
epoch 0/1, iter 23/391, loss 2.300
epoch 0/1, iter 24/391, loss 2.217
epoch 0/1, iter 25/391, loss 2.192
epoch 0/1, iter 26/391, loss 2.231
epoch 0/1, iter 27/391, loss 2.162
epoch 0/1, it