# AlexNet

AlexNet 的原始论文 https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf


AlexNet结构图:


![AlexNet结构图](../images/AlexNet.jpg)



AlexNet取得效果好还有几个原因：

1、使用ReLU作为激活函数

2、局部正则化

3、Dropout

4、Data Augmentation(数据增强)

接下来照着这个图，用PyTorch写一遍模型。当然，就不跑了。

*附上一张NG在 coursera 上授课的AlexNet模型图*


![AlexNet结构图](../images/AlexNet2.jpg)



In [1]:
# add a mark for change
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

class AlextNet(nn.Module):
    def __init__(self, in_channel, n_class):
        super(AlextNet, self).__init__()
        # 第一阶段
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channel, 96, kernel_size=11, stride=4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        # 第二阶段
        self.conv2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        # 第三阶段
        self.conv3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        # 第四阶段 全连接层
        self.fc = nn.Sequential(
            nn.Linear(1*1*256, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, n_class) # AlexNet上面是1000 ...如果测试的话用MNIST则可以使用10
        )
    # 向前传播
    def forward(self, x):
        con1_x = self.conv1(x)
        con2_x = self.conv2(con1_x)
        con3_x = self.conv3(con2_x)
        lin_x = con3_x.view(con3_x.size(0), -1)
        y_hat = self.fc(lin_x)
        return y_hat

In [2]:
alex = AlextNet(3, 10)
print(alex)

AlextNet (
  (conv1): Sequential (
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU (inplace)
    (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  )
  (conv2): Sequential (
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU (inplace)
    (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  )
  (conv3): Sequential (
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU (inplace)
    (4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU (inplace)
    (6): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  )
  (fc): Sequential (
    (0): Linear (9216 -> 4096)
    (1): ReLU (inplace)
    (2): Dropout (p = 0.5)
    (3): Linear (4096 -> 4096)
    (4): ReLU (inplace)
    (5): Dropout (p = 0.5)
    (6): Linear (4096 -> 10)
  )
)


In [None]:
# 图像预处理，因为Alex 是使用 227 * 227 大小的图片，但是 CIFAR10 只有 32 * 32 ,经过测试， 227 * 227 的效果不好。
# 所以这里， 我们将图像放大到 96*96
transform = transforms.Compose([
    transforms.Resize(96),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 超参数
DOWNLOAD = True
BATCH_SIZE = 256
EPOCH = 5
learning_rate = 0.001

# 是否使用GPU
use_gpu = True

# CIFAR10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data/', train=True, transform=transform, download=DOWNLOAD)

test_dataset = torchvision.datasets.CIFAR10(root='./data/', train=False, transform=transform)

# Data Loader
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

# 定义模型
alex = AlextNet(3, 10)
if use_gpu:
    alex = alex.cuda()

# loss and optimizer

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(alex.parameters(), lr=learning_rate)

# Training
alex.train()

for epoch in range(EPOCH):
    total = 0
    correct = 0
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)

        if use_gpu:
            images = images.cuda()
            labels = labels.cuda()
        # forward + backward + optimize
        optimizer.zero_grad()
        y_pred = alex(images)

        loss = loss_fn(y_pred, labels)

        loss.backward()

        optimizer.step()

        if (i + 1) % 100 == 0:
            print("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, EPOCH, i + 1, 100, loss.data[0]))

        # 计算训练精确度
        _, predicted = torch.max(y_pred.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
    print('Accuracy of the model on the train images: %d %%' % (100 * correct / total))

    # Decaying Learning Rate
    if (epoch+1) % 2 == 0:
        learning_rate /= 3
        optimizer = torch.optim.Adam(alex.parameters(), lr=learning_rate)


# Test
alex.eval()

correct = 0
total = 0

for images, labels in test_loader:
    images = Variable(images)
    labels = Variable(labels)
    if use_gpu:
        images = images.cuda()
        labels = labels.cuda()

    y_pred = alex(images)
    _, predicted = torch.max(y_pred.data, 1)
    total += labels.size(0)
    temp = (predicted == labels.data).sum()
    correct += temp

print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))

跑了5次之后的结果。用 GPU 跑，大概1min多

> Files already downloaded and verified

> Epoch [1/5], Iter [100/100] Loss: 1.7411

> Accuracy of the model on the train images: 28 %

> Epoch [2/5], Iter [100/100] Loss: 1.4252

> Accuracy of the model on the train images: 46 %

> Epoch [3/5], Iter [100/100] Loss: 1.1083

> Accuracy of the model on the train images: 57 %

> Epoch [4/5], Iter [100/100] Loss: 0.9585

> Accuracy of the model on the train images: 61 %

> Epoch [5/5], Iter [100/100] Loss: 0.9199

> Accuracy of the model on the train images: 66 %

> Accuracy of the model on the test images: 65 %
