<a href="https://colab.research.google.com/github/YiningMa0822/Deep-Learning-Homework/blob/main/deeplearning_hw5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning Homework 5
### 马艺宁 大数据 2019141210086

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.backends.cudnn as cudnn
import os

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

cuda:0


### (a) 首先对于训练集数据进行预处理，计算训练集图片的均值和方差，得到normalization constants.

In [4]:
transform_try = transforms.ToTensor()

trainset_try = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_try)
trainloader_try = torch.utils.data.DataLoader(trainset_try,batch_size=len(trainset_try), shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data


In [5]:
train = iter(trainloader_try).next()[0]  
train_mean = np.mean(train.numpy(), axis=(0, 2, 3))
train_std = np.std(train.numpy(), axis=(0, 2, 3))

print("train_mean:",train_mean)
print("train_std:",train_std)

train_mean: [0.49139866 0.48215503 0.44652778]
train_std: [0.24703248 0.24348569 0.26158887]


### (b)-(f)   
对训练集采用Padding4 - 随机裁剪到32*32 - 随机水平翻转 - 转化为张量 - 利用(a)中均值和方差标准化的transform方法，对于测试集采用转化为张量 - 利用训练集的均值和方差进行数据标准化的transform方法.进一步，建立ResNet18网络，并训练25个epoch.

In [6]:
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])


trainset = torchvision.datasets.CIFAR10(root='./data',train=True, download=False, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data',train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


==> Preparing data..
Files already downloaded and verified


In [7]:
# Model
print('==> Building model..')
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)                              
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,stride=1, padding=1, bias=False)                               
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

net = ResNet18()
net = net.to(device)

==> Building model..


In [8]:
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True

In [15]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer = optim.SGD(net.parameters(), lr=0.001,
                      momentum=0.9, weight_decay=5e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [11]:
# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
    print('Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))

In [12]:
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    print('Loss: %.3f | Acc: %.3f%% (%d/%d)'
                         % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

In [17]:
for epoch in range(25):
  train(epoch)
  test(epoch)


Epoch: 0
Loss: 0.335 | Acc: 88.538% (44269/50000)
Loss: 0.383 | Acc: 87.150% (8715/10000)

Epoch: 1
Loss: 0.312 | Acc: 89.288% (44644/50000)
Loss: 0.376 | Acc: 87.460% (8746/10000)

Epoch: 2
Loss: 0.305 | Acc: 89.580% (44790/50000)
Loss: 0.374 | Acc: 87.730% (8773/10000)

Epoch: 3
Loss: 0.298 | Acc: 89.840% (44920/50000)
Loss: 0.367 | Acc: 87.710% (8771/10000)

Epoch: 4
Loss: 0.292 | Acc: 89.896% (44948/50000)
Loss: 0.365 | Acc: 87.970% (8797/10000)

Epoch: 5
Loss: 0.287 | Acc: 90.166% (45083/50000)
Loss: 0.363 | Acc: 88.140% (8814/10000)

Epoch: 6
Loss: 0.285 | Acc: 90.374% (45187/50000)
Loss: 0.364 | Acc: 88.040% (8804/10000)

Epoch: 7
Loss: 0.280 | Acc: 90.202% (45101/50000)
Loss: 0.365 | Acc: 88.060% (8806/10000)

Epoch: 8
Loss: 0.280 | Acc: 90.372% (45186/50000)
Loss: 0.358 | Acc: 88.290% (8829/10000)

Epoch: 9
Loss: 0.275 | Acc: 90.482% (45241/50000)
Loss: 0.358 | Acc: 88.230% (8823/10000)

Epoch: 10
Loss: 0.275 | Acc: 90.580% (45290/50000)
Loss: 0.358 | Acc: 88.320% (8832/10000

#### 最终我们可以得到ResNet18的训练结果：在训练集上的损失为**0.250**，准确率**91.354%**； 在测试集上损失为**0.350**，准确率为**88.640%**(>75%).

### (g)   
我们知道，在一般情况下神经网络的深度越深（但非过拟合），学习效果越好. 我尝试了VGG16，同样印证了上述结论. 而相比于ResNet50和ResNet18, ResNet34综合了训练时间更短和深度更深的优点. 经过尝试不同的模型和参数，我最终选定了ResNet34进行训练，且为防止过拟合，选择epoch=28.

In [18]:
def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

net = ResNet34()
net = net.to(device)

In [19]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001,
#                       momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(net.parameters(), lr=0.001)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [20]:
# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
    print('Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
    
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    print('Loss: %.3f | Acc: %.3f%% (%d/%d)'
                         % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

In [21]:
for epoch in range(28):
  train(epoch)
  test(epoch)


Epoch: 0
Loss: 1.638 | Acc: 39.170% (19585/50000)
Loss: 1.386 | Acc: 50.120% (5012/10000)

Epoch: 1
Loss: 1.155 | Acc: 58.160% (29080/50000)
Loss: 1.092 | Acc: 62.360% (6236/10000)

Epoch: 2
Loss: 0.929 | Acc: 67.012% (33506/50000)
Loss: 0.974 | Acc: 68.200% (6820/10000)

Epoch: 3
Loss: 0.778 | Acc: 72.520% (36260/50000)
Loss: 0.760 | Acc: 73.860% (7386/10000)

Epoch: 4
Loss: 0.662 | Acc: 76.968% (38484/50000)
Loss: 0.677 | Acc: 77.170% (7717/10000)

Epoch: 5
Loss: 0.566 | Acc: 80.336% (40168/50000)
Loss: 0.689 | Acc: 76.470% (7647/10000)

Epoch: 6
Loss: 0.506 | Acc: 82.484% (41242/50000)
Loss: 0.542 | Acc: 81.910% (8191/10000)

Epoch: 7
Loss: 0.445 | Acc: 84.464% (42232/50000)
Loss: 0.545 | Acc: 82.790% (8279/10000)

Epoch: 8
Loss: 0.401 | Acc: 86.060% (43030/50000)
Loss: 0.549 | Acc: 81.740% (8174/10000)

Epoch: 9
Loss: 0.376 | Acc: 86.982% (43491/50000)
Loss: 0.391 | Acc: 87.450% (8745/10000)

Epoch: 10
Loss: 0.353 | Acc: 87.670% (43835/50000)
Loss: 0.433 | Acc: 86.020% (8602/10000

值得注意的是，此模型训练时间较长，几乎为前一个模型的2倍. 因此，虽然经实验发现继续增加epoch（如取epoch=50）时模型的结果会进一步提升，但是提升幅度较小（<1%)且训练所用时间会更长，这是得不偿失的.   
  


### 最终我们可以得到ResNet34的训练结果：在训练集上的损失为**0.103**，准确率**96.350%**； 在测试集上损失为**0.365**，准确率为**90.570%**(>90%)，模型的效果较好. 