In [6]:
import os
import pickle
import time
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from scipy import ndimage
from IPython.display import clear_output

In [7]:
class VGG16(nn.Module):
    def __init__(self, num_class, rgb=True):
        super(VGG16, self).__init__()
        
        self.name = 'VGG16'
        
        self.conv1 = nn.Conv2d(in_channels=3 if rgb else 1, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv7 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv8 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv9 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv10 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv11 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv12 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv13 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.fc2 = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.fc3 = nn.Sequential(
            nn.Linear(4096, num_class),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = self.maxpool1(out)
        out = F.relu(self.conv3(out))
        out = F.relu(self.conv4(out))
        out = self.maxpool2(out)
        out = F.relu(self.conv5(out))
        out = F.relu(self.conv6(out))
        out = F.relu(self.conv7(out))
        out = self.maxpool3(out)
        out = F.relu(self.conv8(out))
        out = F.relu(self.conv9(out))
        out = F.relu(self.conv10(out))
        out = self.maxpool4(out)
        out = F.relu(self.conv11(out))
        out = F.relu(self.conv12(out))
        out = F.relu(self.conv13(out))
        out = self.maxpool5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [8]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_channels)
        )
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual if self.downsample else x
        out = self.relu(out)
        return out
    
class ResNet34(nn.Module):
    def __init__(self, num_class, resBlock, repeats=[3, 4, 6, 3], rgb=True):
        super(ResNet34, self).__init__()
        
        self.in_channels = 64
        self.name = 'ResNet'
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3 if rgb else 1, out_channels=64, kernel_size=5, stride=1, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.layer1 = self.build_layer(resBlock, 64, repeats[0], stride=1)
        self.layer2 = self.build_layer(resBlock, 128, repeats[1], stride=2)
        self.layer3 = self.build_layer(resBlock, 256, repeats[2], stride=2)
        self.layer4 = self.build_layer(resBlock, 512, repeats[3], stride=2)
        self.avgpool = nn.AvgPool2d(3, stride=1)
        self.fc = nn.Linear(512, num_class)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
    def build_layer(self, resBlock, out_channels, repeat, stride=1):
        downsample  = nn.Sequential(
            nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride),
            nn.BatchNorm2d(out_channels),
        ) if stride != 1 or self.in_channels != out_channels else None
        
        layers = [resBlock(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels
        layers += [resBlock(self.in_channels, out_channels) for i in range(repeat-1)]
        return nn.Sequential(*layers)

In [9]:
def show_train_hist(y1, y2, show=False, save=False, path='Train_hist.png'):
    x = range(len(y1))
    
    plt.figure()

    plt.plot(x, y1, label='Train')
    plt.plot(x, y2, label='Test')

    plt.xlabel('Epoch')
    plt.ylabel('Loss')

    plt.legend(loc=4)
    plt.grid(True)
    plt.tight_layout()

    if save:
        plt.savefig(path)

    if show:
        plt.show()
    else:
        plt.close()

In [None]:
# MNIST
def get_data(batch_size=100, resize=32):
    data_dir = './data/'
    # construct the dataset and data loader
    transform = transforms.Compose([
        transforms.ToTensor(), 
        # acquire through data without transform: 
        # train_data.data.reshape((50000*32*32, 3)).mean(axis=0)/255
        # train_data.data.reshape((50000*32*32, 3)).std(axis=0)/255
        transforms.Normalize(mean=(0.1307,), std=(0.3081,)), 
        transforms.Resize((resize, resize)),
        transforms.Lambda(lambda x: x.view(1, resize, resize).expand(3, -1, -1)),
    ])
    train_data = datasets.MNIST(root=data_dir, train=True, transform=transform, download=True)
    train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
    test_data = datasets.MNIST(root=data_dir, train=False, transform=transform, download=True)
    test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)
    return train_loader, test_loader

In [70]:
def train_test(Model, learning_rate, train_loader, test_loader):
    print('Now traing model {} with learning rate of {}.'.format(Model.name, learning_rate))

    # initialise the device for training, if gpu is available, device = 'cuda', else: device = 'cpu'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    save_dir = './MNIST_{}_results/'.format(Model.name)

    # create folder if not exist
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)


    # training parameters
    epochs = 20

    # declare the networks
    model = Model.to(device)

    # Cross Entropy Loss function and Adam optimizer
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # tracking variables
    train_hist = {}
    train_hist['train_losses'] = []
    train_hist['test_losses'] = []
    train_hist['train_acc'] = []
    train_hist['test_acc'] = []

    # logging
    writer = SummaryWriter(comment='-lr{}e{}_{}'.format(learning_rate, epochs, Model.name))

    start_time = time.time()
    for epoch in range(epochs):
        # training
        Loss = []
        epoch_start_time = time.time()
        Acc = []
        print('Training...')
        for (image, label) in tqdm(train_loader):
            model.train()
            image = image.to(device)
            label = label.to(device)

            output = model(image)

            # compute the loss
            loss = criterion(output, label)
            
            # compute accuracy
            _, pred = torch.max(output.data, 1)
            Acc.append((pred == label).sum().item() / pred.shape[0])

            # back propagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # store the loss of each iter
            Loss.append(loss.item())

            # release GPU memory
            del image, label, output, loss, pred
            torch.cuda.empty_cache()

        epoch_loss = np.mean(Loss)  # mean loss for the epoch
        epoch_acc = np.mean(Acc)    # mean accuracy for the epoch
        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time

        # logging
        train_hist['train_losses'].append(epoch_loss)
        train_hist['train_acc'].append(epoch_acc)
        writer.add_scalar('Loss/train', epoch_loss, epoch + 1)
        writer.add_scalar('Accuracy/train', epoch_acc, epoch + 1)
        writer.add_scalar('TimeTaken/train', per_epoch_ptime, epoch + 1)

        print("Epoch %d of %d with %.2f s" % (epoch + 1, epochs, per_epoch_ptime))
        print("Loss: %.8f" % (epoch_loss))
        
        # testing
        with torch.no_grad():
            print('Testing...')
            model.eval()
            pred_true = 0
            Acc = []
            Loss = []
            for (image, label) in tqdm(test_loader):
                image = image.to(device)
                label = label.to(device)

                output = model(image)
                _, pred = torch.max(output.data, 1)
                Acc.append((pred == label).sum().item() / pred.shape[0])
                loss = criterion(output, label)
                Loss.append(loss.item())
                del image, label, output

            # logging
            epoch_loss = np.mean(Loss)  # mean loss for the epoch
            epoch_acc = np.mean(Acc)    # mean accuracy for the epoch
            writer.add_scalar('Loss/test', epoch_loss, epoch + 1)
            writer.add_scalar('Accuracy/test', epoch_acc, epoch + 1)
            train_hist['test_losses'].append(epoch_loss)
            train_hist['test_acc'].append(epoch_acc)

    end_time = time.time()
    
    print("Training finish!... save training results")
    show_train_hist(train_hist['train_losses'], train_hist['test_losses'], save=True, path=save_dir + 'lr{}e{}'.format(learning_rate, epochs) + '_loss_hist.png')
    show_train_hist(train_hist['train_acc'], train_hist['test_acc'], save=True, path=save_dir + 'lr{}e{}'.format(learning_rate, epochs) + '_acc_hist.png')
    torch.save(model.state_dict(), save_dir + 'lr{}e{}_{}'.format(learning_rate, epochs, Model.name) + '.model')
    
    del model
    writer.flush()
    writer.close()
    # clear cell's output
    clear_output()

In [None]:
if __name__ == '__main__':

    # parameters for Models
    num_class = 10
    LRs = [0.000001, 0.0000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10]
    train_loader, test_loader = get_data()
    
    for lr in LRs:
        vgg16 = VGG16(num_class)
        resnet = ResNet34(num_class, ResBlock)
        train_test(vgg16, lr, train_loader, test_loader)
        train_test(resnet, lr, train_loader, test_loader)        


In [None]:
# CIFAR-10
def get_data(batch_size=100, resize=32):
    data_dir = './data/'
    # construct the dataset and data loader
    transform = transforms.Compose([
        transforms.ToTensor(), 
        # acquire through data without transform: 
        # train_data.data.reshape((50000*32*32, 3)).mean(axis=0)/255
        # train_data.data.reshape((50000*32*32, 3)).std(axis=0)/255
        transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.2435, 0.2616)), 
    ])
    train_data = datasets.CIFAR10(root=data_dir, train=True, transform=transform, download=True)
    train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
    test_data = datasets.CIFAR10(root=data_dir, train=False, transform=transform, download=True)
    test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)
    return train_loader, test_loader

In [None]:
num_class = 10
train_loader, test_loader = get_data()
vgg16 = VGG16(num_class)
resnet = ResNet34(num_class, ResBlock)
train_test(vgg16, 0.00001, train_loader, test_loader)
train_test(resnet, 0.000001, train_loader, test_loader)  