In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import time

def load_data(batch_size = 1000):
    # load data CIFAR10

    # transform to tensor and normalize to [-1,1]
    transform = transforms.Compose(
        [
    #         transforms.Resize(256),
            transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=0)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False, num_workers=0)
    return trainset, trainloader, testset, testloader

# define model
class LogisticRegression(torch.nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(3072, 10)
        
    def forward(self, x):
        y_pred = F.sigmoid(self.linear(x))
        return y_pred
    
    
def train_vali_split_loader(trainset, batch_size = 1024, num_workers=0):
    trainset_, valset_ = torch.utils.data.random_split(trainset, 
                              [int(0.8*len(trainset)), 
                               int(0.2*len(trainset))])
    
    trainloader_ = torch.utils.data.DataLoader(trainset_, 
                                              batch_size=batch_size,
                                              shuffle=True, 
                                              num_workers=0)
    
    valiloader_ = torch.utils.data.DataLoader(valset_, 
                                              batch_size=batch_size,
                                              shuffle=True, 
                                              num_workers=0)
    return (trainloader_, valiloader_)


def test_model(model, testloader):
        predicted_correct_num = 0
        total_test = 0
        # run on vali set to get accuracy
        for i, data in enumerate(testloader, 0):
        # get the inputs; data is a list of [inputs, labels]
    #         inputs, labels = data
            inputs, labels = data[0].view(batch_size, -1), data[1]

            predicted_test = torch.max(model(inputs), 1)

            total_test += labels.size(0)
            predicted_correct_num += sum(predicted_test.indices == labels)
            print("Test {0} th batch, {1} correct, tested {2}, batch size {3}".format(i, 
                                                                          predicted_correct_num,
                                                                          batch_size*(i+1), batch_size))

        accu_test = predicted_correct_num.item() / total_test
        return accu_test


In [21]:
%%time
def train_and_vali(trainset, model, optimizer, title, testloader,
                   epochs=70, batch_print_num=20, 
                   batch_size=32):
    # train the last layer of the model, using batch size 1024, 70 epochs 
    # batch_size = 32
    accu_train_list = []
    accu_vali_list = []
    for epoch in range(epochs):    # loop over the dataset multiple times
        # splite train into train and vali
        trainloader_, valiloader_ = train_vali_split_loader(trainset, batch_size = batch_size)

        running_loss = 0.0
        predicted_correct_num_train = 0
        total_train = 0
        for i, data in enumerate(trainloader_, 0):
            # get the inputs; data is a list of [inputs, labels]
#             inputs, labels = data[0].view(batch_size, -1), data[1] # -> linear input
            inputs, labels = data
#             print(inputs.size())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            batch_print_num = batch_print_num
            running_loss += loss.item()
            if i % batch_print_num == batch_print_num-1:      # print every 5 mini-batches
                print('[%d, %5d] loss: %0.3f'%(epoch+1,batch_size*(i+1), 
                      running_loss / batch_print_num))
                running_loss = 0.0

            # training accu
            predicted_train = torch.max(outputs, 1)
            total_train += labels.size(0)
            predicted_correct_num_train += sum(predicted_train.indices == labels)

        accu_train = predicted_correct_num_train.item() / total_train
        accu_train_list.append(accu_train)

        predicted_correct_num = 0
        total_vali = 0
        # run on vali set to get accuracy
        for i, data in enumerate(valiloader_, 0):
        # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
#             inputs, labels = data[0].view(batch_size, -1), data[1] # -> linear input
        
#             print(inputs.size())

            predicted_vali = torch.max(model(inputs), 1)

            total_vali += labels.size(0)
            predicted_correct_num += sum(predicted_vali.indices == labels)
#             print("Vali {0} th batch, {1} correct, validated {2}, batch size {3}".format(i, 
#                                                                           predicted_correct_num,
#                                                                           batch_size*(i+1), batch_size))

        accu_vali = predicted_correct_num.item() / total_vali
        accu_vali_list.append(accu_vali)
        
        print("Training Accu: ", accu_train)
        print("Validation Accu: ", accu_vali)

    print('Finish Traning')
    
    # test current model with highest vali accu
    test_accu = test_model(model, testloader)
    
    # plot accu with epoch
    plt.plot(range(epochs), accu_train_list, label="Train")
    plt.plot(range(epochs), accu_vali_list, label="Vali")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title(title)
    plt.legend()
    plt.show()
    
    return model, accu_vali, test_accu

def avg_d_dimensional_list(lst):
    avg_list = []
    for l in lst:
        avg_list.append(mean(l))
    return avg_list

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 7.87 µs


## A5a

In [None]:
%%time
# A5a
# train logistic regression
batch_size = 1000

# load data
trainset, trainloader, testset, testloader = load_data(batch_size = batch_size)

# define model
model = LogisticRegression()

# define a loss function
criterion = torch.nn.CrossEntropyLoss()

lr_list = [0.001, 0.002, 0.003, 0.004, 0.005]
lr_accu_list = [[] for i in range(len(lr_list))]
momentum_list = [0.2, 0.4, 0.6, 0.8, 0.9]
momentum_accu_list = [[] for i in range(len(momentum_list))]

vali_acct_best = 0
test_accu_best = 0

for lr in lr_list:
    for momentum in momentum_list:
        print("lr: ", lr, ", momen: ", momentum)
        # define optimizer
        optimizer = torch.optim.SGD(model.parameters(), 
                                    lr=lr, 
                                    momentum=momentum)

        model, accu_vali, test_accu = train_and_vali(trainset=trainset, 
                                                     model=model, 
                                                     optimizer=optimizer, 
                                                     testloader=testloader,
                                                     epochs=30, 
                                                     batch_print_num=20, 
                                                     batch_size=batch_size, 
                                                     title="Logistic Regression") 
        lr_accu_list[lr_list.index(lr)].append(accu_vali)
        momentum_accu_list[momentum_list.index(momentum)].append(accu_vali)
        
        if accu_vali > vali_acct_best:
            vali_acct_best = accu_vali
            test_accu_best = test_accu
            best_params = {"lr":lr,"momentum": momentum}
        print("Vali Accu: ", accu_vali, "Best vali: ", vali_acct_best, 
              "Test Accu: ", test_accu, "Best params: ", best_params)

print("Best vali accu: {0}, Best test accu: {1}, Best param set: {2}".format(accu_vali, 
                                                                             test_accu, 
                                                                             best_params))
lr_accu_avg_list = avg_d_dimensional_list(lr_accu_list)
momentum_avg_list = avg_d_dimensional_list(momentum_accu_list)


In [None]:
# plot lr and momentum change with accu plots
plt.plot(lr_list, lr_accu_avg_list)
plt.xlabel("Learning Rate")
plt.ylabel("Validation Accu")
plt.title("Lr vs Vali Accu")

In [None]:
# plot momentum change with accu plotsu
plt.plot(momentum_list, momentum_avg_list)
plt.xlabel("Momentum")
plt.ylabel("Validation Accu")
plt.title("Momen vs Vali_Accu")
plt.show()

## A5b

In [32]:
%%time
# A5b
# define model one linear hidden layer
class OneHiddenLayer(torch.nn.Module):
    def __init__(self, M):
        super(OneHiddenLayer, self).__init__()
        self.linear = torch.nn.Linear(3072, M)
        self.linear2 = torch.nn.Linear(M, 10)
        
    def forward(self, x):
        x = F.sigmoid(self.linear(x))
        x = self.linear2(x)
        return x
    

CPU times: user 29 µs, sys: 11 µs, total: 40 µs
Wall time: 55.8 µs


In [None]:
%%time
# A5b
# train one linear hidden layer
batch_size = 1000

# load data
trainset, trainloader, testset, testloader = load_data(batch_size = batch_size)

# define a loss function
criterion = torch.nn.CrossEntropyLoss()

lr_list = [0.001, 0.003, 0.005]
lr_accu_list = [[] for i in range(len(lr_list))]
momentum_list = [0.3, 0.6, 0.9]
momentum_accu_list = [[] for i in range(len(momentum_list))]
M_list = [50, 150, 300]
M_accu_list = [[] for i in range(len(M_list))]

vali_acct_best = 0
test_accu_best = 0

for lr in lr_list:
    for momentum in momentum_list:
        for M in M_list:
            print("lr: ", lr, ", momen: ", momentum, "M: ", M)
            # define model
            model = OneHiddenLayer(M)
            # define optimizer
            optimizer = torch.optim.SGD(model.parameters(), 
                                        lr=lr, 
                                        momentum=momentum)

            model, accu_vali, test_accu = train_and_vali(trainset=trainset, 
                                                         model=model, 
                                                         optimizer=optimizer, 
                                                         testloader=testloader,
                                                         epochs=30, 
                                                         batch_print_num=20, 
                                                         batch_size=batch_size, 
                                                         title="One Hiddent Layer Model") 
            lr_accu_list[lr_list.index(lr)].append(accu_vali)
            momentum_accu_list[momentum_list.index(momentum)].append(accu_vali)
            M_accu_list[M_list.index(M)].append(accu_vali)

            if accu_vali > vali_acct_best:
                vali_acct_best = accu_vali
                test_accu_best = test_accu
                best_params = {"lr": lr,"momentum": momentum, "M": M}
            print("Vali Accu: ", accu_vali, "Best vali: ", vali_acct_best, 
                  "Test Accu: ", test_accu, "Best params: ", best_params)

print("Best vali accu: {0}, Best test accu: {1}, Best param set: {2}".format(accu_vali, 
                                                                             test_accu, 
                                                                             best_params))
lr_accu_avg_list = avg_d_dimensional_list(lr_accu_list)
momentum_avg_list = avg_d_dimensional_list(momentum_accu_list)
M_avg_list = avg_d_dimensional_list(M_accu_list)


## A5c
-  **torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')**
- in_channels (int) – Number of channels in the input image
- out_channels (int) – Number of channels produced by the convolution
- kernel_size (int or tuple) – Size of the convolving kernel
- stride (int or tuple, optional) – Stride of the convolution. (Default: 1)
- padding (int or tuple, optional) – Zero-padding added to both sides of the input (Default: 0)
- padding_mode (string, optional) – zeros
- dilation (int or tuple, optional) – Spacing between kernel elements. (Default: 1)
- groups (int, optional) – Number of blocked connections from input to output channels. (Default: 1)
- bias (bool, optional) – If True, adds a learnable bias to the output. (Default: True)
- Visualize https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md


In [29]:
%%time
# Convolutional layer with max-pool and fully-connected output
class CNN(torch.nn.Module):
    def __init__(self, M,k,N):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=M, kernel_size=k)
        self.linear1 = torch.nn.Linear(M*(int((33-k)/N))**2, 10)
        self.N = N
        self.M = M
        self.k = k
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, (self.N, self.N))
        x = x.view(1000, -1)
#         print(M*(int((33-k)/N))**2, x.size())
        x = self.linear1(x)
        return x 

CPU times: user 40 µs, sys: 222 µs, total: 262 µs
Wall time: 675 µs


## A5d Fine tune Previous Network

In [30]:
%%time
# A5c
# Convolutional layer with max-pool and fully-connected output
batch_size = 1000

# load data
trainset, trainloader, testset, testloader = load_data(batch_size = batch_size)

# define a loss function
criterion = torch.nn.CrossEntropyLoss()

lr_list = [0.001, 0.003, 0.005]
lr_accu_list = [[] for i in range(len(lr_list))]
momentum_list = [0.3, 0.6, 0.9]
momentum_accu_list = [[] for i in range(len(momentum_list))]
M_list = [50, 100, 150]
M_accu_list = [[] for i in range(len(M_list))]
k_list = [5]
k_accu_list = [[] for i in range(len(k_list))]
N_list = [4, 7, 14]
N_accu_list = [[] for i in range(len(N_list))]

vali_acct_best = 0
test_accu_best = 0

# Hyperparameters
# lr: Learning Rate
# momentum
# M: conv2d Filter size
# k: conv2d kernel size
# N: Maxpool kernel size, it is square in this case
for lr in lr_list:
    for momentum in momentum_list:
        for M in M_list:
            for k in k_list:
                for N in N_list:
                    
                    print("lr: ", lr, ", momen: ", momentum, 
                          "M: ", M,"k: ", k, "N: ", N )
                    # define model
                    model = CNN(M, k, N)
                    # define optimizer
                    optimizer = torch.optim.SGD(model.parameters(), 
                                                lr=lr, 
                                                momentum=momentum)

                    model, accu_vali, test_accu = train_and_vali(trainset=trainset, 
                                                                 model=model, 
                                                                 optimizer=optimizer, 
                                                                 testloader=testloader,
                                                                 epochs=30, 
                                                                 batch_print_num=20, 
                                                                 batch_size=batch_size, 
                                                                 title="Conv-Maxpool-linear-CNN") 
                    lr_accu_list[lr_list.index(lr)].append(accu_vali)
                    momentum_accu_list[momentum_list.index(momentum)].append(accu_vali)
                    M_accu_list[M_list.index(M)].append(accu_vali)
                    k_accu_list[k_list.index(k)].append(accu_vali)
                    N_accu_list[N_list.index(N)].append(accu_vali)

                    if accu_vali > vali_acct_best:
                        vali_acct_best = accu_vali
                        test_accu_best = test_accu
                        best_params = {"lr": lr,"momentum": momentum, 
                                       "M": M, "k":k, "N":N}
                    print("Vali Accu: ", accu_vali, "Best vali: ", vali_acct_best, 
                          "Test Accu: ", test_accu, "Best params: ", best_params)

print("Best vali accu: {0}, Best test accu: {1}, Best param set: {2}".format(accu_vali, 
                                                                             test_accu, 
                                                                             best_params))
lr_accu_avg_list = avg_d_dimensional_list(lr_accu_list)
momentum_avg_list = avg_d_dimensional_list(momentum_accu_list)
M_avg_list = avg_d_dimensional_list(M_accu_list)
k_avg_list = avg_d_dimensional_list(k_accu_list)
N_avg_list = avg_d_dimensional_list(N_accu_list)


Files already downloaded and verified
Files already downloaded and verified
lr:  0.001 , momen:  0.3 M:  50 k:  5 N:  4
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch.Size([1000, 2450])
torch.Size([1000, 3, 32, 32])
2450 torch

KeyboardInterrupt: 

In [None]:
# plot lr change with accu plots
plt.plot(lr_list, lr_accu_avg_list)
plt.xlabel("Learning Rate")
plt.ylabel("Validation Accu")
plt.title("Lr vs Vali Accu")

In [None]:
# plot momentum change with accu plotsu
plt.plot(momentum_list, momentum_avg_list)
plt.xlabel("Momentum")
plt.ylabel("Validation Accu")
plt.title("Momen vs Vali_Accu")
plt.show()

In [None]:
# plot M
plt.plot(M_list, M_avg_list)
plt.xlabel("M-Filter Size")
plt.ylabel("Validation Accu")
plt.title("M vs Vali_Accu")
plt.show()

In [None]:
# plot k
plt.plot(k_list, k_avg_list)
plt.xlabel("k-Conv-Kernel Size")
plt.ylabel("Validation Accu")
plt.title("k vs Vali_Accu")
plt.show()

In [None]:
# plot N
plt.plot(N_list, N_avg_list)
plt.xlabel("N-Maxpool Size")
plt.ylabel("Validation Accu")
plt.title("N vs Vali_Accu")
plt.show()