# Deep Learning Final Project

## This notebook shows how to train our final model and save the loss and accuracy per epoch.

### Reference: Train CIFAR10 with PyTorch: https://github.com/kuangliu/pytorch-cifar

### Libraries

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import numpy as np
import random
import math
import pickle
import os

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Set the save path and load path

In [4]:
population_path = "populations/population-9.bin"

model_1_path = "model/model1.pth"
model_2_path = "model/model2.pth"

model_1_history_path = "history/model1_training_history.bin"
model_2_history_path = "history/model2_training_history.bin"

model_save_path = "model/ckpt.pth"
history_save_path = "history/history.bin"

### Helper functions

In [5]:
def save_history(path,h):
  with open(path, "wb") as f: # "wb" because we want to write in binary mode
    pickle.dump(h, f)

def load_history(path):
  with open(path, "rb") as f: # "rb" because we want to read in binary mode
    history = pickle.load(f)
  return history


def save_population(population,generation):
  save_path = "/content/drive/MyDrive/ECE7123-deeplearning/final-proj/submit/populations/population-{}.bin".format(generation)
  with open(save_path, "wb") as f: # "wb" because we want to write in binary mode
    pickle.dump(population, f)

def load_population():
  with open(population_path, "rb") as f: # "rb" because we want to read in binary mode
    population_load = pickle.load(f)
  return population_load

class Individual:

    def __init__(self, part1, part2):
        self.part1 = part1
        self.part2 = part2
        self.score = {"accuracy": 0, "parameters": 0}

def decodeCNN(part1, part2):
    return nn.Sequential(
        *map(transferToLayer, part1),
        nn.Flatten(),
        *map(transferToLayer, part2),
    )

def transferToLayer(args):
    # after conv, the image size remains the same
    mapping = {
        1: 0,  # 1x1 kernel size -> padding = 0
        3: 1,  # 3x3 kernel size -> padding = 1
        5: 2,  # 5x5 kernel size -> padding = 2
        7: 3,  # 7x7 kernel size -> padding = 3
    }
    layer_type = args[0]
    if layer_type == 0:
        return nn.Conv2d(in_channels=args[1], out_channels=args[2], kernel_size=args[3], padding=mapping[args[3]])
    elif layer_type == 1:
        return nn.ReLU()
    elif layer_type == 2:
        return nn.MaxPool2d(2, 2)
    elif layer_type == 3:
        return nn.AvgPool2d(2, 2)
    elif layer_type == 4:
        return nn.Linear(args[1], args[2])
    elif layer_type == 5:
        return nn.Dropout()



### Load the CIFAR-10 dataset

In [6]:
# Define hyper-parameters
batch_size = 64  # TODO
learning_rate = 1e-2  # TODO

# Load CIFAR-10 dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# transform = torchvision.transforms.Compose([
#     torchvision.transforms.ToTensor(),
#     torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
# ])
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


### Load and decode the Final models

In [8]:
load_p = load_population()
final_model_1 = load_p[0]
# final_model_2 = load_p[7]
print(final_model_1.part1)
print(final_model_1.part2)
print(final_model_1.score)
net = decodeCNN(final_model_1.part1, final_model_1.part2)
net = net.to(device)

import os
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) 
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)


[(0, 3, 7, 3), (1,), (0, 7, 195, 5), (1,), (0, 195, 207, 7), (1,), (0, 207, 235, 7), (1,), (3,), (0, 235, 241, 7), (1,), (2,), (2,)]
[(5,), (4, 3856, 1928), (1,), (5,), (4, 1928, 964), (1,), (4, 964, 10)]
{'accuracy': 0.5426, 'parameters': 16477306}


### Training function and Testing function

In [10]:
def train_final_model(epoch): 
  net.train()
  correct = 0
  train_loss = 0
  total = 0
  
  for batch_idx, (images, labels) in enumerate(train_loader):
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      outputs = net(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      train_loss += loss.item()
      _, predicted = outputs.max(1)
      total += labels.size(0)
      correct += predicted.eq(labels).sum().item()
  final_loss = train_loss / len(train_loader)
  acc = 100.*correct/total
  print('Epoch: {}, Train loss: {:.4f} Train Accuracy: {:.4f}'.format(epoch, final_loss, correct/total))
  return (final_loss, acc)

def test_final_model(epoch):
  global best_acc
  test_loss = 0
  correct = 0
  total = 0
  net.eval()
  with torch.no_grad():
      for  batch_idx,(images, labels) in enumerate(test_loader):
          images, labels = images.to(device), labels.to(device)
          outputs = net(images)
          loss = criterion(outputs, labels)

          test_loss += loss.item()
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
  
  
  # Save checkpoint.
  acc = 100.*correct/total
  final_loss = test_loss / len(test_loader)
  print('Epoch: {}, Test loss: {:.4f} Test Accuracy: {:.4f}'.format(epoch, final_loss, correct/total))

  if acc > best_acc:
      print('Saving..')
      state = {
          'net': net.state_dict(),
          'acc': acc,
          'epoch': epoch,
      }
      if not os.path.isdir('checkpoint'):
          os.mkdir('checkpoint')
      # torch.save(state, './checkpoint/ckpt.pth')
      torch.save(state, model_save_path)
      best_acc = acc
  
  print('Final Accuracy: {:.4f}'.format(correct/total))
  return (final_loss, acc)

def train_final_model_test(epoch): 
  net.train()
  correct = 0
  train_loss = 0
  total = 0
  
  for batch_idx, (images, labels) in enumerate(train_loader):
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      outputs = net(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      train_loss += loss.item()
      print("loss.item: ",loss.item())
      _, predicted = outputs.max(1)
      total += labels.size(0)
      correct += predicted.eq(labels).sum().item()
  print("train loss: ",train_loss)
  print("length: ",len(train_loader))
  final_loss = train_loss / len(train_loader)
  acc = 100.*correct/total
  print('Epoch: {}, Train loss: {:.4f} Train Accuracy: {:.4f}'.format(epoch, final_loss, correct/total))
  return (final_loss, acc)

### Start training

In [None]:
start_epoch = 0
train_history = []
test_history = []
for epoch in range(start_epoch, start_epoch + 200 + 1):
    train_h = train_final_model(epoch)
    test_h = test_final_model(epoch)
    train_history.append(train_h)
    test_history.append(test_h)
    scheduler.step()
    history = {}
    history['train'] = train_history
    history['test'] = test_history
    save_history(history_save_path, history)