# ECE 590 - Lab2 - My Model Cifar 10 - Anthony DiSpirito

## My Info
* **Name: Anthony DiSpirito**
* **NetID: ad424**
* **Class: ECE 590 - Comp Eng ML and DL**
* **Title: Lab2 - My Model**
* **Date: 10/2/19**

## Build My Model

### Import Statements

In [None]:
import argparse
import os, sys
import math
import random
import time
import datetime
import numpy as np
import pandas as pd
import csv
import numpy.matlib
import matplotlib.pyplot as plt

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.backends.cudnn as cudnn
from tqdm import tqdm_notebook as tqdm

# You cannot change this line.
from tools.dataloader import CIFAR10
from tools.my_testset_dataloader import TESTSET

### My Model Class

In [None]:
# Create the neural network module:
class convBlock(nn.Module):
    def __init__(self, k, channel_in, channel_out, layers, padding):
        super(convBlock, self).__init__()
        self.layers = layers
        if padding == False:
            self.conv1 = nn.Conv2d(channel_in, channel_out, k, padding=0, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
            self.bn_conv1 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
            if layers > 1:
                self.conv2 = nn.Conv2d(channel_out, channel_out, k, padding=0, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
                self.bn_conv2 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
                if layers > 2:
                    self.conv3 = nn.Conv2d(channel_out, channel_out, k, padding=0, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
                    self.bn_conv3 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
                    if layers > 3:
                        self.conv4 = nn.Conv2d(channel_out, channel_out, k, padding=0, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
                        self.bn_conv4 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
            
        else:
            self.conv1 = nn.Conv2d(channel_in, channel_out, k, padding=k//2, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
            self.bn_conv1 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
            if layers > 1:
                self.conv2 = nn.Conv2d(channel_out, channel_out, k, padding=k//2, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
                self.bn_conv2 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
                if layers > 2:
                    self.conv3 = nn.Conv2d(channel_out, channel_out, k, padding=k//2, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
                    self.bn_conv3 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
                    if layers > 3:
                        self.conv4 = nn.Conv2d(channel_out, channel_out, k, padding=k//2, dilation=1,
                                   groups=1, bias=True, padding_mode='zeros')
                        self.bn_conv4 = nn.BatchNorm2d(channel_out, eps=1e-05,momentum=0.1, 
                                       affine=True, track_running_stats=True)
    def forward(self, x):
        out = F.elu(self.bn_conv1(self.conv1(x)))
        if self.layers > 1:
            out = F.elu(self.bn_conv2(self.conv2(out)))
            if self.layers > 2:
                out = F.elu(self.bn_conv3(self.conv3(out)))
                if self.layers > 3:
                    out = F.elu(self.bn_conv4(self.conv4(out)))
        out = F.max_pool2d(out, 2)
        return out
# Defining the actual net     
class TONY_NET(nn.Module):
    def __init__(self):
        super(TONY_NET, self).__init__()
        self.convBlock1 = convBlock(k = 3, channel_in = 3, channel_out = 64, layers = 2, padding = True)
        self.convBlock2 = convBlock(k = 3, channel_in = 64, channel_out = 128, layers = 2, padding = True)
        self.convBlock3 = convBlock(k = 3, channel_in = 128, channel_out = 256, layers = 4, padding = True)
        self.convBlock4 = convBlock(k = 3, channel_in = 256, channel_out = 512, layers = 4, padding = True)
        self.convBlock5 = convBlock(k = 3, channel_in = 512, channel_out = 512, layers = 4, padding = True)
        #------------------
        orig_input_size = 32 # 32x32
        number_of_conv_block_poolings = 5
        pooling_size = 2 # 2x2 with stride 2
        num_features = int(512*(orig_input_size*(1/(pooling_size**number_of_conv_block_poolings)))**2)
        self.fc1 = nn.Linear(num_features, num_features, bias = True)
        torch.nn.init.xavier_normal_(self.fc1.weight)
        self.dropout_fc1 = nn.Dropout(p = 0.5) # Dropout suggested in slides
        #------------------
        self.fc2 = nn.Linear(num_features, num_features//4, bias = True)
        torch.nn.init.xavier_normal_(self.fc2.weight)
        self.dropout_fc2 = nn.Dropout(p = 0.5) # Dropout suggested in slides
        #------------------
        self.fc3 = nn.Linear(num_features//4, num_features//8, bias = True)
        torch.nn.init.xavier_normal_(self.fc3.weight)
        self.dropout_fc3 = nn.Dropout(p = 0.5) # Dropout suggested in slides
        #------------------
        self.fc4 = nn.Linear(num_features//8, 10, bias = True)
        torch.nn.init.xavier_normal_(self.fc4.weight)
        return
    def forward(self, x):
        out = self.convBlock1(x)
        out = self.convBlock2(out)
        out = self.convBlock3(out)
        out = self.convBlock4(out)
        out = self.convBlock5(out)
        #------------------
        out = out.view(out.size(0), -1) #This is a flattening layer
        out = F.elu(self.fc1(out))
        out = self.dropout_fc1(out)
        out = F.elu(self.fc2(out))
        out = self.dropout_fc2(out)   
        out = F.elu(self.fc3(out))
        out = self.dropout_fc3(out)
        out = F.elu(self.fc4(out))
        return out

### Setting Hyperparameters

In [None]:
# Setting some hyperparameters
TRAIN_BATCH_SIZE = 128#256
VAL_BATCH_SIZE = 100#200
INITIAL_LR = 0.00785
MOMENTUM = 0.9
REG = 5e-4
EPOCHS = 150
DATAROOT = "./data"
CHECKPOINT_PATH = "./saved_model"

### Loading the Data and Defining the Preprocessing

In [None]:
# Specify preprocessing function.
# Reference mean/std value
mean_RGB = (0.4914, 0.4822, 0.4465)
std_RGB = (0.2023, 0.1994, 0.2010)
input_img_size = (32,32)
percent_crop = 0.80 # 80 percent left after crop
data_augment_list = [transforms.RandomVerticalFlip(p = 0.20),
                     transforms.RandomHorizontalFlip(p = 0.40),
                     transforms.RandomApply([transforms.RandomCrop(size = input_img_size, padding=4, pad_if_needed=False, fill=0, padding_mode='constant')], p = 0.25),
                     transforms.RandomApply([transforms.RandomAffine(degrees = 45, translate=(0.15,0.15), scale=None, shear=None, resample=False, fillcolor=0)], p = 0.25),
                     transforms.RandomApply([transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05)], p = 0.20)]
transform_train = transforms.Compose([transforms.Resize(input_img_size),
                                      transforms.RandomOrder(data_augment_list),
                                      transforms.ToTensor(),
                                      transforms.RandomErasing(p=0.20, scale=(0.01, 0.3), ratio=(0.3, 3.3), value=0, inplace=False),
                                      transforms.Normalize(mean_RGB, std_RGB)])
transform_val = transforms.Compose([transforms.Resize(input_img_size),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean_RGB, std_RGB)])

In [None]:
# Call the dataset Loader
num_threads = 16
trainset = CIFAR10(root=DATAROOT, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=num_threads//2)
valset = CIFAR10(root=DATAROOT, train=False, download=True, transform=transform_val)
valloader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=num_threads//2)

### Setting Device

In [None]:
# Specify the device for computation
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = TONY_NET()
net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

### Code to Load in Saved Model to Continue Training

In [None]:
# FLAG for loading the pretrained model
TRAIN_FROM_SCRATCH = True
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model/model.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

### Defining my Preferred Loss Function and Optimization Algorithm

In [None]:
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(net.parameters(), INITIAL_LR, MOMENTUM, weight_decay=REG, nesterov = True)

### Training and Validation of TONY_NET()

In [None]:
# Start the training/validation process
global_step = 0
best_val_acc = 0

# Initializing Log Variables
avg_train_loss_log = np.zeros(EPOCHS)
avg_train_acc_log = np.zeros(EPOCHS)
avg_val_loss_log = np.zeros(EPOCHS)
avg_val_acc_log = np.zeros(EPOCHS)

for i in range(start_epoch, EPOCHS):
    print(datetime.datetime.now())
    # Switch to train mode
    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0
    # Train the training dataset for 1 epoch.
    print(len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # Copy inputs to device
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        # Now backward loss
        loss.backward()
        # Apply gradient
        optimizer.step()
        # Calculate predicted labels
        _, predicted = torch.max(outputs.data, 1)
        # Calculate accuracy
        total_examples += targets.size(0)
        correct_examples += predicted.eq(targets.data).sum().item()
        train_loss += loss.data.item()

        global_step += 1
        if global_step % 100 == 0:
            avg_loss = train_loss / (batch_idx + 1)
        pass
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f %%" %(avg_loss, avg_acc*100))
    
    # Training Log
    avg_train_loss_log[i] = avg_loss
    avg_train_acc_log[i] = avg_acc
    
    print(datetime.datetime.now())
    # Validate on the validation dataset
    print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    net.eval()

    val_loss = 0
    val_acc = 0
    # Disable gradient during validation
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # Copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Zero the gradient
            optimizer.zero_grad()
            # Generate output from the DNN.
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            # Calculate predicted labels
            _, predicted = torch.max(outputs.data, 1)
            # Calculate accuracy
            total_examples += targets.size(0)
            correct_examples += predicted.eq(targets.data).sum().item()
            val_loss += loss.data.item()

    avg_loss = val_loss / len(valloader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f %%" %(avg_loss, avg_acc*100))
    
    # Validation Log
    avg_val_loss_log[i] = avg_loss
    avg_val_acc_log[i] = avg_acc
    
    # Learning Rate Decay and Model Saving
    DECAY_EPOCHS = 2
    DECAY = 0.96#0.98
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = INITIAL_LR*(DECAY**(i//DECAY_EPOCHS))   
        for param_group in optimizer.param_groups:
            # Assign the learning rate parameter
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %(current_learning_rate))
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        print("Saving ...")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'model.h5'))

print("Optimization finished.")

### Code to Analyze the Training and Validation Logs

In [None]:
# Plotting Accuracy Curves
plt.plot(range(0, EPOCHS), avg_train_acc_log, range(0, EPOCHS), avg_val_acc_log)
plt.title("Train Acc and Val Acc vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["Train Acc","Val Acc"])
plt.show()
# Plotting Loss Curves
plt.plot(range(0, EPOCHS), avg_train_loss_log, range(0, EPOCHS), avg_val_loss_log)
plt.title("Train Loss and Val Loss vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(["Train Loss","Val Loss"])
plt.show()

#### Code to Save the Logs

In [None]:
# Saving Log Files
# Defining my Logging Object
with open('./logs/train_and_val_logs.csv', 'w') as filename:
    filename.write('epoch, train_loss, train_acc, val_acc, val_loss\n')
    for i in range(1, EPOCHS):
        filename.write("%d, %0.5f, %0.5f, %0.5f, %0.5f\n" % (i, avg_train_acc_log[i], avg_train_loss_log[i], avg_val_acc_log[i], avg_val_loss_log[i]))
        


### Testing TONY_NET() Saved Model

In [None]:
# Loading Test Set Images
num_threads = 16
testset = TESTSET(root=DATAROOT, transform=transform_val) # See tools folder and my_testset_loader.py file
testloader = torch.utils.data.DataLoader(testset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=num_threads//2)

In [None]:
# Pretty much a combination of code from above to get label predictions on test set
CKPT_PATH = "./saved_model/model.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
net.load_state_dict(ckpt['net'])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(net.parameters(), INITIAL_LR, MOMENTUM, weight_decay=REG, nesterov = True)
net = net.to(device)
net.eval()

TEST_LEN = 10000
TEST_CAT = 2

test_loss = 0
test_acc = 0
pred_log = np.zeros((TEST_LEN, TEST_CAT), dtype = int)
pred_log[:,0] = np.arange(0,TEST_LEN, dtype = int)
# Disable gradient during validation
with torch.no_grad():
    for i, (inputs) in enumerate(testloader):
        # Copy inputs to device
        inputs = inputs.to(device)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output from the DNN.
        outputs = net(inputs)
        # Calculate predicted labels
        _, predicted = torch.max(outputs.data, 1)
        pred_index = VAL_BATCH_SIZE*i
        pred_log[pred_index : (pred_index+len(predicted)), 1] = predicted.data.cpu().numpy() # cpu()-> Fix Error: Use Tensor.cpu() to copy the tensor to host memory first.
        
pred_log = pred_log.astype(int)

### Save Model Test Predictions to .csv File

In [None]:
test_df = pd.DataFrame({'Id': pred_log[:,0],
                       'Category': pred_log[:,1]})
test_df.to_csv(r'./test_results.csv', index=False)