In [1]:
import torch
import torchvision
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import os, sys, subprocess, json, argparse
from itertools import product


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device:", device)

dir_root = ''
# If using google colab
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive/')
    dir_root = '/content/drive/MyDrive/Colab Notebooks/ESE546/hw4'

print("dir_root:", dir_root)

device: cuda
dir_root: 


In [3]:
# Define the dataset directory
data_dir = os.path.join(dir_root, 'data')
print(data_dir)

if not os.path.exists(os.path.join(data_dir, 'cifar-10-batches-py')):
    download = True
    print('Dataset not found, downloading...')
else:
    download = False
    print('Dataset found, not downloading.')

# Reading in the dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True,
                                        download=download, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root=data_dir, train=False,
                                       download=download, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

data
Dataset found, not downloading.


In [4]:
# File allcnn.py provided by Prof. Pratik Chaudhari
# at https://gist.github.com/pratikac/68d6d94e4739786798e90691fb1a581b

class View(nn.Module):
    def __init__(self, o):
        super().__init__()
        self.o = o

    def forward(self, x):
        return x.view(-1, self.o)

class allcnn_t(nn.Module):
    def __init__(self, c1=96, c2=192):
        super().__init__()
        d = 0.5

        def convbn(ci, co, ksz, s=1, pz=0):
            return nn.Sequential(
                nn.Conv2d(ci, co, ksz, stride=s, padding=pz),
                nn.ReLU(True),
                nn.BatchNorm2d(co))

        self.m = nn.Sequential(
            nn.Dropout(0.2),
            convbn(3, c1, 3, 1, 1),
            convbn(c1, c1, 3, 1, 1),
            convbn(c1, c1, 3, 2, 1),
            nn.Dropout(d),
            convbn(c1, c2, 3, 1, 1),
            convbn(c2, c2, 3, 1, 1),
            convbn(c2, c2, 3, 2, 1),
            nn.Dropout(d),
            convbn(c2, c2, 3, 1, 1),
            convbn(c2, c2, 3, 1, 1),
            convbn(c2, 10, 1, 1),
            nn.AvgPool2d(8),
            View(10))

        print('Num parameters: ', sum([p.numel() for p in self.m.parameters()]))

    def forward(self, x):
        return self.m(x)


In [5]:
# Weight decay = 1e-3
# SGD with Nesterov’s momentum of 0.9
# Dropout = 0.5
# Learning rate starts with eta_0 = 1e-5,
# then eta_tp1 = 1.1 * eta_t, (t <= 100)

# First train 100 iters
# Record the average training loss of each mini-batch separately and the learning rate that was used for it for about 100 iterations.
# Plot the training loss (Y-axis) as a function of the learning rate (X-axis); use a log-scale for the X-axis.

# Initialize the logger
logger = SummaryWriter(os.path.join(dir_root, 'runs/cnn_experiment'))

def train(net, optimizer, criterion, train_loader, test_loader, epochs, model_name, plot):
    model = net.to(device)
    total_step = len(train_loader)
    overall_step = 0
    train_loss_values = []
    train_error = []
    val_loss_values = []
    val_error = []
    learning_rates = []

    for epoch in range(epochs):
        correct = 0
        total = 0
        flag = 0
        running_loss = 0.0
        learning_rates.append(optimizer.param_groups[0]['lr'])

        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to configured device
            images = images.to(device)
            labels = labels.to(device)

            # Forward
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if (i+1) % 1000 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Learning Rate: {:.3g}, Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, optimizer.param_groups[0]['lr'], loss.item()))
            if plot:
              info = { ('loss_' + model_name): loss.item() }

              for tag, value in info.items():
                logger.add_scalar(tag, value, overall_step+1)

        # Update learning rate every epoch
        for param_group in optimizer.param_groups:
            param_group['lr'] *= 1.05

        train_loss_values.append(running_loss)
        train_error.append(100 - 100 * correct / total)

        model.eval()
        val_running_loss = 0.0
        with torch.no_grad():
            correct = 0
            total = 0
            for i, (images, labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: {} %'.format(100 * correct / total))
        val_error.append(100 - 100 * correct / total)
        val_loss_values.append(val_running_loss)
    return val_error, val_loss_values, train_error, train_loss_values, learning_rates


In [6]:
# Weight decay = 1e-3
# SGD with Nesterov’s momentum of 0.9
# Dropout = 0.5
# Learning rate starts with eta_0 = 1e-5,
# then eta_tp1 = 1.1 * eta_t, (t <= 100)

model_path_100 = os.path.join(dir_root, 'runs/hw4p3_model_100_epoch.pt')

TRAIN_FLAG = True

if not os.path.exists(model_path_100) or TRAIN_FLAG:
    model = allcnn_t().to(device)
    epochs = 100
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=1e-5, momentum=0.9, weight_decay=1e-3, nesterov=True)

    val_error, val_loss_values, train_error, train_loss_values, learning_rates = train(model, optimizer, criterion, trainloader, testloader, epochs, 'cnn_curve_100', True)

    torch.save(model, model_path_100)

    np.save(os.path.join(dir_root, 'runs/train_error_100.npy'), np.array(train_error))
    np.save(os.path.join(dir_root, 'runs/train_loss_values_100.npy'), np.array(train_loss_values))
    np.save(os.path.join(dir_root, 'runs/val_error_100.npy'), np.array(val_error))
    np.save(os.path.join(dir_root, 'runs/val_loss_values_100.npy'), np.array(val_loss_values))
    np.save(os.path.join(dir_root, 'runs/learning_rates_100.npy'), np.array(learning_rates))
else:
    print(f"Model already exists at {model_path_100}, skipping training.")

Model already exists at runs/hw4p3_model_100_epoch.pt, skipping training.


In [None]:
# Load the npy files for plotting
train_error_100 = np.load(os.path.join(dir_root, 'runs/train_error_100.npy'))
train_loss_values_100 = np.load(os.path.join(dir_root, 'runs/train_loss_values_100.npy'))
val_error_100 = np.load(os.path.join(dir_root, 'runs/val_error_100.npy'))
val_loss_values_100 = np.load(os.path.join(dir_root, 'runs/val_loss_values_100.npy'))
learning_rates_100 = np.load(os.path.join(dir_root, 'runs/learning_rates_100.npy'))

# Plot the training loss (Y-axis) as a function of the learning rate (X-axis); use a log-scale for the X-axis.
plt.figure()
plt.plot(train_loss_values_100, learning_rates_100)
plt.yscale('log')
plt.xlabel('Training Loss')
plt.ylabel('Learning Rate')
plt.title('Training Loss vs Learning Rate')
plt.show()

In [None]:
# Then
# Use cosine learning rate schedule with a warmup
# \eta(t) = 1e-4 + t / T * eta_max, if t <= T0
# \eta(t) = eta_max * cos(pi / 2 * (t - T0) / (T - T0)) + 1e-6, if T0 < t <= T

In [None]:
# Shut down if it's google colab
# First sleep for a while so that changes to the notebook are saved
import time
time.sleep(60)

if 'google.colab' in str(get_ipython()):
    from google.colab import runtime
    runtime.unassign()