In [96]:
import os
import sys
import random
import warnings

import numpy as np
import pandas as pd
import time

import matplotlib.pyplot as plt
import io
import pickle
import json

from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

#import torchvision.datasets as dset
#import torchvision.transforms as T
import torch.nn.functional as F
from basic import *

In [13]:
# For this cell used same code from PyTorch notebook in assignment 2 of Stanford's CS231n Spring 2018 offering
preprocessData = False # To preprocess data set this to True
USE_GPU = False
dtype = torch.float32
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    dtype = torch.float32

# Constant to control how frequently we print train loss
print_every = 1

print('using device:', device)

using device: cpu


In [14]:
# Next two cells, code belongs to [1]. Minor changes made to accomodate to our use 
# (Using PyTorch instead of Keras/tensorflow)
IMG_WIDTH = 28
IMG_HEIGHT = 28
IMG_CHANNELS = 1
PATH = './'
epsilon = 1e-12 #For numerical stability

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')
seed = 1
random.seed = seed
np.random.seed = seed

In [119]:
trainCSV = "./train_npy.csv"
trainDF = pd.read_csv(trainCSV, header = 0)
trainDF = trainDF.values
X_t = trainDF[:, 0:-5]
X_train = np.zeros((len(X_t), 1, 28, 28), dtype= np.float32)
for i, row in enumerate(X_t):
    X_train[i] = np.reshape(X_t[i, :], (1, 28, 28))
# want shape [samples, 1, 28, 28]
Y_t = trainDF[:, -5:]   #label x sample
Y_train = np.zeros((Y_t.shape[0]))
# Pytorch needs indices
for i, row in enumerate(Y_t):
    Y_train[i] = np.argmax(row)
devCSV = "./dev_npy.csv"
devDF = pd.read_csv(devCSV, header = 0)
devDF = devDF.values
X_d = devDF[:, 0:-5]
X_dev = np.zeros((len(X_d), 1, 28, 28), dtype= np.float32)
for i, row in enumerate(X_d):
    X_dev[i] = np.reshape(X_d[i, :], (1, 28, 28))
Y_d = devDF[:, -5:]   #label x sample
Y_dev = np.zeros((Y_d.shape[0]))
for i, row in enumerate(Y_d):
    Y_train[i] = np.argmax(row)
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_dev shape: " + str(X_dev.shape))
print ("Y_dev shape: " + str(Y_dev.shape))


X_train shape: (364984, 1, 28, 28)
Y_train shape: (364984,)
X_dev shape: (121661, 1, 28, 28)
Y_dev shape: (121661,)


In [17]:
def showVisualComparisons(X, y, ex):
    plt.imshow(np.uint8(np.reshape(X[ex, :], (28, 28))))
    plt.show()
    print(Y_train[:, ex])

In [167]:
def trainModel(model, x_train, y_train, optimizer, epochs = 1, mini_batch_size = 64, noVal = False):
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    T = 0
    num_batches = int(len(x_train)/mini_batch_size)
    num_remaining = len(x_train) - num_batches * mini_batch_size
    loss_history = []
    correct = 0
    for e in range(epochs):
        for t in range(num_batches):
            rand_indices = np.random.choice(len(x_train), mini_batch_size)
            x = torch.from_numpy(x_train[rand_indices, :, :, :])
            y = torch.from_numpy(y_train[rand_indices])
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=dtype)
            y = y.type(torch.LongTensor)
            print('y shape: ', y.shape)

            preds = model(x)
            _, predicted = torch.max(preds.data, 1)
            correct += (predicted == y).sum().item()
            
            loss = F.cross_entropy(preds, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if T % print_every == 0:
                currLoss = loss.item()
                loss_history.append(currLoss)
                print('Epoch %d, Iteration %d, loss = %.4f' % (e, t, currLoss))
            if (num_remaining <= 0 and t == (num_batches -1)):
                perf = calculatePerformance(x_train, y_train, model)
                print('Train performance at epoch %d is %.4f' % (e, perf))
                if (noVal == False):
                    perf = calculatePerformance(X_val, Y_val, model)
                    print('Val performance at epoch %d is %.4f' % (e, perf))
            T +=1
        if num_remaining > 0:
            rand_indices = np.random.choice(len(x_train), num_remaining)

            x = torch.from_numpy(x_train[rand_indices, :, :, :])
            y = torch.from_numpy(y_train[rand_indices])
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=dtype)
            y = y.type(torch.LongTensor)
            


            preds = model(x)
            _, predicted = torch.max(preds.data, 1)
            correct += (predicted == y).sum().item()
            #values, indices = torch.max(preds, 1)
            loss = F.cross_entropy(preds, y)
            #loss(preds, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()
            if T % print_every == 0:
                currLoss = loss.item()
                loss_history.append(currLoss)
                print('Epoch %d, Iteration %d, loss = %.4f' % (e, num_batches, currLoss))
            perf = (correct/(float(len(x_train))))
            print('Train performance at epoch %d is %.4f' % (e, perf))
            if (noVal == False):
                perf = (correct/(float(len(x_train))))

                print('Val performance at epoch %d is %.4f' % (e, perf))
            T +=1
    return perf, loss_history

In [168]:
# Overfitting data first
bestPerf = -1
lossHistory = None
lossHistories = {}
print_every = 1
bestModel = None
bestLoss = 10000
lrUsed = 0
x_train = X_train[0:50, :, :, :]
y_train = Y_train[0:50]
lrs = []
for i in range(4):
    lrs.append(5*np.random.rand()*1e-3)
# lrs = [1e-7,1e-6,1e-5,1e-4,1e-3]
lrs.append(.002147418314081924) # Best result from last random searches
for lr in lrs:
    print('Trying out learning rate of ', lr)
    model = NNet()
    optimizer = optim.Adam(model.parameters(), lr = lr)
    modelPerf = trainModel(model, x_train, y_train, optimizer, epochs = 25, noVal = True)
    lossHistories[str(lr)] = modelPerf[1]
    if modelPerf[1][len(modelPerf[1])-1] < bestLoss:
        bestLoss = modelPerf[1][len(modelPerf[1])-1]
        bestPerf = modelPerf[0]
        lossHistory = modelPerf[1]
        bestModel = model
        lrUsed = lr

Trying out learning rate of  0.004210756309466558
Epoch 0, Iteration 0, loss = 12.4801
Train performance at epoch 0 is 0.1800
Epoch 1, Iteration 0, loss = 36.9484
Train performance at epoch 1 is 0.5200
Epoch 2, Iteration 0, loss = 11.7361
Train performance at epoch 2 is 0.9800
Epoch 3, Iteration 0, loss = 11.0770
Train performance at epoch 3 is 1.2600
Epoch 4, Iteration 0, loss = 14.1539
Train performance at epoch 4 is 1.5800
Epoch 5, Iteration 0, loss = 3.6146
Train performance at epoch 5 is 2.2600
Epoch 6, Iteration 0, loss = 9.9977
Train performance at epoch 6 is 2.7400
Epoch 7, Iteration 0, loss = 8.8908
Train performance at epoch 7 is 3.2600
Epoch 8, Iteration 0, loss = 6.9032
Train performance at epoch 8 is 3.7800
Epoch 9, Iteration 0, loss = 1.8420
Train performance at epoch 9 is 4.5800
Epoch 10, Iteration 0, loss = 2.7128
Train performance at epoch 10 is 5.2800
Epoch 11, Iteration 0, loss = 4.0147
Train performance at epoch 11 is 5.8200
Epoch 12, Iteration 0, loss = 1.2400
Trai