In [38]:
# import linear algebra and data manipulation libraries
import numpy as np
import pandas as pd

# import matplotlib for plotting
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

# import helper libraries
import requests
from io import BytesIO # Use When expecting bytes-like objects
import pickle
from collections import OrderedDict
import os
from os import path
import time
import argparse

# import PIL for image manipulation
from PIL import Image

# import machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# import pytorch
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms

import image_utils
from image_utils import add_flipped_and_rotated_images

from simple_conv_nn import SimpleCNN

In [8]:
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Argument parser')

    parser.add_argument('--save_dir', action='store', default = ' ',
                        help='Directory to save model checkpoint')

    parser.add_argument('--learning_rate', type = float, action='store', default = 0.003,
                        help='Model hyperparameters: learning rate')

    parser.add_argument('--epochs', type = int, action='store', default = 30,
                        help='Model hyperparameters: epochs')

    parser.add_argument('--weight_decay', type = float, action='store', default = 0,
                        help='Model hyperparameters: weight decay (regularization)')

    parser.add_argument('--dropout', type = float, action='store', default = 0.0,
                        help='Model hyperparameters: dropout')

    parser.add_argument('--architecture', action='store', default = 'nn',
                        help='Model architecture: nn - feed forward neural network with 1 hidden layer.',
                        choices = ['nn', 'conv'])

    parser.add_argument('--add_data', action='store_true',
                        help='Add flipped and rotated images to the original training set.')

    parser.add_argument('--mini_batches', type = int, action='store', default = 1000,
                        help='Number of minibatches.')

    parser.add_argument('--optimizer', action='store', default = 'SGD',
    choices=['SGD', 'Adam'],
    help='Optimizer for fitting the model.')

    parser.add_argument('--gpu', action='store_true',
                        help='Run training on GPU')
    results = parser.parse_args()

    learning_rate = results.learning_rate
    epochs = results.epochs
    weight_decay = results.weight_decay
    dropout = results.dropout
    architecture = results.architecture
    n_chunks = results.mini_batches
    optimizer = results.optimizer

    if (results.gpu == True):
        device = 'cuda'
    else:
        device = 'cpu'

    if (results.save_dir == ' '):
        save_path = 'checkpoint.pth'
    else:
        save_path = results.save_dir + '/' + 'checkpoint.pth'

    # Load data
    X_train, y_train, X_test, y_test = load_data()

    # Add flipped and rotated images to the dataset
    if (results.add_data == True):
        X_train, y_train = add_flipped_and_rotated_images(X_train, y_train)

    # Save datasets to disk if required
    save_data(X_train, y_train, X_test, y_test, force = results.add_data)

    # Convert to tensors
    train = torch.from_numpy(X_train).float()
    labels = torch.from_numpy(y_train).long()
    test = torch.from_numpy(X_test).float()
    test_labels = torch.from_numpy(y_test).long()

    # Hyperparameters for our network
    input_size = 784
    hidden_sizes = [128, 100, 64]
    output_size = 10

    # Build model
    model = build_model(input_size, output_size, hidden_sizes, architecture = architecture, dropout = dropout)

    # Fit model
    if (architecture == 'nn'):
        fit_model(model, train, labels, epochs = epochs, n_chunks = n_chunks, learning_rate = learning_rate, weight_decay = weight_decay, optimizer = optimizer)
    else:
        fit_conv(model, train, labels, epochs = epochs, n_chunks = n_chunks, learning_rate = learning_rate, weight_decay = weight_decay, optimizer = optimizer)

    #plot_learning_curve(input_size, output_size, hidden_sizes, train, labels, y_train, test, y_test, learning_rate = learning_rate, dropout = dropout, weight_decay = weight_decay, n_chunks = n_chunks, optimizer = optimizer)
    #plot_learning_curve_conv(input_size, output_size, hidden_sizes, train, labels, y_train, test, y_test, learning_rate = learning_rate, dropout = dropout, weight_decay = weight_decay, n_chunks = n_chunks, optimizer = optimizer)

    # Evaluate model
    #evaluate_model(model, train, y_train, test, y_test, architecture = architecture)
    #test_model(model, test[0], architecture = architecture)

    # Save the model
    save_model(model,architecture, input_size, output_size, hidden_sizes, dropout, filepath = save_path)

    #compare_hyperparameters(input_size, output_size, hidden_sizes, train, labels, y_train, test, y_test, learning_rate, n_chunks = n_chunks, optimizer = optimizer)

    #loaded_model = load_model(architecture)
    #loaded_model.eval()
    #pred = test_model(loaded_model, test[0], architecture = architecture)

if __name__ == '__main__':
    main()

usage: ipykernel_launcher [-h] [--save_dir SAVE_DIR]
                          [--learning_rate LEARNING_RATE] [--epochs EPOCHS]
                          [--weight_decay WEIGHT_DECAY] [--dropout DROPOUT]
                          [--architecture {nn,conv}] [--add_data]
                          [--mini_batches MINI_BATCHES]
                          [--optimizer {SGD,Adam}] [--gpu]
ipykernel_launcher: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"3e5043c8-c333-4242-bc6f-ae17ea62c731" --shell=9002 --transport="tcp" --iopub=9004 --f=/var/folders/0f/dj_hz1316w7c4qhdk33w3p3w0000gn/T/tmp-14831Fb7XbcN6TOkK.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [60]:
def load_data():
    """
    Function loads quick draw dataset. If no data is loaded yet, the datasets
    are loaded from the web. If there are already loaded datasets, then data
    is loaded from the disk (pickle files).

    INPUTS: None

    OUTPUT:
        X_train - train dataset
        y_train - train dataset labels
        X_test - test dataset
        y_test - test dataset labels
    """
    print("Loading data \n")

    # Check for already loaded datasets
    if not(path.exists('xtrain_doodle.pickle')):
        # Load from web
        print("Loading data from the web \n")

        # Classes we will load
        categories = ['bee', 'cat', 'cow', 'dog', 'duck', 'horse', 'pig', 'rabbit', 'snake', 'whale']

        # Dictionary for URL and class labels
        URL_DATA = {}
        for category in categories:
            URL_DATA[category] = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/' + category +'.npy'

        # Load data for classes in dictionary
        classes_dict = {}
        for key, value in URL_DATA.items():
            response = requests.get(value)
            classes_dict[key] = np.load(BytesIO(response.content))

        # Generate labels and add labels to loaded data
        for i, (key, value) in enumerate(classes_dict.items()):
            value = value.astype('float32')/255.
            if i == 0:
                classes_dict[key] = np.c_[value, np.zeros(len(value))]
            else:
                classes_dict[key] = np.c_[value,i*np.ones(len(value))]

        # Create a dict with label codes
        label_dict = {0:'bee', 1:'cat', 2:'cow', 3:'dog', 4:'duck',
                      5:'horse', 6:'pig', 7:'rabbit', 8:'snake', 9:'whale'}

        lst = []
        for key, value in classes_dict.items():
            lst.append(value[:3000])
        doodles = np.concatenate(lst)

        # Split the data into features and class labels (X & y respectively)
        y = doodles[:,-1].astype('float32')
        X = doodles[:,:784]

        # Split each dataset into train/test splits
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=1)
    else:
        # Load data from pickle files
        print("Loading data from pickle files \n")

        file = open("xtrain_doodle.pickle",'rb')
        X_train = pickle.load(file)
        file.close()

        file = open("xtest_doodle.pickle",'rb')
        X_test = pickle.load(file)
        file.close()

        file = open("ytrain_doodle.pickle",'rb')
        y_train = pickle.load(file)
        file.close()

        file = open("ytest_doodle.pickle",'rb')
        y_test = pickle.load(file)
        file.close()

    return X_train, y_train, X_test, y_test, classes_dict

In [63]:
categories = ['bee', 'cat', 'cow', 'dog', 'duck', 'horse', 'pig', 'rabbit', 'snake', 'whale']

URL_DATA = {}
for category in categories:
    URL_DATA[category] = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/' + category +'.npy'

URL_DATA

{'bee': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/bee.npy',
 'cat': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cat.npy',
 'cow': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/cow.npy',
 'dog': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/dog.npy',
 'duck': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/duck.npy',
 'horse': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/horse.npy',
 'pig': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/pig.npy',
 'rabbit': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/rabbit.npy',
 'snake': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/snake.npy',
 'whale': 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/whale.npy'}

In [66]:
classes_dict = {}
for key, value in URL_DATA.items():
    response = requests.get(value)
    classes_dict[key] = np.load(BytesIO(response.content))

In [67]:
classes_dict

{'bee': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'cat': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'cow': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'dog': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'duck': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0

In [69]:
len(classes_dict['dog'])

152159

In [95]:
len(classes_dict['cat'])

123202

In [70]:
# Generate labels and add labels to loaded data
for i, (key, value) in enumerate(classes_dict.items()):
    value = value.astype('float32')/255.
    if i == 0:
        classes_dict[key] = np.c_[value, np.zeros(len(value))]
    else:
        classes_dict[key] = np.c_[value,i*np.ones(len(value))]

In [82]:
classes_dict['bee']

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [83]:
classes_dict['bee'][0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.21568628, 0.33725491,
       0.40000001, 0.40000001, 0.21960784, 0.        , 0.     

In [72]:
label_dict = {0:'bee', 1:'cat', 2:'cow', 3:'dog', 4:'duck',
              5:'horse', 6:'pig', 7:'rabbit', 8:'snake', 9:'whale'}

lst = []
for key, value in classes_dict.items():
    lst.append(value[:3000])
doodles = np.concatenate(lst)

In [74]:
len(doodles)

30000

In [80]:
doodles[0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.21568628, 0.33725491,
       0.40000001, 0.40000001, 0.21960784, 0.        , 0.     

In [86]:
y = doodles[:,-1].astype('float32')
X = doodles[:,:784]

In [90]:
y

array([0., 0., 0., ..., 9., 9., 9.], dtype=float32)

In [56]:
y_train

array([1., 1., 2., ..., 4., 0., 9.], dtype=float32)

In [57]:
X_train[0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [85]:
X_train[1230]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.05098039, 0.3764706 , 0.35686275, 0.04313726, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.73725492, 1.        ,
       1.        , 0.81568629, 0.10196079, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [46]:
def save_data(X_train, y_train, X_test, y_test, force = False):
    """
    The function saves datasets to disk as pickle files.

    INPUT:
        X_train - train dataset
        y_train - train dataset labels
        X_test - test dataset
        y_test - test dataset labels
        force - forced saving of the files

    OUTPUT: None
    """
    print("Saving data \n")

    # Check for already saved files
    if not(path.exists('xtrain_doodle.pickle')) or force:
        # Save X_train dataset as a pickle file
        with open('xtrain_doodle.pickle', 'wb') as f:
            pickle.dump(X_train, f)

        # Save X_test dataset as a pickle file
        with open('xtest_doodle.pickle', 'wb') as f:
            pickle.dump(X_test, f)

        # Save y_train dataset as a pickle file
        with open('ytrain_doodle.pickle', 'wb') as f:
            pickle.dump(y_train, f)

        # Save y_test dataset as a pickle file
        with open('ytest_doodle.pickle', 'wb') as f:
            pickle.dump(y_test, f)

In [47]:
save_data(X_train, y_train, X_test, y_test)

Saving data 



In [48]:
type(X_train)

numpy.ndarray

In [91]:
type(X_test)

numpy.ndarray

In [92]:
type(y_train)

numpy.ndarray

In [49]:
len(X_train)

21000

In [28]:
len(X_test)

9000

In [112]:
def build_model(input_size, output_size, architecture = 'nn', dropout = 0.0):
    if (architecture == 'nn'):
        # Build a feed-forward network
        model = nn.Sequential(OrderedDict([
                              ('fc1', nn.Linear(input_size, 128)),
                              ('relu1', nn.ReLU()),
                              ('fc2', nn.Linear(128, 100)),
                              ('bn2', nn.BatchNorm1d(num_features=100)),
                              ('relu2', nn.ReLU()),
                              ('dropout', nn.Dropout(dropout)),
                              ('fc3', nn.Linear(100, 64)),
                              ('bn3', nn.BatchNorm1d(num_features=64)),
                              ('relu3', nn.ReLU()),
                              ('logits', nn.Linear(64, output_size))]))
    else:
        if (architecture == 'conv'):
            # Build a simple convolutional network
            model = SimpleCNN(64, 10)
    return model

In [113]:
rnn_model = build_model(input_size=784, output_size=10)

In [114]:
rnn_model

Sequential(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=128, out_features=100, bias=True)
  (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (dropout): Dropout(p=0.0, inplace=False)
  (fc3): Linear(in_features=100, out_features=64, bias=True)
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (logits): Linear(in_features=64, out_features=10, bias=True)
)

In [116]:
cnn_model = build_model(input_size = 784, output_size=10, architecture='conv')

In [117]:
cnn_model

SimpleCNN(
  (conv1): Conv2d(1, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3528, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [136]:
X_train = pickle.load(open('xtrain.pickle', 'rb'))
X_test = pickle.load(open('xtest.pickle', 'rb'))
y_train = pickle.load(open('ytrain.pickle', 'rb'))
y_test = pickle.load(open('ytest.pickle', 'rb'))

In [137]:
def shuffle(X_train, y_train):

    from sklearn.utils import shuffle
    X_train_shuffled, y_train_shuffled = shuffle(X_train, y_train, random_state=42)

    y_train_shuffled = y_train_shuffled.reshape((X_train.shape[0], 1))

    X_train_shuffled = torch.from_numpy(X_train_shuffled).float()
    y_train_shuffled = torch.from_numpy(y_train_shuffled).long()

    return X_train_shuffled, y_train_shuffled

In [138]:
X_train_shuffled, y_train_shuffled = shuffle(X_train, y_train)

In [121]:
type(X_train_shuffled)

torch.Tensor

In [122]:
type(y_train_shuffled)

torch.Tensor

In [123]:
def fit_model(model, X_train, y_train, epochs = 100, n_chunks = 1000, learning_rate = 0.003, weight_decay = 0, optimizer = 'SGD'):

    print("Fitting model with epochs = {epochs}, learning rate = {lr}\n"\
    .format(epochs = epochs, lr = learning_rate))

    criterion = nn.CrossEntropyLoss()

    if (optimizer == 'SGD'):
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay= weight_decay)
    else:
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay= weight_decay)

    print_every = 100

    steps = 0

    for e in range(epochs):
        running_loss = 0
        images = torch.chunk(X_train, n_chunks)
        labels = torch.chunk(y_train, n_chunks)

        for i in range(n_chunks):
            steps += 1
            optimizer.zero_grad()

            # Forward and backward passes
            output = model.forward(images[i])
            loss = criterion(output, labels[i].squeeze())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if steps % print_every == 0:
                print("Epoch: {}/{}... ".format(e+1, epochs),
                      "Loss: {:.4f}".format(running_loss/print_every))

                running_loss = 0


In [129]:
fit_model(rnn_model, X_train_shuffled, y_train_shuffled)

Fitting model with epochs = 100, learning rate = 0.003

Epoch: 1/100...  Loss: 2.2537
Epoch: 1/100...  Loss: 2.0904
Epoch: 1/100...  Loss: 1.9694
Epoch: 1/100...  Loss: 1.8898
Epoch: 1/100...  Loss: 1.8384
Epoch: 1/100...  Loss: 1.7660
Epoch: 1/100...  Loss: 1.7301
Epoch: 1/100...  Loss: 1.7217
Epoch: 1/100...  Loss: 1.6917
Epoch: 1/100...  Loss: 1.6503
Epoch: 2/100...  Loss: 1.6063
Epoch: 2/100...  Loss: 1.6096
Epoch: 2/100...  Loss: 1.5696
Epoch: 2/100...  Loss: 1.5251
Epoch: 2/100...  Loss: 1.5405
Epoch: 2/100...  Loss: 1.4765
Epoch: 2/100...  Loss: 1.4631
Epoch: 2/100...  Loss: 1.4929
Epoch: 2/100...  Loss: 1.4936
Epoch: 2/100...  Loss: 1.4398
Epoch: 3/100...  Loss: 1.4254
Epoch: 3/100...  Loss: 1.4438
Epoch: 3/100...  Loss: 1.4103
Epoch: 3/100...  Loss: 1.3643
Epoch: 3/100...  Loss: 1.3967
Epoch: 3/100...  Loss: 1.3275
Epoch: 3/100...  Loss: 1.3228
Epoch: 3/100...  Loss: 1.3605
Epoch: 3/100...  Loss: 1.3801
Epoch: 3/100...  Loss: 1.3096
Epoch: 4/100...  Loss: 1.3054
Epoch: 4/100..

In [131]:
def load_model(architecture = 'nn', filepath = 'checkpoint.pth'):
    """
    Function loads the model from checkpoint.

    INPUT:
        architecture - model architecture ('nn' - for fully connected neural network, 'conv' - for convolutional neural
        network)
        filepath - path for the saved model

    OUTPUT:
        model - loaded pytorch model
    """

    print("Loading model from {} \n".format(filepath))

    if architecture == 'nn':
        checkpoint = torch.load(filepath)
        input_size = checkpoint['input_size']
        output_size = checkpoint['output_size']
        hidden_sizes = checkpoint['hidden_layers']
        dropout = checkpoint['dropout']
        model = nn.Sequential(OrderedDict([
                              ('fc1', nn.Linear(input_size, hidden_sizes[0])),
                              ('relu1', nn.ReLU()),
                              ('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),
                              ('bn2', nn.BatchNorm1d(num_features=hidden_sizes[1])),
                              ('relu2', nn.ReLU()),
                              ('dropout', nn.Dropout(dropout)),
                              ('fc3', nn.Linear(hidden_sizes[1], hidden_sizes[2])),
                              ('bn3', nn.BatchNorm1d(num_features=hidden_sizes[2])),
                              ('relu3', nn.ReLU()),
                              ('logits', nn.Linear(hidden_sizes[2], output_size))]))
        model.load_state_dict(checkpoint['state_dict'])

    else:
        checkpoint = torch.load(filepath)
        model = SimpleCNN()
        model.load_state_dict(checkpoint['state_dict'])

    return model

In [133]:
model = load_model(architecture = 'nn', filepath = 'checkpoint.pth')

Loading model from checkpoint.pth 



In [134]:
def evaluate_model(model, train, y_train, test, y_test, architecture = 'nn'):
    """
    Function to print out train and test accuracy of the model.

    INPUT:
        model - pytorch model
        train - (tensor) train dataset
        y_train - (numpy) labels for train dataset
        test - (tensor) test dataset
        y_test - (numpy) labels for test dataset

    OUTPUT:
        accuracy_train - accuracy on train dataset
        accuracy_test - accuracy on test dataset
    """
    train_pred = get_preds(model, train, architecture)
    train_pred_labels = get_labels(train_pred)

    test_pred = get_preds(model, test, architecture)
    test_pred_labels = get_labels(test_pred)

    accuracy_train = accuracy_score(y_train, train_pred_labels)
    accuracy_test = accuracy_score(y_test, test_pred_labels)

    print("Accuracy score for train set is {} \n".format(accuracy_train))
    print("Accuracy score for test set is {} \n".format(accuracy_test))

    return accuracy_train, accuracy_test

In [135]:
evaluate_model(model, X_train_shuffled, y_train_shuffled, X_test, y_test, architecture='nn')

NameError: name 'X_test_shuffled' is not defined

In [None]:
evaluate_model