# Implementing a Neural Network
In this exercise we will develop a neural network with fully-connected layers to perform classification, and test it out on the CIFAR-10 dataset.

In [1]:
# A bit of setup

import numpy as np
import matplotlib.pyplot as plt

from cs231n.classifiers.neural_net_b import TwoLayerNet

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Load the data
Now that you have implemented a two-layer network that passes gradient checks and works on toy data, it's time to load up our favorite CIFAR-10 data so we can use it to train a classifier on a real dataset.

In [2]:
from cs231n.data_utils import load_CIFAR10

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    
    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
    try:
        del X_train, y_train
        del X_test, y_test
        print('Clear previously loaded data.')
    except:
        pass

    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
        
    # Subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 3072)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3072)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3072)
Test labels shape:  (1000,)


## Helper Functions

- Save data to pickle file
- Draw data

In [3]:
from cs231n.vis_utils import visualize_grid

def showTraining(stats):
    plt.subplot(2, 1, 1)
    plt.plot(stats['loss_history'])
    plt.title('Loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')

    plt.subplot(2, 1, 2)
    plt.plot(stats['train_acc_history'], label='train')
    plt.plot(stats['val_acc_history'], label='val')
    plt.title('Classification accuracy history')
    plt.xlabel('Epoch')
    plt.ylabel('Classification accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
def show_net_weights(W1):
    W1 = W1.reshape(32, 32, 3, -1).transpose(3, 0, 1, 2)
    plt.imshow(visualize_grid(W1, padding=3).astype('uint8'))
    plt.gca().axis('off')
    plt.show()

In [4]:
import pickle
import os.path

def savedb(obj,filename):
    with open(filename,'wb') as file:
        pickle.dump(obj,file)
    
def loaddb(filename):
    with open(filename,'rb') as file:
        obj = pickle.load(file)
        return obj
    
def pickle_exist(hs, bs, lr, reg, num_epoch):
    filename = f'pickle/{hs}-{bs}-{lr}-{reg}-{num_epoch}.pickle'
    if os.path.isfile(filename):
        return True
    return False

def save_pickle(hs, bs, lr, reg, num_epoch, val_acc, W1, stats, dtype = np.half):
    W1 = dtype(W1)
    for key in stats.keys():
        stats[key] = dtype(stats[key])
    obj = (hs, bs, lr, reg, num_epoch, val_acc, W1, stats)
    filename = f'pickle/{hs}-{bs}-{lr}-{reg}-{num_epoch}.pickle'
    savedb(obj,filename)

# Tune your hyperparameters

**Tuning**. Tuning the hyperparameters and developing intuition for how they affect the final performance is a large part of using Neural Networks, so we want you to get a lot of practice. Below, you should experiment with different values of the various hyperparameters, including hidden layer size, learning rate, numer of training epochs, and regularization strength. You might also consider tuning the learning rate decay, but you should be able to get good performance using the default value.

**Approximate results**. You should be aim to achieve a classification accuracy of greater than 48% on the validation set. Our best network gets over 52% on the validation set.

**Experiment**: You goal in this exercise is to get as good of a result on CIFAR-10 as you can (52% could serve as a reference), with a fully-connected Neural Network. Feel free implement your own techniques (e.g. PCA to reduce dimensionality, or adding dropout, or adding features to the solver, etc.).

In [5]:
best_net = None 
best_state = None
input_size = 32 * 32 * 3
num_classes = 10
best_acc = 0

hidden_size = [3, 5, 10, 50, 150]
# batch_size = [10, 50, 100, 200, 300, 400]
batch_size = [10, 50, 100, 200, 400]
# learning_rate = [4.5e-3, 4e-3, 3.5e-3, 3e-3, 2e-3, 1e-3, 5e-4, 1e-4]
learning_rate = [4e-3, 3e-3, 1e-3, 5e-4, 1e-4]
# regularization = [0, 0.1, 0.5, 1, 3, 5, 10]
regularization = [0, 0.1, 0.5, 1, 10]
num_epoch = 8

# hidden_size = [10]
# batch_size = [200]
# learning_rate = [4.5e-3, 4e-3, 3.5e-3, 3e-3, 2e-3]
# regularization = [0.5]
# num_epoch = 8

best_hs = hidden_size[0]
best_bs = batch_size[0]
best_lr = learning_rate[0]
best_reg = regularization[0]

def testParam(hs, bs, lr, reg):
    net = TwoLayerNet(input_size, hs, num_classes, dtype = np.single)
    num_train = X_train.shape[0]
    iterations_per_epoch = max(num_train//bs, 1)
    num_iters = int(np.ceil(iterations_per_epoch*num_epoch)+1)
    
    stats = net.train(X_train, y_train, X_val, y_val,
                num_iters=num_iters, batch_size=bs,
                learning_rate=lr, learning_rate_decay=0.95,
                reg=reg, verbose=False)
    val_acc = (net.predict(X_val) == y_val).mean()
    print(f'hs: {hs}, bs: {bs}, lr: {lr}, reg:{reg}, val_acc: {val_acc}')
    return val_acc, net, stats

i = 0
for lr in reversed(learning_rate):
    for bs in batch_size:
        for hs in hidden_size:
            for reg in regularization:
                if pickle_exist(hs, bs, lr, reg, num_epoch):
                    print('.',end='')
                    if i%80 == 79:
                        print()
                    i+=1
                    continue
                val_acc, net, stats = testParam(hs, bs, lr, reg)
                W1 = net.params['W1']
                save_pickle(hs, bs, lr, reg, num_epoch, val_acc, W1, stats)

................................................................................
................................................................................
................................................................................
................................................................................
................................................................................
................................................................................
................................................................................
.................................................................