In [104]:
import numpy as np
import matplotlib.pyplot as plt

from utils.data_process import get_CIFAR10_data
from models.neural_net import NeuralNetwork
from kaggle_submission import output_submission_csv

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Loading CIFAR-10
Now that you have implemented a neural network that passes gradient checks and works on toy data, you will test your network on the CIFAR-10 dataset.

In [46]:
# You can change these numbers for experimentation
# For submission be sure they are set to the default values 
TRAIN_IMAGES = 49000
VAL_IMAGES = 1000
TEST_IMAGES = 5000

data = get_CIFAR10_data(TRAIN_IMAGES, VAL_IMAGES, TEST_IMAGES)
X_train, y_train = data['X_train'], data['y_train']
X_val, y_val = data['X_val'], data['y_val']
X_test, y_test = data['X_test'], data['y_test']

# Train a network
To train our network we will use SGD. In addition, we will adjust the learning rate with an exponential learning rate schedule as optimization proceeds; after each epoch, we will reduce the learning rate by multiplying it by a decay rate.

You can try different numbers of layers and also the different activation functions that you implemented on the CIFAR-10 dataset below.

In [105]:
input_size = 32 * 32 * 3
num_layers = 2
hidden_size = 20
hidden_sizes = [hidden_size]*(num_layers-1)
num_classes = 10
net = NeuralNetwork(input_size, hidden_sizes, num_classes, num_layers, nonlinearity='sigmoid')

# Train the network
stats = net.train(X_train, y_train, X_val, y_val,
            num_iters=1000, batch_size=200,
            learning_rate=1e-3, learning_rate_decay=0.95,
            reg=0.1, verbose=True)

# Predict on the validation set
val_acc = (net.predict(X_val) == y_val).mean()
print('Validation accuracy: ', val_acc)



in the init process the params
dict_keys(['W1', 'b1', 'W2', 'b2'])
dimension of input
3072
classes of output
10
Loss: 
18.77
Loss: 
13.575
Loss: 
13.42
Loss: 
17.13
Loss: 
13.85
Loss: 
17.245
Loss: 
15.055
Loss: 
15.51
Loss: 
13.6
Loss: 
16.045
Loss: 
12.76
Loss: 
15.45
Loss: 
17.15
Loss: 
16.16
Loss: 
15.08
Loss: 
13.945
Loss: 
11.015
Loss: 
13.5
Loss: 
14.57
Loss: 
15.105
Loss: 
15.395
Loss: 
11.785
Loss: 
18.62
Loss: 
12.47
Loss: 
13.475
Loss: 
13.72
Loss: 
15.245
Loss: 
11.025
Loss: 
12.73
Loss: 
12.87
Loss: 
17.07
Loss: 
11.255
Loss: 
13.155
Loss: 
14.5
Loss: 
12.05
Loss: 
16.19
Loss: 
17.395
Loss: 
14.07
Loss: 
16.26
Loss: 
13.965
Loss: 
15.03
Loss: 
14.76
Loss: 
12.625
Loss: 
12.805
Loss: 
12.8
Loss: 
12.905
Loss: 
13.26
Loss: 
12.825
Loss: 
13.495
Loss: 
14.925
Loss: 
12.49
Loss: 
10.855
Loss: 
14.025
Loss: 
11.955
Loss: 
12.75
Loss: 
14.545
Loss: 
14.16
Loss: 
12.24
Loss: 
13.78
Loss: 
12.85
Loss: 
13.025
Loss: 
11.315
Loss: 
13.9
Loss: 
13.355
Loss: 
13.16
Loss: 
14.065
Loss:

# Graph loss and train/val accuracies

Examining the loss graph along with the train and val accuracy graphs should help you gain some intuition for the hyperparameters you should try in the hyperparameter tuning below. It should also help with debugging any issues you might have with your network.

In [None]:
# Plot the loss function and train / validation accuracies
plt.subplot(2, 1, 1)
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')

plt.subplot(2, 1, 2)
plt.plot(stats['train_acc_history'], label='train')
plt.plot(stats['val_acc_history'], label='val')
plt.title('Classification accuracy history')
plt.xlabel('Epoch')
plt.ylabel('Clasification accuracy')
plt.legend()
plt.show()

# Hyperparameter tuning

Once you have successfully trained a network you can tune your hyparameters to increase your accuracy.

Based on the graphs of the loss function above you should be able to develop some intuition about what hyperparameter adjustments may be necessary. A very noisy loss implies that the learning rate might be too high, while a linearly decreasing loss would suggest that the learning rate may be too low. A large gap between training and validation accuracy would suggest overfitting due to large model without much regularization. No gap between training and validation accuracy would indicate low model capacity. 


You will compare networks of two and three layers using the different activation functions you implemented. 

The different hyperparameters you can experiment with are:
- **Batch size**: We recommend you leave this at 200 initially which is the batch size we used. 
- **Number of iterations**: You can gain an intuition for how many iterations to run by checking when the validation accuracy plateaus in your train/val accuracy graph.
- **Initialization** Weight initialization is very important for neural networks. We used the initialization `W = np.random.randn(n) / sqrt(n)` where `n` is the input dimension for layer corresponding to `W`. We recommend you stick with the given initializations, but you may explore modifying these. Typical initialization practices: http://cs231n.github.io/neural-networks-2/#init
- **Learning rate**: Generally from around 1e-4 to 1e-1 is a good range to explore according to our implementation.
- **Learning rate decay**: We recommend a 0.95 decay to start.
- **Hidden layer size**: You should explore up to around 120 units per layer. For three-layer network, we fixed the two hidden layers to be the same size when obtaining the target numbers. However, you may experiment with having different size hidden layers.
- **Regularization coefficient**: We recommend trying values in the range 0 to 0.1. 



Hints:
- After getting a sense of the parameters by trying a few values yourself, you will likely want to write a few for loops to traverse over a set of hyperparameters.
- If you find that your train loss is decreasing, but your train and val accuracy start to decrease rather than increase, your model likely started minimizing the regularization term. To prevent this you will need to decrease the regularization coefficient. 


## Two-layer Relu Activation Network 

In [107]:
best_2layer_relu = NeuralNetwork(input_size, hidden_sizes, num_classes, num_layers, nonlinearity='relu')
# store the best model into this 
stats = net.train(X_train, y_train, X_val, y_val,
            num_iters=1000, batch_size=200,
            learning_rate=1e-3, learning_rate_decay=0.95,
            reg=0.1, verbose=True)

# Predict on the validation set
val_acc = (net.predict(X_val) == y_val).mean()
print('Validation accuracy: ', val_acc)

#################################################################################
# TODO: Tune hyperparameters using the validation set. Store your best trained  #
# model in best_2layer_relu.                                                    #
#################################################################################


in the init process the params
dict_keys(['W1', 'b1', 'W2', 'b2'])
dimension of input
3072
classes of output
10
Loss: 
11.945
Loss: 
14.295
Loss: 
15.02
Loss: 
11.395
Loss: 
10.905
Loss: 
15.76
Loss: 
12.47
Loss: 
9.135
Loss: 
12.52
Loss: 
10.625
Loss: 
9.075
Loss: 
9.72
Loss: 
12.475
Loss: 
8.765
Loss: 
11.53
Loss: 
13.035
Loss: 
11.465
Loss: 
10.33
Loss: 
12.105
Loss: 
11.185
Loss: 
10.245
Loss: 
10.875
Loss: 
12.65
Loss: 
11.03
Loss: 
11.615
Loss: 
12.635
Loss: 
12.6
Loss: 
10.845
Loss: 
11.27
Loss: 
13.635
Loss: 
10.36
Loss: 
13.975
Loss: 
13.97
Loss: 
13.68
Loss: 
12.425
Loss: 
11.945
Loss: 
12.885
Loss: 
11.135
Loss: 
12.12
Loss: 
12.395
Loss: 
8.485
Loss: 
12.46
Loss: 
11.46
Loss: 
11.14
Loss: 
11.58
Loss: 
8.23
Loss: 
11.06
Loss: 
9.87
Loss: 
14.075
Loss: 
14.33
Loss: 
13.165
Loss: 
14.735
Loss: 
12.825
Loss: 
13.77
Loss: 
9.94
Loss: 
12.15
Loss: 
12.82
Loss: 
9.48
Loss: 
10.61
Loss: 
9.355
Loss: 
12.75
Loss: 
11.81
Loss: 
13.585
Loss: 
10.515
Loss: 
9.785
Loss: 
9.98
Loss: 
11

## Two-layer Sigmoid Activation Network 

In [108]:
best_2layer_sigmoid = NeuralNetwork(input_size, hidden_sizes, num_classes, num_layers, nonlinearity='relu')
# store the best model into this 
stats = net.train(X_train, y_train, X_val, y_val,
            num_iters=1000, batch_size=200,
            learning_rate=1e-3, learning_rate_decay=0.95,
            reg=0.1, verbose=True)

# Predict on the validation set
val_acc = (net.predict(X_val) == y_val).mean()
print('Validation accuracy: ', val_acc)

#################################################################################
# TODO: Tune hyperparameters using the validation set. Store your best trained  #
# model in best_2layer_sigmoid.                                                    #
#################################################################################


in the init process the params
dict_keys(['W1', 'b1', 'W2', 'b2'])
dimension of input
3072
classes of output
10
Loss: 
13.16
Loss: 
10.72
Loss: 
14.145
Loss: 
12.325
Loss: 
10.945
Loss: 
13.635
Loss: 
14.065
Loss: 
10.23
Loss: 
10.715
Loss: 
12.485
Loss: 
10.97
Loss: 
12.735
Loss: 
11.59
Loss: 
13.645
Loss: 
11.525
Loss: 
11.57
Loss: 
9.53
Loss: 
12.095
Loss: 
10.305
Loss: 
12.5
Loss: 
12.88
Loss: 
13.49
Loss: 
12.02
Loss: 
14.91
Loss: 
9.985
Loss: 
11.19
Loss: 
12.425
Loss: 
10.76
Loss: 
8.74
Loss: 
10.24
Loss: 
12.085
Loss: 
10.42
Loss: 
14.035
Loss: 
12.34
Loss: 
10.655
Loss: 
11.53
Loss: 
11.25
Loss: 
9.3
Loss: 
12.02
Loss: 
10.98
Loss: 
12.765
Loss: 
11.515
Loss: 
12.045
Loss: 
13.57
Loss: 
12.08
Loss: 
12.655
Loss: 
10.245
Loss: 
10.49
Loss: 
10.87
Loss: 
10.635
Loss: 
10.445
Loss: 
11.265
Loss: 
11.375
Loss: 
9.21
Loss: 
12.455
Loss: 
8.68
Loss: 
11.09
Loss: 
10.895
Loss: 
9.825
Loss: 
7.61
Loss: 
10.165
Loss: 
10.725
Loss: 
12.41
Loss: 
15.02
Loss: 
11.12
Loss: 
9.85
Loss: 
10.

## Three-layer Relu Activation Network 

In [None]:
best_3layer_relu = None # store the best model into this 

#################################################################################
# TODO: Tune hyperparameters using the validation set. Store your best trained  #
# model in best_3layer_relu.                                                    #
#################################################################################

## Three-layer Sigmoid Activation Network 

In [None]:
best_3layer_sigmoid = None # store the best model into this 

#################################################################################
# TODO: Tune hyperparameters using the validation set. Store your best trained  #
# model in best_3layer_sigmoid.                                                    #
#################################################################################

# Run on the test set
When you are done experimenting, you should evaluate your final trained networks on the test set.

In [109]:
print('Two-layer relu')
test_acc = (best_2layer_relu.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

Two-layer relu
Test accuracy:  0.1106


In [110]:
print('Two-layer sigmoid')
test_acc = (best_2layer_sigmoid.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

Two-layer sigmoid
Test accuracy:  0.1064


In [None]:
print('Three-layer relu')
test_acc = (best_3layer_relu.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

In [None]:
print('Three-layer sigmoid')
test_acc = (best_3layer_sigmoid.predict(X_test) == y_test).mean()
print('Test accuracy: ', test_acc)

# Kaggle output

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 2 Neural Network. Use the following code to do so:

In [None]:
output_submission_csv('nn_2layer_relu_submission.csv', best_2layer_relu.predict(X_test))

In [None]:
output_submission_csv('nn_2layer_sigmoid_submission.csv', best_2layer_sigmoid.predict(X_test))

In [None]:
output_submission_csv('nn_3layer_relu_submission.csv', best_3layer_relu.predict(X_test))

In [None]:
output_submission_csv('nn_3layer_sigmoid_submission.csv', best_3layer_sigmoid.predict(X_test))