In [2]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cse493g1/assignments/assignment3/'
FOLDERNAME = 'cse493g1/assignments/assignment3_colab/assignment3/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

# This downloads the COCO dataset to your Drive
# if it doesn't already exist.
%cd /content/drive/My\ Drive/$FOLDERNAME/cse493g1/datasets/
!bash get_datasets.sh
%cd /content/drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/cse493g1/assignments/assignment3_colab/assignment3/cse493g1/datasets
/content/drive/My Drive/cse493g1/assignments/assignment3_colab/assignment3


# Multi-Layer Fully Connected Network Part 2
In this exercise, you will extend your fully connected network from Assignment 2 with Dropout and Normalization Layers. First, you will copy and paste all the necessary parts from Assignment 2. Then you will re-train your model from A2 as a baseline. Next, you will complete the batchnorm and dropout notebook, and then return to this notebook and create an improved model using dropout and normalization.

In [3]:
# Setup cell.
import time
import numpy as np
import matplotlib.pyplot as plt
from cse493g1.classifiers.fc_net import *
from cse493g1.data_utils import get_CIFAR10_data
from cse493g1.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cse493g1.solver import Solver

%matplotlib inline
plt.rcParams["figure.figsize"] = (10.0, 8.0)  # Set default size of plots.
plt.rcParams["image.interpolation"] = "nearest"
plt.rcParams["image.cmap"] = "gray"

%load_ext autoreload
%autoreload 2

def rel_error(x, y):
    """Returns relative error."""
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

	You will need to compile a Cython extension for a portion of this assignment.
	The instructions to do this will be given in a section of the notebook below.


In [4]:
# Load the (preprocessed) CIFAR-10 data.
data = get_CIFAR10_data()
for k, v in list(data.items()):
    print(f"{k}: {v.shape}")

X_train: (49000, 3, 32, 32)
y_train: (49000,)
X_val: (1000, 3, 32, 32)
y_val: (1000,)
X_test: (1000, 3, 32, 32)
y_test: (1000,)


# Copy necessary parts from A2.
Fill in the following functions by copying and pasting your answers from A2:
`affine_forward` in `cse493g1/layers.py`
`affine_backward` in `cse493g1/layers.py`
`relu_forward` in `cse493g1/layers.py`
`relu_backward` in `cse493g1/layers.py`
`softmax_loss` in `cse493g1/layers.py`
`sgd_momentum` in `cse493g1/optim.py`
`rmsprop` in `cse493g1/optim.py`
`adam` in `cse493g1/optim.py`



# Train baseline model from A2
Copy and Paste your `FullyConnectedNet` model from `cse493g1/classifiers/fc_net.py` in Assignment 2 into `FullyConnectedNetBasic` in the file `cse493g1/classifiers/fc_net.py` in this assignment. Use the best hyperparms that you found from the previous assignment to train this model. Call this model `best_model_basic`

In [5]:
best_model_basic = None

################################################################################
# TODO: Train the best FullyConnectedNetBasic that you can on CIFAR-10. Store your best model in  #
# the best_model_basic variable.                                                     #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

params = {}
best_val = -1
iters = 30

num_train = 700
small_data = {
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val']
}

for i in range(iters):
  lr = 10**np.random.uniform(-5, -3)
  reg = 10**np.random.uniform(-4, -2)
  weight_scale = 10**np.random.uniform(-2, -1)

  model = FullyConnectedNetBasic(
    [100, 100, 100, 100],
    weight_scale=weight_scale,
    reg=reg,
    dtype=np.float64
  )

  solver = Solver(
    model,
    small_data,
    num_epochs=15,
    batch_size=128,
    update_rule='adam',
    optim_config={'learning_rate': lr},
    verbose=False
  )
  solver.train()

  model_val = solver.best_val_acc
  if model_val > best_val:
    best_val = model_val
    best_params = {
      'lr': lr,
      'reg': reg,
      'weight_scale': weight_scale
    }
  print(f'lr: {lr}', f'reg: {reg}', f'weight_scale: {weight_scale}', f'val_acc: {model_val}')

print(f'\nbest_val: {best_val}')
print(f'best_params: {best_params}')

best_model_basic = FullyConnectedNetBasic(
  [100, 100, 100, 100],
  weight_scale=best_params['weight_scale'],
  reg=best_params['reg'],
  dtype=np.float64
)
best_solver = Solver(
  best_model_basic,
  data,
  num_epochs=15,
  batch_size=128,
  update_rule='adam',
  optim_config={'learning_rate': best_params['lr']},
  verbose=False
)
best_solver.train()

print(f'Best val accuracy: {best_solver.best_val_acc}')

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

lr: 0.0003925808783477074 reg: 0.0036575163373692636 weight_scale: 0.011202710215900405 val_acc: 0.296
lr: 0.0002274010834106172 reg: 0.0006755276708708243 weight_scale: 0.022557776416818935 val_acc: 0.346
lr: 1.0636265157462757e-05 reg: 0.00022154148812624817 weight_scale: 0.01592147230529891 val_acc: 0.147
lr: 0.0003917214003249585 reg: 0.000498429362684529 weight_scale: 0.04737336239741938 val_acc: 0.329
lr: 0.0001352076302376644 reg: 0.001330136726115873 weight_scale: 0.02140827596175686 val_acc: 0.316
lr: 7.532516217500079e-05 reg: 0.0004503689828323332 weight_scale: 0.08073300178474309 val_acc: 0.178
lr: 3.9993974349762285e-05 reg: 0.000547429121905529 weight_scale: 0.08804693282140691 val_acc: 0.117
lr: 2.0864629014289815e-05 reg: 0.0017500158909523405 weight_scale: 0.03924156965462203 val_acc: 0.196
lr: 6.843185285291299e-05 reg: 0.0001568150334451038 weight_scale: 0.011006430751742033 val_acc: 0.221
lr: 0.00012826119728256014 reg: 0.0004015391885253266 weight_scale: 0.04288120

# Evaluate baseline model from A2
Evaluate above baseline model.

In [6]:
y_test_pred = np.argmax(best_model_basic.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(best_model_basic.loss(data['X_val']), axis=1)
print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())

Validation set accuracy:  0.538
Test set accuracy:  0.49


# Train improved model
Design a new model in `FullyConnectedNetImproved` in the file `cse493g1/classifiers/fc_net.py`. You can start by having `FullyConnectedNetImproved` be the same design as `FullyConnectedNetBasic`. Next, complete the BatchNormoralization.ipynb and Dropout.ipynb notebooks. Then return to this notebook and complete `FullyConnectedNetImproved` by adding in batchnorm and dropout. Try to beat the accuracy of your baseline model! You may have to adjust your hyperparameters.

In [22]:
best_model_improved = None

################################################################################
# TODO: Train the best FullyConnectedNetImproved that you can on CIFAR-10. You might   #
# find batch/layer normalization and dropout useful. Store your best model in  #
# the best_mode_improved variable.                                                     #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

params = {}
best_val = -1
iters = 30

num_train = 700
small_data = {
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val']
}

for i in range(iters):
  lr = 10**np.random.uniform(-2.7, -2.3)
  reg = 10**np.random.uniform(-3.9, -3.3)
  weight_scale = 10**np.random.uniform(-2, -1.5)
  dropout = np.random.uniform(.7, .9)

  model = FullyConnectedNetImproved(
    [100, 100, 100, 100],
    weight_scale=weight_scale,
    reg=reg,
    dropout_keep_ratio=dropout,
    normalization='batchnorm',
    dtype=np.float64
  )

  solver = Solver(
    model,
    small_data,
    num_epochs=15,
    batch_size=128,
    update_rule='adam',
    optim_config={'learning_rate': lr},
    verbose=False
  )
  solver.train()

  model_val = solver.best_val_acc
  if model_val > best_val:
    best_val = model_val
    best_params = {
      'lr': lr,
      'reg': reg,
      'weight_scale': weight_scale,
      'dropout': dropout
    }
  print(f'lr: {lr}', f'reg: {reg}', f'weight_scale: {weight_scale}', f'dropout: {dropout}', f'val_acc: {model_val}')

print(f'\nbest_val: {best_val}')
print(f'best_params: {best_params}')

best_model_improved = FullyConnectedNetImproved(
  [100, 100, 100, 100],
  weight_scale=best_params['weight_scale'],
  reg=best_params['reg'],
  dropout_keep_ratio=best_params['dropout'],
  normalization='batchnorm',
  dtype=np.float64
)
best_solver = Solver(
  best_model_improved,
  data,
  num_epochs=15,
  batch_size=128,
  update_rule='adam',
  optim_config={'learning_rate': best_params['lr']},
  verbose=False
)
best_solver.train()

print(f'Best val accuracy: {best_solver.best_val_acc}')

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

lr: 0.0021599778570350146 reg: 0.00023634950689347428 weight_scale: 0.030992813132862913 dropout: 0.8425779681580954 val_acc: 0.351
lr: 0.0036876238861453304 reg: 0.0002881124918408124 weight_scale: 0.017682647558089085 dropout: 0.8080849321281375 val_acc: 0.316
lr: 0.0042425833397861865 reg: 0.00041921676072849445 weight_scale: 0.03134389928124231 dropout: 0.8470505474732906 val_acc: 0.322
lr: 0.004458982491854083 reg: 0.00013565026064037608 weight_scale: 0.02189257212432032 dropout: 0.873774269986016 val_acc: 0.327
lr: 0.004994151708533769 reg: 0.0003158040562270049 weight_scale: 0.017930826460040978 dropout: 0.8330613737106323 val_acc: 0.337
lr: 0.002056333966264573 reg: 0.00033344763437836067 weight_scale: 0.02750622105613915 dropout: 0.781313795616098 val_acc: 0.343
lr: 0.003714535715181818 reg: 0.0003905114189672309 weight_scale: 0.014708678579181612 dropout: 0.7337628378504379 val_acc: 0.334
lr: 0.0038988118185525494 reg: 0.00037347624320418625 weight_scale: 0.013228284347481233

# Test Your Model!
Run your best model on the validation and test sets. Are you able to outperform the baseline model that has no Batchnorm or Dropout?

In [23]:
y_test_pred = np.argmax(best_model_improved.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(best_model_improved.loss(data['X_val']), axis=1)
print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())

Validation set accuracy:  0.529
Test set accuracy:  0.511
