You will implement batch normalization for fully connected networks.



In [1]:
from __future__ import print_function
import time
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from cs231n.classifiers.fc_net import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cs231n.solver import Solver
import os
from utils import report, run_tasks, makedirs

run the following from the cs231n directory and try again:
python setup.py build_ext --inplace
You may also need to restart your iPython kernel


In [2]:
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [3]:
def task1():
    '''
    Batch normalization: Forward In the file cs231n/layers.py,
    implement the batch normalization forward pass in the function batchnorm_forward.
    Once you have done so, run the following to test your implementation.
    Check the training-time forward pass by checking means and variances of features both before and after batch normalization
    '''
    print('*'*30+' Task 1 '+'*'*30)
    #Simulate the forward pass for a two-layer network
    N, D1, D2, D3 = 200, 50, 60, 3
    X = np.random.randn(N, D1)
    W1 = np.random.randn(D1, D2)
    W2 = np.random.randn(D2, D3)
    a = np.maximum(0, X.dot(W1)).dot(W2)

    print ('Before batch normaliation:')
    print ('  means: ', a.mean(axis=0))
    print ('  stds: ', a.std(axis=0))

    # Means should be close to zero and stds close to one
    print ('After batch normalization (gamma=1, beta=0)')
    a_norm, _ = batchnorm_forward(a, np.ones(D3), np.zeros(D3), {'mode': 'train'})
    print ('  mean: ', a_norm.mean(axis=0))
    print ('  std: ', a_norm.std(axis=0))

    # Now means should be close to beta and stds close to gamma
    gamma = np.asarray([1.0, 2.0, 3.0])
    beta = np.asarray([11.0, 12.0, 13.0])
    a_norm, _ = batchnorm_forward(a, gamma, beta, {'mode': 'train'})
    print ('After batch normalization (nontrivial gamma, beta)')
    print ('  means: ', a_norm.mean(axis=0))
    print ('  stds: ', a_norm.std(axis=0))
    print('*'*30+' Task 1 completed'+'*'*30)

In [4]:
task1()

****************************** Task 1 ******************************
Before batch normaliation:
  means:  [-12.39094475  33.15835779  22.26310147]
  stds:  [24.51509527 36.77094844 32.37334076]
After batch normalization (gamma=1, beta=0)
  mean:  [-6.55031585e-17  1.00891517e-16 -2.72004641e-17]
  std:  [0.99999999 1.         1.        ]
After batch normalization (nontrivial gamma, beta)
  means:  [11. 12. 13.]
  stds:  [0.99999999 1.99999999 2.99999999]
****************************** Task 1 completed******************************


In [5]:
def task2():
    '''
    Fully Connected Nets with Batch Normalization Now that you have a working implementation
    for batch normalization, go back to your FullyConnectedNet in the file
    cs2312n/classifiers/fc_net.py. Modify your implementation to add batch normalization.
    Concretely, when the flag use_batchnorm is True in the constructor, you should insert a
    batch normalization layer before each ReLU nonlinearity.
    The outputs from the last layer of the network should not be normalized.
    Once you are done, run the following to gradient-check your implementation.
    To make your life easier, you are given an additional helper layer in the file
    cs231n/layer_utils.py called affine_bn_relu_forward and affine_bn_relu_backward.
    You have to update 3 parts in the loss function:
    first part is fc_net.py- initialization
    second part is forward pass in loss function
    third part is backward pass in loss function
    '''
    print('*'*30+' Task 3 '+'*'*30)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N,))

    for reg in [0, 3.14]:
        print ('Running check with reg = ', reg)
        model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
                                reg=reg, weight_scale=5e-2, dtype=np.float64,
                                use_batchnorm=True)

        loss, grads = model.loss(X, y)
        print ('Initial loss: ', loss)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
            print ('%s relative error: %.2e' % (name, rel_error(grad_num, grads[name])))
        if reg == 0: print ()

In [6]:
task2()

****************************** Task 3 ******************************
Running check with reg =  0
Initial loss:  2.2713124310221806
W1 relative error: 7.25e-05
W2 relative error: 2.91e-06
W3 relative error: 3.41e-10
b1 relative error: 2.22e-03
b2 relative error: 3.33e-08
b3 relative error: 1.67e-10
beta1 relative error: 9.18e-09
beta2 relative error: 1.06e-07
gamma1 relative error: 4.48e-09
gamma2 relative error: 3.51e-08

Running check with reg =  3.14
Initial loss:  7.18603095655944
W1 relative error: 1.10e-04
W2 relative error: 4.83e-06
W3 relative error: 2.87e-08
b1 relative error: 6.94e-10
b2 relative error: 5.55e-09
b3 relative error: 1.65e-10
beta1 relative error: 8.49e-08
beta2 relative error: 2.97e-08
gamma1 relative error: 1.07e-07
gamma2 relative error: 1.17e-07
