In [1]:
import numpy as np
from impl.solver import *
import sys

In [2]:
n_iter = 1000 # number of epochs
alpha = 1e-3
mb_size = 64 # minibatch size usually compatible to the Cache/RAM size
n_experiment = 1
reg = 1e-5
print_after = 100
p_dropout = 0.8
loss = 'cross_ent'
nonlin = 'relu'
solver = 'sgd'

In [3]:
# import hipsternet.input_data as input_data  # NOT used for MNIST
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('data/MNIST_data/', one_hot=False)
X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels
y_test.shape, y_val.shape, y_train.shape

Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz


((10000,), (5000,), (55000,))

In [4]:
M, D, C = X_train.shape[0], X_train.shape[1], y_train.max() + 1
M, D, C

(55000, 784, 10)

In [5]:
import impl.utils as utils
X_train, X_val, X_test = utils.prepro(X_train, X_val, X_test)
X_train.shape, X_val.shape, X_test.shape

((55000, 784), (5000, 784), (10000, 784))

In [6]:
# if net_type == 'cnn':
img_shape = (1, 28, 28)
X_train = X_train.reshape(-1, *img_shape)
X_val = X_val.reshape(-1, *img_shape)
X_test = X_test.reshape(-1, *img_shape)
X_train.shape, X_val.shape, X_test.shape

((55000, 1, 28, 28), (5000, 1, 28, 28), (10000, 1, 28, 28))

In [7]:
solvers = dict(
    sgd=sgd,
    momentum=momentum,
    nesterov=nesterov,
    adagrad=adagrad,
    rmsprop=rmsprop,
    adam=adam
)
solvers

{'adagrad': <function impl.solver.adagrad>,
 'adam': <function impl.solver.adam>,
 'momentum': <function impl.solver.momentum>,
 'nesterov': <function impl.solver.nesterov>,
 'rmsprop': <function impl.solver.rmsprop>,
 'sgd': <function impl.solver.sgd>}

In [17]:
solver_fun = solvers[solver] # solver functions
accs = np.zeros(n_experiment)
solver_fun, accs
# print()
print('Experimenting on {}'.format(solver))
# print()

Experimenting on sgd


In [18]:
import numpy as np
import impl.loss as loss_fun
import impl.layer as l
import impl.NN as nn

class SPNN(nn.NN):

    def __init__(self, D, C, H, lam=1e-3, p_dropout=.8, loss='cross_ent', nonlin='relu'):
        super().__init__(D, C, H, lam, p_dropout, loss, nonlin)
        
    def forward(self, X, train=False):

        # 1st layer: Input to Conv
        h1, h1_cache = l.conv_forward(X=X, W=self.model['W1'], b=self.model['b1']) 
        h1, nl1_cache = l.relu_forward(h1)

        # midst layer: Convnet 1
        h2, h2_cache = l.conv_forward(X=h1, W=self.model['W2'], b=self.model['b2'])
        h2 = h2.reshape([nl1_cache.shape[0], -1])
        h2, bn2_cache, self.bn_caches['bn2_mean'], self.bn_caches['bn2_var'] = l.bn_forward(h2, 
                                                self.model['gamma2'], self.model['beta2'], 
                                                (self.bn_caches['bn2_mean'], self.bn_caches['bn2_var']), 
                                                train=train)
        h2, nl2_cache = l.relu_forward(h2)
        do2_cache = None # ERROR: referenced before assigned!
        if train: h2, do2_cache = l.dropout_forward(X=h2, p_dropout=p_dropout)
        
        # last layer : FC to Output
        h3, h3_cache = l.fc_forward(X=h2, W=self.model['W3'], b=self.model['b3'])

        cache = h1_cache, nl1_cache, h2_cache, bn2_cache, nl2_cache, do2_cache, h3_cache
        return h3, cache

    def backward(self, y, y_train, cache):

        dh3 = self.dloss_funs[self.loss](y, y_train)
        h1_cache, nl1_cache, h2_cache, bn2_cache, nl2_cache, do2_cache, h3_cache = cache

        # last layer
        dh2, dw3, db3 = l.fc_backward(dout=dh3, cache=h3_cache)

        # midst layer
        dh2 = l.dropout_backward(dout=dh2, cache=do2_cache)
        dh2 = l.relu_backward(dout=dh2, cache=nl2_cache)
        dh2, dgamma2, dbeta2 = l.bn_backward(dout=dh2, cache=bn2_cache)
        dh2 = dh2.reshape(nl1_cache.shape)
        dh1, dw2, db2 = l.conv_backward(dout=dh2, cache=h2_cache)

        # 1st layer
        dh1 = l.relu_backward(dout=dh1, cache=nl1_cache)
        dX, dw1, db1 = l.conv_backward(dout=dh1, cache=h1_cache)

        # grad for GD
        grad = dict(
            W1=dw1, 
            b1=db1,

            W2=dw2, 
            b2=db2,
            gamma2=dgamma2,
            beta2=dbeta2,

            W3=dw3, 
            b3=db3
            )
        
        return grad

    def _init_model(self, D, C, H):
        self.model = dict(
            W1=np.random.randn(H, 1, 3, 3) / np.sqrt(H / 2.),
            b1=np.zeros((H, 1)),

            W2=np.random.randn(H, H, 3, 3) / np.sqrt(H / 2.),
            b2=np.zeros((H, 1)),
            gamma2=np.ones((1, H*D)),
            beta2=np.zeros((1, H*D)),

            W3=np.random.randn(H*D, C) / np.sqrt(H*D / 2.),
            b3=np.zeros((1, C))
        )
        self.bn_caches = dict(            
            bn2_mean=np.zeros((1, H*D)),
            bn2_var=np.zeros((1, H*D))
        )

In [None]:
for k in range(n_experiment):
    print('Experiment-{}'.format(k + 1))

    net = SPNN(C=C, D=D, H=8) #, lam=, loss=, nonlin=, p_dropout=, self=

    net = solver_fun(nn=net, X_train=X_train, y_train=y_train, val_set=(X_val, y_val), 
                     mb_size=mb_size, alpha=alpha, n_iter=n_iter, print_after=print_after)

    y_pred = net.predict(X_test)
    accs[k] = np.mean(y_pred == y_test)

print()
print('Test Mean accuracy: {:.4f}, std: {:.4f}'.format(accs.mean(), accs.std()))

Experiment-1
Iter-100 loss: 1.5298 validation: 0.372600
Iter-200 loss: 1.0343 validation: 0.645200
Iter-300 loss: 0.8997 validation: 0.631200
Iter-400 loss: 0.8050 validation: 0.562600
Iter-500 loss: 0.7899 validation: 0.821000
Iter-600 loss: 0.7933 validation: 0.814200


In [14]:
h2 = np.ones(shape=[10, 1, 28, 28]) # h2_txcxhxw
h2_txn = h2.reshape([h2.shape[0], -1]) # h2_txn, n=c*h*w
h2.shape, h2_txn.shape

((10, 1, 28, 28), (10, 784))