In [1]:
import numpy as np
from scipy.special import expit,xlogy
import math
import pickle
import numpy as np
import os
from scipy.ndimage.interpolation import *
from numpy import fliplr
from random import randint

In [2]:
def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = pickle.load(f, encoding='latin1')
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float64")
    Y = np.array(Y)

    return X, Y

def load_CIFAR10(ROOT,batch_list=[1,2,3,4,5],batch_size=1000,gray_scale=False):
  """ load all of cifar """
  total_train_size = len(batch_list)*batch_size
  Xtr = np.empty([total_train_size, 32, 32, 3],dtype=np.float64)
  Ytr = np.empty([total_train_size,1],dtype=np.int32)

  start,end = 0,batch_size
  for b in batch_list:
    f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
    
    xtmp, ytmp = load_CIFAR_batch(f)
    Xtr[start:end,:],Ytr[start:end] = xtmp[:batch_size],ytmp[:batch_size].reshape(batch_size,1)

    start += batch_size
    end += batch_size

  if gray_scale is True:
    Xtr = np.mean(Xtr,axis=3)

  Xtr = Xtr.reshape(total_train_size,-1)

  return Xtr, Ytr

In [3]:
def load_test_set(ROOT,gray_scale=False):
  x,y = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
  if gray_scale is True:
    x = np.mean(x,axis=3)
  x = x.reshape(10000,-1)
  return x,y.reshape(10000,1)

In [4]:
def reshape(xtrain):
    dist_type = randint(0,4)
    xtrain = xtrain.reshape(-1,32,32,3)
    
    if dist_type is 0:
        xtrain = fliplr(xtrain)
    elif dist_type is 1:
        angle = randint(-14,14)
        xtrain = rotate(xtrain,angle,reshape=False)
    elif dist_type is 2:
        shift_amt = randint(-14,14)
        xtrain = shift(xtrain,(0,0,shift_amt,0),order=0,prefilter=False,mode='nearest')

    return xtrain.reshape(-1,3072)

In [5]:
class Normalizer:

    def norm_train(self,x):
        self.mean = np.mean(x, axis =0)
        self.std = np.std(x, axis = 0)

        x -= self.mean
        x /= self.std
        return x

    def norm_test(self,x):
        return (x - self.mean)/self.std

In [6]:
class FullyConnectedNN:

    def __init__(self,cost='square_error'):
        self.ws = []
        self.activ = []
        self.cost = cost
        self.regularization = 0
        self.dropout = []

    def set_regularization(self,regularization):
        self.regularization = regularization

    def add_layer(self,shape,act_func='sigmoid',dropout=0):
        self.ws.append(shape)
        self.dropout.append(dropout)
        if act_func is not None:
            self.activ.append(act_func)

    def make_weights(self,shape):
        return np.random.randn(shape[0]+1,shape[1])/np.sqrt(shape[0]+1)

    def train(self,xtr,ytr,vx=None,vy=None,moment=0.2,
        lr=0.9,max_epoch=1000,callback=None):
        train_error,validation_error = -1,-1
        epoch = 0
        while self.halt(train_error) and max_epoch > epoch:
            train_error = self.__train__(xtr,ytr,moment=moment,lr=lr,epoch=1)
            if vx is not None and vy is not None:
                validation_error = self.__validation_error__(vx,vy)
            epoch += 1
            callback(epoch,train_error,validation_error)

    def __train__(self,xtr,ytr,moment=0.2,lr=0.9,epoch=100):
        output_dim = self.ws[-1].shape[1]
        ytr = np.concatenate([ytr == i for i in range(output_dim)],axis=1)

        for i in range(epoch):
            for xs,ys in self.get_batch(xtr,ytr):
                xs = reshape(xs)
                error,gradients = self.back_propogation(xs,ys)
                self.update_weights(gradients,moment,lr)
        return error

    def __validation_error__(self,vx,vy):
        predict = self.predict(vx)
        predict = np.argmax(predict,axis=1).reshape(-1,1)

        return np.mean(predict == vy)

    def store_weights(self,epoch):
        print('saving weights')
        for i in range(len(self.ws)):
            file = open("weights/weight_{}".format(i), 'wb+')
            np.save(file,self.ws[i])
            file.close
        print('done saving')

    def halt(self,loss):
        prev_loss = inf,count = 0
        while True:
            if prev_loss - loss > 1e-3:
                count = 0
                prev_loss = loss
                yield False
            elif count > 2:
                yield True
            else:
                count += 1
                yield False

    def get_batch(self,xtrain,ytrain):
        slice_index,slice_size = 0,50

        while slice_index < xtrain.shape[0]:
            yield xtrain[slice_index:slice_index+slice_size], \
                ytrain[slice_index:slice_index+slice_size]
            slice_index += slice_size

    def update_weights(self,gradients,moment,lr):
        for i in range(len(gradients)):
            self.moment[i] *= moment
            self.moment[i] += lr*gradients[i]
            self.ws[i] -= self.moment[i]

    def back_propogation(self,xtr,ytr):
        fwd_a,fwd_z = self.__predict__(xtr)
        out_error, out_grad = self.out_error(fwd_a[-1],ytr,self.cost)
        out_grad = np.multiply(self.activation_gradient(fwd_z[-1],self.activ[-1]),out_grad)
        ws_grads = []

        for w in range(len(self.ws)-1,-1,-1):
            ws_grad = np.dot(out_grad.T, fwd_a[w])
            ws_grads.append(ws_grad.T)
            if w > 0:
                out_grad = np.dot(out_grad,self.ws[w][1:,:].T)
                out_grad = np.multiply(out_grad,
                    self.activation_gradient(fwd_z[w-1],self.activ[w]))

        ws_grads.reverse() 

        for i in range(len(ws_grads)):
            ws_grads[i] /= xtr.shape[0]
            ws_grads[i][:,1:] += self.regularization/xtr.shape[0]*self.ws[i][:,1:]

        return out_error,ws_grads

    def predict(self,x):
        hx = self.__predict__(x)[0][-1]
        return hx

    def __predict__(self,x):
        m = x.shape[0]
        x = np.c_[np.ones((m,1)), x]
        fwd_a,fwd_z = [x],[]
        
        for layer in range(len(self.ws)):
            z = np.dot(fwd_a[-1],self.ws[layer])
            a = np.c_[np.ones((m,1)),self.activate(z,self.activ[layer])]
            fwd_z.append(z)
            fwd_a.append(a)

        fwd_a[-1] = fwd_a[-1][:,1:]
        return fwd_a,fwd_z

    def make_dropout(self):
        self.ws_bkup = self.ws
        self.ws = []
        for i in range(len(self.ws_bkup)):
            self.ws.append(self.dropout_weight(self.ws_bkup[i],self.dropout[i]))

    def load_weights(self):
        for i in range(len(self.ws_bkup)):
            nonzero_entry = np.nonzero(self.ws[i])
            self.ws_bkup[i][nonzero_entry] = self.ws[i][nonzero_entry]
        self.ws = self.ws_bkup

    def out_error(self,hx,y,act):
        funcs = {"square_error":self.square_error,
                "cross_entropy_softmax":self.cross_entropy_softmax,
                "log_cross_entropy":self.log_cross_entropy}
        return funcs[act](hx,y)

    def square_error(self,hx,label):
        grad = hx - label
        square_diff = np.square(grad).sum()/2
        for i in range(len(self.ws)):
            square_diff += np.sum(self.regularization/(2*hx.shape[0])*np.square(self.ws[i][:,1:]))
        return square_diff, grad

    def sigmoid(self,x):
        grad = expit(x)
        return np.multiply(grad, 1 - grad)

    def activate(self,x,act):
        funcs = {"sigmoid":expit,
        "relu":self.relu,
        "leaky_relu":self.leaky_relu,
        "softmax":self.softmax,
        "elu":self.elu
        }
        return funcs[act](x)

    def activation_gradient(self,x,act):
        funcs = {"sigmoid":self.sigmoid,
                "relu":self.relu_grad,
                "leaky_relu":self.leaky_relu,
                "softmax":self.softmax_grad,
                "elu":self.elu_grad}
        return funcs[act](x)

    def leaky_relu(self,x):
        return np.maximum(x,0.01*x)

    def log_cross_entropy(self,hx,y):
        error = (-xlogy(y,hx) - xlogy(1-y,1-hx)).sum()/hx.shape[0]
        for i in range(len(self.ws)):
            error += self.regularization/(2*hx.shape[0]) \
            * np.sum(np.square(self.ws[i][:,1:]))

        y = y.astype('float')

        grad = np.divide(-y, hx, out=np.zeros_like(y), where= hx!=0)
        grad += np.divide(1-y,1-hx, out=np.zeros_like(y), where= (1-hx)!=0)
        return error,grad

    def cross_entropy_softmax(self,hx,y):
        error = -xlogy(y,hx).sum()/hx.shape[0]
        for i in range(len(self.ws)):
            error += self.regularization/(2*hx.shape[0]) \
            * np.sum(np.square(self.ws[i][:,1:]))

        grad = hx - y
        return error,grad


    def softmax(self,hx):
        nterm = np.max(hx,axis=1).reshape(-1,1)
        grad = np.exp(hx - nterm)
        return grad/grad.sum(axis=1,keepdims=True)

    def softmax_grad(self,hx):
        return np.ones(hx.shape)

    def relu(self,x):
        return np.maximum(x,0)

    def relu_grad(self,x):
        return (x > 0)

    def elu(self,x):
        alpha = 0.5
        return alpha * np.exp(x - np.max(x,axis=1).reshape(-1,1)) - alpha

    def elu_grad(self,x):
        alpha = 0.5
        return (x > 0) + (x < 0) * alpha * np.exp(x - np.max(x,axis=1).reshape(-1,1))

    def compile(self):
        self.ws,shape = [],self.ws
        self.moment = []
        for i in range(len(shape)-1):
            w = self.make_weights((shape[i],shape[i+1]))
            self.moment.append(np.zeros((shape[i]+1,shape[i+1])))
            self.ws.append(w)

    def dropout_weight(self,weight,drop_prob):
        new_weight = np.copy(weight)
        drop_prob = int(new_weight.shape[0]*drop_prob)
        zeros = np.random.choice(new_weight.shape[0],drop_prob)
        new_weight[zeros,:] = 0
        return new_weight

In [7]:
def plotLearning(epoch,terror,verror):
    print(terror,verror)

In [8]:
np.seterr(over='raise') 
batch_list = [1,2,3,4,5]
batch_size = 10000
gray_scale=False
cifar10_dir = 'cifar-10-batches-py'
X_train, y_train = load_CIFAR10(cifar10_dir,
                    batch_list,batch_size,gray_scale)

In [9]:
Normalizer = Normalizer()
X_train = Normalizer.norm_train(X_train)
X_test, y_test = load_test_set(cifar10_dir,gray_scale)
X_test = Normalizer.norm_test(X_test)
m = int(X_train.shape[0]*0.8)
X_valid,Y_valid = X_train[m:],y_train[m:]
X_train,y_train = X_train[:m],y_train[:m]

In [14]:
classifier = FullyConnectedNN(cost="cross_entropy_softmax")
classifier.add_layer(3072,None)
classifier.add_layer(1400,'relu')
classifier.add_layer(500,'relu')
classifier.add_layer(10,'softmax')

In [15]:
classifier.compile()

In [16]:
classifier.train(X_train,y_train,vx=X_valid,
    vy=Y_valid,moment=0.6,lr=1e-3,max_epoch=50,
    callback=plotLearning)

2.1845612844 0.3704
1.57483722096 0.3983
1.5205885045 0.4162
1.47711391363 0.4256
1.37509798223 0.4362
1.57394531492 0.442
1.31080961026 0.4537
1.25226199612 0.4546
1.20863445556 0.4624
1.53456747585 0.4724
1.36858301731 0.4668
1.60713252217 0.4792
1.14111242273 0.4816
1.1131972957 0.4846
1.42879895464 0.488
1.07325462647 0.4917
1.75055721842 0.494
2.25237570162 0.497
1.03703369846 0.4953
1.04396258935 0.4999
2.0036986498 0.4991
1.00250091988 0.51
1.42246715687 0.508
1.43560480594 0.5086
0.991966226145 0.5078
0.970985299151 0.5119
1.45963054082 0.5139
1.20394618954 0.5109
1.58103358549 0.5151
1.59804553608 0.5191
1.33175688103 0.5176
0.973960576991 0.5155
1.24021109318 0.5204
1.38693384686 0.53
1.24239412582 0.5208
1.25692925325 0.5242
2.29611160613 0.5182
1.1970999445 0.5177
1.67533411415 0.523
1.74915821525 0.5278
1.04684196046 0.5309
0.985060016568 0.5215
0.904312024892 0.5281
0.896095379286 0.5309
0.865612443526 0.5316
0.85338488251 0.5354
1.19865074188 0.5334
0.86854409893 0.5343


In [18]:
hx = np.argmax(classifier.predict(X_train),axis=1).reshape(-1,1)
print("training accuracy ",np.mean(hx == y_train))

hx = np.argmax(classifier.predict(X_valid),axis=1).reshape(-1,1)
print("validation accuracy ",np.mean(hx == Y_valid))

hx = np.argmax(classifier.predict(X_test),axis=1).reshape(-1,1)
print("testing accuracy ",np.mean(hx == y_test))


training accuracy  0.700725
validtion accuracy  0.5413
testing accuracy  0.5347
