In [1]:
import numpy as np
from atlas_ml import *
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time

In [2]:
def zero_padding(img_matrix, padding):
    pad_width = [(0,0),(0,0),(padding[0],padding[0]),(padding[1],padding[1])]
    padded_matrix = np.pad(img_matrix, pad_width=pad_width, mode='constant',)
    return padded_matrix

In [3]:

# Testing zero_padding()
A = np.arange(2*3*4*4).reshape(2,3,4,4)
P = zero_padding(A,[2,2])
#print(P)

In [4]:
#https://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python
def im2col(imgs, kernel_size, stride):
    # Parameters
    F = kernel_size
    batch_size, D,H,W = imgs.shape
    col_extent = (W - F[1]) + 1
    row_extent = (H - F[0]) + 1

    # Get batch block indices
    batch_idx = np.arange(batch_size)[:, None, None] * D * H * W
    # Get Starting block indices
    start_idx = np.arange(F[0])[None, :,None]*W + np.arange(F[1])
    # Generate Depth indices
    didx=H*W*np.arange(D)
    start_idx=(didx[None, :, None]+start_idx.ravel()).reshape((-1,F[0],F[1]))

    # Get offsetted indices across the height and width of input array
    offset_idx = np.arange(row_extent)[None, :, None]*W + np.arange(col_extent)
    
    # Get all actual indices & index into input array for final output
    act_idx = (batch_idx + 
        start_idx.ravel()[None, :, None] + 
        offset_idx[:,::stride[0],::stride[1]].ravel())

    col_matrix = np.take (imgs, act_idx)
    return col_matrix, act_idx

In [5]:

#Testing im2col()
A = np.random.rand(5,3,28,28) # 3 Sample input array with 1 channel
kernel_size = [3,3] # Sample blocksize (rows x columns)
stride = [1,1]
out, act_idx = im2col(A,kernel_size,stride)
#print(A.shape)
#print(out.shape)
#print(act_idx.shape)
dA1 = np.random.rand(*out.shape)
dA2 = np.zeros(A.shape)
#print(dA1)
for i in range(dA2.shape[-1]):
    dA2.ravel()[act_idx[:,:,i].ravel()] += dA1[:,:,i].ravel()
#print(dA2.shape)


In [6]:
def init_filters(K, D, kernel_size, activation):
        W = init_matrix(D*kernel_size[0]*kernel_size[1],K,activation)
        return W

In [7]:
class layer:
    def __init__(self, n_prev, n_next, activation):
        self.W = init_matrix(n_prev, n_next, activation)
        self.B = init_matrix(1, n_next, activation)
        self.activation = activation()
        
        self.V_dW = np.zeros(self.W.shape)
        self.V_dB = np.zeros(self.B.shape)
        
    def forward(self, A0):
        self.Z = np.einsum('ln,ml-> mn',self.W, A0) + self.B
        self.A = self.activation.activate(self.Z)
        return self.A
    
    def grad(self, dA, A0, m):
        dAdZ = self.activation.diff(self.Z)
        self.dZ = np.multiply(dA, dAdZ)
        self.dW = (1./m)*np.einsum('mn,ml->ln',self.dZ, A0)
        self.dB = (1./m)*(np.einsum('mn->n',self.dZ))
        dA_prev = np.einsum('ln,mn->ml',self.W, self.dZ) 
        return dA_prev
    
    def out_grad(self, dZ, A0, m):
        self.dZ = dZ
        self.dW = (1./m)*np.einsum('mn,ml->ln',self.dZ, A0)
        self.dB = (1./m)*(np.einsum('mn->n',self.dZ))
        dA_prev = np.einsum('ln,mn->ml',self.W, self.dZ) 
        return dA_prev
        
    def step(self, lr, beta):
        self.V_dW = (beta * self.V_dW + (1. - beta) * self.dW)
        self.V_dB = (beta * self.V_dB + (1. - beta) * self.dB)
        self.W = self.W - lr*self.V_dW
        self.B = self.B - lr*self.V_dB

In [8]:
class conv_layer:
    def __init__(self, kernel_size, n_channels, activation, n_filters = 1, stride =[1,1],  padding = [0,0]):
        self.kernel_size = kernel_size
        self.n_channels = n_channels
        self.n_filters = n_filters
        self.stride = stride
        self.padding = padding
        self.activation = activation()
        
        #intialize [n_filters,n_channels,kernel_size[0],kernel_size[1]] filters
        self.W = init_filters(self.n_filters, self.n_channels, self.kernel_size, activation)
        self.B = init_matrix( 1, n_filters, activation)
        
        self.V_dW = np.zeros(self.W.shape)
        self.V_dB = np.zeros(self.B.shape)
        
    def get_output_shape(self, A_prev_shape):
        batch_size,D,W,H = A_prev_shape
        H_out = (H - self.kernel_size[0])//self.stride[0] + 1
        W_out = (W - self.kernel_size[1])//self.stride[1] + 1
        output_shape = (batch_size, self.n_filters, H_out, W_out)
        return output_shape
    
    def forward(self, A_prev):
        A_prev = zero_padding(A_prev, self.padding)        
        self.A_prev_shape = A_prev.shape
        self.im2cols, self.act_idx = im2col(A_prev,self.kernel_size, self.stride)
        output_shape = self.get_output_shape(A_prev.shape)
        
        self.Z = np.einsum('mfn,fk->mnk',self.im2cols,self.W) + self.B   
        self.A = self.activation.activate(self.Z)
        self.A = np.transpose(self.A,(0,2,1)).reshape(output_shape)
        return self.A
    
    def grad(self, dA):
        batch_size = dA.shape[0]
        dAdZ = self.activation.diff(self.Z)
        self.dZ = np.einsum('mjk,mjk->mjk', dA, dAdZ)
        
        self.dW = (1./batch_size)*np.einsum('mjk,mkl->jl', self.im2cols, self.dZ)
        self.dB = (1./batch_size)*np.einsum('mjk->k', self.dZ)
        
        dA_prev_cols = np.einsum('mnk,fk-> mfn',self.dZ, self.W) 

        dA_prev = np.zeros(self.A_prev_shape)        
        for i in range(self.act_idx.shape[-1]):
            dA_prev.ravel()[self.act_idx[:,:,i].ravel()] += dA_prev_cols[:,:,i].ravel()

        dA_prev = dA_prev.reshape(dA_prev.shape[0],dA_prev.shape[1],dA_prev.shape[2]*dA_prev.shape[3])
        dA_prev = np.transpose(dA_prev,(0,2,1))
        
        return dA_prev
    
    def step(self, lr, beta, reg_lambda=0):
        self.V_dW = (beta * self.V_dW + (1. - beta) * self.dW)
        self.V_dB = (beta * self.V_dB + (1. - beta) * self.dB)
        self.W    =  self.W - lr*self.V_dW
        self.B    =  self.B - lr*self.V_dB

In [9]:
class max_pool_layer():
    def __init__(self, kernel_size, n_channels, stride =[1,1],  padding = [0,0]):

        self.kernel_size = kernel_size
        self.n_channels = n_channels
        self.stride = stride
        self.padding = padding
        
    def get_output_shape(self, A_prev_shape):
        batch_size,D,W,H = A_prev_shape
        H_out = (H - self.kernel_size[0])//self.stride[0] + 1
        W_out = (W - self.kernel_size[1])//self.stride[1] + 1
        output_shape = (batch_size, self.n_channels, H_out, W_out)
        return output_shape
    
    def forward(self, A_prev):

        A_prev = zero_padding(A_prev, self.padding)
        self.A_prev_shape = A_prev.shape
        self.im2cols, self.act_idx = im2col(A_prev, self.kernel_size, self.stride)
        
        output_shape = self.get_output_shape(A_prev.shape)
        
        self.Z = np.split(self.im2cols,self.n_channels,axis=1)
        self.Z = np.stack(self.Z, axis=1)

        self.A = np.max(self.Z,axis=2)  
        self.arg = np.argmax(self.A,axis=2)
        
        self.A = self.A.reshape(output_shape)
        return self.A
    
    def grad(self, dA):
        batch_size = dA.shape[0]
        dA = np.expand_dims(dA.transpose(0,2,1), axis=2)
        A = self.A.reshape(self.A.shape[0],self.A.shape[1],1,self.A.shape[-2]*self.A.shape[-1])
        dAdZ = (self.Z - A)==0
        self.dZ = np.multiply(dA, dAdZ) 
        shape = self.dZ.shape
        self.dZ = self.dZ.reshape(shape[0],shape[1]*shape[2],shape[3])
        
        dA_prev = np.zeros(self.A_prev_shape)        
        
        for i in range(self.act_idx.shape[-1]):
            dA_prev.ravel()[self.act_idx[:,:,i].ravel()] += self.dZ[:,:,i].ravel()

        dA_prev = dA_prev.reshape(dA_prev.shape[0],dA_prev.shape[1],dA_prev.shape[2]*dA_prev.shape[3])
        dA_prev = np.transpose(dA_prev,(0,2,1))
        
        return dA_prev
    
    def step(self, lr, beta, reg_lambda=0):
        return None

In [10]:
class MNIST_CNN:
    def __init__(self, Y_size, lossfn, n_channels=1):
        self.L1 = conv_layer([5,5], n_channels, leaky_relu, n_filters = 10,padding = [2,2])
        self.L2 = conv_layer([3,3], 10, leaky_relu, n_filters = 15)
        self.L3 = max_pool_layer([2,2],15, stride=[2,2])
        self.L4 = conv_layer([3,3], 15, leaky_relu)        
        self.L5 = layer(121,50, leaky_relu)
        self.L6 = layer(50, Y_size, softmax)
        self.lossfn = lossfn()
        
    def f_pass(self, X):
        A = self.L1.forward(X)
        A = self.L2.forward(A)
        A = self.L3.forward(A)
        A = self.L4.forward(A)
        A.resize(A.shape[0], A.shape[2]*A.shape[3])
        A = self.L5.forward(A)
        A = self.L6.forward(A)
        self.H = A
        return self.H
    
    def back_prop(self,X,Y, batch_size,reg_lambda=0):
        m = batch_size
        self.loss = self.lossfn.get_loss(self.H,Y)
        dZ = self.lossfn.diff(self.H,Y)
        dA = self.L6.out_grad(dZ, self.L5.A, m)
        dA = self.L5.grad(dA,self.L4.A, m)
        dA = np.expand_dims(dA,axis=-1)
        dA = self.L4.grad(dA)
        dA = self.L3.grad(dA)
        dA = self.L2.grad(dA)
        dX = self.L1.grad(dA)
    
    def optim(self, lr, beta=0):
        self.L1.step(lr,beta)
        self.L2.step(lr,beta)
        self.L3.step(lr,beta)
        self.L4.step(lr,beta)
        self.L5.step(lr,beta)
        self.L6.step(lr,beta)

In [11]:
trainX_path = 'MNIST_np/data/MNIST/train/train-images-idx3-ubyte'
trainY_path = 'MNIST_np/data/MNIST/train/train-labels-idx1-ubyte'
testX_path  = 'MNIST_np/data/MNIST/test/t10k-images.idx3-ubyte'
testY_path  = 'MNIST_np/data/MNIST/test/t10k-labels.idx1-ubyte'

X,Y,X_test,Y_test = load_mnist_data(trainX_path,trainY_path,testX_path,testY_path)

In [12]:
n_out = np.shape(Y)[1]
mnist_cnn = MNIST_CNN(n_out,CE_loss)

In [13]:
batch_size = 16

lr = 0.005

n_epochs = 10

lr_decay = 0.9

data_size = X.shape[0]

beta = 0

In [None]:
train(mnist_cnn, X, Y, X_test, Y_test, model_accuracy, n_epochs, \
    batch_size, lr, lr_decay, beta)

In [None]:
# measure time taken
start = time. time()
mnist_cnn.f_pass(X[0:4])
endf = time. time()
mnist_cnn.back_prop(X[0:4],Y[0:4],4)
endb = time. time()
mnist_cnn.optim(lr,beta)
endo = time. time()
mnist_cnn.loss

In [None]:
print(f"f-pass:{endf-start:.4f} | b-prop:{endb-endf:.4f} | optim: {endo-endb:.4f} | total: {endo-start:.4f}")

In [None]:
import pickle

# Does not work currently

def save_model(model, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(model, output, pickle.HIGHEST_PROTOCOL)

def load_model(filename):
    model = None
    with open(filename, 'r') as f:  # Overwrites any existing file.
        model = pickle.load(f)   
    return model