<pre><h1>TIPR Assignment 2 Code base & Report</h1>
<h2>Neural Network Implementation</h2>
<h3><i> - Achint Chaudhary</i></h3>
<h3>15879, M.Tech (CSA)</h3>
<h5>Note:</h5> Please Scroll Down for Report section, or search "Part 1"
<img src="Images/dnn_architecture.png">

<pre>




<h3>Standard Library Imports</h3>

In [None]:
import sys, os, shutil, itertools as it
from copy import deepcopy
from datetime import datetime

import numpy as np
import pandas as pd
from scipy import ndimage
import matplotlib as mpl
import matplotlib.pyplot as plt

from skimage import io
from sklearn.metrics import f1_score, accuracy_score, classification_report

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

import warnings
warnings.filterwarnings("error")

try:
    res_stdout
except:
    res_stdout = (sys.stdout if sys.stdout else sys.__stdout__)

In [None]:
verbose = bool( eval(input('Do you want Verbose??: 0/1  ')))

<h3>Changing File I/O & Matplotlib inlining if not verbose</h3>

In [None]:
if not verbose:
    sys.stdout = sys.__stdout__ = open('stdoutbuffer','a',buffering=1)
    mpl.use('Agg')
else:
    sys.stdout = sys.__stdout__ = res_stdout
    %matplotlib inline

<pre>




<h3>Activation Functions</h3>

In [None]:
class ActV:
    def sigmoid(x):
        return 1/(1+np.exp(-x))
    def relu(x):
        return np.maximum(0,x)
    def tanh(x):
        return 2*ActV.sigmoid(x)-1
    def swish(x):
        return x*ActV.sigmoid(x)
    def softmax(x):
        x = x-x.max(axis=1,keepdims=True)
        _ = np.exp(x)
        return _/np.sum(_,axis=1,keepdims=True)

class ActD:
    def sigmoid(x):
        _ = ActV.sigmoid( x )
        return _ * (1-_)
    def relu(x):
        '1 for x>=0'
        return (np.sign(x)>=0)
    def tanh(x):
        return 1-(ActV.tanh(x))**2
    def swish(x):
        'y’ = y + σ(x) . (1 – y)'
        _1 = ActV.swish(x)
        _2 = ActV.sigmoid(x)
        return _1 + _2*(1-_1)
    def softmax(x):# Still in doubt, it should be a matrix
        _ = ActV.softmax( x )
        return _ * (1-_)

<h3>Adding "Swish" function to Keras</h3>

In [None]:
# Ref: https://stackoverflow.com/questions/43915482/how-do-you-create-a-custom-activation-function-with-keras
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects

def swish2(x):
    return x*K.sigmoid(x)

get_custom_objects().update({'swish': Activation(swish2)})

def addswish(model):
    model.add(Activation(swish2))

<h3>Cost Functions & Performance Metrics</h3>

In [None]:
class CostV:
    def cross_entropy(act, pred):
        pred = np.where(act!=1,pred+np.e,pred) # Handling perfect prediction
        pred = np.where(np.logical_and(act==1,pred==0),pred+10**-8,pred) # Handling imperfect prediction
        return -1*np.mean( act*np.log(pred) ,axis=0,keepdims=True)
    def MSE(act, pred):
        return np.mean( (pred-act)**2 ,axis=0,keepdims=True)
    
class CostD:
    def cross_entropy(act, pred):
        return pred - act
    def MSE(act, pred):
        return 2*(pred-act)

class Metrices:
    def accuracy(act, pred):
            return np.mean((act==pred).all(axis=1))

def one_hot(y):
    return 1*(y==y.max(axis=1,keepdims=True))

def cattooht(Y):
    Y = np.ravel(Y)
    _ = sorted(set(Y))
    tmp = np.zeros((Y.shape[0],len(_)),dtype='int32')
    for i in range(len(Y)):
        tmp[i][_.index(Y[i])] = 1
    return tmp,_

<h3>Xavier-He Initialization</h3>
<img src="Images/XHE.png" height=450 width=600 align=left>

In [None]:
def initWB(IP,OP,function='relu',He=True,mode='gaussian'):
    if He:
        # Xavier & He initialization
        _ = 1/(IP+OP)**0.5
        if function in ('sigmoid','softmax'):
            r, s = 6**0.5, 2**0.5
        elif function=='tanh':
            r, s = 4*6**0.5, 4*2**0.5
        else: # relu or swish function
            r, s = 12**0.5, 2
        r, s = r*_, s*_
    else:
        r, s = 1, 1
    # Generating matrices
    if mode=='uniform':
        return 2*r*np.random.random((IP,OP))-r , 2*r*np.random.random((1,OP))-r
    elif mode=='gaussian':
        return np.random.randn(IP,OP)*s , np.random.randn(1,OP)*s
    else:
        raise Exception('Code should be unreachable')

<h3>Data split function family</h3>

In [None]:
def RSplit(X,Y,K=10):
    'Random Split Function'
    _ = list(range(X.shape[0]))
    index_set = []
    indxs = set(_)
    batch_size = round(X.shape[0]/K)
    np.random.shuffle(_)
    for k in range(0,X.shape[0],batch_size):
        test = set(_[k:k+batch_size])
        train = indxs - test
        index_set.append((list(train),list(test)))
    return index_set

def SSplit(X,Y,K=10,seed=True):
    'Stratified Split Function'
    if seed:
        np.random.seed(42)
    Y = pd.DataFrame([tuple(y) for y in Y])
    classes = set(Y)
    c2i = {}
    for index,label in Y.iterrows():
        label = label[0]
        if label in c2i:
            c2i[label].add(index)
        else:
            c2i[label] = {index}
    
    # Each class -> list of indices
    for i in c2i:
        c2i[i] = list(c2i[i])
        np.random.shuffle(c2i[i])
    
    # Each class with its set of train, test split indices
    c2is = {}
    for cls in c2i:
        a = int(np.round(len(c2i[cls])/K))
        c2is[cls] = []
        for fold in range(K):
            test_indices  = c2i[cls][a*fold:a*(fold+1)]
            train_indices = c2i[cls][0:a*fold] + c2i[cls][a*(fold+1):]
            c2is[cls].append((train_indices,test_indices))
        np.random.shuffle(c2is[cls])
        
    index_set = []
    for i in range(K):
        train,test = set(),set()
        for cls in c2is:
            _ = c2is[cls][i]
            train.update(set(_[0]))
            test.update (set(_[1]))
        index_set.append((list(train),list(test)))
    return index_set

def BSplit(X,Y,K=10):
    'Biased Split Function'
    indx = sorted(np.arange(X.shape[0]),key = lambda i:list(Y[i]))
    indices = set(indx)
    index_set = []
    step = int(np.ceil(len(indx)/K))
    for i in range(0,len(indx),step):
        test = set(indx[i:i+step])
        train = indices - test
        index_set.append((list(train),list(test)))
    return index_set

def Split(X,Y,K=10,mode='R'):
    if mode=='S':
        return SSplit(X,Y,K)
    elif mode=='B':
        return BSplit(X,Y,K)
    else:
        return RSplit(X,Y,K)

<h3>Max-Pooling Code for Image Compression</h3>

In [None]:
# Ref: https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy
def asStride(arr,sub_shape,stride):
    '''Get a strided sub-matrices view of an ndarray.
    See also skimage.util.shape.view_as_windows()
    '''
    s0,s1 = arr.strides[:2]
    m1,n1 = arr.shape[:2]
    m2,n2 = sub_shape
    view_shape = (1+(m1-m2)//stride[0],1+(n1-n2)//stride[1],m2,n2)+arr.shape[2:]
    strides = (stride[0]*s0,stride[1]*s1,s0,s1)+arr.strides[2:]
    subs = np.lib.stride_tricks.as_strided(arr,view_shape,strides=strides)
    return subs

def poolingOverlap(mat,ksize,stride=None,method='max',pad=False):
    '''Overlapping pooling on 2D or 3D data.

    <mat>: ndarray, input array to pool.
    <ksize>: tuple of 2, kernel size in (ky, kx).
    <stride>: tuple of 2 or None, stride of pooling window.
              If None, same as <ksize> (non-overlapping pooling).
    <method>: str, 'max for max-pooling,
                   'mean' for mean-pooling.
    <pad>: bool, pad <mat> or not. If no pad, output has size
           (n-f)//s+1, n being <mat> size, f being kernel size, s stride.
           if pad, output has size ceil(n/s).

    Return <result>: pooled matrix.
    '''
    m, n = mat.shape[:2]
    ky,kx = ksize
    if stride is None:
        stride = (ky,kx)
    sy,sx = stride

    _ceil = lambda x,y: int(np.ceil(x/float(y)))

    if pad:
        ny = _ceil(m,sy)
        nx = _ceil(n,sx)
        size = ((ny-1)*sy+ky, (nx-1)*sx+kx) + mat.shape[2:]
        mat_pad = np.full(size,np.nan)
        mat_pad[:m,:n,...]=mat
    else:
        mat_pad=mat[:(m-ky)//sy*sy+ky, :(n-kx)//sx*sx+kx, ...]

    view=asStride(mat_pad,ksize,stride)

    if method=='max':
        result=np.nanmax(view,axis=(2,3))
    else:
        result=np.nanmean(view,axis=(2,3))

    return result

<h3>Global Dataset store & Dummy set generation</h3>

In [None]:
try:
    datasets
except:
    datasets = {}

name = 'Dummy'
L = 1000
_1,_2 = list(np.random.random((L,2))), list(np.random.random((L,2)))
X1,X2 = [],[]
Y1,Y2 = [],[]
rad = 0.8
for i in range(L):
    a,b = _1[i][0],_1[i][1]
    if a**2+b**2<rad**2:
        Y1.append([1,0])
        X1.append(_1[i])
    elif a**2+b**2>=rad**2:
        Y1.append([0,1])
        X1.append(_1[i])
    a,b = _2[i][0],_2[i][1]
    if a**2+b**2<rad**2:
        Y2.append([1,0])
        X2.append(_2[i])
    elif a**2+b**2>=rad**2:
        Y2.append([0,1])
        X2.append(_2[i])
X1 = np.array(X1)
X2 = np.array(X2)
Y1 = np.array(Y1)
Y2 = np.array(Y2)
datasets[name] = (X1,Y1,['In','Out'])

In [None]:
m, n = 5,5
X = np.array( list(it.product(np.arange(m),np.arange(n))) )
Y = np.array( cattooht( np.ravel( ((np.array([list(np.arange(n))]*m).T+np.arange(m)).T)%2 ) )[0] )
datasets['XOR'] = (X,Y,['E','O'])

<h3>Loading MNIST & Cat-Dog datasets</h3>

In [None]:
path = 'data'
res_path = os.getcwd()

os.chdir(path)
for fldr in os.listdir():
    if not fldr.startswith('.'):
        datasets[fldr] = ([],[])
        os.chdir(fldr)
        _ = sorted([x for x in os.listdir() if not x.startswith('.')])
        name_index = {x:_.index(x) for x in _}
        for category in _:
            label = [0]*len(_)
            label[name_index[category]] = 1
            os.chdir(category)
            for sample in os.listdir(): #[:2000]:
                if not fldr.startswith('.'):
                    img_mat = io.imread(sample, as_gray=True)
                    if fldr=='Cat-Dog': img_mat = poolingOverlap(img_mat,(4,4))
                    img_mat = np.ravel(img_mat)
                    datasets[fldr][0].append(img_mat)
                    datasets[fldr][1].append(label)
            os.chdir('..')
        datasets[fldr] = tuple(map(np.array,datasets[fldr]))+(_,)
        os.chdir('..')

os.chdir( res_path )
for i in datasets:
    datasets[i] = np.array(datasets[i][0],dtype='float64'), datasets[i][1], datasets[i][2]

<h3>Back-propagation Algorithm for Neural Network</h3>
<img src="Images/BP.png" height=450 width=600 align=left>

<h3>Neural Network Class</h3>

In [None]:
class NN:
    def __init__(self):
        self.Num, self.fun = [], []
        self.IP, self.OP, self.W, self.B, self.delta = {}, {}, {}, {}, {}
        self.beta1, self.beta2, self.eps = 0.9, 0.999, 10**-8

    def data_feed( self, M, L, targets):
        self.raw, self.labels, self.target_names = M, L, targets

    def data_validate( self, M=np.array([]), L=np.array([]) ):
        self.vraw, self.vlabels = M, L

    def add(self,N,f='relu'):
        self.Num.append(N); self.fun.append(f)

    def data_preprocess(self,mode='standard'):
        sp = np.nan_to_num
        try:
            mode = self.preprocess_mode
        except:
            self.preprocess_mode = mode
        if mode=='scale':
            try:
                self.mn, self.mx
            except:
                self.mn, self.mx = self.raw.min(axis=0), self.raw.max(axis=0)
            mx = np.where(self.mx==self.mn,self.mx+1,self.mx)
            self.data  = sp((self.raw - self.mn)/(mx-self.mn))
            try: # If validation data is defined
                self.vdata = sp((self.vraw - self.mn)/(self.mx-self.mn))
            except:
                self.vdata = self.data
        elif mode=='standard':
            try:
                self.mean, self.std
            except:
                self.mean, self.std   = self.raw.mean(axis=0), self.raw.std(axis=0)
            std = np.where(self.std==0,1,self.std)
            self.data = sp((self.raw-self.mean)/std)
            try: # If validation data is defined
                self.vdata  =  sp((self.vraw-self.mean)/std)
            except:
                self.vdata = self.data
        else:
            raise Exception('Code should be unreachable')
    
    def initialize_layers(self,He=True,mode='gaussian'):
        for i in range(len(self.Num)):
            if i==0:
                self.W[i],self.B[i], = initWB(self.data.shape[1],self.Num[i],self.fun[i],He,mode)
            else:
                self.W[i],self.B[i], = initWB(self.Num[i-1],self.Num[i],self.fun[i],He,mode)
                
    def forward_prop(self,predict=False):
        self.IP[0] = self.fdata
        for i in range(len(self.Num)):
            wx_b = np.dot(self.IP[i],self.W[i])+self.B[i]
            if not predict:
                self.OP[i] = wx_b
            _ = eval('ActV.{0}(wx_b)'.format(self.fun[i]))
            self.IP[i+1] = _
            if predict:
                del self.IP[i]
        return self.IP[len(self.Num)]

    def back_prop(self,debug=False):
        for i in range(len(self.Num)-1,-1,-1):
            if debug: print('Layer',i)
            if i==(len(self.Num)-1):
                costD = eval('CostD.{0}(self.flabels,self.IP[len(self.Num)])'.format(self.cost))
                actvD = eval('ActD.{0}(self.OP[i])'.format(self.fun[i]))
                self.delta[i] = costD * actvD
                if debug: print('>>',self.IP[i].shape,costD.shape,actvD.shape,self.delta[i].shape)
            else:
                costD = np.dot(self.W[i+1],self.delta[i+1].T).T # ((6,2),(100,2).T).T => (100,6)
                actvD = eval('ActD.{0}(self.OP[i])'.format(self.fun[i])) #(100,6)
                self.delta[i] = costD * actvD
                if debug: print('>>',self.IP[i].shape,costD.shape,actvD.shape,self.delta[i].shape)
            uW = np.dot( self.IP[i].T , self.delta[i] ) / self.IP[i].shape[0]
            uB = np.mean( self.delta[i] ,axis=0, keepdims=True)
            if debug: print( self.W[i].shape , self.B[i].shape)
            if debug: print( uW.shape , uB.shape)
            self.W[i] -= self.learning_rate*uW
            self.B[i] -= self.learning_rate*uB
            if debug: input()

    def back_prop2(self,Iteration_Count=1,debug=False,amsgrad=False):
        if Iteration_Count==1:
            self.UW, self.UB, self.SW, self.SB =  deepcopy(self.W), deepcopy(self.B), deepcopy(self.W), deepcopy(self.B)
            for i in self.UW:
                self.UW[i], self.UB[i], self.SW[i], self.SB[i] = 0*self.UW[i], 0*self.UB[i], 0*self.SW[i], 0*self.SB[i]
        for i in range(len(self.Num)-1,-1,-1):
            if i==(len(self.Num)-1):
                costD = eval('CostD.{0}(self.flabels,self.IP[len(self.Num)])'.format(self.cost))
                actvD = eval('ActD.{0}(self.OP[i])'.format(self.fun[i]))
                self.delta[i] = costD * actvD
            else:
                costD = np.dot(self.W[i+1],self.delta[i+1].T).T
                actvD = eval('ActD.{0}(self.OP[i])'.format(self.fun[i]))
                self.delta[i] = costD * actvD
            uW = np.dot( self.IP[i].T , self.delta[i] ) / self.IP[i].shape[0]
            uB = np.mean( self.delta[i] ,axis=0, keepdims=True)
            # Eqn 1
            self.UW[i] = self.beta1*self.UW[i] + (1-self.beta1)*uW
            self.UB[i] = self.beta1*self.UB[i] + (1-self.beta1)*uB
            # Eqn 2
            self.SW[i] = self.beta2*self.SW[i] + (1-self.beta2)*uW**2
            self.SB[i] = self.beta2*self.SB[i] + (1-self.beta2)*uB**2
            # Eqn 3
            UW = self.UW[i]/(1-self.beta1**Iteration_Count)
            UB = self.UB[i]/(1-self.beta1**Iteration_Count)
            # Eqn 4
            SW = self.SW[i]/(1-self.beta2**Iteration_Count)
            SB = self.SB[i]/(1-self.beta2**Iteration_Count)
            # Eqn 5
            self.W[i] -= self.learning_rate*UW/(SW**0.5+self.eps)
            self.B[i] -= self.learning_rate*UB/(SB**0.5+self.eps)
            if np.isnan(self.W[i]).any() or np.isnan(self.B[i]).any():
                raise Exception('NAN value arises')

    def back_prop3(self,Epoch_Count=1,debug=False):
        for i in range(len(self.Num)-1,-1,-1):
            if i==(len(self.Num)-1):
                costD = eval('CostD.{0}(self.flabels,self.IP[len(self.Num)])'.format(self.cost))
                actvD = eval('ActD.{0}(self.OP[i])'.format(self.fun[i]))
                self.delta[i] = costD * actvD
            else:
                costD = np.dot(self.W[i+1],self.delta[i+1].T).T
                actvD = eval('ActD.{0}(self.OP[i])'.format(self.fun[i]))
                self.delta[i] = costD * actvD
            uW = np.dot( self.IP[i].T , self.delta[i] ) / self.IP[i].shape[0]
            uB = np.mean( self.delta[i] ,axis=0, keepdims=True)
            # Eqn 1
            _W1 = (1-self.beta1)*uW/(1-self.beta1**Epoch_Count)
            _B1 = (1-self.beta1)*uB/(1-self.beta1**Epoch_Count)
            # Eqn 2
            _W2 = (1-self.beta2)*uW**2/(1-self.beta2**Epoch_Count)
            _B2 = (1-self.beta2)*uB**2/(1-self.beta2**Epoch_Count)
            # Eqn 3
            self.W[i] -= self.learning_rate*_W1/(_W2**0.5+self.eps)
            self.B[i] -= self.learning_rate*_B1/(_B2**0.5+self.eps)
            if np.isnan(self.W[i]).any() or np.isnan(self.B[i]).any():
                raise Exception('NAN value arises')

    def feed_adam(beta1, beta2, eps):
        self.beta1, self.beta2, self.eps = beta1, beta2, eps

    def plot_feed(self,feed=True):
        self.fdata,self.flabels = self.data, self.labels
        y_pred = self.forward_prop(predict=True)
        costV  = eval('CostV.{0}(self.flabels,y_pred)'.format(self.cost))
        y_pred = one_hot(y_pred)
        mvalue = eval('Metrices.{0}(self.flabels,y_pred)'.format(self.metric))
        act2 = [ list(rw).index(1) for rw in self.flabels ]
        pred2 = [ list(rw).index(1) for rw in y_pred ]
        if feed:
            self.costs.append( np.mean(costV) )
            self.mvalues.append( mvalue )
            self.f1m.append( f1_score(act2,pred2,average='micro') )
            self.f1M.append( f1_score(act2,pred2,average='macro') )
            
            self.fdata,self.flabels = self.vdata, self.vlabels
            y_pred = one_hot( self.forward_prop(predict=True) )
            vmvalue = eval('Metrices.{0}(self.flabels,y_pred)'.format(self.metric))
            self.vmvalues.append( vmvalue )
            
        return act2, pred2


    def train(self,epochs=1000,batchsize=30,learning_rate=0.001,\
              optimizer='adam',cost='cross_entropy',metric='accuracy',es=(True,0,True),amsgrad=False):
        
        self.cost, self.metric, self.learning_rate = cost, metric, learning_rate
        self.costs, self.mvalues, self.f1m, self.f1M, self.vmvalues = [], [], [], [], []
        if es[0]: prev_entropy = [np.inf]
        # Random value at starting NN
        self.plot_feed()
        f = open('continue_next_epoch','w')
        f.close()

        for T in range(epochs):
            if 'continue_next_epoch' not in os.listdir(): break
            init = datetime.now()
            print('Epoch {0:{1}} ['.format(T+1,int(np.log10(epochs+1))+1),end='')
            if es[0]: W,B = [deepcopy(self.W)],[deepcopy(self.B)] # Saving Weights for Early Stopping
            mb_indx, splits = 0, int(np.ceil(self.data.shape[0]/batchsize))
            self.index_set = Split(self.data, self.labels, splits ,'R')
            for ln in range(len(self.index_set)):
                train_indx, test_indx = self.index_set[ln]
                self.fdata,self.flabels = self.data[test_indx],self.labels[test_indx]
                self.forward_prop()
                if optimizer=='gd':
                    self.back_prop()
                elif optimizer=='adam':
                    self.back_prop2(T*len(self.index_set)+(ln+1))
                else:
                    self.back_prop3(T+1)
                if(mb_indx>=(splits*0.04)):
                    print('=',end='')
                    mb_indx = 0
                mb_indx+=1
            # Early Stopping using Validation Set #CHECKPOINT
            if es[0]:
                if es[1]==-1:
                    pass
                else:
                    delta = 0 # Exploring with compromising observed value
                    self.fdata,self.flabels = self.vdata,self.vlabels
                    y_pred = self.forward_prop(predict=True)
                    costV  = eval('CostV.{0}(self.flabels,y_pred)'.format(self.cost))
                    best_entropy, cur_entropy = min(prev_entropy), np.mean(costV)
                    if ( cur_entropy - best_entropy) > delta :
                        if len(prev_entropy)==(es[1]+1):
                            if es[2]: # Restoring Best Weights
                                bst_indx = len(prev_entropy)-prev_entropy[::-1].index(best_entropy) - 1
                                self.W,self.B = W[bst_indx], B[bst_indx]
                            print(']\n',best_entropy,'==>',cur_entropy)
                            break
                        else:
                            prev_entropy.append( cur_entropy )
                            W.append(deepcopy(self.W)); B.append(deepcopy(self.B))
                    else:
                        W,B = [deepcopy(self.W)],[deepcopy(self.B)]
                        prev_entropy = [ cur_entropy ]
            # To plot results for entire datasets
            self.plot_feed()
            print('] Loss {0:.6e}, Accuracy {1:.2f}%, Accuracy-V {2:.2f}%, Time {3:}'.format(self.costs[-1],self.mvalues[-1]*100,self.vmvalues[-1]*100,datetime.now()-init))

    def krs(self,epochs=1000,batchsize=30,learning_rate=0.001,\
              optimizer='adam',cost='cross_entropy',metric='accuracy',es=(True,0,True)):
        model = Sequential()
        addswish(model)
        model.add(Dense(self.Num[0], activation=self.fun[0], input_dim=self.data.shape[1]))
        for i in range(1,len(self.Num)-1):
            model.add(Dense(self.Num[i], activation=self.fun[i]))
        model.add(Dense(self.labels.shape[1], activation='softmax'))
        cb = [EarlyStopping(monitor='val_loss', patience=es[1], restore_best_weights=es[2])] if (es[0] and es[1]!=-1) else []
        model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate,amsgrad=False), metrics=[metric])
        model.fit(self.data, self.labels, epochs=epochs, batch_size=batchsize,\
                  validation_data=(self.vdata, self.vlabels), callbacks=cb )
        y_pred = model.predict(self.vdata)
        y_pred = one_hot(y_pred)
        self.kmodel = model
        return classification_report(self.vlabels, y_pred, target_names=self.target_names, digits = 4 )

    def report(self,model=None):
        if model:
            y_true1, y_pred1 = self.labels,  one_hot(model.predict(self.data))
            y_true2, y_pred2 = self.vlabels, one_hot(model.predict(self.vdata))
        else:
            self.fdata, self.flabels = self.data, self.labels
            y_true1, y_pred1 = self.labels, one_hot( self.forward_prop(predict=True) )
            self.fdata, self.flabels = self.vdata, self.vlabels
            y_true2, y_pred2 = self.vlabels, one_hot( self.forward_prop(predict=True) )
            
        r1 = classification_report(y_true1, y_pred1, target_names=self.target_names, digits = 4 )
        r2 = classification_report(y_true2, y_pred2, target_names=self.target_names, digits = 4 )
        return r1, r2

    def plot(self,prms={},learning_plot=False):
        mpl.rcParams['figure.dpi'] = 100
        plt.close()
        ax = plt.subplot(111)

        ls = [ self.mvalues[1:], self.f1M[1:], self.f1m[1:], self.vmvalues[1:] ]
        c1, c2 = min((min(l) for l in ls)), max((max(l) for l in ls))
        _ = (np.array(self.costs[1:])-min(self.costs[1:])) / (max(self.costs[1:])-min(self.costs[1:]))
        _ = list( (_*(c2-c1)+c1) )
        ls = [_]+ls
        for i in range(len(ls)):
            ls[i] = np.array(ls[i])
        s = np.exp(-5)
        if learning_plot:
            for i in range(len(ls)):
                ls[i] = -np.log((c2+s)-np.array(ls[i]))
            # Best Depiction of Learning process
            indx   = list( np.linspace(-np.log((c2+s)-c1),-np.log((c2+s)-c2),10) )
            yticks = np.round(np.linspace(c1,c2,10),3)

        _1 = plt.plot(np.arange(1,len(ls[0])+1), ls[0],'-',label=self.cost)
        _2 = plt.plot(np.arange(1,len(ls[1])+1), ls[1],'*',label='Accuracy-T')
        _3 = plt.plot(np.arange(1,len(ls[2])+1), ls[2],'-.',label='F1-Macro')
        _4 = plt.plot(np.arange(1,len(ls[3])+1), ls[3],':',label='F1-Micro')
        _5 = plt.plot(np.arange(1,len(ls[4])+1), ls[4],'--',label='Accuracy-V')

        if learning_plot:
            plt.yticks(indx,yticks)

        p1 = '{0} Accuracy {1:.2f}%'.format(self.name,(self.mvalues[-1]*0.9+self.vmvalues[-1]*0.1)*100)
        prms = {x:prms[x] for x in prms if x in grid_params}
        p2 = ', '.join(str(x) for x in tuple(prms[x] for x in grid_params) ) # Grid Search Hyperparameters
        title = '\n'.join((p1,p2))
        plt.title(title)
        plt.xlabel('Epochs')
        plt.legend(loc=0)
        plt.savefig(title+'.png',dpi=300,bbox_inches = 'tight')
        if verbose:
            plt.show()
        plt.close()
        
    def missed(self,diff_validation=True):
        try:
            shutil.rmtree('missed')
        except:
            pass
        finally:
            os.mkdir('missed')
            os.chdir('missed')
        try:
            ls = [(self.data,self.labels)]
            if diff_validation:
                ls.append( (self.vdata,self.vlabels) )
            for data,labels in ls:
                self.fdata, self.flabels = deepcopy(data), deepcopy(labels)
                pred = one_hot( self.forward_prop(predict=True) )
                act  = self.flabels
                count = {}
                for i in range(len(self.fdata)):
                    if not (act[i]==pred[i]).all():
                        lbl_a = self.target_names[ np.sum( act[i]*np.arange(act[i].shape[0])) ]
                        lbl_p = self.target_names[ np.sum(pred[i]*np.arange(pred[i].shape[0])) ]
                        if (lbl_a,lbl_p) in count:
                            count[(lbl_a,lbl_p)]+=1
                        else:
                            count[(lbl_a,lbl_p)]=1
                        mat = deepcopy( self.fdata[i] )
                        try:
                            mat = mat*(self.mx-self.mn)+self.mn
                        except:
                            mat = mat*self.std+self.mean
                        mat = mat.reshape(round(mat.shape[0]**0.5),round(mat.shape[0]**0.5))
                        mpl.image.imsave('{1},{2},{0}.png'.format(count[(lbl_a,lbl_p)],lbl_a,lbl_p),mat)
        except:
            pass
        finally:
            os.chdir('..')
        
    def save_model(self,model_store='models'):
        if model_store not in os.listdir():
            os.mkdir(model_store)
        try:
            try:
                shutil.rmtree('{}/{}'.format(model_store,self.name))
            except:
                pass
            finally:
                os.mkdir('{}/{}'.format(model_store,self.name))
                os.chdir('{}/{}'.format(model_store,self.name))
            with open('config','w') as f:
                print(repr(self.Num) ,file=f)
                print(repr(self.fun) ,file=f)
                print(self.preprocess_mode,end = '',file=f)                
            dct = {}
            with open('parameters','wb') as f:
                if self.preprocess_mode == 'standard':
                    dct['mean'], dct['std'] = self.mean, self.std
                elif self.preprocess_mode == 'scale':
                    dct['mn'], dct['mx'] = self.mn, self.mx
                else:
                    raise Exception('Code should be unreachable')
                for i in self.W:
                    dct['W{}'.format(i)] = self.W[i]
                    dct['B{}'.format(i)] = self.B[i]
                np.savez(f,**dct)
        except Exception as exc:
            pass
        finally:
            os.chdir('../..')
    
    def load_model(self,model_store = 'models'):
        if model_store not in os.listdir():
            raise Exception("{} directory does not Exist".format(model_store))
        try:
            os.chdir('{}/{}'.format(model_store,self.name))
            with open('config') as f:
                self.Num = eval(f.readline().strip())
                self.fun = eval(f.readline().strip())
                self.preprocess_mode = f.readline().strip()
            with open('parameters','rb') as f:
                npzfile = np.load(f)
                if self.preprocess_mode == 'standard':
                    self.mean, self.std = npzfile['mean'], npzfile['std']
                elif self.preprocess_mode == 'scale':
                    self.mn, self.mx = npzfile['mn'], npzfile['mx']
                else:
                    raise Exception('Code should be unreachable')
                for i in range(len(self.Num)):
                    self.W[i] = npzfile['W{}'.format(i)]
                    self.B[i] = npzfile['B{}'.format(i)]
        except Exception as exc:
            pass
        finally:
            os.chdir('../..')
    

<h3>Dataset Evaluation Wrapper</h3>

In [None]:
# Early Stopping Parameters
# (Enable, ValidationPartition, Patience, Restore)

def evaldata(name,NumFun,prprc='standard',He=True,initmode='gaussian',\
             epochs=1000,batchsize=30,lr=0.001,opt='adam',es=(True,False,0,True),krs=True):
    params = locals()
    if 'grid_params' not in globals():
        global grid_params
        grid_params = []

    print('Dataset under processing: ',name)
    X,Y,targets = datasets[name]
    net = NN()
    net.name = name
    if es[1]:
        index_set = SSplit(X,Y,10)
        np.random.shuffle(index_set)
        train_index, test_index = index_set[0]
        X1, Y1, X2, Y2 = X[train_index], Y[train_index], X[test_index], Y[test_index]
    else:
        X1, Y1, X2, Y2 = X, Y, X, Y
    net.data_feed(X1,Y1,targets) # Feeding Raw data
    net.data_validate(X2,Y2)     # Used for Early Stopping
    net.data_preprocess(prprc)
    #Adding Hidden Layers
    for n,f in NumFun:
        net.add(n,f)        
    # Output Layer & Cost function
    net.add(Y.shape[1],'softmax')
    net.initialize_layers(He,initmode)
    # Calling Training module, with optmizer & regularization parameters
    print('\n\t\t','#'*16,'NumPy Implementation','#'*16,'\n')
    net.train( epochs, batchsize, lr, opt, 'cross_entropy', 'accuracy', es[0:1]+es[2:] )
    r1, r2 = net.report()
    print('\n\t\t\t','-'*8,'Classification Report on Training data','-'*8,'\n',r1)
    if es[1]: print('\n\t\t','-'*8,'Classification Report on Validation data','-'*8,'\n',r2)
    if krs:
        print('\n\t\t','#'*16,'Keras Implementation','#'*16,'\n')
        net.krs( epochs, batchsize, lr, opt, 'cross_entropy', 'accuracy', es[0:1]+es[2:] )
        r1, r2 = net.report(net.kmodel)
        print('\n\t\t\t','-'*8,'Classification Report on Training data','-'*8,'\n',r1)
        if es[1]: print('\n\t\t','-'*8,'Classification Report on Validation data','-'*8,'\n',r2)

    net.plot(params) 
    if krs:
        net.missed(es[1])
    net.save_model()
    net.load_model()
    return net.mvalues, net.costs, net.f1M, net.f1m

<h3>Grid Search for Hyper-parameter tuning</h3>
<pre>
<b>Sample Worst Case Sample given below, 9k+ executions</b><br>
    ########################################################
    ################ DON'T TRY THIS AT HOME ################
    ########################################################

    dct = {
        'A datasets'       : ['Dummy'],
        'B units'          : list(zip((392,784,1568),(64,128,128))),
        'C functions'      : it.product(('sigmoid','tanh','relu','swish'),('sigmoid','tanh','relu','swish')),
        'D preproc'        : ['scale','standard'],
        'E He'             : [True,False],
        'F initmodes'      : ['uniform','gaussian'],
        'G epochs'         : [10,20,40],
        'H batchsize'      : [128,256,512,1024],
        'I learning_rate'  : [0.001,0.0003,0.0001],
        'J optimizer'      : ['adam'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [True],
    }
    res = grid_search(dct)
    grid_plot(res)


In [None]:
def grid_search(dct):
    grid_values = { 'Accuracy':{}, 'Cost':{}, 'F1-Macro':{}, 'F1-Micro':{} }
    for prms in it.product(*(dct[x] for x in dct)):
        name = prms[0]
        prms = list(prms)
        prms[1:3] = [tuple(zip(prms[1],prms[2]))]
        prms = tuple(prms)
        print('STARTED  ',prms)
        _ = eval( 'evaldata{0}'.format(tuple(prms)))
        print('COMPLETED',prms,end='\n'*3)
        for i in range(len(_)):
            ls = sorted( grid_values.keys() )
            grid_values[ls[i]][prms] = _[i][-1]
    return grid_values

def grid_plot(res,dct):
    'Under assumption that only one quantity will be varied at a time'
    tmp_grid_params = ['DataSet','Config','Preprocess','He','InitMode','Epochs','Batch_Size','Learning_Rate']
    
    def plot(metric,inner_dct,color):
        param_vals, y_values = {i:set() for i in range(len(tmp_grid_params))}, []
        for params in sorted(inner_dct):
            y_values.append( inner_dct[params] )
            for indx in range(len(tmp_grid_params)):
                param_vals[ indx ].add(params[indx])

        for indx in range(len(tmp_grid_params)):
            if len(param_vals[indx])>1:
                break
        else: # No graph can be shown with no changing values
            return

        if tmp_grid_params[indx]=='Config':
            sample = list(inner_dct.keys())[0][indx]
            inner_indx_dct = {(i,j):set() for i in range(len(sample)) for j in range(2)}
            for params in sorted(inner_dct):
                for i in range(len(sample)):
                    for j in range(2):
                        inner_indx_dct[(i,j)].add(params[indx][i][j])
            for inner_indx in inner_indx_dct:
                if len(inner_indx_dct[inner_indx])>1:
                    break
            else:
                return
            i,j = inner_indx
            par_name = 'Layer {0}'.format(i+1)+' '+('Activation' if j else 'Units')
            x_values = sorted(inner_indx_dct[inner_indx])
        else:
            par_name = tmp_grid_params[indx]
            x_values = [params[indx] for params in sorted(inner_dct)]
                
        ind = np.arange(len(y_values))
        plt.xlabel(par_name)
        plt.ylabel(metric)
        plt.xticks( ind,x_values)
        try:
            styl = ('*' if set(map(int,x_values))=={0,1} else '-')
        except:
            styl = '*'
        plt.plot(   ind,y_values,styl,color=color)
        title = ', '.join((metric,par_name))
        plt.title(title)
        plt.savefig(title+'.png',dpi=300,bbox_inches = 'tight')
        if verbose:
            plt.show()
        plt.close()

    for metric,color in zip(res,('g','r','b','y')):
        plt.close()
        plot(metric,res[metric],color)
        
def multi_grid_search(dct,plot=False):
    for i in dct:
        if i=='next':
            for dct2 in dct[i]:
                multi_grid_search({dct2:dct[i][dct2]},plot)
        else:
            _ = os.getcwd()
            try:
                shutil.rmtree(i)
            except:
                pass
            finally:
                os.mkdir(i)
                os.chdir(i)
            print('\n'+'#'*16+' Grid Search Started in {0} '.format(i)+'#'*16)
            res = grid_search(dct[i])
            try:
                if plot: grid_plot(res,dct)
            except Exception as exc:
                print(exc)
                pass
            finally:
                os.chdir(_)

<pre>




<h1>Part 0 Pre-execution checks & Sample executions</h1>

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
sys.stdout = sys.__stdout__ = open('stdoutbuffer','a',buffering=1)

In [None]:
# os.chdir('..')
os.getcwd()

In [None]:
grid_params = ['NumFun','prprc','He', 'initmode', 'batchsize','lr']

In [None]:
name,config = 'Dummy',[(4,'swish'),(3,'relu')]
_ = evaldata(name,config,'standard',True,'gaussian',100,30,0.01,'adam',(True,True,-1,True),False)

In [None]:
name,config = 'XOR',[(10,'sigmoid'),(10,'sigmoid'),]
_ = evaldata(name,config,'standard',True,'uniform',1000,1,0.001,'gd',(True,False,-1,True),False)

In [None]:
name,config = 'MNIST',[(1568,'swish'),(256,'swish'),]
_1 = evaldata(name,config,'standard',True,'gaussian',10,2000,0.001,'myopt',(True,True,-1,True),False)

In [None]:
name,config = 'Cat-Dog',[(2048,'relu'),(256,'relu'),(64,'tanh')]
_ = evaldata(name,config,'standard',True,'gaussian',100,200,0.0001,'myopt',(True,False,-1,True),False)

<pre>




<h1>Part 1 "MNIST" Evaluation and Experiments</h1>

In [None]:
grid_params = ['NumFun','prprc','He', 'initmode', 'batchsize','lr']

<h3>Task 1 - Varying Number of Layers</h3>

In [None]:
try:
    shutil.rmtree('Task1')
except:
    pass
finally:
    os.mkdir('Task1')
    os.chdir('Task1')

grid_params = ['NumFun','prprc','He', 'initmode', 'batchsize','lr']
name,config = 'MNIST',[(1568,'relu'),]
_1 = evaldata(name,config,'standard',True,'gaussian',20,1000,0.001,'myopt',(True,True,5,True),False)
name,config = 'MNIST',[(1568,'relu'),(256,'tanh')]
_2 = evaldata(name,config,'standard',True,'gaussian',20,1000,0.001,'myopt',(True,True,5,True),False)
name,config = 'MNIST',[(1568,'relu'),(256,'tanh'),(64,'tanh')]
_3 = evaldata(name,config,'standard',True,'gaussian',20,1000,0.001,'myopt',(True,True,5,True),False)

ls = ['Accuracy','F1-Macro','F1-Micro']
color = ['green','blue','red']


_ = [_1,_2,_3]
for i in range(3):
    _[i] = list(_[i])
    _[i][:2] = _[i][:2][::-1]

for i in range(3): #Metric
    plt.close()
    plt.title(ls[i])
    plt.plot(list(range(1,3+1)),[_[j][i+1][-1] for j in range(3)],color=color[i])
    plt.savefig(ls[i]+'1')
    plt.close()
    
os.chdir('..')

<img src="output_plots/Part1/Task1/Accuracy1.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task1/F1Macro1.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task1/F1Micro1.png" height=450 width=600 align="left">

<h3>Task 2 - Trying Various number of neurons in each layer</h3>

<h5>SubTask 1 - Changing Number of Units in 1<sup>st</sup> Hidden Layer of Architecture</h5>

In [None]:
dct1 =  {
    '1 Layer' :
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((49,98,196,392,784,1176,1568,)),
        'C functions'      : it.product(('relu',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct1,True)

<img src="output_plots/Part1/Task2/1Layer/CostLayer1Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/1Layer/AccuracyLayer1Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/1Layer/F1MacroLayer1Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/1Layer/F1MicroLayer1Units.png" height=450 width=600 align="left">

<h5>SubTask 2 - Changing Number of Units in 2<sup>nd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct2 = {
    '2 Layer' :
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,),(16,32,64,128,256)),
        'C functions'      : it.product(('relu',),('tanh',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct2,True)

<img src="output_plots/Part1/Task2/2Layer/CostLayer2Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/2Layer/AccuracyLayer2Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/2Layer/F1MacroLayer2Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/2Layer/F1MicroLayer2Units.png" height=450 width=600 align="left">

<h5>SubTask 3 - Changing Number of Units in 3<sup>rd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct3 = {
    '3 Layer' :
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,),(256,),(16,32,64)),
        'C functions'      : it.product(('relu',),('tanh',),('tanh',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct3,True)

<img src="output_plots/Part1/Task2/3Layer/CostLayer3Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/3Layer/AccuracyLayer3Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/3Layer/F1MacroLayer3Units.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task2/3Layer/F1MicroLayer3Units.png" height=450 width=600 align="left">

<h3>Task 3 - Trying Activation Functions on each layer</h3>

<h5>SubTask 1 - Changing Activation Functions in 1<sup>st</sup> Hidden Layer of Architecture</h5>

In [None]:
dct1 =  {
    '1 Layer FUNCTIONS' :
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,)),
        'C functions'      : it.product(('sigmoid','relu','tanh','swish')),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct1,True)

<img src="output_plots/Part1/Task3/1LayerF/CostLayer1Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/1LayerF/AccuracyLayer1Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/1LayerF/F1MacroLayer1Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/1LayerF/F1MicroLayer1Activation.png" height=450 width=600 align="left">

<h5>SubTask 2 - Changing Activation Functions in 2<sup>nd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct2 = {
    '2 Layer FUNCTIONS' :
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,),(256,)),
        'C functions'      : it.product(('relu',),('sigmoid','relu','tanh','swish')),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct2,True)

<img src="output_plots/Part1/Task3/2LayerF/CostLayer2Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/2LayerF/AccuracyLayer2Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/2LayerF/F1MacroLayer2Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/2LayerF/F1MicroLayer2Activation.png" height=450 width=600 align="left">

<h5>SubTask 3 - Changing Activation Functions in 3<sup>rd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct3 = {
    '3 Layer FUNCTIONS' :
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,),(256,),(64,)),
        'C functions'      : it.product(('relu',),('tanh',),('relu','tanh','swish'),),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct3,True)

<img src="output_plots/Part1/Task3/3LayerF/CostLayer3Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/3LayerF/AccuracyLayer3Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/3LayerF/F1MacroLayer3Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part1/Task3/3LayerF/F1MicroLayer3Activation.png" height=450 width=600 align="left">

<h3>Task 4 Initialization & Preprocessing Techniques</h3>

<h5>SubTask 1 Impact of Xavier-He weight Initiliazation</h5>

In [None]:
dct2 =  {
    'Xavier-He':
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,),(256,)),
        'C functions'      : it.product(('relu',),('tanh',)),
        'D preproc'        : ['standard'],
        'E He'             : [False,True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct2,True)

<h5>Observed learning curve for Initialization technique as "Default" vs "Xavier-He"</h5>
<br><img src="output_plots/Part1/Task4/XH/NHE.png" height=300 width=450 align="left">
<img src="output_plots/Part1/Task4/XH/YHE.png" height=300 width=450 align="right">

<h5>SubTask 2 Finding Suitable Preprocesing & Initialization Distirbution</h5>

In [None]:
dct1 =  {
    'Init Gaussian':
    {
    'A datasets'       : ['MNIST'],
    'B units'          : it.product((1568,),(256,)),
    'C functions'      : it.product(('relu',),('tanh',)),
    'D preproc'        : ['standard','scale'],
    'E He'             : [True],
    'F initmodes'      : ['gaussian'],
    'G epochs'         : [20],
    'H batchsize'      : [1000],
    'I learning_rate'  : [0.001],
    'J optimizer'      : ['myopt'],
    'K early_stopping' : [(True,True,5,True)],
    'L keras'          : [False],
    },

    'Init Uniform':
    {
    'A datasets'       : ['MNIST'],
    'B units'          : it.product((1568,),(256,)),
    'C functions'      : it.product(('relu',),('tanh',)),
    'D preproc'        : ['standard','scale'],
    'E He'             : [True],
    'F initmodes'      : ['uniform'],
    'G epochs'         : [20],
    'H batchsize'      : [1000],
    'I learning_rate'  : [0.001],
    'J optimizer'      : ['myopt'],
    'K early_stopping' : [(True,True,5,True)],
    'L keras'          : [False],
    }
        }

multi_grid_search(dct1,True)

<h5>Observed learning curve for Preprocessing from "Scaling" vs "Standardization"</h5>
<br><img src="output_plots/Part1/Task4/IG/GraphG2.png" height=300 width=450 align="left">
<img src="output_plots/Part1/Task4/IG/GraphG.png" height=250 width=450 align="right">

<b>Note: </b>Standard preprocessing, being insensitive to out-liers performs better than Min-Max Scaling<br>
Hence, most our experiments shown use standardization, and might follow same for future ones

<h5>Observed learning curve for Initialization from "Uniform" vs "Gaussian"</h5>
<br><img src="output_plots/Part1/Task4/IU/GraphU.png" height=300 width=450 align="left">
<img src="output_plots/Part1/Task4/IG/GraphG.png" height=250 width=450 align="right">

<b>General obervation:</b>
Initialization when done from Gaussian distribution inserts minimum information in a system<br>
Hence our most experiments will use it, you are free to experiment with other options

<h3>Task 5 Comparing Classification Reports of Numpy & Keras implementations</h3>

In [None]:
dct1 =  {
    'Keras':
    {
        'A datasets'       : ['MNIST'],
        'B units'          : it.product((1568,),(256,)),
        'C functions'      : it.product(('relu',),('tanh',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [20],
        'H batchsize'      : [1000],
        'I learning_rate'  : [0.001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,5,True)],
        'L keras'          : [True],
    },
        }

multi_grid_search(dct1,True)

<pre>

		 ################ NumPy Implementation ################ 

        -------- Classification Report on Training data -------- 
               precision    recall  f1-score   support

           0     0.9984    0.9997    0.9991      3719
           1     1.0000    0.9983    0.9992      4212
           2     0.9989    0.9995    0.9992      3746
           3     0.9992    0.9977    0.9985      3925
           4     0.9986    0.9989    0.9988      3644
           5     0.9985    0.9985    0.9985      3438
           6     0.9992    0.9987    0.9989      3715
           7     0.9977    0.9997    0.9987      3970
           8     0.9986    0.9997    0.9992      3665
           9     0.9981    0.9968    0.9975      3766

   micro avg     0.9988    0.9988    0.9988     37800
   macro avg     0.9987    0.9988    0.9988     37800
weighted avg     0.9988    0.9988    0.9988     37800
 samples avg     0.9988    0.9988    0.9988     37800


        -------- Classification Report on Validation data -------- 
               precision    recall  f1-score   support

           0     0.9689    0.9806    0.9747       413
           1     0.9871    0.9703    0.9786       472
           2     0.9630    0.9652    0.9641       431
           3     0.9240    0.9413    0.9326       426
           4     0.9620    0.9463    0.9541       428
           5     0.9660    0.9552    0.9606       357
           6     0.9833    0.9787    0.9810       422
           7     0.9578    0.9490    0.9534       431
           8     0.9358    0.9523    0.9440       398
           9     0.9343    0.9431    0.9387       422

   micro avg     0.9583    0.9583    0.9583      4200
   macro avg     0.9582    0.9582    0.9582      4200
weighted avg     0.9585    0.9583    0.9584      4200



		 ################ Keras Implementation ################ 
         
         -------- Classification Report on Training data -------- 
               precision    recall  f1-score   support

           0     0.9995    1.0000    0.9997      3719
           1     0.9995    0.9991    0.9993      4212
           2     0.9997    1.0000    0.9999      3746
           3     0.9997    0.9987    0.9992      3925
           4     0.9997    0.9997    0.9997      3644
           5     0.9994    0.9997    0.9996      3438
           6     0.9992    0.9997    0.9995      3715
           7     0.9982    0.9995    0.9989      3970
           8     0.9995    0.9992    0.9993      3665
           9     0.9992    0.9981    0.9987      3766

   micro avg     0.9994    0.9994    0.9994     37800
   macro avg     0.9994    0.9994    0.9994     37800
weighted avg     0.9994    0.9994    0.9994     37800
 samples avg     0.9994    0.9994    0.9994     37800


		 -------- Classification Report on Validation data -------- 
               precision    recall  f1-score   support

           0     0.9806    0.9782    0.9794       413
           1     0.9850    0.9746    0.9798       472
           2     0.9501    0.9722    0.9610       431
           3     0.9307    0.9460    0.9383       426
           4     0.9553    0.9486    0.9519       428
           5     0.9624    0.9328    0.9474       357
           6     0.9833    0.9739    0.9786       422
           7     0.9471    0.9559    0.9515       431
           8     0.9340    0.9598    0.9467       398
           9     0.9444    0.9265    0.9354       422

   micro avg     0.9574    0.9574    0.9574      4200
   macro avg     0.9573    0.9569    0.9570      4200
weighted avg     0.9576    0.9574    0.9574      4200
 samples avg     0.9574    0.9574    0.9574      4200

<pre>




<h1>Part 2 "Cat-Dog" Evaluation and Experiments</h1>

In [None]:
grid_params = ['NumFun','prprc','He', 'initmode', 'batchsize','lr']

<h3>Task 1 - Varying Number of Layers</h3>

In [None]:
try:
    shutil.rmtree('Task1')
except:
    pass
finally:
    os.mkdir('Task1')
    os.chdir('Task1')

grid_params = ['NumFun','prprc','He', 'initmode', 'batchsize','lr']
name,config = 'Cat-Dog',[(2048,'relu'),]
_1 = evaldata(name,config,'standard',True,'gaussian',100,200,0.0001,'myopt',(True,True,-1,True),False)
name,config = 'Cat-Dog',[(2048,'relu'),(256,'relu')]
_2 = evaldata(name,config,'standard',True,'gaussian',100,200,0.0001,'myopt',(True,True,-1,True),False)
name,config = 'Cat-Dog',[(2048,'relu'),(256,'relu'),(64,'tanh')]
_3 = evaldata(name,config,'standard',True,'gaussian',100,200,0.0001,'myopt',(True,True,-1,True),False)

ls = ['Accuracy','F1-Macro','F1-Micro']
color = ['green','blue','red']


_ = [_1,_2,_3]
for i in range(3):
    _[i] = list(_[i])
    _[i][:2] = _[i][:2][::-1]

for i in range(3): #Metric
    plt.close()
    plt.title(ls[i])
    plt.plot(list(range(1,3+1)),[_[j][i+1][-1] for j in range(3)],color=color[i])
    plt.savefig(ls[i]+'1')
    plt.close()
    
os.chdir('..')

<img src="output_plots/Part2/Task1/Accuracy1.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task1/F1Macro1.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task1/F1Micro1.png" height=450 width=600 align="left">

<h3>Task 2 - Trying Various number of neurons in each layer</h3>

<h5>SubTask 1 - Changing Number of Units in 1<sup>st</sup> Hidden Layer of Architecture</h5>

In [None]:
dct1 =  {
    '1 Layer' :
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((512,1024,2048)),
        'C functions'      : it.product(('relu',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct1,True)

<img src="output_plots/Part2/Task2/1Layer/CostLayer1Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/1Layer/AccuracyLayer1Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/1Layer/F1MacroLayer1Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/1Layer/F1MicroLayer1Units.png" height=450 width=600 align="left">

<h5>SubTask 2 - Changing Number of Units in 2<sup>nd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct2 = {
    '2 Layer' :
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((1024,),(64,128,256)),
        'C functions'      : it.product(('relu',),('relu',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct2,True)

<img src="output_plots/Part2/Task2/2Layer/CostLayer2Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/2Layer/AccuracyLayer2Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/2Layer/F1MacroLayer2Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/2Layer/F1MicroLayer2Units.png" height=450 width=600 align="left">

<h5>SubTask 3 - Changing Number of Units in 3<sup>rd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct3 = {
    '3 Layer' :
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((1024,),(256,),(16,32,64)),
        'C functions'      : it.product(('relu',),('relu',),('tanh',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct3,True)

<img src="output_plots/Part2/Task2/3Layer/CostLayer3Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/3Layer/AccuracyLayer3Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/3Layer/F1MacroLayer3Units.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task2/3Layer/F1MicroLayer3Units.png" height=450 width=600 align="left">

<h3>Task 3 - Trying Activation Functions on each layer</h3>

<h5>SubTask 1 - Changing Activation Functions in 1<sup>st</sup> Hidden Layer of Architecture</h5>

In [None]:
dct1 =  {
    '1 Layer FUNCTIONS' :
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((2048,)),
        'C functions'      : it.product(('sigmoid','relu','tanh','swish')),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct1,True)

<img src="output_plots/Part2/Task3/1LayerF/CostLayer1Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/1LayerF/AccuracyLayer1Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/1LayerF/F1MacroLayer1Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/1LayerF/F1MicroLayer1Activation.png" height=450 width=600 align="left">

<h5>SubTask 2 - Changing Activation Functions in 2<sup>nd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct2 = {
    '2 Layer FUNCTIONS' :
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((1024,),(256,)),
        'C functions'      : it.product(('relu',),('sigmoid','relu','tanh','swish')),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct2,True)

<img src="output_plots/Part2/Task3/2LayerF/CostLayer2Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/2LayerF/AccuracyLayer2Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/2LayerF/F1MacroLayer2Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/2LayerF/F1MicroLayer2Activation.png" height=450 width=600 align="left">

<h5>SubTask 3 - Changing Activation Functions in 3<sup>rd</sup> Hidden Layer of Architecture</h5>

In [None]:
dct3 = {
    '3 Layer FUNCTIONS' :
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((1024,),(256,),(64,)),
        'C functions'      : it.product(('relu',),('relu',),('relu','tanh','swish'),),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct3,True)

<img src="output_plots/Part2/Task3/3LayerF/CostLayer3Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/3LayerF/AccuracyLayer3Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/3LayerF/F1MacroLayer3Activation.png" height=450 width=600 align="left">

<img src="output_plots/Part2/Task3/3LayerF/F1MicroLayer3Activation.png" height=450 width=600 align="left">

<h3>Task 4 Initialization & Preprocessing Techniques</h3>

<h5>SubTask 1 Impact of Xavier-He weight Initiliazation</h5>

In [None]:
dct2 =  {
    'Xavier-He':
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((1024,),(256,)),
        'C functions'      : it.product(('relu',),('relu',)),
        'D preproc'        : ['standard'],
        'E He'             : [False,True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [False],
    }
        }

multi_grid_search(dct2,True)

<h5>Observed learning curve for Initialization technique as "Default" vs "Xavier-He"</h5>
<br><img src="output_plots/Part2/Task4/XH/NHE.png" height=300 width=450 align="left">
<img src="output_plots/Part2/Task4/XH/YHE.png" height=300 width=450 align="left">

<h5>SubTask 2 Finding Suitable Preprocesing & Initialization Distirbution</h5>

In [None]:
dct1 =  {
    'Init Gaussian':
    {
    'A datasets'       : ['Cat-Dog'],
    'B units'          : it.product((1024,),(256,)),
    'C functions'      : it.product(('relu',),('relu',)),
    'D preproc'        : ['standard','scale'],
    'E He'             : [True],
    'F initmodes'      : ['gaussian'],
    'G epochs'         : [100],
    'H batchsize'      : [200],
    'I learning_rate'  : [0.0001],
    'J optimizer'      : ['myopt'],
    'K early_stopping' : [(True,True,-1,True)],
    'L keras'          : [False],
    },

    'Init Uniform':
    {
    'A datasets'       : ['Cat-Dog'],
    'B units'          : it.product((1024,),(256,)),
    'C functions'      : it.product(('relu',),('relu',)),
    'D preproc'        : ['standard','scale'],
    'E He'             : [True],
    'F initmodes'      : ['uniform'],
    'G epochs'         : [100],
    'H batchsize'      : [200],
    'I learning_rate'  : [0.0001],
    'J optimizer'      : ['myopt'],
    'K early_stopping' : [(True,True,-1,True)],
    'L keras'          : [False],
    }
        }

multi_grid_search(dct1,True)

<h5>Observed learning curve for Preprocessing from "Scaling" vs "Standardization"</h5>
<br><img src="output_plots/Part2/Task4/IG/GraphG2.png" height=300 width=450 align="left">
<img src="output_plots/Part2/Task4/IG/GraphG.png" height=250 width=450 align="right">

<b>Note: </b>Standard preprocessing, being insensitive to out-liers performs better than Min-Max Scaling<br>
Hence, most our experiments shown use standardization, and might follow same for future ones

<h5>Observed learning curve for Initialization from "Uniform" vs "Gaussian"</h5>
<br><img src="output_plots/Part2/Task4/IU/GraphU.png" height=300 width=450 align="left">
<img src="output_plots/Part2/Task4/IG/GraphG.png" height=250 width=450 align="right">

<b>General obervation:</b>
Initialization when done from Gaussian distribution inserts minimum information in a system<br>
Hence our most experiments will use it, you are free to experiment with other options

<h3>Task 5 Comparing Classification Reports of Numpy & Keras implementations</h3>

In [None]:
dct1 =  {
    'Keras':
    {
        'A datasets'       : ['Cat-Dog'],
        'B units'          : it.product((2048,),(256,),(64,)),
        'C functions'      : it.product(('relu',),('relu',),('tanh',)),
        'D preproc'        : ['standard'],
        'E He'             : [True],
        'F initmodes'      : ['gaussian'],
        'G epochs'         : [100],
        'H batchsize'      : [200],
        'I learning_rate'  : [0.0001],
        'J optimizer'      : ['myopt'],
        'K early_stopping' : [(True,True,-1,True)],
        'L keras'          : [True],
    },
        }

multi_grid_search(dct1,True)

<pre>

		 ################ NumPy Implementation ################ 

        -------- Classification Report on Training data -------- 
               precision    recall  f1-score   support

         cat     1.0000    1.0000    1.0000     11250
         dog     1.0000    1.0000    1.0000     11250

   micro avg     1.0000    1.0000    1.0000     22500
   macro avg     1.0000    1.0000    1.0000     22500
weighted avg     1.0000    1.0000    1.0000     22500
 samples avg     1.0000    1.0000    1.0000     22500


		 -------- Classification Report on Validation data -------- 
               precision    recall  f1-score   support

         cat     0.6341    0.6392    0.6367      1250
         dog     0.6363    0.6312    0.6337      1250

   micro avg     0.6352    0.6352    0.6352      2500
   macro avg     0.6352    0.6352    0.6352      2500
weighted avg     0.6352    0.6352    0.6352      2500
 samples avg     0.6352    0.6352    0.6352      2500



		 ################ Keras Implementation ################ 
         
         -------- Classification Report on Training data -------- 
               precision    recall  f1-score   support

         cat     0.9932    0.9986    0.9959     11250
         dog     0.9986    0.9932    0.9959     11250

   micro avg     0.9959    0.9959    0.9959     22500
   macro avg     0.9959    0.9959    0.9959     22500
weighted avg     0.9959    0.9959    0.9959     22500
 samples avg     0.9959    0.9959    0.9959     22500


		 -------- Classification Report on Validation data -------- 
               precision    recall  f1-score   support

         cat     0.6193    0.7224    0.6669      1250
         dog     0.6670    0.5560    0.6065      1250

   micro avg     0.6392    0.6392    0.6392      2500
   macro avg     0.6432    0.6392    0.6367      2500
weighted avg     0.6432    0.6392    0.6367      2500
 samples avg     0.6392    0.6392    0.6392      2500

<pre>




<h1>Part 3 Execution of Neural Nets on Datasets of Assignment 1</h1>

<h3>Loading Datasets & Preprocessing of Twitter data into bag-of-word</h3>

In [None]:
# 1. Dolphins
X1 = pd.read_csv('/data2/dolphins/dolphins.csv',sep=' ',header=None)
Y1 = pd.read_csv('/data2/dolphins/dolphins_label.csv',sep=' ',header=None)
X1, Y1 = np.array(X1), np.array(Y1)
Y1 = cattooht(Y1)

# 2. Twitter(bag-of-Word)
X2 = pd.read_csv('/data2/twitter/twitter.csv',header=None)
Y2 = pd.read_csv('/data2/twitter/twitter_label.csv',header=None)

# Converting into bag-of-word
all_words = set()
local_ls = []
for indx,stmt in X2.iterrows():
    local = {}
    for word in stmt[0].strip().split():
        if word in local:
            local[word] += 1
        else:
            local[word]  = 1
        
    local_ls.append(local)
    all_words.update(local)

mat = [[(local[word] if word in local else 0) for word in all_words] for local in local_ls]
X2 = pd.DataFrame(np.array(mat))
X2, Y2 = np.array(X2), np.array(Y2)
Y2 = cattooht(Y2)

# 3. PubMed
X3 = pd.read_csv('/data2/pubmed/pubmed.csv',sep=' ',header=None)
Y3 = pd.read_csv('/data2/pubmed/pubmed_label.csv',sep=' ',header=None)
X3, Y3 = np.array(X3), np.array(Y3)
Y3 = cattooht(Y3)


# Assignment into Global Datastructure
datasets['Dolphins'] = X1,Y1[0],list(map(str,Y1[1]))
datasets['Twitter']  = X2,Y2[0],list(map(str,Y2[1]))
datasets['PubMed']   = X3,Y3[0],list(map(str,Y3[1]))

<h3> Evaluation on Dolphins datasets</h3>

In [None]:
# 1. Dolphins Execution
name = 'Dolphins'
ipdim, opdim = datasets[name][0].shape[1], datasets[name][1].shape[1]
s1 = ipdim*2
s2 = int(round((s1*opdim)**0.5))
name,config = name,[(s1,'relu'),(s2,'relu')]
ln = len(datasets[name][0])
_ = evaldata(name,config,'standard',True,'gaussian',100,10,0.001,'myopt',(True,True,-1,True),False)

<pre>Previous Bayesian Classification Metrics
Accuracy : 0.90833   F1-Micro : 0.90833    F1-Macro : 0.85333

Previous KNN Classification Metrices
Accuracy  0.98333    F1-Micro : 0.98333    F1-Macro : 0.98222

<img src="output_plots/Part3/Dolphins.png" height=450 width=600 align="left">

<b>General obervation:</b>
Neural Network is able to make precise prediction in class imbalanced dataset

<h3> Evaluation on Twitter datasets</h3>

In [None]:
name = 'Twitter'
ipdim, opdim = datasets[name][0].shape[1], datasets[name][1].shape[1]
s1 = ipdim*2
s2 = int(round((s1*opdim)**0.5))
name,config = name,[(s1,'relu'),(s2,'relu')]
ln = len(datasets[name][0])
_ = evaldata(name,config,'standard',True,'gaussian',10,300,0.0003,'myopt',(True,True,-1,True),False)

<pre>Previous Bayesian Classification Metrics
Accuracy : 0.56010   F1-Micro : 0.56010    F1-Macro : 0.34854

Previous KNN Classification Metrices
Accuracy  0.48414    F1-Micro : 0.48414    F1-Macro : 0.41388

<img src="output_plots/Part3/Twittr.png" height=450 width=600 align="left">

<b>General obervation:</b>
No improvement on validation data is observed during training<br>
Network is just overfitting Training data, sequence modelling should be captured<br>
Which is not possible with ordinary Neural Network Architecture of this assignment

<h3> Evaluation on PubMed datasets</h3>

In [None]:
name = 'PubMed'
ipdim, opdim = datasets[name][0].shape[1], datasets[name][1].shape[1]
s1 = ipdim*2
s2 = int(round((s1*opdim)**0.5))
name,config = name,[(s1,'relu'),(s2,'relu')]
ln = len(datasets[name][0])
_ = evaldata(name,config,'standard',True,'gaussian',100,500,0.001,'myopt',(True,True,-1,True),False)

<pre>Previous Bayesian Classification Metrics
Accuracy : 0.44144   F1-Micro : 0.44144    F1-Macro : 0.33277

Previous KNN Classification Metrices
Accuracy  0.35412    F1-Micro : 0.35412    F1-Macro : 0.34554

<img src="output_plots/Part3/PubMed.png" height=450 width=600 align="left">

<b>General obervation:</b>
This dataset is again overfitted by Neural Network<br>
Multinomial Naive Bayes' performed some better than Neural network