## MNIST Dataset

Source: https://www.kaggle.com/oddrationale/mnist-in-csv

In [1]:
import pandas as pd
import numpy as np

## Import MNIST Dataset

In [2]:
# import dataset

train_df =  pd.read_csv('data/mnist/train.csv')
train_df.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
NORMALIZE = True

if NORMALIZE:
    train_df.loc[:, train_df.columns != 'label'] /= 255.0

np.unique(train_df.loc[0, train_df.columns != 'label'])

array([0.        , 0.00392157, 0.00784314, 0.01176471, 0.03529412,
       0.04313725, 0.05490196, 0.0627451 , 0.07058824, 0.09019608,
       0.09411765, 0.09803922, 0.10196078, 0.10588235, 0.11764706,
       0.1372549 , 0.14117647, 0.15294118, 0.16862745, 0.17647059,
       0.18039216, 0.19215686, 0.21568627, 0.21960784, 0.25098039,
       0.25882353, 0.2745098 , 0.30588235, 0.31372549, 0.31764706,
       0.32156863, 0.35294118, 0.36470588, 0.36862745, 0.41960784,
       0.42352941, 0.44705882, 0.46666667, 0.49411765, 0.49803922,
       0.50980392, 0.51764706, 0.52156863, 0.52941176, 0.53333333,
       0.54509804, 0.58039216, 0.58823529, 0.60392157, 0.61176471,
       0.62745098, 0.65098039, 0.66666667, 0.67058824, 0.6745098 ,
       0.68627451, 0.71372549, 0.71764706, 0.72941176, 0.73333333,
       0.74509804, 0.76470588, 0.77647059, 0.78823529, 0.80392157,
       0.81176471, 0.83137255, 0.83529412, 0.85882353, 0.86666667,
       0.88235294, 0.88627451, 0.89803922, 0.93333333, 0.94117

## Setup Neural Network weights and structure

The structure can be changed dynamically by changing the "constants" :
- INPUT_SIZE
- OUTPUT_SIZE
- HIDDEN_LAYERS_NBR
- HIDDEN_LAYERS_SIZE

Important variables:
- neurons
- weights
- biases

In [4]:
# setup neural network constants

INPUT_SIZE = 784
OUTPUT_SIZE = 10

HIDDEN_LAYERS_NBR = 1
HIDDEN_LAYERS_SIZE = 32 # same size for all layers ( for simplification purposes )


In [5]:
# Construct Neurons
def construct_neurons():
    length = HIDDEN_LAYERS_NBR+2
    ret = [None] * (length)
    for i in range(length):
        if i == 0: # First Layer is Input
            ret[i] = np.zeros(shape=(INPUT_SIZE,1))
        elif i == (length-1): # and Last layer is Output layer
            ret[i] = np.zeros(shape=(OUTPUT_SIZE,1))
        else: # Hidden Layers
            ret[i] = np.zeros(shape=(HIDDEN_LAYERS_SIZE, 1))
    return ret
    

neurons = construct_neurons()
    

print('Neurons structure: ( there are {} hidden layers)'.format(HIDDEN_LAYERS_NBR))
for layer in range(len(neurons)):
    print(neurons[layer].shape)        


Neurons structure: ( there are 1 hidden layers)
(784, 1)
(32, 1)
(10, 1)


In [6]:
# Init Weights ( Xavier Initialization )

def construct_weights(xavier_constant=2):  # When using RELU initialize weights with (rand) * sqrt(2/N)
    length = HIDDEN_LAYERS_NBR + 2
    ret = [None] * (length)
    for i in range(1, length):
        if i == 1: # the first weights between input and first hidden layer have structure : (HIDDEN_LAYERS_SIZE x INPUT_SIZE )
            ret[i] = np.random.rand(HIDDEN_LAYERS_SIZE, INPUT_SIZE)*(np.sqrt(xavier_constant/INPUT_SIZE))
        elif i == (length - 1): # last weights between last hidden layer and output have structure : (OUTPUT_SIZE x HIDDEN_LAYERS_SIZE)
            ret[i] = np.random.rand(OUTPUT_SIZE, HIDDEN_LAYERS_SIZE)*(np.sqrt(xavier_constant/HIDDEN_LAYERS_SIZE))
        else: # weights between hidden layers have structure (HIDDEN_LAYERS_SIZE x HIDDEN_LAYERS_SIZE)
            ret[i] = np.random.rand(HIDDEN_LAYERS_SIZE, HIDDEN_LAYERS_SIZE)*(np.sqrt(xavier_constant/HIDDEN_LAYERS_SIZE))
    return ret


weights = construct_weights()

print('Weights structure: ')
for w in weights:
    print(w.shape if w is not None else '(None)' )

print(weights)

Weights structure: 
(None)
(32, 784)
(10, 32)
[None, array([[0.01261317, 0.04044516, 0.03969592, ..., 0.02760895, 0.04212768,
        0.04390514],
       [0.01263063, 0.00874074, 0.02205129, ..., 0.0072362 , 0.00369698,
        0.0060419 ],
       [0.02190169, 0.01728697, 0.00762238, ..., 0.04109185, 0.02404104,
        0.00403787],
       ...,
       [0.0275615 , 0.01595086, 0.00689466, ..., 0.00214547, 0.04591417,
        0.00651962],
       [0.02317522, 0.00660563, 0.02174807, ..., 0.04529433, 0.00205456,
        0.0347554 ],
       [0.0234824 , 0.01672635, 0.04095114, ..., 0.0392324 , 0.01671102,
        0.01755148]]), array([[0.02288898, 0.16030559, 0.19943867, 0.06293118, 0.06062961,
        0.23196431, 0.02719215, 0.14442328, 0.20437581, 0.05249125,
        0.23209503, 0.05396892, 0.18167084, 0.01240049, 0.21351511,
        0.07262554, 0.23859753, 0.23743804, 0.09193241, 0.14953049,
        0.10098255, 0.04914516, 0.16935215, 0.22253735, 0.23181848,
        0.15965574, 0.2447902

In [7]:
def construct_biases():
    length = len(neurons)
    ret = [None] * (length)
    for i in range(1, length):
        ret[i] = np.random.rand(neurons[i].shape[0], 1)
    return ret

biases = construct_biases()

print('Biases structure: ')
for b in biases:
    print(b.shape if b is not None else '(None)')

print(biases)


Biases structure: 
(None)
(32, 1)
(10, 1)
[None, array([[0.38227819],
       [0.60495759],
       [0.21125631],
       [0.36084706],
       [0.97133935],
       [0.36777016],
       [0.57179479],
       [0.58634425],
       [0.61708984],
       [0.08667462],
       [0.52182288],
       [0.30551015],
       [0.22032756],
       [0.46081386],
       [0.7446659 ],
       [0.26889522],
       [0.92042552],
       [0.98697223],
       [0.15202051],
       [0.11796128],
       [0.75667718],
       [0.64591168],
       [0.17093621],
       [0.34886888],
       [0.834263  ],
       [0.5717872 ],
       [0.5140515 ],
       [0.14326781],
       [0.52028372],
       [0.63223854],
       [0.70620955],
       [0.17232377]]), array([[0.2461453 ],
       [0.47167015],
       [0.54173028],
       [0.93267508],
       [0.17721781],
       [0.00603961],
       [0.07413794],
       [0.16436659],
       [0.0564843 ],
       [0.7972318 ]])]


In [8]:
# Activation Functions

def sigmoid(x, prime=False):
    if not prime:
        return 1 / (1 + np.exp(-X))
    else:
        return sigmoid(X)*(1-sigmoid(X))

def relu(x, prime=False):
    if not prime:
        return max(0,x)
    else:
        pass

def activation(x, func='relu', prime=False, vectorized=False):
    if func == 'relu':
        return relu(x, prime) if not vectorized else np.array([relu(xi, prime) for xi in x]).reshape(-1,1)
    elif func == 'sigmoid':
        return sigmoid(x, prime) if not vectorized else np.array([sigmoid(xi, prime) for xi in x]).reshape(-1,1)
    elif func == 'softmax' and vectorized:
        exps = [np.exp(xi) for xi in x]
        s = sum(exps)
        ret = [e/s for e in exps]
        return np.array(ret).reshape(-1,1)
    else: 
        raise NameError('Unknown Activation Function Name')

#v = np.array([4,100,4,12]).reshape(-1,1)
#print(v)

#res = activation(t, func='softmax', vectorized=True)
#assert sum(res) == 1
#res
#print(np.random.rand(4,1))

## Feed-Forward

In [11]:
def feed_forward(input_vec):
    global neurons
    global t

    neurons[0] = input_vec
    for i in range(1,len(neurons)):
        last_layer = (i == len(neurons)-1)
        z = np.dot(weights[i], neurons[i-1]).reshape(-1,1) + biases[i]
        neurons[i] = activation(z, func='relu' if not last_layer else 'softmax', vectorized=True)

    return neurons[-1] # Return last layer (output)

# Test

test_vec = train_df.loc[5, train_df.columns != 'label']
test_res = feed_forward(test_vec)
print(test_res)
assert sum(test_res) == 1

[[0.173965  ]
 [0.02519079]
 [0.08182588]
 [0.22669991]
 [0.10090713]
 [0.05581168]
 [0.04683737]
 [0.03624999]
 [0.03966829]
 [0.21284395]]


## Back-Propagation

## Train Network (Online)

## Score the Network