In [1]:
import numpy as np
import pandas as pd
import struct as st
import math

def convert_to_binary(path_images, path_labels):
    filename = {'images' : path_images ,'labels' : path_labels}
    train_imagesfile = open(filename['images'],'rb')
    train_imagesfile.seek(0)
    magic = st.unpack('>4B',train_imagesfile.read(4))
    nImg = st.unpack('>I',train_imagesfile.read(4))[0]
    nR = st.unpack('>I',train_imagesfile.read(4))[0]
    nC = st.unpack('>I',train_imagesfile.read(4))[0] 
    nBytesTotal = nImg*nR*nC*1 #since each pixel data is 1 byte
    images_array = np.asarray(st.unpack('>'+'B'*nBytesTotal,train_imagesfile.read(nBytesTotal))).reshape((nImg,nR,nC))
    
    # Open labels file
    train_labelsfile = open(filename['labels'], 'rb')
    train_labelsfile.seek(0)

    # Read label metadata
    magic = st.unpack('>4B', train_labelsfile.read(4))
    nLabels = st.unpack('>I', train_labelsfile.read(4))[0]  # Number of labels

    # Read label data
    labels_array = np.asarray(
        st.unpack('>' + 'B' * nLabels, train_labelsfile.read(nLabels))
    )
    return images_array, labels_array

def normalize(X):
    X_normalize = (X - np.min(X))/(np.max(X) - np.min(X))
    return X_normalize


def one_hot(a,num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)]) 

In [2]:
images, labels = convert_to_binary('train-images.idx3-ubyte', 'train-labels.idx1-ubyte')

df = pd.DataFrame(images[0])
vector_features = pd.Series([0] * (28 * 28))

X_train = images[0:50000]
y_train = labels[0:50000]
X_test = images[50001:]
y_test = labels[50001:]

In [3]:
rng = np.random.RandomState(42)

weight = rng.standard_normal(size = (28 * 28))

X = normalize(np.array([np.ravel(x) for x in X_train]))
Y = np.array([one_hot(np.array(y,dtype=int),10) for y in y_train], dtype = int)

X_test = normalize(np.array([np.ravel(x) for x in X_test]))
y_test = np.array([one_hot(np.array(y,dtype=int),10) for y in y_test], dtype = int)

input_size = X.shape[1]
output_size = Y.shape[1]
hidden_size = 1

w_hidden = rng.standard_normal(size = (input_size, output_size))
w_output = rng.standard_normal(size = (hidden_size, output_size))

In [4]:
#define dense layer functions
def dense_initialize(input_size, output_size):
    #initalization
    #output size is the number of neurons in that layer
    weights = rng.standard_normal(size = (input_size, output_size))
    LR = 0.1
    bias = np.zeros(output_size)
    return weights, LR, bias

def dense_forward(input, weights, bias):
    return np.dot(input, weights) + bias

def dense_backward(weights, bias, gradient_output, input, LR):
    gradient_input = np.dot(gradient_output, weights.T)
    grad_weight = np.dot(input.T, gradient_output)
    grad_bias = gradient_output.mean(axis = 0) * input.shape[0]
    numpy_weight = np.array(grad_weight)
    weights -= LR * numpy_weight
    bias -= LR * grad_bias
    return weights, bias, gradient_input

In [5]:
#define ReLU layer functions
def ReLU_forward(input):
    return np.maximum(0, input)

def ReLU_backward(input, grad_output):
    return (input > 0) * grad_output

In [6]:
#loss and loss gradient functions
def softmax_crossentropy_with_logits(logits, reference_answers):
    # Compute crossentropy from logits[batch,n_classes] and ids of correct answers             
    logits_for_answers = logits[np.arange(len(logits)), reference_answers]
    xentropy = - logits_for_answers + np.log(np.sum(np.exp(logits), axis=-1))    
    return xentropy


def grad_softmax_crossentropy_with_logits(logits, y_labels):
    # Compute crossentropy gradient from logits[batch,n_classes] and ids of correct answers
    ones_for_answers = np.zeros_like(logits)
    ones_for_answers[np.arange(len(logits)), y_labels] = 1    
    softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)    
    return (- ones_for_answers + softmax) / logits.shape[0]

In [7]:
#Train the network
def train_network(epochs, X_train, y_train, input_size, output_size,batches = 32, layers = 3):
    weights_hidden1, LR1, bias1 = dense_initialize(input_size, 100)
    weights_hidden2, LR2, bias2 = dense_initialize(100, output_size)
    train_log = []
    for epoch in range(epochs):
        for i in range(0, X_train.shape[0], batches):
            x_batch = np.array([x.flatten() for x in X[i:i + batches]])
            y_batch = np.array([y for y in y_train[i:i+batches]])
            weights_hidden1, weights_hidden2, bias1, bias2, mean_loss = train_batch(x_batch, y_batch, 3, weights_hidden1, weights_hidden2, bias1, bias2, LR1, LR2)
        train_log.append(mean_loss)
        print(f"mean_loss = {mean_loss}")
    return weights_hidden1, weights_hidden2, bias1, bias2, mean_loss


def train_batch(x_batch, y_batch, layers, weights1, weights2, bias1, bias2, LR1, LR2):
    activations = []
    input = x_batch

    #apply forward propogation to each layer
    for i in range(layers):
        if i == 0:
            activations.append([dense_forward(input, weights1, bias1)])
        elif i == 2:
            activations.append([dense_forward(input, weights2, bias2)])
        else:
            activations.append([ReLU_forward(input)])
        input = activations[-1]
        input = np.array(input).squeeze()

    layer_inputs = [x_batch] + activations
    logits = activations[-1]
    as_numpy = np.array(logits)
    as_numpy = as_numpy.squeeze()
    logits = as_numpy
    y_hat = y_batch.argmax(axis = 1)
    loss = softmax_crossentropy_with_logits(logits, y_hat)
    grad_loss = grad_softmax_crossentropy_with_logits(logits, y_hat)

    #back propogation
    for layer_index in range(layers)[::-1]:
        to_array  = np.array(layer_inputs[layer_index])
        to_array = to_array.squeeze()
        if layer_index == 2:
            weights2, bias2, grad_loss = dense_backward(weights2, bias2, grad_loss, to_array, LR2)
        elif layer_index == 1:
            grad_loss = ReLU_backward(to_array, grad_loss)
        elif layer_index == 0:
            weights1, bias1, grad_loss = dense_backward(weights1, bias1, grad_loss, to_array, LR1)
   
    return weights1, weights2, bias1, bias2, np.mean(loss)

In [8]:
weights_hidden1, weights_hidden2, bias1, bias2, mean_loss = train_network(10, X, Y, input_size, output_size)

mean_loss = 1.335486554735048
mean_loss = 0.7007404729181874
mean_loss = 0.4839654754623377
mean_loss = 0.37237218732286076
mean_loss = 0.3056767468694743
mean_loss = 0.28084159108264506
mean_loss = 0.2629965147484114
mean_loss = 0.2518721463053924
mean_loss = 0.23033297504994904
mean_loss = 0.21660359719851804


In [9]:
print(X_test.shape)

(9999, 784)


In [10]:
#Predicting labels

def prediction(X_test, weights1, weights2, bias1, bias2):
    first_layer = dense_forward(X_test, weights1, bias1)
    second_layer = ReLU_forward(first_layer)
    third_layer = dense_forward(second_layer, weights2, bias2)
    return third_layer.argmax(axis = 1)
    
predictions = prediction(X_test, weights_hidden1, weights_hidden2, bias1, bias2)
predictions = np.array(predictions)
accuracy = np.mean(predictions == y_test)
print(accuracy)

0.0


  accuracy = np.mean(predictions == y_test)
