In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("../input/digit-recognizer/train.csv")
data.head()

In [3]:
data = data.to_numpy()
samples, features = data.shape
np.random.shuffle(data)

In [4]:
test_data = data[0:1000].T
X_test = test_data[1:features]
X_test = X_test/255.
Y_test = test_data[0]

train_data = data[1000:samples].T
X_train = train_data[1:features]
X_train = X_train/255.
Y_train = train_data[0]
m_train = X_train.shape[1]

In [5]:
X_train.shape

In [6]:
Y_train.shape

In [54]:
def initial_parameters():
    first_weights = np.random.rand(10, 784) - .5
    first_bias = np.random.rand(10, 1) - .5
    second_weights = np.random.rand(10, 10) - .5
    second_bias = np.random.rand(10, 1) - .5
    return first_weights, first_bias, second_weights, second_bias

def ReLU(Z):
    return np.maximum(Z, 0)

def ReLU_deriv(Z):
    return Z > 0
"""
def Leaky_ReLU(step_1, alpha):
    array = [np.maximum(alpha*i, i) for i in step_1]
    return np.array(array, dtype=float)

def LR_deriv(step_1, alpha):
    array = [1 if i>0 else alpha for i in step_1]
    return np.array(array, dtype=float)
"""     
def Softmax(step_3):
    prob_array = np.exp(step_3) / sum(np.exp(step_3))
    return prob_array

def One_Hot_Encoder(Y_train):
    binary_labels = np.zeros((Y_train.size, Y_train.max() + 1))
    binary_labels[np.arange(Y_train.size), Y_train] = 1
    binary_labels = binary_labels.T
    return binary_labels

def forward_pass(first_weights, first_bias, second_weights, second_bias, X_train):
    step_1 = first_weights.dot(X_train) + first_bias
    step_2 = ReLU(step_1)
    step_3 = second_weights.dot(step_2) + second_bias
    step_4 = Softmax(step_3)
    return step_1, step_2, step_3, step_4

def backward_pass(step_1, step_2, step_3, step_4, first_weights, second_weights,
                  X_train, Y_train):
    binary_labels = One_Hot_Encoder(Y_train)
    d_step_3 = 2*(step_4 - binary_labels)
    d_second_weights = 1/samples * d_step_3.dot(step_2.T)
    d_second_bias = 1/samples * np.sum(d_step_3)
    d_step_2 = second_weights.T.dot(d_step_3) * ReLU_deriv(step_1)
    d_first_weights = 1/samples * d_step_2.dot(X_train.T)
    d_first_bias = 1/samples * np.sum(d_step_2)
    return d_first_weights, d_first_bias, d_second_weights, d_second_bias

def update_parameters(first_weights, first_bias, second_weights, second_bias,
                      d_first_weights, d_first_bias, d_second_weights, d_second_bias, 
                      learning_rate):
    first_weights = first_weights - d_first_weights*learning_rate
    first_bias = first_bias - d_first_bias*learning_rate
    second_weights = second_weights - d_second_weights*learning_rate
    second_bias = second_bias - d_second_bias*learning_rate
    return first_weights, first_bias, second_weights, second_bias

def predictions(step_4):
    return np.argmax(step_4, 0)

def accuracy(predictions, Y_train):
    # print(predictions, Y_train)
    return np.sum(predictions == Y_train)/Y_train.shape[0]


In [55]:
def gradient_descent(X_train, Y_train, learning_rate, epochs):
    first_weights, first_bias, second_weights, second_bias = initial_parameters()
    for epoch in range(epochs):
        step_1, step_2, step_3, step_4 = forward_pass(first_weights, first_bias,
                                                      second_weights, second_bias, X_train)
        
        d_first_weights, d_first_bias, d_second_weights, d_second_bias = backward_pass(
                                                                        step_1, step_2, 
                                                                        step_3, step_4,
                                                                        first_weights,
                                                                        second_weights, 
                                                                        X_train, Y_train)
        
        first_weights, first_bias, second_weights, second_bias = update_parameters(
                                                            first_weights, first_bias, 
                                                            second_weights, second_bias,
                                                            d_first_weights, d_first_bias, 
                                                            d_second_weights, d_second_bias, 
                                                            learning_rate)

        if (epoch % 100) == 0:
            print(f"Epochs: {epoch}/{epochs}")
            pred = predictions(step_4)
            print(F"{(accuracy(pred, Y_train)*100):.2f}%\n_______________")
            print(step_1)
    return first_weights, first_bias, second_weights, second_bias

In [56]:
learning_rate = 0.01
epochs = 5000
first_weights, first_bias, second_weights, second_bias = gradient_descent(X_train, Y_train, learning_rate, epochs)

In [35]:
def test(x, first_weights, first_bias, second_weights, second_bias):
    _, _, _, step_4 = forward_pass(first_weights, first_bias, second_weights, second_bias, x)
    pred = predictions(step_4)
    return pred

def test_validation(index, first_weights, first_bias, second_weights, second_bias):
    the_sample = X_train[:, index, None]
    pred = test(X_train[:, index, None], first_weights, first_bias, second_weights, second_bias)
    label = Y_train[index]
    print(f"Model's prediction: {pred}\n Actual label: {label}")
    
    the_sample = the_sample.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(the_sample, interpolation="nearest")
    plt.show()

In [36]:
test_validation(5, first_weights, first_bias, second_weights, second_bias)
test_validation(34, first_weights, first_bias, second_weights, second_bias)
test_validation(56, first_weights, first_bias, second_weights, second_bias)
test_validation(89, first_weights, first_bias, second_weights, second_bias)
test_validation(101, first_weights, first_bias, second_weights, second_bias)
test_validation(234, first_weights, first_bias, second_weights, second_bias)

In [38]:
Validation = test(X_test, first_weights, first_bias, second_weights, second_bias)
accuracy(Validation, Y_test)