In [1]:
import pandas as pd
import numpy as np
from network import Network

## Data preparation ##

In [2]:
from sklearn.datasets import fetch_openml

# Fetch the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
x, y = mnist["data"], mnist["target"]

In [3]:
x =  x / 255
y = y.astype("int64")

In [4]:
x.head(10)

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
data = pd.concat([x,y],axis=1)

In [6]:
data.head()

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784,class
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9


In [7]:
randomized_data = data.sample(frac=1,random_state=42)

split_index = int(0.8 * len(randomized_data))

train_data = randomized_data[:split_index]
test_data = randomized_data[split_index:]

train_x,train_y = train_data[["pixel"+str(i)for i in range(1,785)]],train_data["class"]
test_x,test_y = test_data[["pixel"+str(i)for i in range(1,785)]],test_data["class"]

In [8]:
#one hot encode the labels
train_y = pd.get_dummies(train_y)
train_y = train_y.astype("int")
train_x,train_y = train_x.to_numpy(),train_y.to_numpy()
test_y = pd.get_dummies(test_y)
test_y = test_y.astype("int")
test_x,test_y = test_x.to_numpy(),test_y.to_numpy()

## Experimenting ##

In [9]:
#create an instance of the network
nn = Network([784,10,10])

In [10]:
nn.train(train_x.T,train_y.T,learnRate=0.01,epochs=3000,batch_size=100,show_training_progress=True,show_training_progress_rate=100)

Epoch 0, Loss: 2.2833354000250083
Epoch 100, Loss: 0.6408650583528078
Epoch 200, Loss: 0.49205397775590537
Epoch 300, Loss: 0.43321928227932305
Epoch 400, Loss: 0.397949437028804
Epoch 500, Loss: 0.37604300521827944
Epoch 600, Loss: 0.3706953143013414
Epoch 700, Loss: 0.3426139405160037
Epoch 800, Loss: 0.333748741640955
Epoch 900, Loss: 0.3214545508578012
Epoch 1000, Loss: 0.31662794395738164
Epoch 1100, Loss: 0.3064145493083505
Epoch 1200, Loss: 0.29733568394139853
Epoch 1300, Loss: 0.28561285836378136
Epoch 1400, Loss: 0.2806533521849765
Epoch 1500, Loss: 0.274718080606809
Epoch 1600, Loss: 0.27233735958807703
Epoch 1700, Loss: 0.26518609198218607
Epoch 1800, Loss: 0.26126874965517644
Epoch 1900, Loss: 0.2606274677857242
Epoch 2000, Loss: 0.2543314183306489
Epoch 2100, Loss: 0.24788092355353925
Epoch 2200, Loss: 0.24780649854770986
Epoch 2300, Loss: 0.24839975271428932
Epoch 2400, Loss: 0.24905594216770754
Epoch 2500, Loss: 0.23663060114666398
Epoch 2600, Loss: 0.2338217219923983
Ep

In [11]:
def calculate_accuracy(actual, predicted):

    predicted_classes = np.argmax(predicted, axis=0)
    actual_classes = np.argmax(actual, axis=0)
    print(actual_classes)

    correct_predictions = np.sum(predicted_classes == actual_classes)
    
    accuracy = correct_predictions / len(actual_classes)
    
    return accuracy

In [12]:
print(calculate_accuracy(train_y.T,nn.feed_forward(train_x.T))*100,"%")

[8 4 8 ... 5 1 6]
93.79464285714286 %


In [13]:
print(calculate_accuracy(test_y.T,nn.feed_forward(test_x.T))*100,"%")

[3 2 1 ... 1 0 0]
92.34285714285714 %


In [14]:
nn.save_model("handwritten_digits_V1.0")