# Scratch Artificial Neural Network

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.dont_write_bytecode = True

# Import libraries
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

# Import custom modules
from ArtificialNeuralNetwork import ArtificialNeuralNetwork
from Layer import Layer, OutputLayer
from Function import ActivationFunction, LossFunction
from enums import InitializerType

  from .autonotebook import tqdm as notebook_tqdm


# Pipeline for testing MNIST dataset

In [3]:
input_size = 784
hidden_layers = 2
hidden_size = 128
output_size = 10
learning_rate = 0.001
param_1 = 0
param_2 = 0
batch_size = 128

In [4]:
# Load MNIST dataset using fetch_openml
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
X = X / 255.0
y = y.astype(int)


# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42,
)

In [14]:
ann = ArtificialNeuralNetwork(
    123,
    Layer(
        weight_init=InitializerType.XAVIER,
        bias_init=InitializerType.ZERO,
        input_size=input_size,
        num_neurons=hidden_size,
        param_1=param_1,
        param_2=param_2,
        activation=ActivationFunction.relu,
        layer_name=f"Hidden Layer 0"
    ),
    *[Layer(
        weight_init=InitializerType.XAVIER,
        bias_init=InitializerType.ZERO,
        input_size=hidden_size,
        num_neurons=hidden_size,
        param_1=param_1,
        param_2=param_2,
        activation=ActivationFunction.relu,
        layer_name=f"Hidden Layer {i + 1}"
    ) for i in range(hidden_layers)],
    OutputLayer(
        weight_init=InitializerType.XAVIER,
        bias_init=InitializerType.ZERO,
        input_size=hidden_size,
        num_neurons=output_size,
        param_1=param_1,
        param_2=param_2,
        activation=ActivationFunction.softmax,
        loss_funct=LossFunction.categorical_cross_entropy,
        layer_name="Output Layer"
    )
)

In [15]:
ann.train(
    x=X_train,
    y=y_train,
    loss_function=LossFunction.categorical_cross_entropy,
    lr=0.01,
    epochs=50,
    batch_size=batch_size,
    shuffle=False,
    verbose=True,
    validation_data=(X_test, y_test)
)

Training: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, Loss: 0.5647, Val Loss: 0.7453]

Total training time: 23.78s





([10.615632628077442,
  4.551571752513069,
  3.253502420811506,
  2.7337156003009677,
  2.435108478120644,
  2.22966427627728,
  2.07230022689634,
  1.9449042782707275,
  1.8375064914748551,
  1.7436726627077606,
  1.6602617844197118,
  1.5853232274117417,
  1.5167962621468347,
  1.4544230466878347,
  1.396985013333428,
  1.343964415284714,
  1.294825820011939,
  1.2487495242077926,
  1.2056798580404338,
  1.165239446033508,
  1.1277121139843245,
  1.0923951537633203,
  1.0590239644049884,
  1.027920310999982,
  0.9984632516648828,
  0.970453824586029,
  0.9442469258013892,
  0.9193163884502967,
  0.8957468694369858,
  0.8733628328880682,
  0.8517187102023168,
  0.8311725214787908,
  0.8114469036567769,
  0.7924288596432244,
  0.7741655005968429,
  0.7567700392559504,
  0.7397930822774843,
  0.7234088717159748,
  0.7077956543742288,
  0.6925346378969144,
  0.6777895044590379,
  0.6636684534465672,
  0.6497747035806389,
  0.6364423131775493,
  0.623403959994853,
  0.610935329165525,
  0

In [16]:
from sklearn.metrics import f1_score

y_pred = ann.predict(X_test)
print(f"F1 Score: {f1_score(y_test, y_pred, average='macro')}")

F1 Score: 0.9680068489737372


In [8]:
ann.save("ann_model.pkl")

Model saved to models/ann_model.pkl


In [9]:
new_model = ArtificialNeuralNetwork()

In [10]:
new_model.load("ann_model.pkl")

Layer Name: Hidden Layer 0
None
Layer Name: Hidden Layer 1
None
Layer Name: Hidden Layer 2
None
Layer Name: Output Layer
None
Model loaded from models/ann_model.pkl


In [11]:
new_model.evaluate(X_test, y_test)

np.float64(0.9662142857142857)

In [17]:
# Compare with MLP

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


# Comparison with scikit-learn MLP (using ReLU instead of PReLU)
model = MLPClassifier(
    hidden_layer_sizes=(128, 128, 128),
    activation='relu',
    solver='sgd',
    max_iter=50,
    batch_size=batch_size,
    random_state=123,
    verbose=True,
    learning_rate_init=0.01,
    learning_rate='constant',
    tol=0.0
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("F1 Score: ", f1_score(y_test, y_pred, average='macro'))

Iteration 1, loss = 0.50987355
Iteration 2, loss = 0.20081164
Iteration 3, loss = 0.14448322
Iteration 4, loss = 0.11279001
Iteration 5, loss = 0.09228397
Iteration 6, loss = 0.07737273
Iteration 7, loss = 0.06476378
Iteration 8, loss = 0.05590762
Iteration 9, loss = 0.04750016
Iteration 10, loss = 0.04148656
Iteration 11, loss = 0.03518707
Iteration 12, loss = 0.03031388
Iteration 13, loss = 0.02678819
Iteration 14, loss = 0.02317245
Iteration 15, loss = 0.01956633
Iteration 16, loss = 0.01767333
Iteration 17, loss = 0.01382902
Iteration 18, loss = 0.01283998
Iteration 19, loss = 0.01141277
Iteration 20, loss = 0.00909595
Iteration 21, loss = 0.00749835
Iteration 22, loss = 0.00579999
Iteration 23, loss = 0.00643335
Iteration 24, loss = 0.00454817
Iteration 25, loss = 0.00411729
Iteration 26, loss = 0.00336292
Iteration 27, loss = 0.00311689
Iteration 28, loss = 0.00250785
Iteration 29, loss = 0.00212413
Iteration 30, loss = 0.00184556
Iteration 31, loss = 0.00172927
Iteration 32, los

