## Import libraries

In [7]:
import numpy as np
from scipy.special import logsumexp
from typing import List, Tuple

In [1]:
from google.colab import drive
drive.mount("mnt")

Mounted at mnt


In [2]:
%cd "mnt/My Drive/Colab Notebooks"

/content/mnt/My Drive/Colab Notebooks


In [4]:
!pip install import-ipynb
import import_ipynb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting import-ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, import-ipynb
Successfully installed import-ipynb-0.1.4 jedi-0.18.2


In [5]:
# Imports from Neural_Network_from_scratch notebook
from Neural_Network_from_scratch import Loss

importing Jupyter notebook from Neural_Network_from_scratch.ipynb



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

Validation loss after 10 epochs is 37.226.
Validation loss after 20 epochs is 29.312.
Validation loss after 30 epochs is 26.731.
Validation loss after 40 epochs is 26.571.
Loss increased after epoch 50, the final loss was 26.571, using the model from epoch 40

Mean absolute error: 3.68

Root mean squared error 5.20
Validation loss after 10 epochs is 35.272.
Validation loss after 20 epochs is 20.043.
Validation loss after 30 epochs is 17.688.
Validation loss after 40 epochs is 15.706.
Validation loss after 50 epochs is 14.666.

Mean absolute error: 2.53

Root mean squared error 3.83
Validation loss after 10 epochs is 89.403.
Validation loss after 20 epochs is 19.193.
Validation loss after 30 epochs is 16.681.
Validation loss after 40 epochs is 14.424.
Validation loss after 50 epochs is 12.989.

Mean absolute error: 2.37

Root mean squared error 3.60


## Softmax Loss

In [8]:
# Subclass for Softmax Crossentropy Loss

def softmax(x, axis=None): # Softmax function
  return np.exp(x - logsumexp(x, axis=axis, keepdims=True))

def normalize(a: np.ndarray):
  other = 1 - a
  return np.concatenate([a, other], axis=1)

def unnormalize(a: np.ndarray):
  return a[np.newaxis, 0]


class Softmax(Loss):
  def __init__(self, eps: float = 1e-9) -> None:
    super().__init__()
    self.eps = eps # Store epsilon
    self.single_output = False # Store single output NN

  
  def _output(self) -> float:
    # Check if the NN has only a single output (i.e. only one class)
    if self.target.shape[1] == 0:
      self.single_output = True

    # If there is a single output, normalize it
    if self.single_output:
      self.prediction = normalize(self.prediction)
      self.target = normalize(self.target)

    softmax_preds = softmax(self.prediction, axis=1) # Apply softmax function to each row

    self.softmax_preds = np.clip(softmax_preds, self.eps, 1 - self.eps) # Clip softmax output to prevent numeric instability

    softmax_loss = (-1.0 * self.target * np.log(self.softmax_preds) - (1.0 - self.target) * np.log(1 - self.softmax_preds)) # Loss computation

    return np.sum(softmax_loss) / self.prediction.shape[0]


  def _input_grad(self) -> np.ndarray:
    if self.single_output:
      return unnormalize(self.softmax_preds - self.target)
    else:
      return (self.softmax_preds - self.target) / self.prediction.shape[0]

## MNIST dataset

In [11]:
# Import MNIST dataset - Credit: https://github.com/hsjeong5

from urllib import request
import gzip
import pickle

filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]


def download_mnist():
  base_url = "http://yann.lecun.com/exdb/mnist/"
  for name in filename:
    print("Downloading "+name[1]+"...")
    request.urlretrieve(base_url+name[1], name[1])
  print("Download complete.")


def save_mnist():
  mnist = {}
  for name in filename[:2]:
    with gzip.open(name[1], 'rb') as f:
      mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
  for name in filename[-2:]:
    with gzip.open(name[1], 'rb') as f:
      mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
  with open("mnist.pkl", 'wb') as f:
    pickle.dump(mnist,f)
  print("Save complete.")


def init():
  download_mnist()
  save_mnist()


def load():
  with open("mnist.pkl",'rb') as f:
    mnist = pickle.load(f)
  return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

if __name__ == '__main__':
  init()

Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


In [12]:
X_train, y_train, X_test, y_test = load()
num_labels = len(y_train)
num_labels

60000

In [13]:
# One-hot encoding labels (both train and test)
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
  train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
  test_labels[i][y_test[i]] = 1

In [14]:
# Normalize data (mean 0, variance 1)
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [20]:
# Function for measuring accuracy
def model_accuracy(model, test_set):
  return print(f"The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%")


In [16]:
# Imports from NN_from_scratch
from Neural_Network_from_scratch import NeuralNetwork, Dense, MeanSquaredError, SGD, Sigmoid, Tanh, Linear, ReLU, Trainer

In [21]:
# Model with sigmoid activation
model = NeuralNetwork(
    layers = [Dense(neurons = 89, activation = Tanh()),
              Dense(neurons = 10, activation = Sigmoid())],
    loss = MeanSquaredError()
)

trainer = Trainer(model, SGD(learning_rate = 0.1))

trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 50, eval_every = 10, batch_size = 60)
print()
model_accuracy(model, X_test)

Validation loss after 10 epochs is 0.622.
Validation loss after 20 epochs is 0.579.
Validation loss after 30 epochs is 0.474.
Validation loss after 40 epochs is 0.435.
Validation loss after 50 epochs is 0.376.

The model validation accuracy is: 72.19%


In [22]:
# Model with sigmoid activation and softmax
model = NeuralNetwork(
    layers = [Dense(neurons = 89, activation = Sigmoid()),
              Dense(neurons = 10, activation = Linear())],
    loss = Softmax()
)

trainer = Trainer(model, SGD(learning_rate = 0.1))

trainer.fit(X_train, train_labels, X_test, test_labels, epochs = 120, eval_every = 5, batch_size = 60)
print()
model_accuracy(model, X_test)

Validation loss after 5 epochs is 0.716.
Validation loss after 10 epochs is 0.603.
Validation loss after 15 epochs is 0.551.
Validation loss after 20 epochs is 0.523.
Validation loss after 25 epochs is 0.504.
Validation loss after 30 epochs is 0.491.
Validation loss after 35 epochs is 0.479.
Validation loss after 40 epochs is 0.472.
Validation loss after 45 epochs is 0.468.
Validation loss after 50 epochs is 0.465.
Validation loss after 55 epochs is 0.461.
Validation loss after 60 epochs is 0.460.
Validation loss after 65 epochs is 0.456.
Validation loss after 70 epochs is 0.455.
Validation loss after 75 epochs is 0.454.
Loss increased after epoch 80, the final loss was 0.454, using the model from epoch 75

The model validation accuracy is: 92.37%
