In [275]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [295]:
from typing import List
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from scipy.special import softmax

from dense_layer import DenseLayer
from relu_layer import ReluLayer

In [296]:
iris = load_iris()
X, y = iris.data, iris.target

In [297]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, random_state=42, stratify=y, test_size=0.25)

In [298]:
def iterate_batches(X, y, batch_size) -> List:
    X_split, y_split = [], []

    for i in range(X.shape[0] // batch_size - 1): 
        X_split.append(X[(i) * batch_size:(i+1) * batch_size])
        y_split.append(y[(i) * batch_size:(i+1) * batch_size])
        
    return np.array(X_split), np.array(y_split)

In [299]:
class Network:
    def __init__(self, input_features: int) -> None:
        self.layers = np.array([])
        self.layers = np.append(self.layers, DenseLayer(input_features, 16))
        self.layers = np.append(self.layers, ReluLayer())
        self.layers = np.append(self.layers, DenseLayer(16, 8))
        self.layers = np.append(self.layers, ReluLayer())
        self.layers = np.append(self.layers, DenseLayer(8, 3))
        
    def forward(self, X: np.ndarray):
        activations = []
        entr = X.copy()
    
        for layer in self.layers:
            activations.append(layer.forward(entr))
            entr = activations[-1]
    
        return activations
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        return self.forward(X)[-1].argmax(axis=-1)
    
    def fit_batch(self, X_batch: np.ndarray, y_batch: np.ndarray) -> None:
        activations = self.forward(X_batch)
        inputs = [X_batch] + activations
        
        grads = self.calculate_grads(activations[-1], y_batch)
        
        for i in reversed(range(len(self.layers))):
            layer = self.layers[i]
            grads = layer.backward(inputs[i], grads)
    
    @staticmethod
    def calculate_grads(X, y):
        probs = softmax(X)
        n = X.shape[0]
        probs[range(n), y] -= 1
        return probs / n

In [300]:
network = Network(X_train.shape[1])

for epoch in range(2):
    X_batches, y_batches = iterate_batches(X_train, y_train, batch_size=16)
    for i in range(X_batches.shape[0]):
        network.fit_batch(X_batches[i], y_batches[i])

In [301]:
network.predict(X_test)

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [302]:
y_test

array([0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 1, 0, 1, 2, 1,
       2, 1, 2, 1, 0, 2, 0, 1, 2, 2, 0, 0, 0, 0, 2, 1])

In [293]:
network.forward(X)[-1]

array([[1.21583366e+10, 1.43778614e+10, 2.04545349e+10],
       [1.14658411e+10, 1.35589498e+10, 1.92895178e+10],
       [1.12229378e+10, 1.32717040e+10, 1.88808700e+10],
       [1.11781446e+10, 1.32187337e+10, 1.88055123e+10],
       [1.20832009e+10, 1.42890094e+10, 2.03281304e+10],
       [1.32588043e+10, 1.56792207e+10, 2.23059016e+10],
       [1.13453650e+10, 1.34164800e+10, 1.90868341e+10],
       [1.20209115e+10, 1.42153492e+10, 2.02233384e+10],
       [1.06253990e+10, 1.25650835e+10, 1.78756027e+10],
       [1.16084630e+10, 1.37276079e+10, 1.95294578e+10],
       [1.28636785e+10, 1.52119647e+10, 2.16411644e+10],
       [1.18083508e+10, 1.39639850e+10, 1.98657373e+10],
       [1.12857743e+10, 1.33460118e+10, 1.89865836e+10],
       [1.02449497e+10, 1.21151826e+10, 1.72355553e+10],
       [1.34292221e+10, 1.58807491e+10, 2.25926045e+10],
       [1.39186326e+10, 1.64595014e+10, 2.34159609e+10],
       [1.28882771e+10, 1.52410529e+10, 2.16825462e+10],
       [1.21858071e+10, 1.44103