In [1]:
from dataclasses import dataclass
from typing import Optional
import numpy as np
import pandas as pd

from data import read_ML_cup, read_monks
from nn import LossFunction, Optimizer

Test Data Modules

In [2]:
dataset = read_ML_cup("train")

In [3]:
dataset = read_monks(3, "test")
dataset.shape

(124, [6, 1])

### Trainer Class

In [4]:
class Trainer:
    def __init__(self, nn, data, *,
        loss=LossFunction(), optimizer=Optimizer(),
        batchsize=None, start_it=0, seed=None
    ):
        self.dataset = data
        self.nn = nn
        self.t = start_it
        self.loss = loss
        self.optimizer = optimizer
        if batchsize is None:
            self.batchsize = self.dataset.shape[0]
        else:
            self.batchsize = batchsize
        self.rng = np.random.default_rng(seed)
        if seed != None:
            # re-randomize all layers with new rng
            self.nn.rng = self.rng
    
    @staticmethod
    def get_minibatches(x, y, batchsize):
        size = x.shape[0]
        batchtotal, remainder = divmod(size, batchsize)
        for i in range(batchtotal):
            mini_x = x[i*batchsize:(i+1)*batchsize]
            mini_y = y[i*batchsize:(i+1)*batchsize]
            yield mini_x, mini_y
        if remainder > 0:
            yield (
                x[batchtotal*batchsize:],
                y[batchtotal*batchsize:]
            )
        
    def train(self, n_epochs, callback=print, mb_callback=None):
        for i in range(n_epochs):
            # permute dataset
            permutation = self.rng.permutation(self.dataset.shape[0])
            x = self.dataset.data[permutation]
            y = self.dataset.labels[permutation]
            # iterate minibatches
            avg_loss, batchcount = 0., np.ceil(x.shape[0] / self.batchsize)
            for b, (mini_x, mini_y) in enumerate(Trainer.get_minibatches(x, y, self.batchsize)):
                pred = self.nn.foward(mini_x)
                loss = self.loss.foward(pred, mini_y)
                if mb_callback is not None:
                    record = {"epoch": self.t, "batch": b, "loss": loss}
                    mb_callback(self.t, b, loss)
                avg_loss += loss
                loss_grad = self.loss.backward()
                self.nn.backward(loss_grad)
                self.nn.optimize(self.optimizer)
            avg_loss /= batchcount
            self.t += 1
            record = {"epoch": self.t, "loss": avg_loss}
            callback(record)

In [5]:
from nn import NeuralNetwork, LinearLayer, ActivationFunction

In [6]:
trainer = Trainer(
    NeuralNetwork([
        LinearLayer((9, 8)),
        ActivationFunction(),
        LinearLayer((8, 8)),
        ActivationFunction(),
        LinearLayer((8, 2))
    ]),
    read_ML_cup("train"),
    seed=123,
    optimizer=Optimizer(eta=1e-3, l2_coeff=1e-2, alpha=0.2)
)

In [7]:
trainer.train(100)

{'epoch': 1, 'loss': 1371.8703763570543}
{'epoch': 2, 'loss': 451.8049967820603}
{'epoch': 3, 'loss': 448.93691363874774}
{'epoch': 4, 'loss': 444.9905358628785}
{'epoch': 5, 'loss': 440.33591419852877}
{'epoch': 6, 'loss': 435.5141989917532}
{'epoch': 7, 'loss': 430.8645304334314}
{'epoch': 8, 'loss': 426.52548849971805}
{'epoch': 9, 'loss': 422.45081912327015}
{'epoch': 10, 'loss': 418.56797116846826}
{'epoch': 11, 'loss': 414.7779776302563}
{'epoch': 12, 'loss': 410.98060966470075}
{'epoch': 13, 'loss': 407.1873551018359}
{'epoch': 14, 'loss': 403.46688734574866}
{'epoch': 15, 'loss': 399.84708701026074}
{'epoch': 16, 'loss': 396.318247518225}
{'epoch': 17, 'loss': 392.83509108044836}
{'epoch': 18, 'loss': 389.3217751224261}
{'epoch': 19, 'loss': 385.70497824556156}
{'epoch': 20, 'loss': 381.8711350394194}
{'epoch': 21, 'loss': 377.72470715544904}
{'epoch': 22, 'loss': 373.15915811432427}
{'epoch': 23, 'loss': 368.1108783944512}
{'epoch': 24, 'loss': 362.606161617119}
{'epoch': 25, 