In [1]:
backend = "pt_backend"

In [2]:
import time
import numpy as np

np.random.seed(0)

import warnings
warnings.filterwarnings("ignore")

match backend:
    case "original_backend":
        from lib.original_backend.linear_algebra import Matrix
        from lib.original_backend.nn import  NN, Softmax, Linear
        from lib.original_backend.processing import OneHotEncoder, ColumnNormalizer
        from lib.optimizers import SgdOptimizer, SgdWithMomentumOptimizer, AdaGradOptimizer, RmsPropOptimizer, AdamOptimizer

    case "np_backend":
        from lib.np_backend.linear_algebra import Matrix
        from lib.np_backend.nn import NN, Softmax, Linear
        from lib.np_backend.processing import OneHotEncoder, ColumnNormalizer
        from lib.optimizers import SgdOptimizer, SgdWithMomentumOptimizer, AdaGradOptimizer, RmsPropOptimizer, AdamOptimizer

    case "pt_backend":
        from lib.pt_backend.linear_algebra import Matrix
        from lib.pt_backend.nn import NN, Softmax, Linear
        from lib.pt_backend.processing import OneHotEncoder, ColumnNormalizer
        from lib.pt_backend.optimizers import SgdOptimizer, SgdWithMomentumOptimizer, AdaGradOptimizer, RmsPropOptimizer, AdamOptimizer


from lib.metrics.losses import negative_log_likelihood
from lib.gd_data_loaders import BatchDataLoader, StochasticDataLoader, MiniBatchDataLoader


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/local/Cellar/python@3.10/3.10.14_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/Cellar/python@3.10/3.10.14_1/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/anton/repos/neural-nets/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/an

In [3]:
# The Iris dataset was used in R.A. Fisher's classic 1936 paper, The Use of Multiple Measurements in Taxonomic Problems, and can also be found on the UCI Machine Learning Repository.
# It includes three iris species with 50 samples each as well as some properties about each flower. One flower species is linearly separable from the other two, but the other two are not linearly separable from each other.

data = []
labels = []
with open("data/iris.data", "rt") as f:
    for line in f.readlines():
        data.append([float(v) for v in line.split(",")[:-1]])
        labels.append(line.split(",")[-1])
data = np.array(data)

In [4]:
indeces = list(range(len(data)))
np.random.shuffle(indeces)
split = int(len(data) * 0.8)

X_train = data[indeces[:split]]
X_test = data[indeces[split:]]
y_train = [labels[i] for i in indeces[:split]]
y_test = [labels[i] for i in indeces[split:]]
X_train = Matrix(X_train)
X_test = Matrix(X_test)
X_train.dims(), X_test.dims()

(torch.Size([120, 4]), torch.Size([30, 4]))

In [5]:
ohe = OneHotEncoder()
ohe.fit(labels)
y_train = ohe.transform(y_train)
y_test = ohe.transform(y_test)
y_train.dims(), y_test.dims()

(torch.Size([120, 3]), torch.Size([30, 3]))

In [6]:
normalizer = ColumnNormalizer()
normalizer.fit(X_train)
X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)
X_train.dims(), X_test.dims()

(torch.Size([120, 4]), torch.Size([30, 4]))

In [7]:
def init_nn():
    return NN([
        Linear(4, 3),
        Softmax()
    ])

In [8]:
time_point = time.time()

data_loaders = [
    # BatchDataLoader(X_train, y_train),
    # StochasticDataLoader(X_train, y_train),
    MiniBatchDataLoader(X_train, y_train, 4)
]
optimizer_creators = [
    lambda nn: SgdOptimizer(nn, 0.01),
    lambda nn: SgdWithMomentumOptimizer(nn, 0.01, 0.9),
    lambda nn: AdaGradOptimizer(nn, 0.1),
    lambda nn: RmsPropOptimizer(nn, 0.01, 0.95),
    lambda nn: AdamOptimizer(nn, 0.01, 0.95, 0.95),
]

for data_loader in data_loaders:
    for optimizer_creator in optimizer_creators:
        nn = init_nn()
        optimizer = optimizer_creator(nn)
        print(f"gradient descent: {data_loader.__class__} | optimizer: {optimizer.__class__}")
        for i in range(4001):
            X_b, y_b = data_loader.get_batch()
            out = nn(X_b)
            loss = negative_log_likelihood(y_b, out)
            
            if i % 400 == 0:
                elapsed_time = int(time.time() - time_point)
                time_point = time.time()
                print(f"{i} | {loss.data:.2f} | {elapsed_time}s")    
    
            optimizer.step(loss)
        
        train_out = nn(X_train) 
        train_loss = negative_log_likelihood(y_train, train_out)
        test_out = nn(X_test) 
        test_loss = negative_log_likelihood(y_test, test_out)
        print(f"train loss: {train_loss.data:.2f}   test loss: {test_loss.data:.2f}") 

gradient descent: <class 'lib.gd_data_loaders.MiniBatchDataLoader'> | optimizer: <class 'lib.pt_backend.optimizers.SgdOptimizer'>
0 | 0.78 | 0s
400 | 0.44 | 0s
800 | 0.08 | 0s
1200 | 0.09 | 0s
1600 | 0.33 | 0s
2000 | 0.33 | 0s
2400 | 0.36 | 0s
2800 | 0.27 | 0s
3200 | 0.26 | 0s
3600 | 0.13 | 0s
4000 | 0.08 | 0s
train loss: 0.19   test loss: 0.18
gradient descent: <class 'lib.gd_data_loaders.MiniBatchDataLoader'> | optimizer: <class 'lib.pt_backend.optimizers.SgdWithMomentumOptimizer'>
0 | 0.98 | 0s
400 | 0.31 | 0s
800 | 0.34 | 0s
1200 | 0.22 | 0s
1600 | 0.23 | 0s
2000 | 0.13 | 0s
2400 | 0.11 | 0s
2800 | 0.09 | 0s
3200 | 0.13 | 0s
3600 | 0.17 | 0s
4000 | 0.16 | 0s
train loss: 0.19   test loss: 0.18
gradient descent: <class 'lib.gd_data_loaders.MiniBatchDataLoader'> | optimizer: <class 'lib.pt_backend.optimizers.AdaGradOptimizer'>
0 | 0.77 | 0s
400 | 0.35 | 0s
800 | 0.19 | 0s
1200 | 0.10 | 0s
1600 | 0.02 | 0s
2000 | 0.01 | 0s
2400 | 0.19 | 0s
2800 | 0.00 | 0s
3200 | 0.13 | 0s
3600 | 0.08 