In [1]:
import time
import numpy as np

np.random.seed(0)

from lib.value import Value
from lib.linear_algebra import Vector, Matrix
from lib.nn import NN, Softmax, Linear
from lib.processing import OneHotEncoder, ColumnNormalizer
from lib.metrics.losses import negative_log_likelihood
from lib.gd_data_loaders import BatchDataLoader, StochasticDataLoader, MiniBatchDataLoader

In [2]:
# The Iris dataset was used in R.A. Fisher's classic 1936 paper, The Use of Multiple Measurements in Taxonomic Problems, and can also be found on the UCI Machine Learning Repository.
# It includes three iris species with 50 samples each as well as some properties about each flower. One flower species is linearly separable from the other two, but the other two are not linearly separable from each other.

data = []
labels = []
with open("data/iris.data", "rt") as f:
    for line in f.readlines():
        data.append([float(v) for v in line.split(",")[:-1]])
        labels.append(line.split(",")[-1])
data = np.array(data)

In [3]:
indeces = list(range(len(data)))
np.random.shuffle(indeces)
split = int(len(data) * 0.8)

X_train = data[indeces[:split]]
X_test = data[indeces[split:]]
y_train = [labels[i] for i in indeces[:split]]
y_test = [labels[i] for i in indeces[split:]]
X_train = Matrix(X_train)
X_test = Matrix(X_test)
X_train.dims(), X_test.dims()

((120, 4), (30, 4))

In [4]:
ohe = OneHotEncoder()
ohe.fit(labels)
y_train = ohe.transform(y_train)
y_test = ohe.transform(y_test)
y_train.dims(), y_test.dims()

((120, 3), (30, 3))

In [5]:
normalizer = ColumnNormalizer()
normalizer.fit(X_train)
X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)
X_train.dims(), X_test.dims()

((120, 4), (30, 4))

In [6]:
nn = NN([
    Linear(4, 3),
    Softmax()
])

In [7]:
time_point = time.time()
for data_loader in [
    BatchDataLoader(X_train, y_train),
    StochasticDataLoader(X_train, y_train),
    MiniBatchDataLoader(X_train, y_train, 32)
]:
    print(data_loader.__class__)
    for i in range(4001):
        X_b, y_b = data_loader.get_batch()
        out = nn(X_b)
        loss = negative_log_likelihood(y_b, out)
        
        if i % 400 == 0:
            elapsed_time = int(time.time() - time_point)
            time_point = time.time()
            print(f"{i} | {loss.data:.2f} | {elapsed_time}s")    
    
        for p in nn.params():
            for v in p.all_values():
                v.zero_grad()
        loss.grad = 1
        loss.backward()
    
        for p in nn.params():
            for v in p.all_values():
                v.data -= 0.01 * v.grad
    
    train_out = nn(X_train) 
    train_loss = negative_log_likelihood(y_train, train_out)
    test_out = nn(X_test) 
    test_loss = negative_log_likelihood(y_test, test_out)
    print(f"train loss: {train_loss.data:.2f}   test loss: {test_loss.data:.2f}") 

<class 'lib.gd_data_loaders.BatchDataLoader'>
0 | 1.71 | 0s
400 | 0.50 | 78s
800 | 0.41 | 75s
1200 | 0.35 | 72s
1600 | 0.32 | 82s
2000 | 0.29 | 76s
2400 | 0.26 | 80s
2800 | 0.24 | 84s
3200 | 0.23 | 76s
3600 | 0.21 | 78s
4000 | 0.20 | 84s
train loss: 0.20   test loss: 0.18
<class 'lib.gd_data_loaders.StochasticDataLoader'>
0 | 0.10 | 0s
400 | 0.13 | 0s
800 | 0.02 | 0s
1200 | 0.06 | 0s
1600 | 0.10 | 0s
2000 | 0.01 | 0s
2400 | 0.04 | 0s
2800 | 0.08 | 0s
3200 | 0.01 | 0s
3600 | 0.03 | 0s
4000 | 0.07 | 0s
train loss: 0.14   test loss: 0.13
<class 'lib.gd_data_loaders.MiniBatchDataLoader'>
0 | 0.15 | 0s
400 | 0.12 | 25s
800 | 0.13 | 22s
1200 | 0.12 | 22s
1600 | 0.13 | 22s
2000 | 0.13 | 19s
2400 | 0.06 | 18s
2800 | 0.13 | 19s
3200 | 0.13 | 20s
3600 | 0.08 | 21s
4000 | 0.18 | 23s
train loss: 0.11   test loss: 0.12


In [8]:
[(round(float(v1),1), float(v2)) for v1, v2 in zip([v[0].data for v in test_out.values], [v[0].data for v in y_test])]

[(1.0, 1.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.1, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0)]