In [1]:
import time
import numpy as np

np.random.seed(0)

from lib.value import Value
from lib.linear_algebra import Vector, Matrix
from lib.nn import NN, Softmax, Linear
from lib.processing import OneHotEncoder, ColumnNormalizer
from lib.metrics.losses import negative_log_likelihood

In [2]:
# The Iris dataset was used in R.A. Fisher's classic 1936 paper, The Use of Multiple Measurements in Taxonomic Problems, and can also be found on the UCI Machine Learning Repository.
# It includes three iris species with 50 samples each as well as some properties about each flower. One flower species is linearly separable from the other two, but the other two are not linearly separable from each other.

data = []
labels = []
with open("data/iris.data", "rt") as f:
    for line in f.readlines():
        data.append([float(v) for v in line.split(",")[:-1]])
        labels.append(line.split(",")[-1])
data = np.array(data)

In [3]:
indeces = list(range(len(data)))
np.random.shuffle(indeces)
split = int(len(data) * 0.8)

X_train = data[indeces[:split]]
X_test = data[indeces[split:]]
y_train = [labels[i] for i in indeces[:split]]
y_test = [labels[i] for i in indeces[split:]]
X_train = Matrix(X_train)
X_test = Matrix(X_test)
X_train.dims(), X_test.dims()

((120, 4), (30, 4))

In [4]:
ohe = OneHotEncoder()
ohe.fit(labels)
y_train = ohe.transform(y_train)
y_test = ohe.transform(y_test)
y_train.dims(), y_test.dims()

((120, 3), (30, 3))

In [5]:
normalizer = ColumnNormalizer()
normalizer.fit(X_train)
X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)
X_train.dims(), X_test.dims()

((120, 4), (30, 4))

In [6]:
nn = NN([
    Linear(4, 3),
    Softmax()
])

In [7]:
time_point = time.time()

for i in range(4001):
    out = nn(X_train)
    loss = negative_log_likelihood(y_train, out)
    
    if i % 100 == 0:
        elapsed_time = int(time.time() - time_point)
        time_point = time.time()
        print(f"{i} | {loss.data:.2f} | {elapsed_time}s")    

    for p in nn.params():
        p.zero_grad()
    loss.grad = 1
    loss.backward()

    for p in nn.params():
        for v in p.all_values():
            v.data -= 0.01 * v.grad

0 | 1.71 | 0s
100 | 0.92 | 17s
200 | 0.64 | 17s
300 | 0.55 | 17s
400 | 0.50 | 17s
500 | 0.47 | 18s
600 | 0.44 | 17s
700 | 0.42 | 18s
800 | 0.41 | 18s
900 | 0.39 | 18s
1000 | 0.38 | 17s
1100 | 0.36 | 17s
1200 | 0.35 | 17s
1300 | 0.34 | 17s
1400 | 0.33 | 17s
1500 | 0.33 | 17s
1600 | 0.32 | 16s
1700 | 0.31 | 15s
1800 | 0.30 | 15s
1900 | 0.29 | 17s
2000 | 0.29 | 17s
2100 | 0.28 | 17s
2200 | 0.28 | 17s
2300 | 0.27 | 17s
2400 | 0.26 | 16s
2500 | 0.26 | 16s
2600 | 0.25 | 17s
2700 | 0.25 | 17s
2800 | 0.24 | 16s
2900 | 0.24 | 16s
3000 | 0.24 | 16s
3100 | 0.23 | 16s
3200 | 0.23 | 17s
3300 | 0.22 | 17s
3400 | 0.22 | 17s
3500 | 0.22 | 17s
3600 | 0.21 | 16s
3700 | 0.21 | 16s
3800 | 0.21 | 17s
3900 | 0.20 | 17s
4000 | 0.20 | 17s


In [8]:
out = nn(X_test) 
loss = negative_log_likelihood(y_test, out)
   
loss

{8a61a53d, 0.18, 0}

In [9]:
[(round(float(v1),1), float(v2)) for v1, v2 in zip([v[0].data for v in out.values], [v[0].data for v in y_test])]

[(1.0, 1.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.9, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.1, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0)]