In [1]:
import time
import numpy as np

np.random.seed(0)

from lib.value import Value
from lib.linear_algebra import Vector, Matrix
from lib.nn import NN, Softmax, Linear
from lib.processing import OneHotEncoder, ColumnNormalizer

In [2]:
# The Iris dataset was used in R.A. Fisher's classic 1936 paper, The Use of Multiple Measurements in Taxonomic Problems, and can also be found on the UCI Machine Learning Repository.
# It includes three iris species with 50 samples each as well as some properties about each flower. One flower species is linearly separable from the other two, but the other two are not linearly separable from each other.

data = []
labels = []
with open("data/iris.data", "rt") as f:
    for line in f.readlines():
        data.append([float(v) for v in line.split(",")[:-1]])
        labels.append(line.split(",")[-1])
data = np.array(data)

In [3]:
indeces = list(range(len(data)))
np.random.shuffle(indeces)
split = int(len(data) * 0.8)

X_train = data[indeces[:split]]
X_test = data[indeces[split:]]
y_train = [labels[i] for i in indeces[:split]]
y_test = [labels[i] for i in indeces[split:]]
X_train = Matrix(X_train)
X_test = Matrix(X_test)
X_train.dims(), X_test.dims()

((120, 4), (30, 4))

In [4]:
ohe = OneHotEncoder()
ohe.fit(labels)
y_train = ohe.transform(y_train)
y_test = ohe.transform(y_test)
y_train.dims(), y_test.dims()

((120, 3), (30, 3))

In [5]:
normalizer = ColumnNormalizer()
normalizer.fit(X_train)
X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)
X_train.dims(), X_test.dims()

((120, 4), (30, 4))

In [6]:
nn = NN([
    Linear(4, 3),
    Softmax()
])

In [7]:
time_point = time.time()

for i in range(4001):
    out = nn(X_train)
    L = -(y_train*out.ln()).row_sum().col_sum()[0] / y_train.dims()[0]
    
    if i % 100 == 0:
        elapsed_time = int(time.time() - time_point)
        time_point = time.time()
        print(f"{i} | {L.data:.2f} | {elapsed_time}s")    

    for p in nn.params():
        p.zero_grad()
    L.grad = 1
    L.backward()

    for p in nn.params():
        for v in p.all_values():
            v.data -= 0.01 * v.grad

0 | 1.71 | 0s
100 | 0.92 | 19s
200 | 0.64 | 20s
300 | 0.55 | 22s
400 | 0.50 | 20s
500 | 0.47 | 21s
600 | 0.44 | 24s
700 | 0.42 | 20s
800 | 0.41 | 21s
900 | 0.39 | 21s
1000 | 0.38 | 20s
1100 | 0.36 | 18s
1200 | 0.35 | 17s
1300 | 0.34 | 18s
1400 | 0.33 | 17s
1500 | 0.33 | 20s
1600 | 0.32 | 20s
1700 | 0.31 | 22s
1800 | 0.30 | 22s
1900 | 0.29 | 22s
2000 | 0.29 | 21s
2100 | 0.28 | 22s
2200 | 0.28 | 21s
2300 | 0.27 | 20s
2400 | 0.26 | 20s
2500 | 0.26 | 21s
2600 | 0.25 | 22s
2700 | 0.25 | 20s
2800 | 0.24 | 20s
2900 | 0.24 | 20s
3000 | 0.24 | 21s
3100 | 0.23 | 20s
3200 | 0.23 | 20s
3300 | 0.22 | 21s
3400 | 0.22 | 20s
3500 | 0.22 | 20s
3600 | 0.21 | 20s
3700 | 0.21 | 20s
3800 | 0.21 | 21s
3900 | 0.20 | 23s
4000 | 0.20 | 23s


In [8]:
out = nn(X_test) 
L = (-(y_test * out.ln()).row_sum().col_sum())[0] / y_test.dims()[0]
   
L

{4d66607a, 0.18, 0}

In [9]:
[(round(float(v1),1), float(v2)) for v1, v2 in zip([v[0].data for v in out.values], [v[0].data for v in y_test])]

[(1.0, 1.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.9, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.1, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (1.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (1.0, 1.0)]