In [1]:
from sklearn.datasets import load_digits, load_iris

from sklearn.model_selection import train_test_split

import numpy as np

In [2]:
digits = load_digits()
print(digits.data.shape)
print(digits.target.shape)

(1797, 64)
(1797,)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    digits.data/np.max(digits.data), digits.target, test_size=0.1, random_state=42)
# X_train = X_train[:200]
# X_test = X_test[:200]

In [4]:
from nn.layer import Layer


class Model:
    def __init__(self, in_dim, out_dim):
        self.layer1 = Layer(in_dim, 5)
        self.layer2 = Layer(5, 5)
        self.layer3 = Layer(5, out_dim)
        self.in_dim = in_dim
        self.out_dim = out_dim

    def forward(self, x):
        out = self.layer1.forward(x)
        out = [x.relu() for x in out]
        out = self.layer2.forward(out)
        out = [x.relu() for x in out]
        out = self.layer3.forward(out)
        # out = [x.relu() for x in out]
        # Apply softmax activation to the output layer
        exp_out = [x.exp() for x in out]
        sum_exp = sum(exp_out)
        out = [x / sum_exp for x in exp_out]
        return out

    def zero_grad(self):
        for layer in [self.layer1, self.layer2,self.layer3]:
            for p in layer.weights:
                p.grad = 0
            for p in layer.biases:
                p.grad = 0
                
    def learn(self, learning_rate):
 
        for layer in [self.layer1, self.layer2,self.layer3]:
            for p in layer.weights:
                p.value = p.value - (learning_rate * p.grad)
          
            for p in layer.biases:
                p.value = p.value - (learning_rate * p.grad)
           


In [5]:
model = Model(len(X_train[0]), len(np.unique(y_train)))

batch_size = len(X_train)//10
epochs = 100



for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    learning_rate = .9 / (1 + 0.1 * epoch)  # Stochastic Gradient Descent (SGD) with learning rate decay
    # learning_rate = 0.3
    print(f"learning_rate: {learning_rate}")
    for i in range(0, len(X_train), batch_size):
        model.zero_grad()
        error_list = []
        for j in range(batch_size):
            if i+j >= len(X_train):
                break
            x = (X_train[i+j]/1.0).tolist()
            y = int(y_train[i+j])
            out = model.forward(x)
            # errors = [
            #     (out[i]-1 if i == y else out[i])**2 for i in range(len(out))
            # ]
            # Calculate cross-entropy error
            target = [1 if i == y else 0 for i in range(len(out))]
            errors = [-target[i] * out[i].sigmoid().log() - (1 - target[i]) * (1 - out[i].sigmoid()).log() for i in range(len(out))]
            error_list.extend(errors)
        loss = sum(error_list)/len(error_list)
        loss.grad=1
        loss.backward()
        model.learn(learning_rate)
        # print(f"loss: {loss.value}")

    # evaluate
    correct = 0
    import numpy as np
    for i in range(len(X_test)):
        x = (X_test[i]/1.0).tolist()
        y = int(y_test[i])
        out = model.forward(x)
        if np.argmax([o.value for o in out]) == y:
            correct += 1
    print(model.forward(X_test[1]))
    print(y_test[1])
    print(f"Accuracy: {correct/len(X_test):.2f}")

Epoch 1/100
learning_rate: 0.9
[Parameter(0.04779756632501065), Parameter(0.04775704759523418), Parameter(0.033561061585948314), Parameter(0.15707329860057356), Parameter(0.021471236216949057), Parameter(0.21586836628821587), Parameter(0.0333147514613562), Parameter(0.09107855944199482), Parameter(0.21568289112357136), Parameter(0.13639522136114604)]
9
Accuracy: 0.08
Epoch 2/100
learning_rate: 0.8181818181818181


In [None]:
for layer in [model.layer1, model.layer2, model.layer3]:
    print(layer.weights)
    print(layer.biases)

[Parameter(0.18654696309633526), Parameter(0.490600594419033), Parameter(-0.15887579534056473), Parameter(1.030787811385254), Parameter(0.3758973046304542), Parameter(-0.9183937614446892), Parameter(1.4036926874536162), Parameter(0.025502060608008657), Parameter(-0.0363322899519338), Parameter(-0.8481017723224151), Parameter(1.5644075803317388), Parameter(-0.02881003209777139), Parameter(-0.049863216923649346), Parameter(-0.8329875244359635), Parameter(0.5847990256351847), Parameter(-0.1949970347061038), Parameter(-0.8758314715840874), Parameter(-0.126100230675104), Parameter(-0.7121091837119458), Parameter(0.568469323334188)]
[Parameter(-0.09250598978534469), Parameter(-0.033834234976438435), Parameter(0.07622701902181019), Parameter(0.4583369051629912), Parameter(-0.4007289176745161)]
[Parameter(0.7686391472435841), Parameter(0.7677088138626311), Parameter(-0.9925691212387548), Parameter(0.6969022404989409), Parameter(-0.7827066468369663), Parameter(0.12472312727543793), Parameter(0.