In [31]:
import math
import numpy as np
from Value import Value
from MLP import *
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
%matplotlib inline

In [22]:
# loading dataset
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
X = X / 255.0 # normalize pixels
y = y.astype(int)

In [23]:
# splitting data
np.random.seed(42)
indices = np.random.permutation(len(X))
split = int(0.8 * len(X))
train_idx, test_idx = indices[:split], indices[split:]
X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]

np.shape(X_train)

(56000, 784)

In [24]:
# ie. 4 = [0,0,0,0,1,0,0,0,0,0]
def encode_labels(y):
    one_hot = np.zeros((len(y), 10))
    one_hot[np.arange(len(y)), y] = 1
    return one_hot
y_train_encoded = encode_labels(y_train)
y_test_encoded = encode_labels(y_test)

In [25]:
# Model that will be used
model = MLP(784, [48, 48, 10])

In [26]:
# using different loss function
def softmax(logits):
    exps = [x.exp() for x in logits]
    sum_exps = sum(exps, Value(0))
    probs = [e / sum_exps for e in exps]
    return probs

def cross_entropy_loss(logits, true_label):
    probs = softmax(logits)
    loss = Value(0)
    for p, t in zip(probs, true_label):
        if t == 1:
            loss -= p.log()
    return loss

In [27]:
def update_weights(model, base_lr, e, decay=0.95):
    lr = base_lr * (decay ** e)
    for p in model.parameters:
         p.data += -lr * p.grad

In [33]:
# the training begins
epochs = 40
for epoch in range(epochs):
    
    total_loss = 0
    for x, y_true in zip(X_train, y_train_encoded):
        # forward pass
        y_pred = model(x)
        loss = cross_entropy_loss(y_pred, y_true)

        # zero gradients
        model.zero_grad()

        # backward pass
        loss.backward()

        # update weights
        update_weights(model, lr=0.1, e=epoch)

    print(f"Epoch: {epoch + 1}, Loss: {total_loss / len((X_train))}")

AttributeError: 'Value' object has no attribute 'log'