In [1]:
import numpy as np
import math

from lib.Tensor import Tensor
from lib.NN import ReLU, Dense, Module, CategoricalCrossEntropyLoss
from lib.Optimizers import SGD
from lib.data_utils import get_mnist

In [2]:
path = "/Users/beneverman/Documents/Coding/bens-mini-dl/data"
x_train, y_train, x_valid, y_valid = get_mnist(path)

In [3]:
x_train_tensor = Tensor(x_train, requires_grad=False) # (50000, 784) (num_examples, num_features)
y_train_tensor = Tensor(y_train, requires_grad=False) # (50000,) (num_examples,)
x_valid_tensor = Tensor(x_valid, requires_grad=False) # (10000, 784) (num_examples, num_features)
y_valid_tensor = Tensor(y_valid, requires_grad=False) # (10000,) (num_examples,)

input_dim = x_train_tensor.shape[1] # number of features (pixels) (784)
output_dim = len(set(y_train_tensor.data)) # all unique class labels (10)

print(f"input_dim: {input_dim}") # 784
print(f"output_dim: {output_dim}") # 10

input_dim: 784
output_dim: 10


In [4]:
class MLP(Module):
    def __init__(self, input_dim: int, output_dim: int):
        super().__init__()
        self.fc1 = Dense(input_dim, 64) # (784, 64)
        self.relu1 = ReLU() # (64,)
        self.fc2 = Dense(64, output_dim) # (64, 10)

    def forward(self, x):   
        x = self.fc1(x) # (batch_size, 64)
        x = self.relu1(x) 
        x = self.fc2(x) # (batch_size, 10)
        return x
    
    def parameters(self):
        return self.fc1.parameters() + self.fc2.parameters()

model = MLP(input_dim, output_dim)
EPOCHS = 20
STEPS = 100 # num of batches per epoch
BATCH_SIZE = 64
max_batches_per_epoch = math.ceil(len(x_train) / BATCH_SIZE) # handle smaller last batch

criterion = CategoricalCrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001)

In [5]:
model.train()
optimizer.zero_grad() # zero gradients

samp = np.random.randint(0, len(x_train_tensor), BATCH_SIZE) # get random indices

# get batch and labels
batch = x_train_tensor[samp] # get batch
labels = y_train_tensor[samp] # get labels

In [6]:
out = model(batch) # forward pass
loss = criterion(out, labels) # calculate loss

In [7]:
model.fc1.biases.grad.shape

(64,)

In [8]:
loss.backward() # backprop

Creation_op mul, shape :(), grad shape: ()
Creation_op , shape :(), grad shape: ()
Creation_op mean, shape :(), grad shape: ()
Creation_op getitem, shape :(64,), grad shape: (64,)
Creation_op add, shape :(64, 10), grad shape: (64, 10)


ValueError: cannot reshape array of size 640 into shape (64,1)

In [None]:
optimizer.step() # update params

In [None]:
model.eval()  # set model to eval mode
out = model(x_valid_tensor)

Self shape (10000, 64)
Other shape (64, 64)


ValueError: operands could not be broadcast together with shapes (10000,64) (64,64) 

In [None]:
pred = np.argmax(out.data, axis=1)  # get index of max value
accuracy = (pred == y_valid_tensor).mean()  # calculate accuracy