In [1]:
import math
from time import perf_counter
import numpy as np

from lib.Tensor import Tensor, no_grad
from lib.NN import ReLU, Dense, Module, CategoricalCrossEntropyLoss, MSELoss, MLP
from lib.Optimizers import SGD
from lib.data_utils import get_mnist
from lib.dataloader import SimpleDataLoader

In [2]:
path = "/Users/beneverman/Documents/Coding/bens-mini-dl/data"
x_train, y_train, x_valid, y_valid = get_mnist(path)

In [3]:
print(x_train.shape) # (n images, n pixels)
print(y_train.shape) # (class labels,)

print(x_valid.shape) # (n images, n pixels)
print(y_valid.shape) # (class labels,)

input_dim = x_train.shape[1] # number of features (pixels)
output_dim = len(set(y_train)) # all unique class labels

x_train_tensor, y_train_tensor = Tensor(x_train, requires_grad=False), Tensor(y_train, requires_grad=False)
x_valid_tensor, y_valid_tensor = Tensor(x_valid, requires_grad=False), Tensor(y_valid, requires_grad=False)

(50000, 784)
(50000,)
(10000, 784)
(10000,)


In [4]:
train_loader = SimpleDataLoader(x_train, y_train, batch_size=64, shuffle=True)
test_loader = SimpleDataLoader(x_valid, y_valid, batch_size=64, shuffle=False)

In [5]:
class MLP(Module):
    def __init__(self, input_dim: int, output_dim: int):
        super().__init__()
        self.fc1 = Dense(input_dim, 64)
        self.relu1 = ReLU()
        self.fc2 = Dense(64, output_dim)

    def forward(self, x):   
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        return x

In [6]:
model = MLP(input_dim, output_dim)

EPOCHS = 20
STEPS = 100 # num of batches per epoch
BATCH_SIZE = 64
max_batches_per_epoch = math.ceil(len(x_train) / BATCH_SIZE) # handle smaller last batch

In [7]:
criterion = CategoricalCrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001)

In [10]:
total_time = 0.0
steps = min(STEPS, max_batches_per_epoch)
for epoch in range(EPOCHS):
    start = perf_counter()
    running_train_loss = 0.0

    for step in range(steps): # for each batch
        model.train()
        optimizer.zero_grad() # zero gradients

        samp = np.random.randint(0, len(x_train), BATCH_SIZE) # get random indices

        # get batch and labels
        batch = x_train_tensor[samp] # get batch
        labels = y_train_tensor[samp] # get labels

        print(batch.requires_grad)
        print(labels.requires_grad)

        out = model(batch) # forward pass
        loss = criterion(out, labels) # calculate loss

        loss.backward() # backprop
        optimizer.step() # update params

        running_train_loss += loss.data # add loss to running total
    
    train_loss = running_train_loss / STEPS # loss over all batches / num batches

    model.eval() # set model to eval mode

    with no_grad(): # don't track gradients
        # test accuracy
        out = model(Tensor(x_valid, requires_grad=False)) # forward pass
        pred =  np.argmax(out.data, axis=1) # get index of max value
        accuracy = (pred == y_valid).mean() # calculate accuracy, no Tensor wrap because comparing arrays directly

    elapsed = perf_counter() - start
    total_time += elapsed

    print(f"Epoch {epoch+1}/{EPOCHS}: {steps} Batches (max: {max_batches_per_epoch}) | Train Loss: {train_loss:.4f} | Test Accuracy: {accuracy:.4f} | Time: {elapsed:.2f}s")

print(f"Total training time: {total_time:.2f}s")

False
False
Creation_op NLLLoss, shape ()
Creation_op getitem, shape (64,)


TypeError: 'NoneType' object is not subscriptable

In [None]:
for epoch in range(EPOCHS):
    model.train()
    running_train_loss = 0.0
    for x, y in train_loader:
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.data

    running_test_loss = 0.0
    for x, y in test_loader:
        model.eval()
        with no_grad():
            out = model(x)
            loss = criterion(out, y)
            running_test_loss += loss.data

    print(f"Epoch {epoch+1/EPOCHS} | Train Loss: {running_train_loss/len(train_loader):0.4f} | Test Loss: {running_test_loss/len(test_loader):0.4f}")

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices