In [67]:
import os
os.environ['DEBUG'] = '1'

import numpy as np
from typing import Union
import math
import torch
from time import perf_counter

from tinygrad.tensor import Tensor
from tinygrad.nn import Linear
from tinygrad.nn.state import get_parameters

from lib.utils import get_mnist
from tinygrad.nn.optim import SGD

In [37]:
X_train, Y_train, X_test, Y_test = get_mnist("../data") # these need to be tensors??

In [38]:
type(X_train)

numpy.ndarray

In [125]:
class MLP:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.l1 = Linear(input_dim, hidden_dim)
        self.l2 = Linear(hidden_dim, output_dim)

    def forward(self, x) -> Tensor:
        return self.l2(self.l1(x).relu())
    
    def parameters(self):
        return get_parameters(self.l1) + get_parameters(self.l2)
    
    def __call__(self, x):
        return self.forward(x)
    
    def __repr__(self):
        return f"MLP({self.l1}, {self.l2})"

In [62]:
class SimpleDataLoader:
    def __init__(self, X: Union[np.ndarray, Tensor], Y: Union[np.ndarray, Tensor], batch_size=64, shuffle=True):
        self.X = Tensor(X) if not isinstance(X, Tensor) else X
        self.Y = Tensor(Y) if not isinstance(Y, Tensor) else Y
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        xlen  = self.X.shape[0]
        indices = np.random.permutation(xlen) if self.shuffle else np.arange(xlen) # shuffled indices if self.shuffle else range

        for start_idx in range(0, xlen, self.batch_size):
            end_idx = min(self.batch_size + start_idx, xlen)
            batch_indices = Tensor(indices[start_idx:end_idx]) # this has to be tensor because indexing with np.ndarray or list raises error in tinygrad

            yield self.X[batch_indices], self.Y[batch_indices]

    def __len__(self): # return the number of batches
        return math.ceil(self.X.shape[0] / self.batch_size) 

In [63]:
train_loader = SimpleDataLoader(X_train, Y_train, batch_size=64, shuffle=True)
test_loader = SimpleDataLoader(X_test, Y_test, batch_size=64, shuffle=False)

In [138]:
model = MLP(784, 100, 10) # instantiate the model
optim = SGD(model.parameters(), lr=0.001) # instantiate the optimizer

EPOCHS = 10
STEPS = 1000 # num of batches per epoch
BATCH_SIZE = 64
max_batches_per_epoch = math.ceil(len(X_train) / BATCH_SIZE) # handle smaller last batch

## Train for mini-epochs

In [139]:
total_time = 0.0
steps = min(STEPS, max_batches_per_epoch)
for epoch in range(EPOCHS):
    start = perf_counter()
    running_train_loss = 0.0
    for step in range(steps):
        with Tensor.train():
            samp = np.random.randint(0, X_train.shape[0], size=(64))

            # get batch and labels
            batch = Tensor(X_train[samp], requires_grad=False)
            labels = Tensor(Y_train[samp])

            out = model(batch) # forward pass
            loss = out.sparse_categorical_crossentropy(labels) # calculate loss
            optim.zero_grad() # zero out gradients
            loss.backward() # backward pass
            optim.step() # update weights

            running_train_loss += loss.numpy()

    train_loss = running_train_loss / STEPS # loss over all batches, over num batches

    # test accuracy over the whole dataset
    out = model(Tensor(X_test))
    pred = out.argmax(axis=1) # get the index of the max value
    accuracy = (pred == Tensor(Y_test)).mean().numpy()

    elapsed = perf_counter() - start
    total_time += elapsed

    print(f"Epoch {epoch+1}/{EPOCHS}: {steps} Batches (max: {max_batches_per_epoch}) | Train Loss: {train_loss:.4f} | Test Accuracy: {accuracy:.4f} | Time: {elapsed:.2f}s")

print(f"Total training time: {total_time:.2f}s")

Epoch 1/10: 782 Batches (max: 782) | Train Loss: 1.7502 | Test Accuracy: 0.4834 | Time: 35.00s
Epoch 2/10: 782 Batches (max: 782) | Train Loss: 1.6234 | Test Accuracy: 0.6643 | Time: 35.51s
Epoch 3/10: 782 Batches (max: 782) | Train Loss: 1.4638 | Test Accuracy: 0.7223 | Time: 37.48s
Epoch 4/10: 782 Batches (max: 782) | Train Loss: 1.2776 | Test Accuracy: 0.7516 | Time: 31.91s
Epoch 5/10: 782 Batches (max: 782) | Train Loss: 1.0900 | Test Accuracy: 0.7789 | Time: 32.24s
Epoch 6/10: 782 Batches (max: 782) | Train Loss: 0.9340 | Test Accuracy: 0.8088 | Time: 32.11s
Epoch 7/10: 782 Batches (max: 782) | Train Loss: 0.8068 | Test Accuracy: 0.8272 | Time: 30.76s
Epoch 8/10: 782 Batches (max: 782) | Train Loss: 0.7138 | Test Accuracy: 0.8380 | Time: 33.51s
Epoch 9/10: 782 Batches (max: 782) | Train Loss: 0.6417 | Test Accuracy: 0.8468 | Time: 36.18s
Epoch 10/10: 782 Batches (max: 782) | Train Loss: 0.5916 | Test Accuracy: 0.8546 | Time: 33.53s
Total training time: 338.24s


**MLP 10 Epochs MNIST**

```
Epoch 1/10: 782 Batches (max: 782) | Train Loss: 1.7502 | Test Accuracy: 0.4834 | Time: 35.00s
Epoch 2/10: 782 Batches (max: 782) | Train Loss: 1.6234 | Test Accuracy: 0.6643 | Time: 35.51s
Epoch 3/10: 782 Batches (max: 782) | Train Loss: 1.4638 | Test Accuracy: 0.7223 | Time: 37.48s
Epoch 4/10: 782 Batches (max: 782) | Train Loss: 1.2776 | Test Accuracy: 0.7516 | Time: 31.91s
Epoch 5/10: 782 Batches (max: 782) | Train Loss: 1.0900 | Test Accuracy: 0.7789 | Time: 32.24s
Epoch 6/10: 782 Batches (max: 782) | Train Loss: 0.9340 | Test Accuracy: 0.8088 | Time: 32.11s
Epoch 7/10: 782 Batches (max: 782) | Train Loss: 0.8068 | Test Accuracy: 0.8272 | Time: 30.76s
Epoch 8/10: 782 Batches (max: 782) | Train Loss: 0.7138 | Test Accuracy: 0.8380 | Time: 33.51s
Epoch 9/10: 782 Batches (max: 782) | Train Loss: 0.6417 | Test Accuracy: 0.8468 | Time: 36.18s
Epoch 10/10: 782 Batches (max: 782) | Train Loss: 0.5916 | Test Accuracy: 0.8546 | Time: 33.53s
Total training time: 338.24s
```

## Train For Full Epochs

In [66]:
for epoch in range(EPOCHS):    
    with Tensor.train():
        running_train_loss = 0.0
        for x, y in train_loader:
            out = model(x)
            loss = out.sparse_categorical_crossentropy(y)
            optim.zero_grad()
            loss.backward()
            optim.step()

            running_train_loss += loss.numpy()
        
    running_test_loss = 0.0
    for x, y in test_loader:
        out = model(x)
        loss = out.sparse_categorical_crossentropy(y)
        running_test_loss += loss.numpy()

    print(f"Epoch {epoch+1/EPOCHS} | Train Loss: {running_train_loss/len(train_loader):0.4f} | Test Loss: {running_test_loss/len(test_loader):0.4f}")

Epoch 1/100 | Train Loss: 1.41102050423927 | Test Loss: 1.2729565672054413
Epoch 2/100 | Train Loss: 1.1991597955946423 | Test Loss: 1.0799631529552922
Epoch 3/100 | Train Loss: 1.035741127436728 | Test Loss: 0.9349080020455038
Epoch 4/100 | Train Loss: 0.9142745292705038 | Test Loss: 0.8270840436030346
Epoch 5/100 | Train Loss: 0.8235563360669119 | Test Loss: 0.7458942454711647


KeyboardInterrupt: 