In [1]:
import numpy as np

from lib.Tensor import Tensor
from lib.NN import ReLU, Dense, Module, CategoricalCrossEntropyLoss
from lib.Optimizers import SGD
from lib.data_utils import get_mnist

In [2]:
path = "/Users/beneverman/Documents/Coding/bens-mini-dl/data"
x_train, y_train, x_valid, y_valid = get_mnist(path)

In [3]:
input_dim = x_train.shape[1] # number of features (pixels)
output_dim = len(set(y_train)) # all unique class labels

x_train_tensor, y_train_tensor = Tensor(x_train, requires_grad=False), Tensor(y_train, requires_grad=False)
x_valid_tensor, y_valid_tensor = Tensor(x_valid, requires_grad=False), Tensor(y_valid, requires_grad=False)

In [4]:
class MLP(Module):
    def __init__(self, input_dim: int, output_dim: int):
        super().__init__()
        self.fc1 = Dense(input_dim, 64)
        self.relu1 = ReLU()
        self.fc2 = Dense(64, output_dim)

    def forward(self, x):   
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        return x

model = MLP(input_dim, output_dim)
criterion = CategoricalCrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001)

model.train()
optimizer.zero_grad()

samp = np.random.randint(0, len(x_train), 64) # get random indices
batch, labels = x_train_tensor[samp], y_train_tensor[samp]

out = model(batch)
loss = criterion(out, labels)

In [5]:
loss.backward()

Creation_op mul, shape :(), requires_grad: True 
Creation_op , shape :(), requires_grad: True 
Creation_op mean, shape :(), requires_grad: True 
Creation_op getitem, shape :(64,), requires_grad: True 
Creation_op add, shape :(64, 10), requires_grad: True 
Creation_op mul, shape :(64, 1), requires_grad: True 
Creation_op , shape :(), requires_grad: True 
Creation_op log, shape :(64, 1), requires_grad: True 
Creation_op sum, shape :(64, 1), requires_grad: True 
Creation_op exp, shape :(64, 10), requires_grad: True 
Creation_op add, shape :(64, 10), requires_grad: True 
Creation_op mul, shape :(64, 1), requires_grad: True 
Creation_op , shape :(), requires_grad: True 
Creation_op max, shape :(64, 1), requires_grad: True 
Creation_op add, shape :(64, 10), requires_grad: True 
Creation_op , shape :(10,), requires_grad: True 
Creation_op matmul, shape :(64, 10), requires_grad: True 
Creation_op transpose, shape :(64, 10), requires_grad: True 
Creation_op mul, shape :(10, 64), requires_grad: 

## Without grad

``
Creation_op matmul, shape (64, 64)
```

## With grad

```
Creation_op matmul, shape (64, 64) 
...
Creation_op transpose, shape (784, 64)
Creation_op mul, shape (64, 784)
Creation_op , shape ()
Creation_op , shape (64, 784)
Creation_op getitem, shape (64, 784)
Creation_op , shape (50000, 784)
```

In [6]:
t = Tensor(np.array([[1, 2, 3], [4, 5, 6]]))
test_layer = Dense(3, 2)
out = test_layer(t).sum()

In [7]:
out.backward()

Creation_op sum, shape :(), requires_grad: True 
Creation_op add, shape :(2, 2), requires_grad: True 
Creation_op , shape :(2,), requires_grad: True 
Creation_op matmul, shape :(2, 2), requires_grad: True 
Creation_op transpose, shape :(3, 2), requires_grad: True 
Creation_op mul, shape :(2, 3), requires_grad: True 
Creation_op , shape :(), requires_grad: True 
Creation_op , shape :(2, 3), requires_grad: True 
Creation_op , shape :(2, 3), requires_grad: False 
