minigrad

Minigrad is an automatic tensor differentiation engine with a deep learning library on top of it.

PyTorch-like API
GPU accelerated (soon)

Dependencies

numpy

How it works

The gradient of a tensor function z with respect to x can be computed using the chain rule.

This property of differentiation allows us to compute the gradient by dynamically building a directed acyclic graph of the operations and values that produced z and visiting it in reverse topological order.

Simple example

Minimizing the following real-valued function:

z(x) = (2x + 50)² + x²

which has a local minima at x0 = -20 where z(x0) = 500

import minigrad
from minigrad import Tensor
import matplotlib.pyplot as plt

minigrad.set_device("cpu")

history = []
learning_rate = 1e-4
epochs = 300

x = Tensor([2]) # initialize 1-D tensor [2]

for i in range(epochs):
    z = (2*x + 50)**2 + x**2
    history.append(z.numpy()[0])
    z.backward() # compute gradient
    with minigrad.no_grad(): # disable gradients globally
        x -= x.grad * learning_rate # stochastic gradient descent, x.grad is dz/dx
        x.zero_grad() # reset gradient before next iteration
        z.zero_grad()

plt.figure()
plt.plot(history)
plt.show()

The Neural Network module

Solving MNIST shouldn't be a problem once you have an autograd engine, but it's even easier with a neural network library

...hence minigrad.nn

import minigrad
from minigrad import nn
from sklearn import metrics, datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from minigrad.data import DataLoader
import numpy as np

n_classes = 10
digits = datasets.load_digits(n_class=n_classes)

X_train, X_test, y_train, y_test = train_test_split(digits.images, digits.target, train_size=0.8)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, train_size=0.8)
X_train /= 16.
X_validation /= 16
X_test /= 16.


class MnistClassifier(nn.Module):
    def __init__(self, input_shape, num_classes, z_size=64):
        super().__init__()
        self.input_shape = input_shape
        # child modules like this are automatically registered in the parent module
        # if they have trainable params
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(input_size=np.prod(input_shape), output_size=z_size)
        self.activation = nn.Tanh()
        self.linear2 = nn.Linear(input_size=z_size, output_size=z_size)
        self.linear3 = nn.Linear(input_size=z_size, output_size=num_classes)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        x = self.activation(x)
        x = self.linear3(x)
        x = self.softmax(x)
        return x

    def predict(self, x):
        if isinstance(x, np.ndarray):
            x = minigrad.Tensor(x, requires_grad=False)
        with minigrad.no_grad(): # disables DAG construction for gradients
            probs = self(x)
        return np.argmax(probs.data, axis=1)


shape = X_train[0].shape
batch_size = 64
epochs = 50

model = MnistClassifier(shape, n_classes)

# instantiate our optimizer with model params
optimizer = minigrad.optim.Adam(model.params(), learning_rate=1e-3)

# since we're not passing the ground truth as a one hot array, we better specify n_classes
criterion = nn.losses.MSE(n_classes)

# wraps our data in batches nicely
train_loader = DataLoader(X_train, y_train, batch_size=batch_size, tensors=True)
losses = []

for i in range(epochs):
    total_loss = 0
    for x, gt in train_loader.get():
        outputs = model(x) # forward pass
        loss = criterion(gt, outputs) # compute loss
        loss.backward() # calculate the gradients
        optimizer.step() # update weights
        optimizer.zero_grad() # reset gradients
        total_loss += loss.data.item()
    train_preds = model.predict(X_train)
    train_acc = metrics.accuracy_score(y_train, train_preds)
    validation_preds = model.predict(X_validation)
    validation_acc = metrics.accuracy_score(y_validation, validation_preds)
    print(f"Epoch {i:{len(str(epochs))}}/{epochs}, Loss: {total_loss:.3f}"
          f", Train accuracy: {train_acc*100:.1f}%"
          f", Validation accuracy: {validation_acc * 100:.1f}%")
    losses.append(loss)

test_preds = model.predict(X_test)
test_acc = metrics.accuracy_score(y_test, test_preds)
print(f"Test accuracy: {test_acc*100:.1f}%")

plt.plot(losses)
plt.show()

Output and loss

Epoch 50/50, Loss: 0.023, Train accuracy: 99.7%, Validation accuracy: 95.5%
Test accuracy: 97.8%

Name		Name	Last commit message	Last commit date
Latest commit History 60 Commits
.github/workflows		.github/workflows
examples		examples
minigrad		minigrad
tests		tests
.gitignore		.gitignore
LICENSE		LICENSE
README.md		README.md
mypy.ini		mypy.ini
poetry.lock		poetry.lock
pyproject.toml		pyproject.toml

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

minigrad

Dependencies

How it works

Simple example

The Neural Network module

Output and loss

About

Releases

Packages

Languages

License

avramdj/minigrad

Folders and files

Latest commit

History

Repository files navigation

minigrad

Dependencies

How it works

Simple example

The Neural Network module

Output and loss

About

Topics

Resources

License

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages