# centigrad demo using MNIST

### Install additional dependencies
To install `centigrad` dependencies see `requirements.txt`

In [None]:
!pip install tqdm
!pip install idx2numpy

### Import necessary libraries

In [2]:
import gzip
import os
import pickle
import shutil
from urllib import request

import idx2numpy
import numpy as np
from tqdm import tqdm

from centigrad.tensor import Tensor
from centigrad.layer import (
    Flatten,
    FullyConnected,
    Conv2d,
    MaxPool2d,
    Dropout2d,
    BatchNorm2d,
)
from centigrad.optimizer import GradientDescent
from centigrad.loss import cross_entropy
from centigrad.activation import relu, softmax, tanh
from centigrad.model import Model

### Download the dataset

In [3]:
base_url = "http://yann.lecun.com/exdb/mnist/"

dataset = [
    "train-images.idx3-ubyte",
    "train-labels.idx1-ubyte",
    "t10k-images.idx3-ubyte",
    "t10k-labels.idx1-ubyte",
]

os.makedirs("data", exist_ok=True)

for file in dataset:
    _file = file.replace(".", "-") + ".gz"

    if _file not in os.listdir():
        with request.urlopen(f"{base_url}{_file}") as response, open(
            f"data/{_file}", "wb"
        ) as out_file:
            shutil.copyfileobj(response, out_file)

    if file not in os.listdir():
        with gzip.open(f"data/{_file}", "rb") as f_in, open(
            f"data/{file}", "wb"
        ) as f_out:
            shutil.copyfileobj(f_in, f_out)

### Function to one-hot-code the labels

In [4]:
def label_to_onehot(index, num_of_classes=10):
    label = np.zeros((index.shape[0], num_of_classes))
    for i in range(index.shape[0]):
        label[i, index[i]] = 1
    return label

### Prepare the data

In [5]:
# load the dataset and normalize
train_images = idx2numpy.convert_from_file("../data/" + dataset[0]) / 255.0
test_images = idx2numpy.convert_from_file("../data/" + dataset[2]) / 255.0

# the dimension of the inputs should be BxCxHxW (batch x channel x height x width)
train_images = np.expand_dims(train_images, axis=1)
test_images = np.expand_dims(test_images, axis=1)

# load the labels
train_labels = idx2numpy.convert_from_file("../data/" + dataset[1])
test_labels = idx2numpy.convert_from_file("../data/" + dataset[3])

# one-hot-encode the labels
train_labels = label_to_onehot(train_labels)
test_labels = label_to_onehot(test_labels)

### Define the model
Here this model showcases all the layers and activation functions available
(except tanh activation)

In [6]:
class MnistNet(Model):
    def __init__(self):
        super().__init__()
        self.layerc1 = Conv2d(1, 2)
        self.maxpool = MaxPool2d()
        self.dropout = Dropout2d()
        self.batchnorm = BatchNorm2d(2)
        self.flatten = Flatten()
        self.layer1 = FullyConnected(338, 10)

    def forward(self, x):
        x = relu(self.layerc1(x))
        x = self.maxpool(x)
        x = self.dropout(x)
        x = self.batchnorm(x)
        x = self.flatten(x)
        x = softmax(self.layer1(x))
        return x

### Define the parameters for training the model

In [7]:
epochs = 10
batch_size = 32
net = MnistNet()
optimizer = GradientDescent(net.parameters(), batch_size)

### Train the model

In [8]:
b = "{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]"

net.train()

for e in range(epochs):
    r_loss = 0

    # calculate number of batches
    no_batches = len(train_images) // batch_size + (
        1 if len(train_images) % batch_size != 0 else 0
    )

    pbar = tqdm(
        total=no_batches, ncols=80, bar_format=b, desc=f"Epochs: {e+1}/{epochs}"
    )

    for i in range(0, len(train_images), batch_size):
        image, label = Tensor(train_images[i : i + batch_size]), Tensor(
            train_labels[i : i + batch_size]
        )

        x = net(image)
        loss = cross_entropy(x, label)
        loss.backward()

        r_loss += loss.item().mean()

        optimizer.step()
        optimizer.zero_grad()

        pbar.update(1)
        pbar.set_postfix(Loss=f"{(r_loss/(i+1)):04f}")

    pbar.set_postfix(Loss=f"{(r_loss/no_batches):04f}")
    pbar.close()

Epochs: 1/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.569323]
Epochs: 2/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.366532]
Epochs: 3/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.336350]
Epochs: 4/10: 100%|█████████████████████| 1875/1875 [00:57<00:00, Loss=0.320589]
Epochs: 5/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.313759]
Epochs: 6/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.308322]
Epochs: 7/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.306048]
Epochs: 8/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.298902]
Epochs: 9/10: 100%|█████████████████████| 1875/1875 [00:56<00:00, Loss=0.294765]
Epochs: 10/10: 100%|████████████████████| 1875/1875 [00:56<00:00, Loss=0.296286]


### Save the model usng pickle if you want

In [9]:
with open("centigrad_model.pickle", "wb") as outfile:
    pickle.dump(net, outfile)

del net

### Test the model

In [12]:
# load the model
with open("centigrad_model.pickle", "rb") as outfile:
    net = pickle.load(outfile)

net.inference()
acc = 0
for i in range(0, len(test_images), batch_size):
    images, labels = Tensor(test_images[i : i + batch_size]), Tensor(
        test_labels[i : i + batch_size]
    )
    inference = net(images)

    for predict, label in zip(
        np.argmax(inference.data, axis=-1), np.argmax(labels.data, axis=-1)
    ):
        if predict == label:
            acc += 1

print(f"Accuracy: {(100*acc/len(test_images)):.2f}%")

Accuracy: 92.32%
