In [14]:
# import dependencies
import torch
from PIL import Image
from torch import nn, save, load
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [3]:
# get data
train = datasets.MNIST(root = 'data', download = True, train = True, transform =ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [4]:
## minibatch size = 32
## input images are of shape 1x28x28, meaning 1 channel, 28 px high, 28 px wide
## classes are 0 to 9, so 10 different classes
dataset = DataLoader(train, 32)

In [11]:
# create NN class
class ImageClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, (3,3)), # ip channels = 1, op channels = 32, meaning 32 filters, each of shape 3x3
            nn.ReLU(),
            nn.Conv2d(32, 64, (3,3)),
            nn.ReLU(),
            nn.Conv2d(64, 64, (3,3)),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64*(28-6)*(28-6),10)
        )

    def forward(self, x):
        return self.model(x)

In [12]:
## instane of the above NN , loss and clf (classifier)
clf = ImageClassifier().to('cpu')
## lr = learning rate
opt = Adam(clf.parameters(), lr = 1e-3)
loss_fn = nn.CrossEntropyLoss()

In [13]:
## training flow, 
## put it under the if __name__, if using a .py file
# if __name__ == "__main__":
for epoch in range(10):
    for batch in dataset:
        # unpack the data
        X, y = batch
        X, y = X.to('cpu'), y.to('cpu')
        yhat = clf(X)
        loss = loss_fn(yhat, y)

        ## Apply backpropagation
        # Zero out any existing gradient
        opt.zero_grad()
        # calculate new gradient
        loss.backward()
        # apply gradient back
        opt.step()

    print(f'Epoch: {epoch}, loss: {loss.item()}')

Epoch: 0, loss: 0.019194109365344048
Epoch: 1, loss: 0.004327834118157625
Epoch: 2, loss: 0.0008858221117407084
Epoch: 3, loss: 0.00016197173681575805
Epoch: 4, loss: 1.5965655620675534e-05
Epoch: 5, loss: 0.00014409572759177536
Epoch: 6, loss: 6.991453847149387e-05
Epoch: 7, loss: 5.9416433941805735e-06
Epoch: 8, loss: 1.3454365216603037e-05
Epoch: 9, loss: 1.8148279195884243e-05


In [17]:
# save model as pt file
with open('model_state.pt', 'wb') as f:
    save(clf.state_dict(), f)

In [18]:
# load saved model from file
with open('model_state.pt','rb') as f:
    clf.load_state_dict(load(f))

In [21]:
# make predictions on unseen data
img = Image.open('data/img_3.jpg')
img_tensor = ToTensor()(img).unsqueeze(0).to('cpu')
print(torch.argmax(clf(img_tensor)))

tensor(9)
