In [1]:
import torch.nn as nn
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [5]:
# Loading the raw data (pixel data)
raw_train_data = pd.read_csv("../../../../DeepLearning/Datasets/mnist_train_small.csv").to_numpy()
raw_test_data = pd.read_csv("../../../../DeepLearning/Datasets/mnist_train_small.csv").to_numpy()

In [6]:
x_train = raw_train_data[:,1:].reshape((-1,1,28,28))/255
y_train = raw_train_data[:,0]

x_test = raw_test_data[:,1:].reshape((-1,1,28,28))/255
y_test = raw_test_data[:,0]

In [14]:
class ResidualBlock(nn.Sequential):
    def __init__(self,in_features, out_features):
        super(nn.Sequential,self).__init__()
        self.direct = nn.Sequential(
            nn.Conv2d(in_features,out_features,kernel_size=3,padding=1),
            nn.BatchNorm2d(out_features),
            nn.ReLU(),
            nn.Conv2d(out_features,out_features,kernel_size=3,padding=1),
            nn.BatchNorm2d(out_features)
        )
        self.shortcut = nn.Conv2d(in_features,out_features,kernel_size=1)
        self.relu = nn.ReLU()
    def forward(self,x):
        return self.relu(self.direct(x) + self.shortcut(x))

In [15]:
# Creating our model
model = nn.Sequential(
    ResidualBlock(1,3),
    nn.MaxPool2d(kernel_size=2),
    ResidualBlock(3,16),
    nn.MaxPool2d(kernel_size=2),
    ResidualBlock(16,32),
    nn.MaxPool2d(kernel_size=2),
    nn.Flatten(),
    nn.Linear(32 * 3 * 3, 10)
)

# Defining our loss function
loss_fn = nn.CrossEntropyLoss()

# Defining our optimizer
optim = torch.optim.SGD(model.parameters(),lr=0.1)

In [16]:
N = len(x_train)
epochs = 5
batch_size = 64
# Start training for #epochs
for epoch in range(epochs):
    tot_loss = 0
    # Traing by batches
    for i in range(N // batch_size + 1):
        # Setting all grads (of all parameters) to zero
        optim.zero_grad()
        # Converting our inputs and outputs to tensor
        x_ = torch.tensor(x_train[i*batch_size:(i+1)*batch_size]).float()
        y_ = torch.tensor(y_train[i*batch_size:(i+1)*batch_size]).long()
        # Forward
        o = model(x_)
        # Calculate loss
        loss = loss_fn(o,y_)
        # Backward
        loss.backward()
        # Accumulate loss
        tot_loss += loss.item()
        # Update parameters using the calculated gradients
        optim.step()
    print(tot_loss)

90.37156366184354
19.46238697785884
11.655169290024787
7.581189502845518
4.939888269000221


In [17]:
o = model(torch.tensor(x_test[:20]).float())

In [18]:
torch.argmax(o,axis=1)

tensor([5, 7, 9, 5, 2, 0, 6, 5, 5, 2, 7, 7, 6, 8, 9, 3, 5, 7, 6, 9])

In [19]:
y_test[:20]

array([5, 7, 9, 5, 2, 0, 6, 5, 5, 2, 7, 7, 6, 8, 9, 3, 5, 7, 6, 9],
      dtype=int64)