# 1.Importing Libraries 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import os 
from sklearn.metrics import confusion_matrix
import warnings 
warnings.filterwarnings("ignore")

import torch 
from torch import nn 
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torchvision.utils import make_grid, save_image
import torchvision as tv 
from torchvision.datasets import MNIST

# 2.Importing Dataset 

In [None]:
validation  = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
sample_submission = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

## let's take a look on some images on dataset

In [None]:
rows = 5
fig, axes = plt.subplots(rows, 10, figsize=(10 ,rows))

for i in range (10 ):
    ds  = validation[validation.label == i]
    for j in range(rows):
        ax = axes[j,i]
        ax.imshow(ds.iloc[j,1:].values.reshape(28,28), cmap='gray')
        ax.axis('off')
        
plt.show() 

# 3. Preprocessing of Data
- Loading extra data from PyTorch
- Construction of data loader 
- Normalizing the data
- Converting data into tensor 

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x / 255)
])


x1 = MNIST("/data", download = True, train=True, transform=transform)
x2 = MNIST("/data", train = False, transform=transform)
train = torch.utils.data.ConcatDataset([x1, x2])

We are converting data into `tensor` first and than `normalize` it by `dividing by 255`.

`torch.cuda.is_available` function checks if a GPU is available. We are using GPU when available. Later in code we are sinding the model and each data to the GPU. 

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Our data is in pandas dataframe we need to convert it to tensor and also normalize it to fit the model. I think best way to do it by using [PyTorch Dataset](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html). I am also making batch data loader.

In [None]:
class TensorFromDataset(Dataset):
    def __init__ (self, dataFrame, transform = transforms.ToTensor()):
        self.dataFrame = dataFrame
        
    def __len__ (self):
        return len(self.dataFrame)
    
    def __getitem__ (self, index):
        label = self.dataFrame.iloc[index, 0]
        image = self.dataFrame.iloc[index, 1:].values.astype(np.uint8).reshape(28, 28)
        label = torch.tensor(label)
        if transform is not None:
            image = transform(image)
        return (image, label)

validation_data = TensorFromDataset(validation, transform = transform)
test_data = transform(test.values.astype(np.uint8))

In [None]:
batch_size = 32 ** 2

train_loader = DataLoader(train, batch_size = batch_size, shuffle = True)
validation_loader = DataLoader(validation_data, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_data, batch_size = batch_size, shuffle = True)

# 4. Making Model

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.main =  nn.Sequential(
            nn.Conv2d(1, 32, kernel_size  = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, kernel_size  = 3, stride = 1, padding = 1),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size  = 3, stride = 1, padding = 1),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(128 * 7 * 7, 128 * 3),
            nn.ReLU(),
            nn.Linear(128 * 3 , 10),
            nn.Softmax()
        )
        self.to(device)
        self.loss = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr = 0.0001)
        self.validation_step = self.training_step
        self.history = [] 
        self.apply(self.init_weights)
    
    def init_weights(self, m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            
    def forward(self, x):
        return self.main(x)
    
    def accuracy(self, outputs, labels):
        _, preds = torch.max(outputs, dim = 1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))
    
    def training_step(self, batch):
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)
        out = self(images)
        loss = self.loss(out, labels)
        accuracy = self.accuracy(out, labels) 
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        return loss.item(), accuracy.item() 
    
    def validation_step(self, batch):
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            out = self(images)
            loss = self.loss(out, labels)
            accuracy = self.accuracy(out, labels)
        
            return loss.item() , accuracy.item()
    
    def training_epoch_end(self, t_accuracy, t_loss, v_accuracy, v_loss):
        epoch = len(self.history)
        self.history.append({
            'epoch' : epoch + 1,
            'training_accuracy' : t_accuracy,
            'training_loss' : t_loss,
            'validation_accuracy' : v_accuracy,
            'validation_loss' : v_loss,
        })
        print (self.history[-1])
        
        
    def plot_history(self):
        df = pd.DataFrame(self.history)
        fig, axes  = plt.subplots(1, 2, figsize = (15, 5))
        losses = df[['training_loss', 'validation_loss']]
        accuracies = df[['training_accuracy', 'validation_accuracy']]
        losses.plot(ax = axes[0])
        accuracies.plot(ax = axes[1])
        axes[0].set_title('Loss over epochs')
        axes[1].set_title('Accuracy over epochs')
        axes[0].set_xlabel('Epoch')
        axes[1].set_xlabel('Epoch')
        axes[0].set_ylabel('Loss')
        axes[1].set_ylabel('Accuracy')
        plt.show()
        
        
    def training_loop(self, epochs, ):
        for epoch in range(epochs):
            for training_batch in train_loader:
                t_loss, t_accuracy = self.training_step(training_batch)
                
            for validation_batch in validation_loader:
                v_loss, v_accuracy = self.validation_step(validation_batch)
                    
            self.training_epoch_end(t_accuracy, t_loss, v_accuracy, v_loss)
                    
digitRecognizer = Model()

# 5. Training model

In [None]:
digitRecognizer.training_loop(200)

In [None]:
# i am also saveing the model after training it for 200 epochs
torch.save(digitRecognizer.state_dict(), 'digitRecognizer_200.pth')

# 6. Looking at model performance
I am using similar approch to test the model to previous [keras project](https://www.kaggle.com/code/bibekbhusal0/digit-recognizer-with-keras-99-978-accurate#Looking-at-the-model-performance-and-predictions).

In [None]:
digitRecognizer.plot_history()

In [None]:
def random_test(rows= 1, cols =1 ):
    total = rows * cols
    correct = total
    random_indices = np.random.randint(len(validation), size=total)
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 1.3  ,1.8* rows) )
    
    random_images = validation.iloc[random_indices, 1:].values.astype(np.uint8)
    labels = validation.iloc[random_indices, 0].values
    images_tensor = transform(random_images)
    images_tensor = images_tensor.to(device)
    images_tensor = images_tensor.reshape(-1, 1, 28, 28)
    
    predictions_p = digitRecognizer(images_tensor)
    _, predictions = torch.max(predictions_p, dim = 1)
    predictions = predictions.cpu().numpy()
    
    for i in range(rows):
        for j in range(cols):
            index = i * cols + j
            image = random_images[index]
            label = labels[index]
            prediction = predictions[index]
            if rows == 1 and cols ==1:
                ax = axes
            elif rows ==1 :
                ax = axes[j]
            elif cols == 1:
                ax = axes[i]
            else:
                ax = axes[i][j]
            cmap = 'gray'
            if label != prediction:
                correct -= 1
                cmap = 'OrRd'
            ax.imshow(image.reshape(28, 28), cmap=cmap)
            ax.set_title(f'L: {label}, P: {prediction}')
            ax.axis(False)
        fig.suptitle(f'correct:{correct}/{total}', fontsize=16)
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    del images_tensor
    del predictions_p
    del predictions
    torch.cuda.empty_cache()
    plt.show()

random_test(3,3)

In [None]:
random_test(10 ,10)

# 7. submission

In [None]:
test_data = test_data.to(device)
test_data_batch = test_data.reshape(1000, 28, 1, 28, 28)

predictions_p = np.array([])
for t in test_data_batch:
    predictions_p = np.append(predictions_p, digitRecognizer(t).detach().cpu().numpy())

predictions_p = torch.from_numpy(predictions_p.reshape(-1, 10))

_, predictions = torch.max(predictions_p, dim = 1)
predictions = predictions.cpu().numpy()

sample_submission['Label'] = predictions
sample_submission.to_csv('submission.csv', index=False)

Thanks for reading till the end of this notebook. I hope you have learned something new, if yes then please do upvote and share. You can also checkout my [previous project](https://www.kaggle.com/bibekbhusal0/digit-recognizer-with-keras-99-978-accurate) which is [digit recognizer with keras](https://www.kaggle.com/bibekbhusal0/digit-recognizer-with-keras-99-978-accurate).

My next project will be Generative Adversarial Networks on this MNIST dataset with PyTorch, I will make it public so that others can also learn from it.