[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/biodatlab/deep-learning-skooldio/blob/master/student_notebooks/03_handwritten_recognition_explore.ipynb)

## **Thai-digit handwritten classification with Pytorch: Exploration**

This notebook contains exploration of the "Thai-digit handwritten classification" including
- Customize model
    - Adding layers
    - Adding dropout layer
- Adding data augmentation

## **Download and clean the dataset from the repository**

- We have downloaded the data from https://github.com/kittinan/thai-handwriting-number by cloning the repository
- Remove files that have character mismatch (as suggested by the the creator)
- Then, we put the cleaned data at https://github.com/biodatlab/deep-learning-skooldio

In [None]:
!git clone https://github.com/biodatlab/deep-learning-skooldio

In [None]:
import os
import os.path as op
from glob import glob
from pathlib import Path
import random
from PIL import Image
from collections import Counter

In [None]:
directory = "deep-learning-skooldio/"
paths = glob(op.join(directory, "thai-handwritten-dataset", "*", "*"))
num_samples = len(paths)
print("Number of samples", num_samples)

In [None]:
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
import shutil

train_paths, test_paths = train_test_split(paths, test_size=0.1, random_state=42)
for i in range(10):
    os.makedirs(f"data/train/{i}", exist_ok=True)
    os.makedirs(f"data/validation/{i}", exist_ok=True)

In [None]:
def copy_to_destination(src_paths, dst_path: str = "data/train/"):
    for path in tqdm(src_paths):
        path = Path(path)
        if path.parent.name == "10":
            parent_dir = "0"
        else:
            parent_dir = path.parent.name
        shutil.copy(path, op.join(dst_path, parent_dir, path.name))

In [None]:
copy_to_destination(train_paths, "data/train/")
copy_to_destination(test_paths, "data/validation/")

## **Create a custom dataset and a dataloader**

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
import pandas as pd
import os

In [None]:
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.Grayscale(),
    transforms.ToTensor()
])

In [None]:
# Create dataframe
train_df = pd.DataFrame({"path": glob("data/train/*/*")})
val_df = pd.DataFrame({"path": glob("data/validation/*/*")})

# Create text column from path
train_df["text"] = train_df["path"].apply(lambda x: Path(x).parent.name)
val_df["text"] = val_df["path"].apply(lambda x: Path(x).parent.name)

# Shape of dataframe
print("Number of train images = {}, number of validation images = {},".format(train_df.shape, val_df.shape))

In [None]:
class ThaiDigitDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.transform = transform
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        label = int(self.dataframe['text'][idx])
        image = Image.open(self.dataframe['path'][idx])
        if self.transform:
            image = 1 - self.transform(image)
        return image, label

In [None]:
train_thaidigit_dataset = ThaiDigitDataset(train_df, transform=transform)
val_thaidigit_dataset = ThaiDigitDataset(val_df, transform=transform)

In [None]:
train_loader = DataLoader(train_thaidigit_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_thaidigit_dataset, batch_size=16, shuffle=False)

## **Create the model**

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class ThaiDigitNet(nn.Module):
    def __init__(self):
        super(ThaiDigitNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 100)
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

## **Train the model**

In [None]:
# Create training loop function
def train(model, n_epochs, loss_function, optimizer, train_loader, validation_loader):
    training_logs = {"train_loss": [],  "train_acc": [], "val_loss": [], "val_acc": []}
    print("-"*80)
    for epoch in range(1, n_epochs+1):
        # training
        model.train()
        train_loss, correct = 0, 0
        for images, labels in train_loader:
            pred = model(images)
            loss = loss_function(pred, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            correct += (pred.argmax(1) == labels).float().sum().item()
        # save training logs
        training_logs["train_loss"].append(train_loss/ len(train_loader))
        training_logs["train_acc"].append(correct / len(train_loader.dataset))

        # validation
        model.eval()
        val_loss, correct = 0, 0
        for images, labels in validation_loader:
            pred = model(images)
            val_loss += loss_function(pred, labels).item()
            correct += (pred.argmax(1) == labels).float().sum().item()
        # save validation logs
        training_logs["val_loss"].append(val_loss/ len(val_loader))
        training_logs["val_acc"].append(correct / len(validation_loader.dataset))

        print(f"Epochs {epoch}".ljust(10), 
              f"train loss {training_logs['train_loss'][-1]:.5f}",
              f"train acc {training_logs['train_acc'][-1]:.5f}",

              f"val loss {training_logs['val_loss'][-1]:.5f}",
              f"val acc {training_logs['val_acc'][-1]:.5f}",
              )
        print("-"*80)
    return model, training_logs
        

In [None]:
n_epoch = 50

net = ThaiDigitNet() # Initialize the model
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
net, net_history = train(net, n_epoch, loss_fn, optimizer, train_loader, val_loader)

In [None]:
# Plot the training accuracy and validation accuracy
plt.plot(net_history["train_acc"], label="Train")
plt.plot(net_history["val_acc"], label="Validation")
plt.legend()
plt.title("Train accuracy vs Validation accuracy")
plt.show()

## **Save and load model**



In [None]:
save_path = "thai_digit.pth"
torch.save(net.state_dict(), save_path)  # save model parameters

In [None]:
model_path = "thai_digit.pth"
model = ThaiDigitNet()  # initialize the model
model.load_state_dict(torch.load(model_path))  # load the model weights

## **Evaluate the model**

In [None]:
# TODOs: Create evaluation function for the model
def evaluate(dataframe, model):
    # Predict the image class from a given dataframe
    # Assume that image is in "path" column
    return None

In [None]:
evaluate(val_df, model)

In [None]:
# Show some random images with their predicted number
random_number = random.randint(0, len(val_df))
img = Image.open(val_df.iloc[random_number].path)
img = transform(img)
transformed_img = 1 - img

pred = model(transformed_img).argmax(1)
plt.title(f"Predicted class = {pred.numpy()[0]}")
plt.imshow(img.squeeze(0), cmap="gray")
plt.show()

## **Train neural netowrk model with more layers**

In [None]:
class ThaiDigitMoreLayers(nn.Module):
    def __init__(self):
        super(ThaiDigitMoreLayers, self).__init__()
        # TODOs: Create more layers

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        # TODOs: Create forward pass
        return x

In [None]:
n_epochs = 50

more_layers_model = ThaiDigitMoreLayers()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(more_layers_model.parameters(), lr=0.01)

more_layers_model, more_layers_model_history = train(
    more_layers_model, n_epochs, loss_fn, optimizer, train_loader, val_loader
)


In [None]:
evaluate(val_df, more_layers_model)

In [None]:
# Plot the training accuracy and validation accuracy
plt.plot(net_history["train_acc"], label="Train")
plt.plot(net_history["val_acc"], label="Validation")
plt.legend()
plt.title("Train accuracy vs Validation accuracy")
plt.show()

In [None]:
# Plot the training loss and validation loss
plt.plot(net_history["train_loss"], label="Train")
plt.plot(net_history["val_loss"], label="Validation")
plt.legend()
plt.title("Train loss vs Validation loss")
plt.show()

The training accuracy of the model is higher than the validation accuracy.
This means that the model is overfitting the training data.
We can try to reduce the overfitting by adding regularization .

## **Regularization with Dropout**

Dropout is one of the techniques used to prevent overfitting

In [None]:
class DropoutThaiDigit(nn.Module):
    def __init__(self):
        super(DropoutThaiDigit, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 100)
        self.fc2 = nn.Linear(100, 10)
        # TODOs: Add dropout layer

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = F.relu(x)
        # TODOs: Add dropout layer
        x = self.fc2(x)
        return x

In [None]:
n_epochs = 50
drop_model = DropoutThaiDigit()  # Initialize a model
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(drop_model.parameters(), lr=0.01)

drop_model, drop_model_history = train(drop_model, n_epochs, loss_fn, optimizer, train_loader, val_loader)

In [None]:
evaluate(val_df, drop_model)

In [None]:
# Plot the training accuracy and validation accuracy
plt.plot(drop_model_history["train_acc"], label="Train")
plt.plot(drop_model_history["val_acc"], label="Validation")
plt.legend()
plt.title("Train accuracy vs Validation accuracy")
plt.show()

In [None]:
# Plot the training loss and validation loss
plt.plot(drop_model_history["train_loss"], label="Train")
plt.plot(drop_model_history["val_loss"], label="Validation")
plt.legend()
plt.title("Train loss vs Validation loss")
plt.show()

Now the gap between the training accuracy and validation accuracy is smaller than before.

## **Image Augmentation**

Image augmentation is a technique used to artificially increase the size of a training dataset by creating modified versions of existing images.

In [None]:
# augment_transform is same as transform but add RandomAffine to the image

train_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.Grayscale(),
    transforms.RandomAffine(degrees=(15, 30), translate=(0.05, 0.1), scale=(1, 1)),
    transforms.ToTensor(),
])

val_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.Grayscale(),
    transforms.ToTensor(),
])

In [None]:
# Apply train_transform to a given image
# img = Image.open("../thai-handwritten-dataset/2/0098326c-aa9e-410d-b949-e13d3cd74cfd.png")
# plt.imshow((1 - train_transform(img)).squeeze(0), cmap="gray")

In [None]:
train_thaidigit_dataset = ThaiDigitDataset(train_df, transform=train_transform)
val_thaidigit_dataset = ThaiDigitDataset(val_df, transform=val_transform)

In [None]:
train_dataloader = DataLoader(train_thaidigit_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(val_thaidigit_dataset, batch_size=16, shuffle=False)

In [None]:
# Example of augmented image
img = (next(iter(train_dataloader))[0][1])
transformed_img = 1 - img

plt.imshow(transformed_img.squeeze(0), cmap="gray")
plt.show()

In [None]:
# Let's train the model with augmented data
n_epochs = 50
augmented_model = DropoutThaiDigit()  # Initialize the model
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(augmented_model.parameters(), lr=0.01)

augmented_model, augmented_model_history = train(
    augmented_model, n_epochs, loss_fn, optimizer, train_dataloader, val_dataloader
)

In [None]:
# Evaluate the new model with old dataset.
evaluate(val_df, augmented_model)

In [None]:
# Plot the training accuracy and validation accuracy
plt.plot(augmented_model_history["train_acc"], label="Train")
plt.plot(augmented_model_history["val_acc"], label="Validation")
plt.legend()
plt.title("Train accuracy vs Validation accuracy")
plt.show()

In [None]:
# Plot the training loss and validation loss
plt.plot(augmented_model_history["train_loss"], label="Train")
plt.plot(augmented_model_history["val_loss"], label="Validation")
plt.legend()
plt.title("Train loss vs Validation loss")
plt.show()