In [1]:
import numpy as np
from typing import Tuple
import pandas as pd

In [2]:
alpha_data = pd.read_csv("../dataset/A_Z Handwritten Data.csv")[:10000]

In [None]:
alpha_data.to_csv("../dataset/alpha_data_10000.csv", index=False)

In [2]:
def load_mnist_data(dataset_path: str) -> Tuple[np.array]:
    mnist_dataset = np.load(dataset_path, allow_pickle=True)
    x_train = mnist_dataset["x_train"]
    y_train = mnist_dataset["y_train"]

    x_test = mnist_dataset["x_test"]
    y_test = mnist_dataset["y_test"]
    return x_train, y_train, x_test, y_test

In [3]:
def load_data():
    # MNIST Data
    digit_image_train, digit_label_train, digit_image_test, digit_label_test = load_mnist_data("../dataset/mnist.npz")
    digit_label = np.hstack((digit_label_train, digit_label_test))
    digit_image = np.vstack((digit_image_train, digit_image_test))

    #A-Z Data
    alpha_data = pd.read_csv("../dataset/A_Z Handwritten Data.csv")
    alpha_label = np.array(alpha_data["0"])  # The '0' column is the target
    alpha_label += 10  # A-Z will not overlab with MNIST
    alpha_image = alpha_data.drop(["0"], axis=1)  # Drop target column
    alpha_image = np.reshape(
        alpha_image.values,
        (alpha_image.shape[0], 28, 28)
    ) # Resize to (28*28)

    # Combine datasets
    labels = np.hstack((digit_label, alpha_label))
    data = np.vstack((digit_image, alpha_image))
    return data, labels

In [4]:
# Load data
data, labels = load_data()
print(f'Data shape: {data.shape}')
print(f'Labels shape: {labels.shape}')

Data shape: (442450, 28, 28)
Labels shape: (442450,)


In [None]:
from sklearn.model_selection import train_test_split

# Train test splits
x_train, x_test, y_train, y_test = train_test_split(
    data,
    labels,
    test_size=0.20,
    stratify=labels,
    random_state=101
)

# Delete data
del data, labels

<h1>Create dataloader</h1>

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [None]:
def get_dataloader(
    images: np.array, 
    labels: np.array,
    batch_size: int = 32
) -> DataLoader:
    images = torch.tensor(images, dtype=torch.float32).unsqueeze(1)  # Shape: (total, 1, 28, 28)

    # Convert labels to tensors
    labels = torch.tensor(labels, dtype=torch.long)  # Shape: (total,)

    # Create a Dataset and DataLoader
    dataset = TensorDataset(images, labels)

    # Use DataLoader for batching and shuffling
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader

In [None]:
batch_size = 32

train_dataloader = get_dataloader(images=x_train, labels=y_train, batch_size=batch_size)
test_dataloader = get_dataloader(images=x_test, labels=y_test, batch_size=batch_size)

<h1>Build model</h1>

In [None]:
from torch import nn


class TextRecognitionModel(nn.Module):
    def __init__(self):
        super(TextRecognitionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.2),
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.15),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.1),

            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=36)
        )
    
    def forward(self, x):
        return self.model(x)

In [None]:
model = TextRecognitionModel().to("cpu")

In [None]:
type(model)

In [None]:
import os
from tqdm import tqdm


def eval(
    val_dataloader: DataLoader, 
    model: nn.Module, 
    loss_function: torch.nn.modules.loss,
    device: torch.device
) -> float:
    num_batches = len(val_dataloader)
    with torch.no_grad():
        for images, labels in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)

            pred = model(images)
            test_loss = loss_function(pred, labels).item()

    test_loss /= num_batches
    return test_loss


def train(
    train_dataloader: DataLoader,
    val_dataloader: DataLoader,
    model: nn.Module,
    loss_function: torch.nn.modules.loss,
    optimizer: torch.optim,
    device: torch.device,
    model_dir: str,
    eval_every: int = 1,
    epochs: int = 50
):
    def batch_train(
        images: DataLoader, 
        labels: DataLoader,
        model: TextRecognitionModel,
        loss_function,
        optimizer,
        device: torch.device
    ):
        images = images.to(device)
        labels = labels.to(device)

        pred = model(images)

        loss = loss_function(pred, labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    min_loss = None
    for epoch in tqdm(range(epochs)):
        for _, (images, labels) in enumerate(train_dataloader):
            batch_train(
                images=images,
                labels=labels,
                model=model,
                loss_function=loss_function,
                optimizer=optimizer,
                device=device
            )
        
        if epoch + 1 == eval_every:
            test_loss = eval(
                val_dataloader=val_dataloader,
                model=model,
                loss_function=loss_function,
                device=device
            )

            if min_loss is None or test_loss < min_loss:
                min_loss = test_loss
                torch.save(model.state_dict(), os.path.join(model_dir, "model.pt"))

In [None]:
loss = nn.CrossEntropyLoss()

learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
train(
    train_dataloader=train_dataloader,
    val_dataloader=test_dataloader,
    model=model,
    loss_function=loss,
    optimizer=optimizer,
    device="cpu",
    model_dir="../models/",
    eval_every=1,
    epochs=1
)