## Creating custom OCR model from Scratch using Pytorch

### Load Libraries

In [1]:
import torch
from torchvision.transforms import ToTensor
from torch.utils.data import ConcatDataset, Dataset, DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import MNIST
import numpy as np
import zipfile
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from PIL import Image

from dataloader.dataset import MNISTAndKaggleAZCombined

In [None]:
## Download Kaggle A-Z to local env
!wget https://iaexpert.academy/arquivos/alfabeto_A-Z.zip

In [None]:
# Unzip file zip object

zip_obj = zipfile.ZipFile(file= 'alfabeto_A-Z.zip', mode= 'r')
zip_obj.extractall('data/')
zip_obj.close()

### Load Data

In [2]:
mnist_root = 'data'
az_csv_file = 'data/A_Z Handwritten Data.csv'

data = MNISTAndKaggleAZCombined(mnist_root, az_csv_file)

In [3]:
# Get class weights
class_weights = data.get_class_weights()
print("Class Weights:", class_weights)

Class Weights: {0: tensor(8.3768), 1: tensor(7.3410), 2: tensor(8.2725), 3: tensor(8.0976), 4: tensor(8.4738), 5: tensor(9.1597), 6: tensor(8.4097), 7: tensor(7.9288), 8: tensor(8.4725), 9: tensor(8.3106), 10: tensor(4.1694), 11: tensor(6.6711), 12: tensor(2.4702), 13: tensor(5.7060), 14: tensor(5.0546), 15: tensor(49.7206), 16: tensor(10.0356), 17: tensor(8.0112), 18: tensor(51.6295), 19: tensor(6.8085), 20: tensor(10.3204), 21: tensor(4.9909), 22: tensor(4.6875), 23: tensor(3.0418), 24: tensor(1.), 25: tensor(2.9898), 26: tensor(9.9492), 27: tensor(4.9996), 28: tensor(1.1943), 29: tensor(2.5706), 30: tensor(1.9934), 31: tensor(13.8271), 32: tensor(5.3621), 33: tensor(9.2195), 34: tensor(5.3251), 35: tensor(9.5170)}


In [4]:
train_dataset, test_dataset = data.train_test_split()

In [5]:
augmentation_transform = transforms.Compose([
    transforms.RandomRotation(degrees=10),
    transforms.RandomResizedCrop(size=28, scale=(0.95, 1.0)),
    transforms.RandomHorizontalFlip(p=0),  # No horizontal flip
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Width and height shift
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize
])
train_dataset.dataset.transform = augmentation_transform

# Define a simple transformation (without augmentations) for the test set
simple_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
test_dataset.dataset.transform = simple_transform