<h1>Imports</h1>

In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from tqdm import tqdm 

<h1> Dataloader </h1>

In [22]:
class CustomDataset(Dataset):
    def __init__(self, dataset_folder, labels_file, transform=None):
        self.dataset_folder = dataset_folder
        self.labels_file = labels_file
        self.transform = transform
        self.data = []
        self.labels = []
        self.label_dict = {}
        self.label_to_idx = {}
        
        with open(labels_file, 'r', encoding='utf-8') as f:
            for line in f:
                img_path, label = line.strip().split('\t')
                img_name = img_path.split('/')[-1]  
                self.data.append(img_name)
                self.labels.append(label)
                
                if label not in self.label_dict:
                    self.label_dict[label] = len(self.label_dict)
                    self.label_to_idx[len(self.label_dict) - 1] = label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data[idx]
        label = self.labels[idx]
        img_path = os.path.join(self.dataset_folder, img_name)
        image = Image.open(img_path).convert("L")
        label_encoded = self.label_dict[label]
        if self.transform:
            image = self.transform(image)
        return image, label_encoded

transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

dataset_folder = r'D:\405 FOUND\Comp-Vision\OCR-With-CNN\CNN-TextRecognition\Dataset'
labels_file = r'D:\405 FOUND\Comp-Vision\OCR-With-CNN\CNN-TextRecognition\Dataset\Labels.txt'
dataset = CustomDataset(dataset_folder, labels_file, transform=transform)

train_data, val_data = train_test_split(list(zip(dataset.data, dataset.labels)), test_size=0.2, random_state=42)

train_dataset = CustomDataset(dataset_folder, labels_file, transform=transform)
train_dataset.data, train_dataset.labels = zip(*train_data)

val_dataset = CustomDataset(dataset_folder, labels_file, transform=transform)
val_dataset.data, val_dataset.labels = zip(*val_data)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

num_classes = len(dataset.label_dict)

print(f"Train dataset size: {len(train_loader.dataset)}")
print(f"Validation dataset size: {len(val_loader.dataset)}")
print(f"Number of classes: {num_classes}")
print(f"Label to index mapping: {dataset.label_dict}")
print(f"Index to label mapping: {dataset.label_to_idx}")


Train dataset size: 30936
Validation dataset size: 7734
Number of classes: 3867
Label to index mapping: {'أ': 0, 'ا': 1, 'ر': 2, 'و': 3, 'د': 4, 'کے': 5, 'کی': 6, 'ں': 7, 'میں': 8, 'ہے': 9, 'کر': 10, 'کا': 11, 'ن': 12, 'ہو': 13, 'ی': 14, 'سے': 15, 'نے': 16, 'کو': 17, 'س': 18, 'آ': 19, 'ز': 20, 'ہ': 21, 'ت': 22, 'پر': 23, '،': 24, 'م': 25, 'جا': 26, 'ہیں': 27, 'کہ': 28, 'ل': 29, 'با': 30, 'ے': 31, 'سا': 32, '0': 33, 'پا': 34, 'یہ': 35, 'بھی': 36, 'نا': 37, 'تا': 38, 'ئی': 39, 'یک': 40, 'تو': 41, 'ئے': 42, 'ما': 43, 'نہیں': 44, 'ڈ': 45, '1': 46, 'جو': 47, 'ب': 48, '2': 49, 'لا': 50, 'تی': 51, 'ف': 52, 'سر': 53, 'کیا': 54, 'گر': 55, 'نی': 56, 'شا': 57, 'نو': 58, 'مر': 59, 'ہی': 60, 'تے': 61, 'ہا': 62, 'مو': 63, 'نہ': 64, 'فر': 65, '5': 66, 'خو': 67, 'پ': 68, 'بر': 69, 'گا': 70, 'لئے': 71, 'لو': 72, 'یو': 73, 'خا': 74, 'ند': 75, 'تھا': 76, 'طر': 77, 'چا': 78, 'یں': 79, 'ہر': 80, 'کستا': 81, 'گی': 82, 'سو': 83, 'لے': 84, 'پو': 85, '3': 86, 'سی': 87, 'ہم': 88, 'ء': 89, 'قا': 90, '4': 91, '9'

<h1> Architecture </h1>
CNN-Fully Connected Layers

In [25]:
class CNNForOCR(nn.Module):
    def __init__(self, num_classes, dropout=0.7):
        super(CNNForOCR, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 256, kernel_size=1), 
            nn.ReLU(),
        )
        self.fc1 = nn.Linear(256 * 1 * 1, 1024)  
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = CNNForOCR(num_classes=num_classes, dropout=0.7)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00005)
num_epochs = 20

<h1> Training </h1>

In [None]:
for epoch in range(1, num_epochs + 1):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = torch.tensor([int(label) for label in labels]).to(device)
        optimizer.zero_grad()
        outputs = model(images)
        labels = labels.long()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    avg_train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct_train / total_train
    print(f"Epoch {epoch}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")


Epoch 1/20, Train Loss: 30.8248, Train Accuracy: 0.03%
Epoch 2/20, Train Loss: 14.3987, Train Accuracy: 0.02%
Epoch 3/20, Train Loss: 15.4684, Train Accuracy: 0.01%
Epoch 4/20, Train Loss: 16.1424, Train Accuracy: 0.02%
Epoch 5/20, Train Loss: 16.8872, Train Accuracy: 0.04%
Epoch 6/20, Train Loss: 17.3064, Train Accuracy: 0.03%
Epoch 7/20, Train Loss: 17.9182, Train Accuracy: 0.02%
Epoch 8/20, Train Loss: 18.2936, Train Accuracy: 0.02%
Epoch 9/20, Train Loss: 18.6257, Train Accuracy: 0.01%
Epoch 10/20, Train Loss: 18.9467, Train Accuracy: 0.03%
