In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold

In [32]:
class ArabicCharDataset(Dataset):
    def __init__(self, image_file, label_file, transform=None):
        self.images = pd.read_csv(image_file, header=None).values
        self.labels = pd.read_csv(label_file, header=None).values.ravel() - 1
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx].reshape(32, 32).astype(np.float32) / 255.0
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

class OptimizedArabicCharNet(nn.Module):
    def __init__(self, num_classes):
        super(OptimizedArabicCharNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [33]:
# Hyperparameters and Setup
num_classes = 28 
learning_rate = 0.001
batch_size = 64
num_epochs = 20  
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [34]:
# Data Augmentation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(5),  
    transforms.RandomAffine(0, shear=5, scale=(0.9, 1.1)),  
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [35]:
# Data Loading
train_dataset = ArabicCharDataset('data/csvTrainImages 13440x1024.csv', 'data/csvTrainLabel 13440x1.csv', transform=transform)
test_dataset = ArabicCharDataset('data/csvTestImages 3360x1024.csv', 'data/csvTestLabel 3360x1.csv', transform=transform)

In [36]:
# Cross-validation setup
kfold = KFold(n_splits=3, shuffle=True, random_state=42)

def train_with_early_stopping(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs, patience=5):
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        accuracy = 100 * correct / total

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%')

        # Learning rate scheduling
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_without_improvement += 1
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered after {epoch + 1} epochs")
            break

    return model, best_accuracy

def evaluate(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

# Cross-validation training
best_fold_accuracy = 0.0
best_fold_model = None

for fold, (train_ids, val_ids) in enumerate(kfold.split(train_dataset)):
    print(f'FOLD {fold}')
    print('--------------------------------')

    train_subsampler = SubsetRandomSampler(train_ids)
    val_subsampler = SubsetRandomSampler(val_ids)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_subsampler)
    val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_subsampler)
    
    model = OptimizedArabicCharNet(num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)

    # Train the model
    model, fold_accuracy = train_with_early_stopping(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs)

    # Update best model if this fold's accuracy is higher
    if fold_accuracy > best_fold_accuracy:
        best_fold_accuracy = fold_accuracy
        best_fold_model = model.state_dict()

    # Evaluate on the test set
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    evaluate(model, test_loader, device)

# Save the best model from all folds
torch.save(best_fold_model, 'Arabic_OCR_PyTorch.pth')

FOLD 0
--------------------------------
Epoch [1/20], Train Loss: 1.9599, Val Loss: 0.8350, Accuracy: 73.37%
Epoch [2/20], Train Loss: 0.8391, Val Loss: 0.4944, Accuracy: 82.72%
Epoch [3/20], Train Loss: 0.6216, Val Loss: 0.3689, Accuracy: 87.48%
Epoch [4/20], Train Loss: 0.5041, Val Loss: 0.5546, Accuracy: 83.06%
Epoch [5/20], Train Loss: 0.4514, Val Loss: 0.2936, Accuracy: 89.93%
Epoch [6/20], Train Loss: 0.4172, Val Loss: 0.3375, Accuracy: 88.93%
Epoch [7/20], Train Loss: 0.3588, Val Loss: 0.9740, Accuracy: 72.52%
Epoch [8/20], Train Loss: 0.3306, Val Loss: 0.2601, Accuracy: 91.72%
Epoch [9/20], Train Loss: 0.3096, Val Loss: 0.3092, Accuracy: 90.56%
Epoch [10/20], Train Loss: 0.3047, Val Loss: 0.3544, Accuracy: 87.57%
Epoch [11/20], Train Loss: 0.2825, Val Loss: 0.4858, Accuracy: 85.85%
Epoch [12/20], Train Loss: 0.2252, Val Loss: 0.1803, Accuracy: 94.67%
Epoch [13/20], Train Loss: 0.2155, Val Loss: 0.1673, Accuracy: 94.89%
Epoch [14/20], Train Loss: 0.1840, Val Loss: 0.1660, Accura

In [37]:
# Final evaluation on the test set using the best model from the last fold
print("Final Evaluation on Test Set:")
final_model = OptimizedArabicCharNet(num_classes).to(device)
final_model.load_state_dict(torch.load('Arabic_OCR_PyTorch.pth'))
evaluate(final_model, test_loader, device)

def classify_image(model, csv_file, row_index, transform, device):
    model.eval()
    df = pd.read_csv(csv_file, header=None)
    image = df.iloc[row_index].values.reshape(32, 32).astype(np.float32) / 255.0
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    
    return f"Detected Arabic character: {predicted.item() + 1}" 

Final Evaluation on Test Set:


  final_model.load_state_dict(torch.load('Arabic_OCR_PyTorch.pth'))


Test Accuracy: 95.12%


In [38]:
# Character mapping
arabic_chars = 'أبتثجحخدذرزسشصضطظعغفقكلمنهوي'
arabic_characters = ['alef', 'beh', 'teh', 'theh', 'jeem', 'hah', 'khah', 'dal', 'thal',
                    'reh', 'zain', 'seen', 'sheen', 'sad', 'dad', 'tah', 'zah', 'ain',
                    'ghain', 'feh', 'qaf', 'kaf', 'lam', 'meem', 'noon', 'heh', 'waw', 'yeh']

In [39]:
# Example usage
result = classify_image(model, 'data/csvTestImages 3360x1024.csv', 16, transform, device)
print(result)

# Map the result to Arabic character and English transliteration
result_index = int(result.split()[-1]) - 1
if 0 <= result_index < len(arabic_chars):
    print(f"The detected Arabic character is: {arabic_chars[result_index]}")
    print(f"The detected Arabic character (in English) is: {arabic_characters[result_index]}")
else:
    print(f"Error: Invalid index {result_index}")

Detected Arabic character: 8
The detected Arabic character is: د
The detected Arabic character (in English) is: dal
