In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
import os
import pandas as pd
from tqdm import tqdm
from typing import List, Dict, Tuple

class Config:
    CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,-?!&"
    VOCAB_SIZE = len(CHARS) + 1
    
    BATCH_SIZE = 32
    NUM_EPOCHS = 10
    LEARNING_RATE = 1e-3
    
    IMG_HEIGHT = 64
    IMG_WIDTH = 256
    HIDDEN_SIZE = 256
    
    BASE_DIR = os.getcwd()
    CSV_DIR = os.path.join(BASE_DIR, "CSV")
    TRAIN_IMG_DIR = os.path.join(BASE_DIR, "train_v2/train")
    
    TRAIN_CSV = os.path.join(CSV_DIR, "written_name_train.csv")
    
    MINI_MODEL_PATH = os.path.join(BASE_DIR, "htr_crnn_mini.pth")


def create_char_to_int_mapping(chars: str) -> Tuple[Dict[str, int], Dict[int, str]]:
    char_to_int = {char: i + 1 for i, char in enumerate(chars)}
    int_to_char = {i + 1: char for i, char in enumerate(chars)}
    char_to_int['CTC_BLANK'] = 0
    int_to_char[0] = ''
    return char_to_int, int_to_char

CHAR_TO_INT, INT_TO_CHAR = create_char_to_int_mapping(Config.CHARS)


def load_all_annotations() -> Dict[str, Tuple[List[Tuple[str, str]], str]]:
    csv_path = Config.TRAIN_CSV
    img_dir = Config.TRAIN_IMG_DIR
    
    if not os.path.exists(csv_path):
        print(f"❌ CSV not found at {csv_path}")
        return {}
    
    df = pd.read_csv(csv_path)
    df.columns = ['image_filename', 'transcription_text']
    df.dropna(inplace=True)
    
    df['exists'] = df['image_filename'].apply(lambda x: os.path.exists(os.path.join(img_dir, x)))
    df = df[df['exists']].drop(columns=['exists'])

    annotations = list(df[['image_filename', 'transcription_text']].itertuples(index=False, name=None))
    print(f"✅ Loaded {len(annotations)} valid records for training.")
    
    return {'train': (annotations, img_dir)}


class HTRDataset(Dataset):
    def __init__(self, img_dir: str, annotations: List[Tuple[str, str]], transform=None):
        self.img_dir = img_dir
        self.annotations = annotations
        self.transform = transform
        self.char_to_int = CHAR_TO_INT

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_filename, label_text = self.annotations[idx]
        img_path = os.path.join(self.img_dir, img_filename)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception:
            image = Image.new('RGB', (Config.IMG_WIDTH, Config.IMG_HEIGHT), color='black')
            
        if self.transform:
            image = self.transform(image)
        
        label_encoded = [self.char_to_int.get(char, 0) for char in label_text]
        target = torch.tensor(label_encoded, dtype=torch.long)
        target_len = torch.tensor(len(target), dtype=torch.long)
        
        return image, target, target_len


def collate_fn(batch, model_cnn_output_width=None):
    images, targets, target_lengths = zip(*batch)
    images = torch.stack(images, 0)
    
    max_target_len = max(target_lengths)
    padded_targets = torch.zeros((len(targets), max_target_len), dtype=torch.long)
    for i, target in enumerate(targets):
        padded_targets[i, :target.size(0)] = target
    
    target_lengths = torch.stack(target_lengths)
    
    if model_cnn_output_width is None:
        model_cnn_output_width = Config.IMG_WIDTH // 4
    
    input_lengths = torch.full((len(batch),), model_cnn_output_width, dtype=torch.long)
    
    return images, padded_targets, input_lengths, target_lengths


class CRNN(nn.Module):
    def __init__(self, img_height, vocab_size, hidden_size):
        super(CRNN, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.MaxPool2d((2, 1), (2, 1)),
            nn.Conv2d(256, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, None))
        )
        
        self.map_to_rnn = nn.Linear(512, hidden_size)
        
        self.rnn = nn.LSTM(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=3,
            bidirectional=True,
            dropout=0.3,
            batch_first=False
        )
        
        self.linear = nn.Linear(hidden_size * 2, vocab_size)
        
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        cnn_out = self.cnn(x)
        cnn_out = cnn_out.squeeze(2)
        cnn_out = cnn_out.permute(0, 2, 1)
        
        rnn_input = self.map_to_rnn(cnn_out)
        rnn_input = rnn_input.permute(1, 0, 2)
        
        rnn_out, _ = self.rnn(rnn_input)
        output = self.linear(rnn_out)
        output = nn.functional.log_softmax(output, dim=2)
        
        return output
    
    def get_cnn_output_width(self, batch_size=1, device='cpu'):
        with torch.no_grad():
            dummy_input = torch.zeros(batch_size, 3, Config.IMG_HEIGHT, Config.IMG_WIDTH, device=device)
            cnn_out = self.cnn(dummy_input)
            return cnn_out.size(-1)


def train_htr_model(data_loader, model, criterion, optimizer, device, scheduler=None):
    model.train()
    min_loss = float('inf')
    
    pbar = tqdm(data_loader, desc="Training")
    for images, targets, input_lengths, target_lengths in pbar:
        images, targets = images.to(device), targets.to(device)
        
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, targets, input_lengths.to(device), target_lengths.to(device))
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        
        optimizer.step()
        
        batch_loss = loss.item()
        min_loss = min(min_loss, batch_loss)
        pbar.set_postfix({'batch_loss': f'{batch_loss:.4f}', 'min_loss': f'{min_loss:.4f}'})

    return min_loss


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}\n")
    
    train_transform = transforms.Compose([
        transforms.Resize((Config.IMG_HEIGHT, Config.IMG_WIDTH)),
        transforms.RandomAffine(degrees=3, translate=(0.02, 0.02), shear=2),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((Config.IMG_HEIGHT, Config.IMG_WIDTH)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    all_data = load_all_annotations()
    
    if 'train' not in all_data:
        print("❌ Training data not found!")
        exit(1)
    
    train_annotations, train_img_dir = all_data['train']
    train_dataset = HTRDataset(img_dir=train_img_dir, annotations=train_annotations, transform=train_transform)
    
    print("\n" + "="*60)
    print("MINI DATASET TRAINING (IMPROVED)")
    print("="*60)
    
    mini_size = 5000
    mini_indices = torch.randperm(len(train_dataset))[:mini_size].tolist()
    mini_dataset = Subset(train_dataset, mini_indices)
    
    mini_loader = DataLoader(
        mini_dataset,
        batch_size=Config.BATCH_SIZE,
        shuffle=True,
        collate_fn=collate_fn
    )
    
    print(f"Mini dataset size: {len(mini_dataset)} samples")
    print(f"Training mini model for {Config.NUM_EPOCHS} epochs...\n")
    
    model_mini = CRNN(Config.IMG_HEIGHT, Config.VOCAB_SIZE, Config.HIDDEN_SIZE).to(device)
    
    cnn_output_width = model_mini.get_cnn_output_width(batch_size=1, device=device)
    print(f"CNN output width: {cnn_output_width}")
    
    def collate_fn_with_width(batch):
        return collate_fn(batch, model_cnn_output_width=cnn_output_width)
    
    criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True)
    optimizer_mini = optim.Adam(model_mini.parameters(), lr=Config.LEARNING_RATE)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_mini, mode='min', factor=0.5, patience=3, verbose=True
    )
    
    best_loss_mini = float('inf')
    loss_history = []
    
    for epoch in range(1, Config.NUM_EPOCHS + 1):
        print(f"Epoch {epoch}/{Config.NUM_EPOCHS}")
        min_epoch_loss = train_htr_model(mini_loader, model_mini, criterion, optimizer_mini, device, scheduler)
        print(f"Lowest Loss in Epoch: {min_epoch_loss:.4f}\n")
        
        loss_history.append(min_epoch_loss)
        
        scheduler.step(min_epoch_loss)
        
        if min_epoch_loss < best_loss_mini:
            best_loss_mini = min_epoch_loss
            torch.save(model_mini.state_dict(), Config.MINI_MODEL_PATH)
            print(f"✅ Best model saved! Lowest loss: {best_loss_mini:.4f}\n")
        else:
            print(f"Loss did not improve. Best so far: {best_loss_mini:.4f}\n")
    
    print(f"\n✅ Training complete! Best loss: {best_loss_mini:.4f}")
    print(f"Model saved to: {Config.MINI_MODEL_PATH}")
    print(f"\nLoss history by epoch:")
    for epoch, loss in enumerate(loss_history, 1):
        print(f"  Epoch {epoch}: {loss:.4f}")

Using device: cpu

✅ Loaded 330396 valid records for training.

MINI DATASET TRAINING (IMPROVED)
Mini dataset size: 5000 samples
Training mini model for 10 epochs...





CNN output width: 64
Epoch 1/10


Training: 100%|██████████████████████████████████| 157/157 [07:04<00:00,  2.70s/it, batch_loss=3.2764, min_loss=2.9628]


Lowest Loss in Epoch: 2.9628

✅ Best model saved! Lowest loss: 2.9628

Epoch 2/10


Training: 100%|██████████████████████████████████| 157/157 [06:14<00:00,  2.38s/it, batch_loss=3.0321, min_loss=2.7900]


Lowest Loss in Epoch: 2.7900

✅ Best model saved! Lowest loss: 2.7900

Epoch 3/10


Training: 100%|██████████████████████████████████| 157/157 [06:57<00:00,  2.66s/it, batch_loss=2.9452, min_loss=2.7173]


Lowest Loss in Epoch: 2.7173

✅ Best model saved! Lowest loss: 2.7173

Epoch 4/10


Training: 100%|██████████████████████████████████| 157/157 [06:09<00:00,  2.35s/it, batch_loss=3.3055, min_loss=2.6923]


Lowest Loss in Epoch: 2.6923

✅ Best model saved! Lowest loss: 2.6923

Epoch 5/10


Training: 100%|██████████████████████████████████| 157/157 [06:08<00:00,  2.35s/it, batch_loss=2.7364, min_loss=2.6364]


Lowest Loss in Epoch: 2.6364

✅ Best model saved! Lowest loss: 2.6364

Epoch 6/10


Training: 100%|██████████████████████████████████| 157/157 [06:11<00:00,  2.37s/it, batch_loss=2.8129, min_loss=2.6192]


Lowest Loss in Epoch: 2.6192

✅ Best model saved! Lowest loss: 2.6192

Epoch 7/10


Training: 100%|██████████████████████████████████| 157/157 [06:10<00:00,  2.36s/it, batch_loss=2.8105, min_loss=2.5785]


Lowest Loss in Epoch: 2.5785

✅ Best model saved! Lowest loss: 2.5785

Epoch 8/10


Training: 100%|██████████████████████████████████| 157/157 [06:10<00:00,  2.36s/it, batch_loss=2.8365, min_loss=2.5585]


Lowest Loss in Epoch: 2.5585

✅ Best model saved! Lowest loss: 2.5585

Epoch 9/10


Training: 100%|██████████████████████████████████| 157/157 [06:07<00:00,  2.34s/it, batch_loss=2.9176, min_loss=2.5796]


Lowest Loss in Epoch: 2.5796

Loss did not improve. Best so far: 2.5585

Epoch 10/10


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=2.8161, min_loss=2.5347]

Lowest Loss in Epoch: 2.5347

✅ Best model saved! Lowest loss: 2.5347


✅ Training complete! Best loss: 2.5347
Model saved to: C:\Users\ahmed\Desktop\Ahmed Sajid\Office - NCV\NCV - HTR\htr_crnn_mini.pth

Loss history by epoch:
  Epoch 1: 2.9628
  Epoch 2: 2.7900
  Epoch 3: 2.7173
  Epoch 4: 2.6923
  Epoch 5: 2.6364
  Epoch 6: 2.6192
  Epoch 7: 2.5785
  Epoch 8: 2.5585
  Epoch 9: 2.5796
  Epoch 10: 2.5347





In [2]:
#Continue training the model with improvements from Document 2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
import os
import pandas as pd
from tqdm import tqdm
from typing import List, Dict, Tuple

# --- Configuration ---
class Config:
    CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,-?!&"
    VOCAB_SIZE = len(CHARS) + 1
    
    # Training Parameters
    BATCH_SIZE = 32
    NUM_EPOCHS = 50  # Additional epochs to train
    LEARNING_RATE = 1e-3
    
    # Image Size
    IMG_HEIGHT = 64
    IMG_WIDTH = 256
    HIDDEN_SIZE = 256
    
    # Directory Structure
    BASE_DIR = os.getcwd()
    CSV_DIR = os.path.join(BASE_DIR, "CSV")
    TRAIN_IMG_DIR = os.path.join(BASE_DIR, "train_v2/train")
    
    # Annotation Files
    TRAIN_CSV = os.path.join(CSV_DIR, "written_name_train.csv")
    
    # Model Paths
    MINI_MODEL_PATH = os.path.join(BASE_DIR, "htr_crnn_mini.pth")


def create_char_to_int_mapping(chars: str) -> Tuple[Dict[str, int], Dict[int, str]]:
    char_to_int = {char: i + 1 for i, char in enumerate(chars)}
    int_to_char = {i + 1: char for i, char in enumerate(chars)}
    char_to_int['CTC_BLANK'] = 0
    int_to_char[0] = ''
    return char_to_int, int_to_char

CHAR_TO_INT, INT_TO_CHAR = create_char_to_int_mapping(Config.CHARS)


def load_all_annotations() -> Dict[str, Tuple[List[Tuple[str, str]], str]]:
    csv_path = Config.TRAIN_CSV
    img_dir = Config.TRAIN_IMG_DIR
    
    if not os.path.exists(csv_path):
        print(f"❌ CSV not found at {csv_path}")
        return {}
    
    df = pd.read_csv(csv_path)
    df.columns = ['image_filename', 'transcription_text']
    df.dropna(inplace=True)
    
    df['exists'] = df['image_filename'].apply(lambda x: os.path.exists(os.path.join(img_dir, x)))
    df = df[df['exists']].drop(columns=['exists'])

    annotations = list(df[['image_filename', 'transcription_text']].itertuples(index=False, name=None))
    print(f"✅ Loaded {len(annotations)} valid records for training.")
    
    return {'train': (annotations, img_dir)}


class HTRDataset(Dataset):
    def __init__(self, img_dir: str, annotations: List[Tuple[str, str]], transform=None):
        self.img_dir = img_dir
        self.annotations = annotations
        self.transform = transform
        self.char_to_int = CHAR_TO_INT

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_filename, label_text = self.annotations[idx]
        img_path = os.path.join(self.img_dir, img_filename)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception:
            image = Image.new('RGB', (Config.IMG_WIDTH, Config.IMG_HEIGHT), color='black')
            
        if self.transform:
            image = self.transform(image)
        
        label_encoded = [self.char_to_int.get(char, 0) for char in label_text]
        target = torch.tensor(label_encoded, dtype=torch.long)
        target_len = torch.tensor(len(target), dtype=torch.long)
        
        return image, target, target_len


def collate_fn(batch, model_cnn_output_width=None):
    images, targets, target_lengths = zip(*batch)
    images = torch.stack(images, 0)
    
    max_target_len = max(target_lengths)
    padded_targets = torch.zeros((len(targets), max_target_len), dtype=torch.long)
    for i, target in enumerate(targets):
        padded_targets[i, :target.size(0)] = target
    
    target_lengths = torch.stack(target_lengths)
    
    # Use computed CNN output width if provided, else use approximation
    if model_cnn_output_width is None:
        model_cnn_output_width = Config.IMG_WIDTH // 4
    
    input_lengths = torch.full((len(batch),), model_cnn_output_width, dtype=torch.long)
    
    return images, padded_targets, input_lengths, target_lengths


class CRNN(nn.Module):
    def __init__(self, img_height, vocab_size, hidden_size):
        super(CRNN, self).__init__()
        
        # Improved CNN with deeper architecture and BatchNorm
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.MaxPool2d((2, 1), (2, 1)),
            nn.Conv2d(256, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, None))
        )
        
        # Map from CNN output (512 channels) to RNN input
        self.map_to_rnn = nn.Linear(512, hidden_size)
        
        # Improved RNN with dropout and extra layer
        self.rnn = nn.LSTM(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=3,
            bidirectional=True,
            dropout=0.3,
            batch_first=False
        )
        
        self.linear = nn.Linear(hidden_size * 2, vocab_size)

    def forward(self, x):
        cnn_out = self.cnn(x)
        cnn_out = cnn_out.squeeze(2)
        cnn_out = cnn_out.permute(0, 2, 1)
        
        rnn_input = self.map_to_rnn(cnn_out)
        rnn_input = rnn_input.permute(1, 0, 2)
        
        rnn_out, _ = self.rnn(rnn_input)
        output = self.linear(rnn_out)
        output = nn.functional.log_softmax(output, dim=2)
        
        return output
    
    def get_cnn_output_width(self, batch_size=1, device='cpu'):
        """Compute actual CNN output width dynamically"""
        with torch.no_grad():
            dummy_input = torch.zeros(batch_size, 3, Config.IMG_HEIGHT, Config.IMG_WIDTH, device=device)
            cnn_out = self.cnn(dummy_input)
            return cnn_out.size(-1)


def train_htr_model(data_loader, model, criterion, optimizer, device, scheduler=None):
    """Training with gradient clipping - returns lowest loss from epoch"""
    model.train()
    min_loss = float('inf')
    
    pbar = tqdm(data_loader, desc="Training")
    for images, targets, input_lengths, target_lengths in pbar:
        images, targets = images.to(device), targets.to(device)
        
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, targets, input_lengths.to(device), target_lengths.to(device))
        loss.backward()
        
        # Gradient clipping to prevent explosions
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        
        optimizer.step()
        
        batch_loss = loss.item()
        min_loss = min(min_loss, batch_loss)
        pbar.set_postfix({'batch_loss': f'{batch_loss:.4f}', 'min_loss': f'{min_loss:.4f}'})

    return min_loss


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}\n")
    
    # Check if model exists
    if not os.path.exists(Config.MINI_MODEL_PATH):
        print(f"❌ No existing model found at {Config.MINI_MODEL_PATH}")
        print("Please train the model first using the training script.")
        exit(1)
    
    # Image transformations with augmentation and normalization
    train_transform = transforms.Compose([
        transforms.Resize((Config.IMG_HEIGHT, Config.IMG_WIDTH)),
        transforms.RandomAffine(degrees=3, translate=(0.02, 0.02), shear=2),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    # Load data
    all_data = load_all_annotations()
    
    if 'train' not in all_data:
        print("❌ Training data not found!")
        exit(1)
    
    train_annotations, train_img_dir = all_data['train']
    train_dataset = HTRDataset(img_dir=train_img_dir, annotations=train_annotations, transform=train_transform)
    
    print("\n" + "="*60)
    print("CONTINUE TRAINING FROM SAVED MODEL (IMPROVED)")
    print("="*60)
    
    mini_size = 5000
    mini_indices = torch.randperm(len(train_dataset))[:mini_size].tolist()
    mini_dataset = Subset(train_dataset, mini_indices)
    
    mini_loader = DataLoader(
        mini_dataset,
        batch_size=Config.BATCH_SIZE,
        shuffle=True,
        collate_fn=collate_fn
    )
    
    print(f"Mini dataset size: {len(mini_dataset)} samples")
    
    # Load existing model
    print(f"\nLoading existing model from: {Config.MINI_MODEL_PATH}")
    model_mini = CRNN(Config.IMG_HEIGHT, Config.VOCAB_SIZE, Config.HIDDEN_SIZE).to(device)
    model_mini.load_state_dict(torch.load(Config.MINI_MODEL_PATH, map_location=device))
    print(f"✅ Model loaded successfully!")
    
    # Compute actual CNN output width for accurate CTC loss
    cnn_output_width = model_mini.get_cnn_output_width(batch_size=1, device=device)
    print(f"CNN output width: {cnn_output_width}")
    
    # Update collate function to use actual CNN output width
    def collate_fn_with_width(batch):
        return collate_fn(batch, model_cnn_output_width=cnn_output_width)
    
    mini_loader = DataLoader(
        mini_dataset,
        batch_size=Config.BATCH_SIZE,
        shuffle=True,
        collate_fn=collate_fn_with_width
    )
    
    print(f"\nContinuing training for {Config.NUM_EPOCHS} more epochs...\n")
    
    criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True)
    optimizer_mini = optim.Adam(model_mini.parameters(), lr=Config.LEARNING_RATE)
    
    # Add learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_mini, mode='min', factor=0.5, patience=3, verbose=True
    )
    
    best_loss_mini = float('inf')
    loss_history = []
    
    for epoch in range(1, Config.NUM_EPOCHS + 1):
        print(f"Epoch {epoch}/{Config.NUM_EPOCHS}")
        min_loss = train_htr_model(mini_loader, model_mini, criterion, optimizer_mini, device, scheduler)
        print(f"Minimum Loss: {min_loss:.4f}\n")
        
        loss_history.append(min_loss)
        
        # Step learning rate based on loss plateau
        scheduler.step(min_loss)
        
        if min_loss < best_loss_mini:
            best_loss_mini = min_loss
            torch.save(model_mini.state_dict(), Config.MINI_MODEL_PATH)
            print(f"✅ Best model saved! Lowest loss: {best_loss_mini:.4f}\n")
        else:
            print(f"Loss did not improve. Best so far: {best_loss_mini:.4f}\n")
    
    print(f"\n✅ Continued training complete! Best loss achieved: {best_loss_mini:.4f}")
    print(f"Model saved to: {Config.MINI_MODEL_PATH}")
    print(f"\nLoss history by epoch:")
    for epoch, loss in enumerate(loss_history, 1):
        print(f"  Epoch {epoch}: {loss:.4f}")

Using device: cpu

✅ Loaded 330396 valid records for training.

CONTINUE TRAINING FROM SAVED MODEL (IMPROVED)
Mini dataset size: 5000 samples

Loading existing model from: C:\Users\ahmed\Desktop\Ahmed Sajid\Office - NCV\NCV - HTR\htr_crnn_mini.pth
✅ Model loaded successfully!


  model_mini.load_state_dict(torch.load(Config.MINI_MODEL_PATH, map_location=device))


CNN output width: 64

Continuing training for 50 more epochs...

Epoch 1/50


Training: 100%|██████████████████████████████████| 157/157 [07:14<00:00,  2.77s/it, batch_loss=2.2780, min_loss=2.0026]


Minimum Loss: 2.0026

✅ Best model saved! Lowest loss: 2.0026

Epoch 2/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.31s/it, batch_loss=2.0940, min_loss=1.7183]


Minimum Loss: 1.7183

✅ Best model saved! Lowest loss: 1.7183

Epoch 3/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=1.9384, min_loss=1.7456]


Minimum Loss: 1.7456

Loss did not improve. Best so far: 1.7183

Epoch 4/50


Training: 100%|██████████████████████████████████| 157/157 [06:10<00:00,  2.36s/it, batch_loss=1.7950, min_loss=1.6372]


Minimum Loss: 1.6372

✅ Best model saved! Lowest loss: 1.6372

Epoch 5/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.32s/it, batch_loss=2.1004, min_loss=1.5689]


Minimum Loss: 1.5689

✅ Best model saved! Lowest loss: 1.5689

Epoch 6/50


Training: 100%|██████████████████████████████████| 157/157 [06:02<00:00,  2.31s/it, batch_loss=1.3110, min_loss=1.3110]


Minimum Loss: 1.3110

✅ Best model saved! Lowest loss: 1.3110

Epoch 7/50


Training: 100%|██████████████████████████████████| 157/157 [05:59<00:00,  2.29s/it, batch_loss=1.0817, min_loss=1.0817]


Minimum Loss: 1.0817

✅ Best model saved! Lowest loss: 1.0817

Epoch 8/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=0.7769, min_loss=0.7769]


Minimum Loss: 0.7769

✅ Best model saved! Lowest loss: 0.7769

Epoch 9/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.32s/it, batch_loss=1.2977, min_loss=1.0491]


Minimum Loss: 1.0491

Loss did not improve. Best so far: 0.7769

Epoch 10/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=1.7001, min_loss=0.9573]


Minimum Loss: 0.9573

Loss did not improve. Best so far: 0.7769

Epoch 11/50


Training: 100%|██████████████████████████████████| 157/157 [05:58<00:00,  2.28s/it, batch_loss=0.8827, min_loss=0.8418]


Minimum Loss: 0.8418

Loss did not improve. Best so far: 0.7769

Epoch 12/50


Training: 100%|██████████████████████████████████| 157/157 [06:45<00:00,  2.58s/it, batch_loss=1.2038, min_loss=0.7306]


Minimum Loss: 0.7306

✅ Best model saved! Lowest loss: 0.7306

Epoch 13/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.30s/it, batch_loss=0.9055, min_loss=0.6433]


Minimum Loss: 0.6433

✅ Best model saved! Lowest loss: 0.6433

Epoch 14/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.29s/it, batch_loss=0.8707, min_loss=0.6023]


Minimum Loss: 0.6023

✅ Best model saved! Lowest loss: 0.6023

Epoch 15/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.31s/it, batch_loss=0.5231, min_loss=0.4837]


Minimum Loss: 0.4837

✅ Best model saved! Lowest loss: 0.4837

Epoch 16/50


Training: 100%|██████████████████████████████████| 157/157 [05:59<00:00,  2.29s/it, batch_loss=0.8403, min_loss=0.5146]


Minimum Loss: 0.5146

Loss did not improve. Best so far: 0.4837

Epoch 17/50


Training: 100%|██████████████████████████████████| 157/157 [05:54<00:00,  2.26s/it, batch_loss=1.1016, min_loss=0.4511]


Minimum Loss: 0.4511

✅ Best model saved! Lowest loss: 0.4511

Epoch 18/50


Training: 100%|██████████████████████████████████| 157/157 [05:50<00:00,  2.23s/it, batch_loss=0.5183, min_loss=0.3997]


Minimum Loss: 0.3997

✅ Best model saved! Lowest loss: 0.3997

Epoch 19/50


Training: 100%|██████████████████████████████████| 157/157 [05:49<00:00,  2.23s/it, batch_loss=0.5597, min_loss=0.3780]


Minimum Loss: 0.3780

✅ Best model saved! Lowest loss: 0.3780

Epoch 20/50


Training: 100%|██████████████████████████████████| 157/157 [05:50<00:00,  2.23s/it, batch_loss=0.6454, min_loss=0.3830]


Minimum Loss: 0.3830

Loss did not improve. Best so far: 0.3780

Epoch 21/50


Training: 100%|██████████████████████████████████| 157/157 [05:50<00:00,  2.23s/it, batch_loss=1.0981, min_loss=0.3639]


Minimum Loss: 0.3639

✅ Best model saved! Lowest loss: 0.3639

Epoch 22/50


Training: 100%|██████████████████████████████████| 157/157 [05:50<00:00,  2.23s/it, batch_loss=0.7336, min_loss=0.3315]


Minimum Loss: 0.3315

✅ Best model saved! Lowest loss: 0.3315

Epoch 23/50


Training: 100%|██████████████████████████████████| 157/157 [05:49<00:00,  2.23s/it, batch_loss=0.2770, min_loss=0.2770]


Minimum Loss: 0.2770

✅ Best model saved! Lowest loss: 0.2770

Epoch 24/50


Training: 100%|██████████████████████████████████| 157/157 [06:09<00:00,  2.35s/it, batch_loss=0.8712, min_loss=0.2339]


Minimum Loss: 0.2339

✅ Best model saved! Lowest loss: 0.2339

Epoch 25/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.31s/it, batch_loss=0.1766, min_loss=0.1766]


Minimum Loss: 0.1766

✅ Best model saved! Lowest loss: 0.1766

Epoch 26/50


Training: 100%|██████████████████████████████████| 157/157 [06:01<00:00,  2.31s/it, batch_loss=0.4016, min_loss=0.1984]


Minimum Loss: 0.1984

Loss did not improve. Best so far: 0.1766

Epoch 27/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.29s/it, batch_loss=0.2807, min_loss=0.1894]


Minimum Loss: 0.1894

Loss did not improve. Best so far: 0.1766

Epoch 28/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.30s/it, batch_loss=0.7078, min_loss=0.1822]


Minimum Loss: 0.1822

Loss did not improve. Best so far: 0.1766

Epoch 29/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.29s/it, batch_loss=0.3325, min_loss=0.2054]


Minimum Loss: 0.2054

Loss did not improve. Best so far: 0.1766

Epoch 30/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.30s/it, batch_loss=0.1278, min_loss=0.1225]


Minimum Loss: 0.1225

✅ Best model saved! Lowest loss: 0.1225

Epoch 31/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.29s/it, batch_loss=0.6799, min_loss=0.1119]


Minimum Loss: 0.1119

✅ Best model saved! Lowest loss: 0.1119

Epoch 32/50


Training: 100%|██████████████████████████████████| 157/157 [06:11<00:00,  2.36s/it, batch_loss=0.4329, min_loss=0.0950]


Minimum Loss: 0.0950

✅ Best model saved! Lowest loss: 0.0950

Epoch 33/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=0.1878, min_loss=0.0927]


Minimum Loss: 0.0927

✅ Best model saved! Lowest loss: 0.0927

Epoch 34/50


Training: 100%|██████████████████████████████████| 157/157 [06:01<00:00,  2.30s/it, batch_loss=0.2607, min_loss=0.0843]


Minimum Loss: 0.0843

✅ Best model saved! Lowest loss: 0.0843

Epoch 35/50


Training: 100%|██████████████████████████████████| 157/157 [06:02<00:00,  2.31s/it, batch_loss=0.1416, min_loss=0.0920]


Minimum Loss: 0.0920

Loss did not improve. Best so far: 0.0843

Epoch 36/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.30s/it, batch_loss=0.3717, min_loss=0.0760]


Minimum Loss: 0.0760

✅ Best model saved! Lowest loss: 0.0760

Epoch 37/50


Training: 100%|██████████████████████████████████| 157/157 [06:00<00:00,  2.29s/it, batch_loss=0.2223, min_loss=0.0711]


Minimum Loss: 0.0711

✅ Best model saved! Lowest loss: 0.0711

Epoch 38/50


Training: 100%|██████████████████████████████████| 157/157 [06:05<00:00,  2.33s/it, batch_loss=0.1480, min_loss=0.0553]


Minimum Loss: 0.0553

✅ Best model saved! Lowest loss: 0.0553

Epoch 39/50


Training: 100%|██████████████████████████████████| 157/157 [06:02<00:00,  2.31s/it, batch_loss=0.1161, min_loss=0.0638]


Minimum Loss: 0.0638

Loss did not improve. Best so far: 0.0553

Epoch 40/50


Training: 100%|██████████████████████████████████| 157/157 [06:02<00:00,  2.31s/it, batch_loss=0.0221, min_loss=0.0221]


Minimum Loss: 0.0221

✅ Best model saved! Lowest loss: 0.0221

Epoch 41/50


Training: 100%|██████████████████████████████████| 157/157 [06:02<00:00,  2.31s/it, batch_loss=0.0407, min_loss=0.0407]


Minimum Loss: 0.0407

Loss did not improve. Best so far: 0.0221

Epoch 42/50


Training: 100%|██████████████████████████████████| 157/157 [06:02<00:00,  2.31s/it, batch_loss=0.0216, min_loss=0.0216]


Minimum Loss: 0.0216

✅ Best model saved! Lowest loss: 0.0216

Epoch 43/50


Training: 100%|██████████████████████████████████| 157/157 [06:01<00:00,  2.30s/it, batch_loss=0.0360, min_loss=0.0348]


Minimum Loss: 0.0348

Loss did not improve. Best so far: 0.0216

Epoch 44/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.32s/it, batch_loss=0.1306, min_loss=0.0438]


Minimum Loss: 0.0438

Loss did not improve. Best so far: 0.0216

Epoch 45/50


Training: 100%|██████████████████████████████████| 157/157 [06:03<00:00,  2.32s/it, batch_loss=0.0874, min_loss=0.0408]


Minimum Loss: 0.0408

Loss did not improve. Best so far: 0.0216

Epoch 46/50


Training: 100%|██████████████████████████████████| 157/157 [06:05<00:00,  2.32s/it, batch_loss=0.2020, min_loss=0.0418]


Minimum Loss: 0.0418

Loss did not improve. Best so far: 0.0216

Epoch 47/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=0.3800, min_loss=0.0437]


Minimum Loss: 0.0437

Loss did not improve. Best so far: 0.0216

Epoch 48/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=0.2015, min_loss=0.0301]


Minimum Loss: 0.0301

Loss did not improve. Best so far: 0.0216

Epoch 49/50


Training: 100%|██████████████████████████████████| 157/157 [06:04<00:00,  2.32s/it, batch_loss=0.0181, min_loss=0.0181]


Minimum Loss: 0.0181

✅ Best model saved! Lowest loss: 0.0181

Epoch 50/50


Training: 100%|██████████████████████████████████| 157/157 [06:01<00:00,  2.31s/it, batch_loss=0.1663, min_loss=0.0298]

Minimum Loss: 0.0298

Loss did not improve. Best so far: 0.0181


✅ Continued training complete! Best loss achieved: 0.0181
Model saved to: C:\Users\ahmed\Desktop\Ahmed Sajid\Office - NCV\NCV - HTR\htr_crnn_mini.pth

Loss history by epoch:
  Epoch 1: 2.0026
  Epoch 2: 1.7183
  Epoch 3: 1.7456
  Epoch 4: 1.6372
  Epoch 5: 1.5689
  Epoch 6: 1.3110
  Epoch 7: 1.0817
  Epoch 8: 0.7769
  Epoch 9: 1.0491
  Epoch 10: 0.9573
  Epoch 11: 0.8418
  Epoch 12: 0.7306
  Epoch 13: 0.6433
  Epoch 14: 0.6023
  Epoch 15: 0.4837
  Epoch 16: 0.5146
  Epoch 17: 0.4511
  Epoch 18: 0.3997
  Epoch 19: 0.3780
  Epoch 20: 0.3830
  Epoch 21: 0.3639
  Epoch 22: 0.3315
  Epoch 23: 0.2770
  Epoch 24: 0.2339
  Epoch 25: 0.1766
  Epoch 26: 0.1984
  Epoch 27: 0.1894
  Epoch 28: 0.1822
  Epoch 29: 0.2054
  Epoch 30: 0.1225
  Epoch 31: 0.1119
  Epoch 32: 0.0950
  Epoch 33: 0.0927
  Epoch 34: 0.0843
  Epoch 35: 0.0920
  Epoch 36: 0.0760
  Epoch 37: 0.0711
  Epoch 38: 0.0553
  Epoch 39: 0.0638
  Epoch 40: 0.0221
  Epoch 4




In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
import os
import csv
import pandas as pd
from typing import List, Dict, Tuple
from tqdm import tqdm

# --- Configuration ---
class Config:
    CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,-?!&"
    VOCAB_SIZE = len(CHARS) + 1
    
    # Image Size
    IMG_HEIGHT = 64
    IMG_WIDTH = 256
    HIDDEN_SIZE = 256
    
    # Directory Structure
    BASE_DIR = os.getcwd()
    CSV_DIR = os.path.join(BASE_DIR, "CSV")
    TEST_IMG_DIR = os.path.join(BASE_DIR, "test_v2/test")
    
    # Annotation Files
    TEST_CSV = os.path.join(CSV_DIR, "written_name_test.csv")
    
    # Model Paths
    MINI_MODEL_PATH = os.path.join(BASE_DIR, "htr_crnn_mini.pth")


def create_char_to_int_mapping(chars: str) -> Tuple[Dict[str, int], Dict[int, str]]:
    char_to_int = {char: i + 1 for i, char in enumerate(chars)}
    int_to_char = {i + 1: char for i, char in enumerate(chars)}
    char_to_int['CTC_BLANK'] = 0
    int_to_char[0] = ''
    return char_to_int, int_to_char

CHAR_TO_INT, INT_TO_CHAR = create_char_to_int_mapping(Config.CHARS)


class CRNN(nn.Module):
    def __init__(self, img_height, vocab_size, hidden_size):
        super(CRNN, self).__init__()
        
        # Improved CNN with deeper architecture and BatchNorm
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.MaxPool2d((2, 1), (2, 1)),
            nn.Conv2d(256, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, None))
        )
        
        # Map from CNN output (512 channels) to RNN input
        self.map_to_rnn = nn.Linear(512, hidden_size)
        
        # Improved RNN with dropout and extra layer
        self.rnn = nn.LSTM(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=3,
            bidirectional=True,
            dropout=0.3,
            batch_first=False
        )
        
        self.linear = nn.Linear(hidden_size * 2, vocab_size)
        
        self._init_weights()

    def _init_weights(self):
        """Proper weight initialization"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        cnn_out = self.cnn(x)
        cnn_out = cnn_out.squeeze(2)
        cnn_out = cnn_out.permute(0, 2, 1)
        
        rnn_input = self.map_to_rnn(cnn_out)
        rnn_input = rnn_input.permute(1, 0, 2)
        
        rnn_out, _ = self.rnn(rnn_input)
        output = self.linear(rnn_out)
        output = nn.functional.log_softmax(output, dim=2)
        
        return output


def decode_ctc(output: torch.Tensor, int_to_char: Dict[int, str]) -> tuple:
    """
    Decodes CTC output using greedy decoding.
    
    Args:
        output: Tensor of shape [SequenceLength, VocabSize] with log probabilities
        int_to_char: Mapping from integer indices to characters
        
    Returns:
        Tuple of (decoded_text, confidence_score)
    """
    probs = output.exp()
    preds = output.argmax(dim=1)
    max_probs = probs.max(dim=1)[0]
    
    decoded_text = []
    confidence_scores = []
    prev_idx = -1
    
    for i, char_idx in enumerate(preds.cpu().numpy()):
        if char_idx != 0 and char_idx != prev_idx:
            decoded_text.append(int_to_char.get(int(char_idx), '?'))
            confidence_scores.append(max_probs[i].item())
        prev_idx = char_idx
    
    avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0
    
    return "".join(decoded_text), avg_confidence


def predict_handwritten_text(image_path: str, model: nn.Module, device) -> tuple:
    """
    Predicts text from a handwritten image using a loaded model.
    
    Args:
        image_path: Path to the image file to predict on
        model: Already loaded CRNN model
        device: Device to run inference on
        
    Returns:
        Tuple of (predicted_text, confidence_score)
    """
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file not found at: {image_path}")
    
    try:
        transform = transforms.Compose([
            transforms.Resize((Config.IMG_HEIGHT, Config.IMG_WIDTH)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
        
        image = Image.open(image_path).convert('RGB')
        image_tensor = transform(image).unsqueeze(0).to(device)
        
        with torch.no_grad():
            output = model(image_tensor)
            output = output.squeeze(1)
            predicted_text, confidence = decode_ctc(output, INT_TO_CHAR)
        
        return predicted_text, confidence
        
    except Exception as e:
        raise RuntimeError(f"Prediction failed: {str(e)}")


def predict_batch(model_path: str, image_dir: str, output_dir: str = "output", save_csv: bool = True):
    """
    Predicts text from all images in a directory using the mini model.
    
    Args:
        model_path: Path to the saved model weights
        image_dir: Directory containing images to predict on
        output_dir: Directory to save output results
        save_csv: Whether to save results as CSV file
    """
    if not os.path.exists(image_dir):
        print(f"❌ Image directory not found: {image_dir}")
        return
    
    if not os.path.exists(model_path):
        print(f"❌ Model file not found: {model_path}")
        return
    
    os.makedirs(output_dir, exist_ok=True)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}\n")
    
    # Load model
    print("Loading CRNN model...")
    model = CRNN(Config.IMG_HEIGHT, Config.VOCAB_SIZE, Config.HIDDEN_SIZE).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print("✅ Model loaded successfully\n")
    
    # Get all image files
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
    image_files = sorted([f for f in os.listdir(image_dir) 
                          if os.path.splitext(f)[1].lower() in image_extensions])
    
    if not image_files:
        print(f"❌ No image files found in {image_dir}")
        return
    
    print(f"Found {len(image_files)} images. Starting predictions...\n")
    
    results = []
    
    for idx, filename in enumerate(image_files, 1):
        image_path = os.path.join(image_dir, filename)
        
        try:
            predicted_text, confidence = predict_handwritten_text(image_path, model, device)
            status = "✅"
            print(f"{status} [{idx}/{len(image_files)}] {filename}")
            print(f"   Predicted: '{predicted_text}' (Confidence: {confidence:.4f})\n")
            
            results.append({
                'filename': filename,
                'predicted_text': predicted_text,
                'confidence': f"{confidence:.4f}"
            })
            
        except (FileNotFoundError, RuntimeError) as e:
            print(f"❌ [{idx}/{len(image_files)}] {filename}")
            print(f"   Error: {e}\n")
            
            results.append({
                'filename': filename,
                'predicted_text': 'ERROR',
                'confidence': 'N/A'
            })
    
    # Save results to CSV
    if save_csv:
        csv_path = os.path.join(output_dir, "predictions.csv")
        try:
            with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['filename', 'predicted_text', 'confidence']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(results)
            print(f"✅ Results saved to: {csv_path}")
        except Exception as e:
            print(f"❌ Failed to save CSV: {e}")
    
    # Save results to text file
    txt_path = os.path.join(output_dir, "predictions.txt")
    try:
        with open(txt_path, 'w', encoding='utf-8') as txtfile:
            txtfile.write("HTR CRNN Model - Predictions\n")
            txtfile.write("=" * 60 + "\n\n")
            for result in results:
                txtfile.write(f"File: {result['filename']}\n")
                txtfile.write(f"Predicted Text: {result['predicted_text']}\n")
                txtfile.write(f"Confidence: {result['confidence']}\n")
                txtfile.write("-" * 60 + "\n")
        print(f"✅ Results saved to: {txt_path}")
    except Exception as e:
        print(f"❌ Failed to save TXT: {e}")
    
    print(f"\n✅ Prediction complete! Results saved in '{output_dir}' folder")


def predict_single_image(model_path: str, image_path: str):
    """
    Predict text from a single image.
    
    Args:
        model_path: Path to the saved model weights
        image_path: Path to the image file
    """
    if not os.path.exists(model_path):
        print(f"❌ Model file not found: {model_path}")
        return
    
    if not os.path.exists(image_path):
        print(f"❌ Image file not found: {image_path}")
        return
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}\n")
    
    # Load model
    print("Loading CRNN model...")
    model = CRNN(Config.IMG_HEIGHT, Config.VOCAB_SIZE, Config.HIDDEN_SIZE).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print("✅ Model loaded successfully\n")
    
    try:
        predicted_text, confidence = predict_handwritten_text(image_path, model, device)
        print(f"Image: {os.path.basename(image_path)}")
        print(f"Predicted Text: '{predicted_text}'")
        print(f"Confidence: {confidence:.4f}")
    except Exception as e:
        print(f"❌ Prediction failed: {e}")


if __name__ == "__main__":
    print("=" * 60)
    print("CRNN MODEL - TESTING/INFERENCE")
    print("=" * 60 + "\n")
    #'''
    # Option 1: Batch prediction on test directory
    img_dir = Config.TEST_IMG_DIR
    output_folder = "output"
    
    predict_batch(
        model_path=Config.MINI_MODEL_PATH,
        image_dir=img_dir,
        output_dir=output_folder,
        save_csv=True
    )
    '''
    # Option 2: Uncomment to predict on a single image
    predict_single_image(
        model_path=Config.MINI_MODEL_PATH,
        image_path="C:/Users/ahmed/Dropbox/PC/Pictures/Screenshots/Screenshot 2025-07-04 141651.png"
    )
    '''

CRNN MODEL - TESTING/INFERENCE

Using device: cpu

Loading CRNN model...
✅ Model loaded successfully



  model.load_state_dict(torch.load(model_path, map_location=device))


Found 41370 images. Starting predictions...

✅ [1/41370] TEST_0001.jpg
   Predicted: 'KEVIN' (Confidence: 0.9917)

✅ [2/41370] TEST_0002.jpg
   Predicted: 'COIINE' (Confidence: 0.6787)

✅ [3/41370] TEST_0003.jpg
   Predicted: 'LENA' (Confidence: 0.9980)

✅ [4/41370] TEST_0004.jpg
   Predicted: 'JULES' (Confidence: 0.9995)

✅ [5/41370] TEST_0005.jpg
   Predicted: 'CHERDIN' (Confidence: 0.9070)

✅ [6/41370] TEST_0006.jpg
   Predicted: 'MARTIN' (Confidence: 0.9986)

✅ [7/41370] TEST_0007.jpg
   Predicted: 'VALENTINE' (Confidence: 0.9994)

✅ [8/41370] TEST_0008.jpg
   Predicted: 'CORAS' (Confidence: 0.8579)

✅ [9/41370] TEST_0009.jpg
   Predicted: 'THIBAULT' (Confidence: 0.9941)

✅ [10/41370] TEST_0010.jpg
   Predicted: 'AERBI' (Confidence: 0.8235)

✅ [11/41370] TEST_0011.jpg
   Predicted: 'BORT-MARFE' (Confidence: 0.6474)

✅ [12/41370] TEST_0012.jpg
   Predicted: 'MANEHINIRAN' (Confidence: 0.8750)

✅ [13/41370] TEST_0013.jpg
   Predicted: 'FANIESSRSEA' (Confidence: 0.7266)

✅ [14/41370] T

KeyboardInterrupt: 