# PARSeq Training Notebook

This notebook runs the training pipeline for the **PARSeq** text recognition model.
It uses the `train_recognition` module which implements the training loop and validation.
Metrics tracked: Loss, Sequence Accuracy (Exact Match), and CER.

In [None]:
import os
import sys

# Ensure project root is in path
# Current dir: src/rec/
project_root = os.path.abspath(os.path.join(os.getcwd(), '../../'))
if project_root not in sys.path:
    sys.path.append(project_root)
    
print(f"Project Root: {project_root}")

from src.rec.train_recognition import main as train_parseq
from src.rec.rec_dataset import RecDataset
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import numpy as np

In [None]:
# Configuration
config = {
    'train_dir': os.path.join(project_root, 'data/train_crop'),       # Path to training data (images + txt)
    'val_dir': os.path.join(project_root, 'data/val_crop'),           # Path to validation data
    'save_dir': os.path.join(project_root, 'best_model/rec'),         # Where to save models
    'batch_size': 64,
    'epochs': 20,
    'lr': 1e-4,
    'max_len': 64
}

print("Configuration:", config)

In [None]:
# Start Training
if __name__ == '__main__':
    train_parseq(config)