# OCR Baseline using LPRNet (PyTorch)

This notebook implements the OCR pipeline using **LPRNet**, a lightweight Convolutional Neural Network designed for License Plate Recognition.

**Optimized for A100 GPU:**
- **Batch Size**: 1024
- **Mixed Precision**: Enabled (AMP)
- **Workers**: 8
- **Progress Tracking**: TQDM enabled
- **Metrics**: Sequence Acc, Char Acc, Edit Distance

**Steps:**
1. **Setup**: Import libraries and Modules.
2. **Data**: Load dataset using custom `LPRDataset`.
3. **Model**: Initialize `LPRNet`.
4. **Training**: Train the model using CTC Loss with AMP.
5. **Inference**: Evaluate on validation set.

## 1. Setup

In [1]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler # Mixed Precision
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm

# Add src to path if needed
sys.path.append('..')

from src.ocr.lprnet import build_lprnet
from src.ocr.reader import LPRDataset, collate_fn
from src.ocr.decoder import LPRLabelEncoder, CHARS
from src.evaluation.metrics import compute_ocr_metrics

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Check CUDA Capability
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Using device: cuda
GPU: NVIDIA A100-SXM4-80GB
Memory: 85.20 GB


## 2. Dataset Preparation

In [9]:
DATASET_DIR = "../datasets/preprocessed/plate_text_cropped"
IMAGES_DIR = os.path.join(DATASET_DIR, "dataset")
LABEL_FILE = os.path.join(DATASET_DIR, "label.csv")

# HPC Optimization
BATCH_SIZE = 16 # Increased for A100
NUM_WORKERS = 8   # Parallel data loading
PIN_MEMORY = True # Speed up host-to-device transfer

IMG_SIZE = (94, 24) # LPRNet standard input size

# Initialize Dataset
dataset = LPRDataset(img_dir=IMAGES_DIR, label_file=LABEL_FILE, img_size=IMG_SIZE)

# Split Dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, 
                          collate_fn=collate_fn, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, 
                        collate_fn=collate_fn, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")

Train samples: 1490, Val samples: 373


## 3. Model Initialization

In [10]:
lpr_max_len = 18 # Maximum length of license plate
class_num = len(CHARS) + 1 # +1 for blank

model = build_lprnet(lpr_max_len=lpr_max_len, class_num=class_num, dropout_rate=0.5)
model.to(device)
# print(model) # Commented out to reduce log clutter

LPRNet(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (4): SmallBasicBlock(
      (block): Sequential(
        (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU()
        (2): Conv2d(32, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
        (3): ReLU()
        (4): Conv2d(32, 32, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
        (5): ReLU()
        (6): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
      )
    )
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2), padding=0, dilation=1, ceil_mode=False)
    (8): SmallBasicBlock(
      (block): Sequential(
        (0): Conv2d(64, 64, kern

## 4. Training Loop (with AMP & TQDM)

In [11]:
def train_one_epoch(model, loader, optimizer, criterion, device, scaler, epoch):
    model.train()
    total_loss = 0.0
    
    # Metrics accumulator
    total_seq_acc = 0.0
    total_char_acc = 0.0
    num_batches = 0
    
    encoder = LPRLabelEncoder(CHARS) # Helper untuk decode prediksi

    pbar = tqdm(loader, desc=f"Train Epoch {epoch+1}", leave=False)

    for images, labels, lengths in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        lengths = lengths.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            logits = model(images)                 # (B, C, T)
            log_probs = logits.permute(2, 0, 1)    # (T, B, C)
            log_probs = log_probs.log_softmax(2)

            input_lengths = torch.full(
                size=(images.size(0),),
                fill_value=logits.size(2),
                dtype=torch.long,
                device=device
            )

            loss = criterion(log_probs, labels, input_lengths, lengths)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        

        # --- Calculate Train Metrics on the Fly ---
        # Decode greedy untuk hitung akurasi kasar saat training
        with torch.no_grad():
            preds_decoded = encoder.decode_greedy(logits)
            
            # Reconstruct targets text
            labels_cpu = labels.cpu().numpy()
            lengths_cpu = lengths.cpu().numpy()
            targets_decoded = []
            idx = 0
            for le in lengths_cpu:
                targets_decoded.append("".join([CHARS[c] for c in labels_cpu[idx:idx+le]]))
                idx += le
                
            # Compute Batch Metrics
            batch_metrics = compute_ocr_metrics(preds_decoded, targets_decoded)
            total_seq_acc += batch_metrics["seq_acc"]
            total_char_acc += batch_metrics["char_acc"]
            num_batches += 1

        total_loss += loss.item()
        pbar.set_postfix({
            "loss": f"{loss.item():.4f}", 
            "acc": f"{batch_metrics['seq_acc']:.2%}"
        })

    avg_loss = total_loss / num_batches
    avg_seq_acc = total_seq_acc / num_batches
    avg_char_acc = total_char_acc / num_batches
    
    return avg_loss, avg_seq_acc, avg_char_acc

def evaluate(model, loader, device):
    model.eval()
    encoder = LPRLabelEncoder(CHARS)

    all_preds = []
    all_targets = []

    pbar = tqdm(loader, desc="Evaluating", leave=False)

    with torch.no_grad():
        for images, labels, lengths in pbar:
            images = images.to(device, non_blocking=True)

            with autocast():
                logits = model(images)

            preds = encoder.decode_greedy(logits)

            labels = labels.cpu().numpy()
            lengths = lengths.cpu().numpy()

            idx = 0
            for i, length in enumerate(lengths):
                true_indices = labels[idx : idx + length]
                true_text = "".join([CHARS[j] for j in true_indices])
                idx += length

                all_preds.append(preds[i])
                all_targets.append(true_text)

    metrics = compute_ocr_metrics(all_preds, all_targets)
    return metrics

In [None]:
num_epochs = 100
patience = 15
best_seq_acc = 0.0
epochs_no_improve = 0

save_dir = "../artifacts/lprnet"
os.makedirs(save_dir, exist_ok=True)

criterion = nn.CTCLoss(blank=len(CHARS), reduction='mean')

optimizer = optim.Adam(
    model.parameters(),
    lr=0.001
)

scheduler = optim.lr_scheduler.StepLR(
    optimizer,
    step_size=20,
    gamma=0.5
)

scaler = GradScaler()


# --- History Storage ---
history = {
    "train_loss": [],
    "train_seq_acc": [],
    "val_seq_acc": [],
    "train_char_acc": [],
    "val_char_acc": [],
    "val_edit_dist": []
}

print("Starting Training...\n")

for epoch in range(num_epochs):
    # Pass scheduler ke dalam train_one_epoch agar di-step per batch
    train_loss, train_seq_acc, train_char_acc = train_one_epoch(
        model,
        train_loader,
        optimizer,
        criterion,
        device,
        scaler,
        epoch
    )
    
    scheduler.step()
    
    # ... (Evaluate) ...
    metrics = evaluate(model, val_loader, device)
    val_seq_acc = metrics["seq_acc"]
    val_char_acc = metrics["char_acc"]
    val_edit_dist = metrics["avg_edit_dist"]
    # --- SIMPAN KE HISTORY ---
    history["train_loss"].append(train_loss)
    history["train_seq_acc"].append(train_seq_acc)
    history["val_seq_acc"].append(val_seq_acc)
    history["train_char_acc"].append(train_char_acc)
    history["val_char_acc"].append(val_char_acc)
    history["val_edit_dist"].append(val_edit_dist)

    print(
        f"Epoch [{epoch+1}/{num_epochs}] "
        f"Loss: {train_loss:.4f} | "
        f"Train Acc: {train_seq_acc:.4f} | "  
        f"Val Acc: {val_seq_acc:.4f} | "      
        f"Val Char: {val_char_acc:.4f} | "
        f"Edit Dist: {val_edit_dist:.4f}"
    )

    # Save Best Model berdasarkan Val Seq Accuracy
    if val_seq_acc > best_seq_acc:
        best_seq_acc = val_seq_acc
        epochs_no_improve = 0
        torch.save(
            model.state_dict(),
            os.path.join(save_dir, "lprnet_best.pth")
        )
        print(f"Best model saved (Seq Acc: {best_seq_acc:.4f})")
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"\nEarly stopping triggered after {epoch+1} epochs")
        break

# Save Last
torch.save(
    model.state_dict(),
    os.path.join(save_dir, "lprnet_last.pth")
)
print("\nTraining finished")
print(f"Best Seq Accuracy: {best_seq_acc:.4f}")
print(f"Models saved in: {save_dir}")

Starting Training...



Train Epoch 1:  18%|█▊        | 17/94 [00:18<00:31,  2.42it/s, loss=3.0548, acc=0.00%]

## 5. Inference Visualization

In [None]:
def show_results(model, dataset, num=5):
    model.eval()
    encoder = LPRLabelEncoder(CHARS)
    
    indices = np.random.randint(0, len(dataset), num)
    
    for idx in indices:
        img_tensor, _, _ = dataset[idx]
        input_img = img_tensor.unsqueeze(0).to(device)
        
        with torch.no_grad():
            logits = model(input_img)
            pred_text = encoder.decode_greedy(logits)[0]
            
        # De-normalize for plotting
        display_img = img_tensor.numpy().transpose(1, 2, 0)
        display_img = (display_img / 0.0078125) + 127.5
        display_img = display_img.astype(np.uint8)
        
        plt.figure()
        plt.imshow(cv2.cvtColor(display_img, cv2.COLOR_BGR2RGB))
        plt.title(f"Pred: {pred_text}")
        plt.axis('off')
        plt.show()

# --- Load Best Model untuk Visualisasi ---
best_model_path = os.path.join(save_dir, "lprnet_best.pth")
if os.path.exists(best_model_path):
    print(f"Loading best model from: {best_model_path}")
    model.load_state_dict(torch.load(best_model_path))
else:
    print("Best model not found, using last model.")
# Baru panggil fungsi show_results
show_results(model, val_dataset)

In [None]:
import matplotlib.pyplot as plt

def plot_training_history(history):
    epochs = range(1, len(history["train_loss"]) + 1)
    
    plt.figure(figsize=(18, 5))

    # 1. Loss Plot
    plt.subplot(1, 3, 1)
    plt.plot(epochs, history["train_loss"], label="Train Loss", color='red')
    plt.title("Training Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)

    # 2. Sequence Accuracy Plot
    plt.subplot(1, 3, 2)
    plt.plot(epochs, history["train_seq_acc"], label="Train Seq Acc", color='blue')
    plt.plot(epochs, history["val_seq_acc"], label="Val Seq Acc", color='green')
    plt.title("Sequence Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.grid(True)

    # 3. Character Accuracy Plot
    plt.subplot(1, 3, 3)
    plt.plot(epochs, history["train_char_acc"], label="Train Char Acc", color='orange')
    plt.plot(epochs, history["val_char_acc"], label="Val Char Acc", color='purple')
    plt.title("Character Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

# Panggil fungsi
plot_training_history(history)