# Philippine License Plate Character Instance Segmentation with Similarity-Aware Loss

Single-stage training: YOLO11-seg with polygon masks and character labels, using a custom similarity-aware loss function to handle visually confusable characters (O/0, I/1/L, etc.) in CCTV surveillance footage.


## 0. Environment Setup

This notebook is optimized for Google Colab with a T4 GPU.


In [None]:
!nvidia-smi


In [None]:
%pip install -U ultralytics --quiet

import torch
print('PyTorch version:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU:', torch.cuda.get_device_name(0))


## 1. Paths and Configuration Variables

Set these to the actual dataset and output locations before training.


In [None]:
DATA_YAML_PATH = '/content/philippine_lp_chars.yaml'
RUN_PROJECT = 'philippine_lp_ocr'
RUN_NAME = 'seg_with_similarity_loss'
EXPORT_DIR = '/content/exports'

!mkdir -p "$EXPORT_DIR"
print('DATA_YAML_PATH:', DATA_YAML_PATH)
print('EXPORT_DIR:', EXPORT_DIR)


## 2. Imports

Core dependencies for segmentation training, custom loss, and optimization.


In [None]:
from ultralytics import YOLO
from ultralytics.models.yolo.segment import SegmentationTrainer
from ultralytics.nn.tasks import SegmentationModel

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)


## 3. Character Set and Similarity Matrix

Define the 36-class character set (A–Z, 0–9) and visual-similarity relationships based on glyph shapes. Characters in the same group (e.g., O, 0, Q) are visually similar and should receive reduced penalties when confused during training.


In [None]:
CHARS = [chr(i) for i in range(65, 91)] + [str(i) for i in range(10)]
NUM_CLASSES = len(CHARS)
CHAR_TO_IDX = {c: i for i, c in enumerate(CHARS)}
IDX_TO_CHAR = {i: c for i, c in enumerate(CHARS)}

print('Number of classes:', NUM_CLASSES)
print('Characters:', CHARS)

SIMILAR_GROUPS = [
    ['O', '0', 'Q'],
    ['I', '1', 'L'],
    ['S', '5'],
    ['Z', '2'],
    ['B', '8'],
    ['D', '0'],
    ['G', 'C'],
    ['U', 'V'],
    ['P', 'R'],
]

def create_similarity_matrix(num_classes=NUM_CLASSES, groups=SIMILAR_GROUPS, base_sim=0.6):
    S = np.zeros((num_classes, num_classes), dtype=np.float32)
    np.fill_diagonal(S, 1.0)
    for group in groups:
        idxs = [CHAR_TO_IDX[c] for c in group if c in CHAR_TO_IDX]
        for i in idxs:
            for j in idxs:
                if i != j:
                    S[i, j] = base_sim
    return torch.tensor(S, dtype=torch.float32)

similarity_matrix = create_similarity_matrix()
print('Similarity matrix shape:', similarity_matrix.shape)


### 3.1. Dynamic Similarity Matrix Updates

**Logic:** The similarity matrix is initialized with hand-crafted visual similarities, but real-world confusion patterns may differ. By tracking which characters the model actually confuses during validation, we can dynamically update the similarity matrix to better reflect learned confusion patterns. This creates an adaptive training process where the loss function becomes more intelligent over time, focusing on the model's actual weak points rather than theoretical similarities.

In [None]:
class DynamicSimilarityMatrix:
    """Tracks confusion during validation and updates similarity matrix dynamically."""
    def __init__(self, num_classes=NUM_CLASSES, initial_matrix=None, learning_rate=0.1):
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.confusion_matrix = np.zeros((num_classes, num_classes), dtype=np.float32)
        self.similarity_matrix = initial_matrix.cpu().numpy() if initial_matrix is not None else create_similarity_matrix().numpy()
        
    def update_confusion(self, predictions, targets):
        """Accumulate confusion from a batch of predictions."""
        for pred, target in zip(predictions, targets):
            if 0 <= target < self.num_classes and 0 <= pred < self.num_classes:
                self.confusion_matrix[target, pred] += 1
    
    def compute_similarity_from_confusion(self):
        """Convert confusion matrix to similarity scores."""
        # Normalize each row by the number of times that class appeared
        row_sums = self.confusion_matrix.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1  # Avoid division by zero
        normalized_confusion = self.confusion_matrix / row_sums
        
        # High confusion rate = high similarity
        # Clip to [0, 1] and exclude diagonal (self-similarity stays 1.0)
        similarity_from_confusion = normalized_confusion.copy()
        np.fill_diagonal(similarity_from_confusion, 1.0)
        
        return similarity_from_confusion
    
    def update_similarity_matrix(self):
        """Update similarity matrix using exponential moving average of confusion patterns."""
        new_similarity = self.compute_similarity_from_confusion()
        
        # Exponential moving average: S_new = (1-lr) * S_old + lr * S_from_confusion
        self.similarity_matrix = (1 - self.learning_rate) * self.similarity_matrix + \
                                  self.learning_rate * new_similarity
        
        # Reset confusion matrix for next validation period
        self.confusion_matrix.fill(0)
        
        return torch.tensor(self.similarity_matrix, dtype=torch.float32)
    
    def get_similarity_matrix(self):
        return torch.tensor(self.similarity_matrix, dtype=torch.float32)

# Initialize dynamic similarity matrix manager
dynamic_sim_matrix = DynamicSimilarityMatrix(
    num_classes=NUM_CLASSES,
    initial_matrix=similarity_matrix,
    learning_rate=0.1
)

print('Dynamic similarity matrix manager initialized.')
print('Will update every validation epoch based on actual confusion patterns.')

## 4. Custom Similarity-Aware Loss Function

Similarity-aware top-k loss directly rewards the model when visually similar characters appear in the top-2 predictions. If the model is uncertain between O and 0, having both in the top-2 with high confidence is acceptable and should be penalized less than confidently predicting X when the answer is O. This matches the requirement of considering "top-K outputs (e.g., top-2) rather than only the single best prediction." [https://openaccess.thecvf.com/content_cvpr_2016/papers/Lapin_Loss_Functions_for_CVPR_2016_paper.pdf](https://openaccess.thecvf.com/content_cvpr_2016/papers/Lapin_Loss_Functions_for_CVPR_2016_paper.pdf)


In [None]:
class SimilarityAwareTopKLoss(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES, similarity_matrix=None,
                 k=2, temperature=1.0, base_weight=0.7, topk_weight=0.3):
        super().__init__()
        self.num_classes = num_classes
        self.k = k
        self.temperature = temperature
        self.base_weight = base_weight
        self.topk_weight = topk_weight
        if similarity_matrix is not None:
            self.register_buffer('similarity_matrix', similarity_matrix)
        else:
            self.register_buffer('similarity_matrix', create_similarity_matrix())

    def forward(self, logits, targets):
        B = logits.size(0)
        device = logits.device

        ce_loss = F.cross_entropy(logits, targets, reduction='none')
        probs = F.softmax(logits / self.temperature, dim=1)
        topk_probs, topk_indices = torch.topk(probs, self.k, dim=1)

        sim_loss = torch.zeros(B, device=device)
        for i in range(B):
            t = targets[i].item()
            sims = self.similarity_matrix[t][topk_indices[i]]
            penalties = 1.0 - sims
            weighted_penalties = topk_probs[i] * penalties
            sim_loss[i] = weighted_penalties.sum()

        total = self.base_weight * ce_loss + self.topk_weight * sim_loss
        return total.mean()

print('Similarity-aware loss defined.')


### 4.1. Loss Function Refinements: Temperature Annealing & Adaptive Weighting

**Logic:** Temperature scheduling helps the model transition from exploration to exploitation. Early in training (high temperature), the model explores various character hypotheses with softer penalties. As training progresses (lower temperature), the model commits to more confident predictions. This is crucial for OCR where early confusion helps learn feature relationships, but later training needs sharp decisions.

Adaptive weighting based on prediction confidence dynamically balances between base cross-entropy and similarity-aware loss. When the model is uncertain (low confidence), we rely more on similarity-aware loss to guide learning with soft constraints. When confident, we trust the model's strong predictions and rely more on standard cross-entropy. This creates a self-regulating loss that adapts to the model's learning stage.

In [None]:
class ImprovedSimilarityAwareTopKLoss(nn.Module):
    """Enhanced loss with temperature annealing and confidence-based adaptive weighting."""
    def __init__(self, num_classes=NUM_CLASSES, similarity_matrix=None,
                 k=2, initial_temperature=1.0, base_weight=0.7, topk_weight=0.3,
                 epochs=300):
        super().__init__()
        self.num_classes = num_classes
        self.k = k
        self.initial_temperature = initial_temperature
        self.base_weight = base_weight
        self.topk_weight = topk_weight
        self.epochs = epochs
        self.current_epoch = 0
        
        if similarity_matrix is not None:
            self.register_buffer('similarity_matrix', similarity_matrix)
        else:
            self.register_buffer('similarity_matrix', create_similarity_matrix())

    def update_epoch(self, epoch):
        """Update current epoch for temperature annealing."""
        self.current_epoch = epoch
    
    def get_temperature(self):
        """Anneal temperature from initial_temperature to 0.5 over training."""
        progress = self.current_epoch / max(self.epochs, 1)
        return max(0.5, self.initial_temperature - progress * 0.8)
    
    def forward(self, logits, targets):
        B = logits.size(0)
        device = logits.device
        
        # Get current temperature for this epoch
        temperature = self.get_temperature()
        
        ce_loss = F.cross_entropy(logits, targets, reduction='none')
        probs = F.softmax(logits / temperature, dim=1)
        topk_probs, topk_indices = torch.topk(probs, self.k, dim=1)
        
        # Compute similarity-aware loss
        sim_loss = torch.zeros(B, device=device)
        max_confidences = []
        
        for i in range(B):
            t = targets[i].item()
            sims = self.similarity_matrix[t][topk_indices[i]]
            penalties = 1.0 - sims
            weighted_penalties = topk_probs[i] * penalties
            sim_loss[i] = weighted_penalties.sum()
            max_confidences.append(topk_probs[i].max().item())
        
        # Adaptive weighting based on confidence
        # Low confidence: rely more on similarity-aware loss (exploratory)
        # High confidence: rely more on standard CE loss (exploitation)
        confidence = torch.tensor(max_confidences, device=device)
        adaptive_base_weight = self.base_weight * confidence + self.topk_weight * (1 - confidence)
        adaptive_topk_weight = self.topk_weight * confidence + self.base_weight * (1 - confidence)
        
        # Normalize weights
        total_weight = adaptive_base_weight + adaptive_topk_weight
        adaptive_base_weight = adaptive_base_weight / total_weight
        adaptive_topk_weight = adaptive_topk_weight / total_weight
        
        total = adaptive_base_weight * ce_loss + adaptive_topk_weight * sim_loss
        return total.mean()

print('Improved similarity-aware loss with temperature annealing and adaptive weighting defined.')

## 5. Sanity Check for Custom Loss

Verify that confusing similar characters (O vs 0) incurs lower penalty than confusing very different characters (O vs X).


In [None]:
loss_fn = SimilarityAwareTopKLoss(num_classes=NUM_CLASSES, similarity_matrix=similarity_matrix, k=2).to(device)

logits_similar = torch.zeros(1, NUM_CLASSES, device=device)
logits_similar[0, CHAR_TO_IDX['0']] = 5.0
target_O = torch.tensor([CHAR_TO_IDX['O']], device=device)
loss_similar = loss_fn(logits_similar, target_O)

logits_diff = torch.zeros(1, NUM_CLASSES, device=device)
logits_diff[0, CHAR_TO_IDX['X']] = 5.0
loss_diff = loss_fn(logits_diff, target_O)

print(f'Loss (O vs 0): {loss_similar.item():.4f}')
print(f'Loss (O vs X): {loss_diff.item():.4f}')
assert loss_similar < loss_diff, 'Expected O/0 confusion < O/X confusion'


## 6. Custom Segmentation Trainer with Similarity-Aware Character Loss

Override YOLO's segmentation trainer to inject the similarity-aware loss into the character classification head. The model still outputs masks (via polygon supervision) and boxes, but the character class logits are trained with the custom loss instead of vanilla cross-entropy. This preserves mask quality while handling character confusion intelligently.


### 6.1. OCR-Specific Validation Metrics

**Logic:** Standard classification metrics (accuracy, precision, recall) don't capture OCR-specific challenges. Character Error Rate (CER) measures individual character mistakes, while Word Error Rate (WER) captures full plate correctness—critical for real applications where partial plate reads are often useless. Top-2/3 accuracy shows if the correct character is among top predictions, indicating "close but not quite" scenarios. Similarity-aware accuracy gives partial credit for confusing similar characters (O vs 0), providing a more nuanced view of model performance that aligns with the similarity-aware loss. These metrics together give a complete picture of OCR quality beyond simple accuracy.

In [None]:
class OCRMetrics:
    """Compute OCR-specific validation metrics."""
    def __init__(self, similarity_matrix=None):
        self.similarity_matrix = similarity_matrix if similarity_matrix is not None else create_similarity_matrix()
        self.reset()
    
    def reset(self):
        """Reset all accumulated metrics."""
        self.total_chars = 0
        self.correct_chars = 0
        self.total_plates = 0
        self.correct_plates = 0
        self.top2_correct = 0
        self.top3_correct = 0
        self.similarity_score = 0.0
    
    def update(self, predictions, targets, top_k_preds=None):
        """
        Update metrics with a batch of predictions.
        
        Args:
            predictions: Tensor of predicted class indices [B]
            targets: Tensor of ground truth class indices [B]
            top_k_preds: Optional tensor of top-k predictions [B, k] for top-k accuracy
        """
        predictions = predictions.cpu().numpy()
        targets = targets.cpu().numpy()
        
        # Character-level metrics
        self.total_chars += len(targets)
        self.correct_chars += (predictions == targets).sum()
        
        # Similarity-aware accuracy (partial credit for similar chars)
        for pred, target in zip(predictions, targets):
            if 0 <= target < len(self.similarity_matrix) and 0 <= pred < len(self.similarity_matrix):
                sim = self.similarity_matrix[target][pred].item()
                self.similarity_score += sim
        
        # Top-k accuracy
        if top_k_preds is not None:
            top_k_preds = top_k_preds.cpu().numpy()
            for i, target in enumerate(targets):
                if top_k_preds.shape[1] >= 2 and target in top_k_preds[i, :2]:
                    self.top2_correct += 1
                if top_k_preds.shape[1] >= 3 and target in top_k_preds[i, :3]:
                    self.top3_correct += 1
    
    def update_plate(self, predicted_plate, target_plate):
        """
        Update plate-level metrics (WER).
        
        Args:
            predicted_plate: String of predicted plate characters
            target_plate: String of ground truth plate characters
        """
        self.total_plates += 1
        if predicted_plate == target_plate:
            self.correct_plates += 1
    
    def compute(self):
        """Compute all metrics and return as dictionary."""
        if self.total_chars == 0:
            return {}
        
        metrics = {
            'CER': 1.0 - (self.correct_chars / self.total_chars),  # Character Error Rate
            'char_accuracy': self.correct_chars / self.total_chars,
            'top2_accuracy': self.top2_correct / self.total_chars if self.total_chars > 0 else 0.0,
            'top3_accuracy': self.top3_correct / self.total_chars if self.total_chars > 0 else 0.0,
            'similarity_aware_accuracy': self.similarity_score / self.total_chars,
        }
        
        if self.total_plates > 0:
            metrics['WER'] = 1.0 - (self.correct_plates / self.total_plates)  # Word Error Rate
            metrics['plate_accuracy'] = self.correct_plates / self.total_plates
        
        return metrics

# Initialize OCR metrics tracker
ocr_metrics = OCRMetrics(similarity_matrix=similarity_matrix)

# Test metrics with dummy data
test_preds = torch.tensor([CHAR_TO_IDX['O'], CHAR_TO_IDX['1'], CHAR_TO_IDX['A']])
test_targets = torch.tensor([CHAR_TO_IDX['0'], CHAR_TO_IDX['I'], CHAR_TO_IDX['A']])
test_topk = torch.tensor([
    [CHAR_TO_IDX['O'], CHAR_TO_IDX['0']],
    [CHAR_TO_IDX['1'], CHAR_TO_IDX['I']],
    [CHAR_TO_IDX['A'], CHAR_TO_IDX['B']],
])

ocr_metrics.update(test_preds, test_targets, test_topk)
test_metrics = ocr_metrics.compute()

print('OCR Metrics Test Results:')
for key, value in test_metrics.items():
    print(f'  {key}: {value:.4f}')

print('\nOCR metrics module ready for validation.')

### 6.2. Multi-Task Loss Weights

**Logic:** The model performs three distinct tasks: segmentation (mask generation), localization (bounding boxes), and classification (character recognition). Default YOLO weighting may not be optimal for OCR, where classification accuracy is paramount. By explicitly balancing these losses (mask_weight=0.4, box_weight=0.3, cls_weight=0.3), we ensure the model doesn't over-prioritize segmentation quality at the expense of character recognition. These weights are tunable based on application needs—surveillance may prioritize localization, while data entry prioritizes classification.

In [None]:
# Multi-task loss weights configuration
MASK_WEIGHT = 0.4  # Segmentation mask loss weight
BOX_WEIGHT = 0.3   # Bounding box loss weight  
CLS_WEIGHT = 0.3   # Character classification loss weight

print(f'Multi-task loss weights configured:')
print(f'  Mask (segmentation): {MASK_WEIGHT:.1f}')
print(f'  Box (localization): {BOX_WEIGHT:.1f}')
print(f'  Class (recognition): {CLS_WEIGHT:.1f}')
print(f'  Total: {MASK_WEIGHT + BOX_WEIGHT + CLS_WEIGHT:.1f}')

print('\nThese weights will be applied in the custom trainer to balance multi-task learning.')

## 6. Custom Segmentation Trainer with Enhanced Features

Integrates all improvements: dynamic similarity matrix updates, temperature annealing, adaptive weighting, OCR metrics, and multi-task loss balancing.

In [None]:
class CustomSegmentationTrainer(SegmentationTrainer):
    """
    Custom trainer with:
    - Dynamic similarity matrix updates
    - Temperature annealing
    - Adaptive loss weighting
    - OCR-specific metrics
    - Multi-task loss balancing
    """
    def __init__(self, cfg=None, overrides=None, _callbacks=None):
        super().__init__(cfg, overrides, _callbacks)
        
        # Initialize improved loss function
        self.character_loss_fn = ImprovedSimilarityAwareTopKLoss(
            num_classes=NUM_CLASSES,
            similarity_matrix=dynamic_sim_matrix.get_similarity_matrix(),
            k=2,
            initial_temperature=1.0,
            base_weight=0.7,
            topk_weight=0.3,
            epochs=EPOCHS
        ).to(device)
        
        # Initialize OCR metrics tracker
        self.ocr_metrics = OCRMetrics(similarity_matrix=similarity_matrix)
        
        # Multi-task loss weights
        self.mask_weight = MASK_WEIGHT
        self.box_weight = BOX_WEIGHT
        self.cls_weight = CLS_WEIGHT
        
    def on_train_epoch_start(self):
        """Called at the start of each training epoch."""
        super().on_train_epoch_start()
        
        # Update temperature in loss function
        self.character_loss_fn.update_epoch(self.epoch)
    
    def on_val_start(self):
        """Called at the start of validation."""
        super().on_val_start()
        self.ocr_metrics.reset()
    
    def on_val_end(self):
        """Called at the end of validation - update similarity matrix and log metrics."""
        super().on_val_end()
        
        # Update dynamic similarity matrix every 10 epochs
        if self.epoch % 10 == 0 and self.epoch > 0:
            new_similarity = dynamic_sim_matrix.update_similarity_matrix()
            self.character_loss_fn.similarity_matrix = new_similarity.to(device)
            print(f'[Epoch {self.epoch}] Similarity matrix updated from validation confusion patterns.')
        
        # Compute and log OCR metrics
        ocr_results = self.ocr_metrics.compute()
        if ocr_results:
            print(f'\n[Epoch {self.epoch}] OCR Metrics:')
            for key, value in ocr_results.items():
                print(f'  {key}: {value:.4f}')
    
    def compute_loss(self, preds, batch):
        """Compute multi-task loss with balanced weights."""
        # Get base YOLO losses (box, mask, class)
        base_loss = super().compute_loss(preds, batch)
        
        # Apply multi-task weights to base loss components
        # Note: This is a simplified approach. In practice, you'd decompose base_loss
        # into its components and weight them individually
        weighted_base_loss = base_loss * (self.mask_weight + self.box_weight) / 2
        
        # Add custom similarity-aware character classification loss
        if len(preds) > 3:
            cls_logits = preds[3]
            cls_targets = batch['cls'].long()
            
            if cls_logits is not None and cls_targets is not None:
                cls_logits_flat = cls_logits.view(-1, NUM_CLASSES)
                cls_targets_flat = cls_targets.view(-1)
                
                valid_mask = cls_targets_flat >= 0
                if valid_mask.sum() > 0:
                    # Compute similarity-aware classification loss
                    char_loss = self.character_loss_fn(
                        cls_logits_flat[valid_mask],
                        cls_targets_flat[valid_mask]
                    )
                    
                    # Apply classification weight
                    weighted_char_loss = self.cls_weight * char_loss
                    
                    # Update confusion matrix for dynamic similarity updates
                    with torch.no_grad():
                        preds_cls = cls_logits_flat[valid_mask].argmax(dim=1)
                        dynamic_sim_matrix.update_confusion(
                            preds_cls.cpu().numpy(),
                            cls_targets_flat[valid_mask].cpu().numpy()
                        )
                        
                        # Update OCR metrics
                        top_k_preds = torch.topk(cls_logits_flat[valid_mask], k=3, dim=1)[1]
                        self.ocr_metrics.update(
                            preds_cls,
                            cls_targets_flat[valid_mask],
                            top_k_preds
                        )
                    
                    # Combine losses
                    total_loss = weighted_base_loss + weighted_char_loss
                    return total_loss
        
        return weighted_base_loss

print('Custom segmentation trainer with all improvements defined.')

## 7. Training Configuration (Hyperparameters & Augmentations)

Configure training hyperparameters tuned for character-level OCR on CCTV footage.


In [None]:
EPOCHS = 300
BATCH_SIZE = 16
IMG_SIZE = 224

LR0 = 0.01
LRF = 0.01
MOMENTUM = 0.937
WEIGHT_DECAY = 5e-4
WARMUP_EPOCHS = 3.0
WARMUP_MOMENTUM = 0.8
WARMUP_BIAS_LR = 0.1

AUG_HSV_H = 0.015
AUG_HSV_S = 0.7
AUG_HSV_V = 0.4
AUG_ERASING = 0.4
AUG_FLIPLR = 0.0
AUG_MOSAIC = 0.0
AUG_MIXUP = 0.0
AUG_COPY_PASTE = 0.0

print('Hyperparameters configured.')


### 7.1. Hyperparameter and Augmentation Rationale

These settings aim to balance robustness, stability, and efficiency for text-level OCR on pre‑augmented character crops. SGD with momentum and weight decay, combined with cosine‑annealed learning rate and brief warmup (LR0 = 0.01, LRF = 0.01, MOMENTUM = 0.937, WEIGHT_DECAY = 5e-4, WARMUP_EPOCHS = 3), follows recommended YOLO training practice and is known to improve convergence and final accuracy over simple step schedules in vision models. [https://docs.ultralytics.com/guides/hyperparameter-tuning/](https://docs.ultralytics.com/guides/hyperparameter-tuning/)

Moderate HSV jitter and random erasing (AUG_HSV_*, AUG_ERASING = 0.4) extend lighting and occlusion variability to better match CCTV conditions while preserving character structure. [https://arxiv.org/abs/1902.07296](https://arxiv.org/abs/1902.07296)

Horizontal flips and detection-style augmentations (Mosaic, MixUp, Copy-Paste) are disabled because mirrored or composited text does not occur in the target domain and can degrade OCR performance. [https://home.nr.no/~eikvil/OCR.pdf](https://home.nr.no/~eikvil/OCR.pdf)


## 8. Initialize Model and Attach Custom Trainer

Load YOLO11-seg as the backbone and plug in the custom trainer with similarity-aware character loss.


In [None]:
model = YOLO('yolo11n-seg.pt')
model.trainer = CustomSegmentationTrainer

print('Segmentation model initialized with custom trainer.')
print('Includes: temperature annealing, adaptive weighting, dynamic similarity updates, OCR metrics, and multi-task loss balancing.')

## 9. Optional: Early Stopping Callback

Halt training if validation loss stalls for a prolonged period to prevent overfitting and wasted compute.


In [None]:
best_val_loss = float('inf')
no_improve_epochs = 0
EARLY_STOP_PATIENCE = 50

def early_stopping_callback(trainer):
    global best_val_loss, no_improve_epochs
    metrics = trainer.metrics or {}
    val_loss = metrics.get('loss', None)
    if val_loss is None:
        return

    if best_val_loss == float('inf'):
        best_val_loss = val_loss
        no_improve_epochs = 0
        return

    improvement = (best_val_loss - val_loss) / max(best_val_loss, 1e-8) * 100.0
    if improvement >= 1.0:
        best_val_loss = val_loss
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1

    if no_improve_epochs >= EARLY_STOP_PATIENCE:
        print(f'Early stopping at epoch {trainer.epoch}')
        trainer.stop = True

model.add_callback('on_val_end', early_stopping_callback)
print('Early stopping callback registered.')


## 10. Train Segmentation Model with Similarity-Aware Character Loss

Train YOLO11-seg on polygon annotations with the custom trainer. The model learns to segment character regions (mask) while classifying each character (O vs 0 etc.) with reduced penalties for visually similar confusions. Make sure `DATA_YAML_PATH` points to your dataset.


In [None]:
results = model.train(
    data=DATA_YAML_PATH,
    epochs=EPOCHS,
    batch=BATCH_SIZE,
    imgsz=IMG_SIZE,
    optimizer='SGD',
    lr0=LR0,
    lrf=LRF,
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY,
    warmup_epochs=WARMUP_EPOCHS,
    warmup_momentum=WARMUP_MOMENTUM,
    warmup_bias_lr=WARMUP_BIAS_LR,
    hsv_h=AUG_HSV_H,
    hsv_s=AUG_HSV_S,
    hsv_v=AUG_HSV_V,
    erasing=AUG_ERASING,
    fliplr=AUG_FLIPLR,
    mosaic=AUG_MOSAIC,
    mixup=AUG_MIXUP,
    copy_paste=AUG_COPY_PASTE,
    project=RUN_PROJECT,
    name=RUN_NAME,
    exist_ok=True,
    val=True,
    save=True,
    save_period=10,
    amp=True,
    device=0 if device == 'cuda' else 'cpu',
    seed=42,
    deterministic=True,
)

print('Training completed.')
print('Results directory:', results.save_dir)


## 11. Export Best Model

Copy the best weights to the export directory for inference and deployment.


In [None]:
import os, shutil

best_weights_path = os.path.join(str(results.save_dir), 'weights', 'best.pt')
export_path = os.path.join(EXPORT_DIR, f'{RUN_NAME}_best.pt')

if os.path.exists(best_weights_path):
    shutil.copy2(best_weights_path, export_path)
    print('Best weights exported to:', export_path)
else:
    print('best.pt not found.')


## 12. Inference on Test Images

Load the trained model and run inference to validate segmentation and character classification.


In [None]:
inference_model = YOLO(export_path if os.path.exists(export_path) else best_weights_path)

TEST_IMAGE_PATHS = [
    # '/content/test_plate_1.jpg',
    # '/content/test_plate_2.jpg',
]

for img_path in TEST_IMAGE_PATHS:
    if not os.path.exists(img_path):
        print(f'Test image not found: {img_path}')
        continue
    
    results_inf = inference_model(img_path, imgsz=IMG_SIZE)
    print(f'\nInference on {img_path}')
    for r in results_inf:
        if r.masks is not None:
            print(f'Detected {len(r.masks)} character instances')
        if r.boxes is not None:
            print(f'Boxes: {r.boxes.cls}')
