In [None]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import cv2
import kagglehub
from tqdm import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from torch.nn import functional as F

In [None]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Download dataset
print("Downloading dataset...")
dataset_path = kagglehub.dataset_download('fareselmenshawii/large-license-plate-dataset')
print(f"Dataset downloaded to: {dataset_path}")

# Set paths
train_images_path = os.path.join(dataset_path, 'images/train')
train_labels_path = os.path.join(dataset_path, 'labels/train')
val_images_path = os.path.join(dataset_path, 'images/val')
val_labels_path = os.path.join(dataset_path, 'labels/val')
test_images_path = os.path.join(dataset_path, 'images/test')
test_labels_path = os.path.join(dataset_path, 'labels/test')


In [None]:
# Enhanced data augmentation and transformation
transform_train = transforms.Compose([
    transforms.Resize((320, 320)),  # Larger input size
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.RandomAffine(degrees=5, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize((320, 320)),  # Larger input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class LicensePlateDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None, is_training=False):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.is_training = is_training

        # Get all image files with .jpg extension
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
        print(f"Found {len(self.image_files)} images in {image_dir}")

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)

        # Load image
        image = Image.open(img_path).convert('RGB')

        # Get original dimensions for denormalization later
        orig_width, orig_height = image.size

        # Load label if it exists
        label_name = os.path.splitext(img_name)[0] + '.txt'
        label_path = os.path.join(self.label_dir, label_name)

        boxes = []
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()

                for line in lines:
                    parts = line.strip().split()
                    if len(parts) >= 5:  # Check if label format is valid
                        # Extract YOLO format (class_id, x_center, y_center, width, height)
                        # Ignore class_id
                        x_center = float(parts[1])
                        y_center = float(parts[2])
                        width = float(parts[3])
                        height = float(parts[4])

                        # Convert to [x_min, y_min, x_max, y_max]
                        x_min = x_center - width / 2
                        y_min = y_center - height / 2
                        x_max = x_center + width / 2
                        y_max = y_center + height / 2

                        # Clip values to ensure they are within [0, 1]
                        x_min = max(0, min(1, x_min))
                        y_min = max(0, min(1, y_min))
                        x_max = max(0, min(1, x_max))
                        y_max = max(0, min(1, y_max))

                        boxes.append([x_min, y_min, x_max, y_max])

        # If no boxes, create a dummy box (this shouldn't happen with this dataset)
        if len(boxes) == 0:
            boxes.append([0, 0, 0, 0])

        # For simplicity and to ensure consistent batch sizes, use only the first box
        if len(boxes) > 1:
            boxes = [boxes[0]]

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        # Convert to tensor
        boxes = torch.tensor(boxes, dtype=torch.float32)

        # Metadata for visualization and denormalization
        metadata = {
            'img_name': img_name,
            'orig_width': orig_width,
            'orig_height': orig_height
        }

        return image, boxes, metadata

# Define Squeeze-and-Excitation Block
class SEBlock(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

# Define ResNeXt-style Bottleneck Block with SE attention
class SEResNeXtBlock(nn.Module):
    expansion = 2  # Expansion factor for channels

    def __init__(self, inplanes, planes, cardinality=32, stride=1, reduction=16):
        super(SEResNeXtBlock, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        # Grouped convolution for ResNeXt style
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)

        # SE attention module
        self.se = SEBlock(planes * self.expansion, reduction)

        self.relu = nn.ReLU(inplace=True)

        # Skip connection with projection if needed
        self.downsample = None
        if stride != 1 or inplanes != planes * self.expansion:
            self.downsample = nn.Sequential(
                nn.Conv2d(inplanes, planes * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * self.expansion)
            )

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        # Apply SE attention
        out = self.se(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

# Feature Pyramid Network module for multi-scale feature extraction
class FPN(nn.Module):
    def __init__(self, in_channels_list, out_channels):
        super(FPN, self).__init__()

        # Lateral connections (1x1 convolutions to reduce channel dimensions)
        self.lateral_convs = nn.ModuleList([
            nn.Conv2d(in_channels, out_channels, kernel_size=1)
            for in_channels in in_channels_list
        ])

        # FPN connections (3x3 convolutions after upsampling)
        self.fpn_convs = nn.ModuleList([
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
            for _ in range(len(in_channels_list))
        ])

    def forward(self, features):
        # Last feature is the deepest with smallest resolution
        last_inner = self.lateral_convs[-1](features[-1])
        results = [self.fpn_convs[-1](last_inner)]

        # Iterate from the second last layer
        for idx in range(len(features) - 2, -1, -1):
            # 1x1 conv on the current feature
            lateral = self.lateral_convs[idx](features[idx])

            # Upsample the deeper feature and add it to the current one
            feat_size = lateral.size()[-2:]
            inner_top_down = F.interpolate(results[0], size=feat_size, mode='nearest')
            inner = lateral + inner_top_down

            # Apply 3x3 conv and add to results
            results.insert(0, self.fpn_convs[idx](inner))

        return results

# Improved License Plate Detector with FPN and SE-ResNeXt blocks
class ImprovedLicensePlateDetector(nn.Module):
    def __init__(self, cardinality=32):
        super(ImprovedLicensePlateDetector, self).__init__()

        # Initial convolution layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # SE-ResNeXt backbone layers
        self.layer1 = self._make_layer(64, 64, blocks=3, cardinality=cardinality, stride=1)
        self.layer2 = self._make_layer(128, 128, blocks=4, cardinality=cardinality, stride=2)
        self.layer3 = self._make_layer(256, 256, blocks=6, cardinality=cardinality, stride=2)
        self.layer4 = self._make_layer(512, 512, blocks=3, cardinality=cardinality, stride=2)

        # Feature Pyramid Network
        in_channels_list = [128, 256, 512, 1024]  # After each layer (considering expansion)
        self.fpn = FPN(in_channels_list, out_channels=256)

        # Bounding box regression head
        self.bbox_head = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        # Final linear layer
        self.fc = nn.Linear(256, 4)

        # Weight initialization
        self._initialize_weights()

    def _make_layer(self, inplanes, planes, blocks, cardinality, stride=1):
        layers = []
        # First block may have stride > 1
        layers.append(SEResNeXtBlock(inplanes, planes, cardinality, stride))

        # Rest of the blocks have stride = 1
        inplanes = planes * SEResNeXtBlock.expansion
        for _ in range(1, blocks):
            layers.append(SEResNeXtBlock(inplanes, planes, cardinality))

        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:  # Check if bias exists before initializing
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:  # Check if bias exists before initializing
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        # Initial layers
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # Backbone feature extraction
        c1 = self.layer1(x)
        c2 = self.layer2(c1)
        c3 = self.layer3(c2)
        c4 = self.layer4(c3)

        # FPN feature fusion
        fpn_features = self.fpn([c1, c2, c3, c4])

        # Use the most semantic feature map (P5) for bounding box regression
        p5 = fpn_features[-1]
        out = self.bbox_head(p5)
        out = torch.flatten(out, 1)
        out = self.fc(out)

        # Apply sigmoid to constrain outputs to [0, 1] range
        out = torch.sigmoid(out)

        return out

# Enhanced IoU Loss for better optimization
class IoULoss(nn.Module):
    def __init__(self):
        super(IoULoss, self).__init__()

    def forward(self, pred, target):
        # Extract coordinates
        pred_x1, pred_y1, pred_x2, pred_y2 = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
        target_x1, target_y1, target_x2, target_y2 = target[:, 0], target[:, 1], target[:, 2], target[:, 3]

        # Calculate areas
        pred_area = (pred_x2 - pred_x1) * (pred_y2 - pred_y1)
        target_area = (target_x2 - target_x1) * (target_y2 - target_y1)

        # Calculate intersection coordinates
        inter_x1 = torch.max(pred_x1, target_x1)
        inter_y1 = torch.max(pred_y1, target_y1)
        inter_x2 = torch.min(pred_x2, target_x2)
        inter_y2 = torch.min(pred_y2, target_y2)

        # Calculate intersection area
        inter_w = torch.clamp(inter_x2 - inter_x1, min=0)
        inter_h = torch.clamp(inter_y2 - inter_y1, min=0)
        intersection = inter_w * inter_h

        # Calculate union area
        union = pred_area + target_area - intersection

        # Calculate IoU
        iou = intersection / (union + 1e-6)

        # Return loss as (1 - IoU)
        return 1 - iou.mean()

# Combined loss for better optimization
class CombinedLoss(nn.Module):
    def __init__(self, alpha=0.5):
        super(CombinedLoss, self).__init__()
        self.alpha = alpha
        self.iou_loss = IoULoss()
        self.mse_loss = nn.MSELoss()

    def forward(self, pred, target):
        # IoU loss component
        iou_loss = self.iou_loss(pred, target)

        # MSE loss component
        mse_loss = self.mse_loss(pred, target)

        # Combined loss
        return self.alpha * iou_loss + (1 - self.alpha) * mse_loss

# Calculate IoU for evaluation
def calculate_iou(boxes1, boxes2):
    """
    Calculate IoU between boxes1 and boxes2

    Args:
        boxes1: tensor of shape [batch_size, 4] - [x_min, y_min, x_max, y_max]
        boxes2: tensor of shape [batch_size, 4] - [x_min, y_min, x_max, y_max]

    Returns:
        IoU: tensor of shape [batch_size]
    """
    # Extract coordinates
    x1_min, y1_min, x1_max, y1_max = boxes1[:, 0], boxes1[:, 1], boxes1[:, 2], boxes1[:, 3]
    x2_min, y2_min, x2_max, y2_max = boxes2[:, 0], boxes2[:, 1], boxes2[:, 2], boxes2[:, 3]

    # Calculate areas
    area1 = (x1_max - x1_min) * (y1_max - y1_min)
    area2 = (x2_max - x2_min) * (y2_max - y2_min)

    # Calculate intersection coordinates
    inter_x_min = torch.max(x1_min, x2_min)
    inter_y_min = torch.max(y1_min, y2_min)
    inter_x_max = torch.min(x1_max, x2_max)
    inter_y_max = torch.min(y1_max, y2_max)

    # Calculate intersection area
    inter_width = torch.clamp(inter_x_max - inter_x_min, min=0)
    inter_height = torch.clamp(inter_y_max - inter_y_min, min=0)
    inter_area = inter_width * inter_height

    # Calculate union area
    union_area = area1 + area2 - inter_area

    # Calculate IoU
    iou = inter_area / (union_area + 1e-6)  # Add small epsilon to avoid division by zero

    return iou

# Training function with additional validation checks
def train(model, train_loader, optimizer, criterion, epoch, scheduler=None):
    model.train()
    running_loss = 0.0
    running_iou = 0.0

    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1} Train")

    for batch_idx, (images, targets, _) in progress_bar:
        # We only have one box per image after our Dataset modification
        # Reshape targets to match model output format [batch_size, 4]
        boxes = targets.squeeze(1)  # This removes the second dimension which is now always 1

        images = images.to(device)
        boxes = boxes.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, boxes)

        # Check for NaN losses and skip if found
        if torch.isnan(loss).any():
            print(f"Warning: NaN loss detected in batch {batch_idx}. Skipping batch.")
            continue

        # Backward pass and optimize
        loss.backward()

        # Gradient clipping to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        # Update metrics
        running_loss += loss.item()

        # Calculate IoU for this batch
        with torch.no_grad():
            batch_iou = calculate_iou(outputs, boxes).mean().item()
            running_iou += batch_iou

        # Update progress bar
        progress_bar.set_postfix({
            'loss': running_loss / (batch_idx + 1),
            'iou': running_iou / (batch_idx + 1)
        })

    # Calculate epoch metrics
    epoch_loss = running_loss / len(train_loader)
    epoch_iou = running_iou / len(train_loader)

    # Step the scheduler if provided
    if scheduler is not None:
        if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(epoch_loss)
        else:
            scheduler.step()

    return epoch_loss, epoch_iou

# Validation function
def validate(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    running_iou = 0.0

    with torch.no_grad():
        progress_bar = tqdm(enumerate(val_loader), total=len(val_loader), desc="Validation")

        for batch_idx, (images, targets, _) in progress_bar:
            # We only have one box per image after our Dataset modification
            # Reshape targets to match model output format [batch_size, 4]
            boxes = targets.squeeze(1)  # This removes the second dimension which is now always 1

            images = images.to(device)
            boxes = boxes.to(device)

            # Forward pass
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, boxes)

            # Update metrics
            running_loss += loss.item()

            # Calculate IoU for this batch
            batch_iou = calculate_iou(outputs, boxes).mean().item()
            running_iou += batch_iou

            # Update progress bar
            progress_bar.set_postfix({
                'loss': running_loss / (batch_idx + 1),
                'iou': running_iou / (batch_idx + 1)
            })

    # Calculate validation metrics
    val_loss = running_loss / len(val_loader)
    val_iou = running_iou / len(val_loader)

    return val_loss, val_iou

# Inference function to visualize predictions
def inference(model, test_loader, num_samples=5):
    model.eval()
    results = []

    with torch.no_grad():
        progress_bar = tqdm(enumerate(test_loader), total=len(test_loader), desc="Inference")

        for batch_idx, (images, targets, metadata) in progress_bar:
            # Process only the specified number of samples
            if batch_idx >= num_samples and num_samples > 0:
                break

            # For simplicity, process one image at a time
            for i in range(images.size(0)):
                if len(results) >= num_samples and num_samples > 0:
                    break

                image = images[i].unsqueeze(0).to(device)

                # Get ground truth box for this image - with our dataset modification we have one box
                gt_box = targets[i].squeeze().cpu().numpy()

                # Get metadata for this image
                img_name = metadata['img_name'][i]
                orig_width = metadata['orig_width'][i]
                orig_height = metadata['orig_height'][i]

                # Get prediction
                pred_box = model(image).squeeze().cpu().numpy()

                # Calculate IoU
                iou = calculate_iou(
                    torch.tensor(pred_box).unsqueeze(0),
                    torch.tensor(gt_box).unsqueeze(0)
                ).item()

                # Store result
                results.append({
                    'img_name': img_name,
                    'orig_width': orig_width,
                    'orig_height': orig_height,
                    'gt_box': gt_box,
                    'pred_box': pred_box,
                    'iou': iou
                })

    # Print results
    print("\nInference Results:")
    for i, result in enumerate(results):
        print(f"\nImage {i+1}: {result['img_name']}")
        print(f"Original dimensions: {result['orig_width']}x{result['orig_height']}")
        print(f"Ground truth box (normalized): {result['gt_box']}")
        print(f"Predicted box (normalized): {result['pred_box']}")
        print(f"IoU: {result['iou']:.4f}")

        # Convert normalized boxes to pixel coordinates for visualization
        gt_box_pixels = [
            result['gt_box'][0] * 320,
            result['gt_box'][1] * 320,
            result['gt_box'][2] * 320,
            result['gt_box'][3] * 320
        ]

        pred_box_pixels = [
            result['pred_box'][0] * 320,
            result['pred_box'][1] * 320,
            result['pred_box'][2] * 320,
            result['pred_box'][3] * 320
        ]

        print(f"Ground truth box (pixels): {gt_box_pixels}")
        print(f"Predicted box (pixels): {pred_box_pixels}")

    return results

# Visualize predictions
def visualize_predictions(results, test_loader):
    try:
        import matplotlib.pyplot as plt
        for i, result in enumerate(results):
            # Find the image in the dataset
            for j, (images, targets, metadata) in enumerate(test_loader):
                for k in range(images.size(0)):
                    if metadata['img_name'][k] == result['img_name']:
                        # Get the image and denormalize it
                        image = images[k].permute(1, 2, 0).cpu().numpy()
                        image = image * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
                        image = np.clip(image, 0, 1)

                        # Create a figure
                        plt.figure(figsize=(10, 10))
                        plt.imshow(image)

                        # Draw ground truth box
                        gt_box = result['gt_box']
                        x_min, y_min, x_max, y_max = gt_box[0], gt_box[1], gt_box[2], gt_box[3]
                        width = x_max - x_min
                        height = y_max - y_min
                        plt.gca().add_patch(plt.Rectangle((x_min * 320, y_min * 320),
                                                        width * 320, height * 320,
                                                        fill=False, edgecolor='g', linewidth=2))

                        # Draw predicted box
                        pred_box = result['pred_box']
                        x_min, y_min, x_max, y_max = pred_box[0], pred_box[1], pred_box[2], pred_box[3]
                        width = x_max - x_min
                        height = y_max - y_min
                        plt.gca().add_patch(plt.Rectangle((x_min * 320, y_min * 320),
                                                        width * 320, height * 320,
                                                        fill=False, edgecolor='r', linewidth=2))

                        plt.title(f"Image: {result['img_name']}, IoU: {result['iou']:.4f}")
                        plt.legend(['Ground Truth', 'Prediction'])
                        plt.show()
                        break
                else:
                    continue
                break
    except Exception as e:
        print(f"Error in visualization: {e}")
        print("Skipping visualization. This doesn't affect model training/evaluation.")

# Main function
def main():
    # Create datasets with different transforms for training and validation
    print("Loading datasets...")
    train_dataset = LicensePlateDataset(train_images_path, train_labels_path, transform=transform_train, is_training=True)
    val_dataset = LicensePlateDataset(val_images_path, val_labels_path, transform=transform_val)
    test_dataset = LicensePlateDataset(test_images_path, test_labels_path, transform=transform_val)

    # Create data loaders
    batch_size = 16  # Smaller batch size for more iterations and stable convergence
    # Reduce num_workers if running into memory issues
    num_workers = 2 if torch.cuda.is_available() else 0
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    # Initialize model
    print("Initializing improved model...")
    model = ImprovedLicensePlateDetector(cardinality=16).to(device)

    # Define loss function
    criterion = CombinedLoss(alpha=0.7)  # Higher weight to IoU loss

    # Define optimizer with weight decay
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

    # Learning rate scheduler with warmup
    num_epochs = 20
    warmup_epochs = 2

    def lr_lambda(epoch):
        if epoch < warmup_epochs:
            # Linear warmup
            return (epoch + 1) / warmup_epochs
        else:
            # Cosine annealing
            return 0.5 * (1 + np.cos(np.pi * (epoch - warmup_epochs) / (num_epochs - warmup_epochs)))

    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)

    # Initialize best model state and metrics
    best_model_state = None
    best_val_iou = 0.0

    # Lists to store metrics for plotting
    train_losses = []
    train_ious = []
    val_losses = []
    val_ious = []

    # Training loop
    print(f"Starting training for {num_epochs} epochs...")
    for epoch in range(num_epochs):
        # Train for one epoch
        epoch_loss, epoch_iou = train(model, train_loader, optimizer, criterion, epoch, scheduler)
        train_losses.append(epoch_loss)
        train_ious.append(epoch_iou)

        # Validate model
        val_loss, val_iou = validate(model, val_loader, criterion)
        val_losses.append(val_loss)
        val_ious.append(val_iou)

        # Print epoch results
        print(f"Epoch {epoch+1}/{num_epochs} - "
              f"Train Loss: {epoch_loss:.4f}, Train IoU: {epoch_iou:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val IoU: {val_iou:.4f}")

        # Save best model
        if val_iou > best_val_iou:
            best_val_iou = val_iou
            best_model_state = model.state_dict().copy()
            print(f"  ↳ New best model saved with validation IoU: {best_val_iou:.4f}")

            # Save checkpoint
            checkpoint = {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_iou': val_iou,
                'val_loss': val_loss,
            }
            torch.save(checkpoint, 'best_license_plate_detector.pth')

    # Plot training and validation metrics
    plt.figure(figsize=(12, 5))

    # Plot losses
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    # Plot IoUs
    plt.subplot(1, 2, 2)
    plt.plot(train_ious, label='Train IoU')
    plt.plot(val_ious, label='Validation IoU')
    plt.xlabel('Epoch')
    plt.ylabel('IoU')
    plt.title('Training and Validation IoU')
    plt.legend()

    plt.tight_layout()
    plt.savefig('training_metrics.png')
    plt.show()

    # Load best model for final evaluation
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    # Final validation
    final_val_loss, final_val_iou = validate(model, val_loader, criterion)
    print(f"\nFinal Validation - Loss: {final_val_loss:.4f}, IoU: {final_val_iou:.4f}")

    # Run inference on test set
    print("\nRunning inference on test set...")
    results = inference(model, test_loader, num_samples=5)

    # Visualize predictions
    print("\nVisualizing predictions...")
    visualize_predictions(results, test_loader)

    print("\nTraining complete!")

    return model, results

if __name__ == "__main__":
    main()

Using device: cuda
Downloading dataset...
Dataset downloaded to: /kaggle/input/large-license-plate-dataset
Loading datasets...
Found 25470 images in /kaggle/input/large-license-plate-dataset/images/train
Found 1073 images in /kaggle/input/large-license-plate-dataset/images/val
Found 386 images in /kaggle/input/large-license-plate-dataset/images/test
Initializing improved model...
Starting training for 20 epochs...


Epoch 1 Train: 100%|██████████| 1592/1592 [09:51<00:00,  2.69it/s, loss=0.61, iou=0.153]
Validation: 100%|██████████| 68/68 [00:19<00:00,  3.43it/s, loss=0.681, iou=0.0538]


Epoch 1/20 - Train Loss: 0.6103, Train IoU: 0.1531, Val Loss: 0.6811, Val IoU: 0.0538
  ↳ New best model saved with validation IoU: 0.0538


Epoch 2 Train: 100%|██████████| 1592/1592 [09:23<00:00,  2.82it/s, loss=0.516, iou=0.279]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.92it/s, loss=0.673, iou=0.0669]


Epoch 2/20 - Train Loss: 0.5157, Train IoU: 0.2785, Val Loss: 0.6735, Val IoU: 0.0669
  ↳ New best model saved with validation IoU: 0.0669


Epoch 3 Train: 100%|██████████| 1592/1592 [09:21<00:00,  2.83it/s, loss=0.453, iou=0.367]
Validation: 100%|██████████| 68/68 [00:18<00:00,  3.65it/s, loss=0.648, iou=0.102]


Epoch 3/20 - Train Loss: 0.4529, Train IoU: 0.3667, Val Loss: 0.6480, Val IoU: 0.1017
  ↳ New best model saved with validation IoU: 0.1017


Epoch 4 Train: 100%|██████████| 1592/1592 [09:21<00:00,  2.84it/s, loss=0.421, iou=0.411]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.84it/s, loss=0.627, iou=0.128]


Epoch 4/20 - Train Loss: 0.4215, Train IoU: 0.4110, Val Loss: 0.6268, Val IoU: 0.1282
  ↳ New best model saved with validation IoU: 0.1282


Epoch 5 Train: 100%|██████████| 1592/1592 [09:22<00:00,  2.83it/s, loss=0.398, iou=0.444]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.87it/s, loss=0.623, iou=0.151]


Epoch 5/20 - Train Loss: 0.3983, Train IoU: 0.4435, Val Loss: 0.6227, Val IoU: 0.1511
  ↳ New best model saved with validation IoU: 0.1511


Epoch 6 Train: 100%|██████████| 1592/1592 [09:16<00:00,  2.86it/s, loss=0.386, iou=0.462]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.89it/s, loss=0.629, iou=0.145]

Epoch 6/20 - Train Loss: 0.3856, Train IoU: 0.4616, Val Loss: 0.6288, Val IoU: 0.1449



Epoch 7 Train: 100%|██████████| 1592/1592 [09:19<00:00,  2.85it/s, loss=0.368, iou=0.486]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.94it/s, loss=0.588, iou=0.191]


Epoch 7/20 - Train Loss: 0.3684, Train IoU: 0.4857, Val Loss: 0.5879, Val IoU: 0.1906
  ↳ New best model saved with validation IoU: 0.1906


Epoch 8 Train: 100%|██████████| 1592/1592 [09:23<00:00,  2.82it/s, loss=0.357, iou=0.502]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.94it/s, loss=0.614, iou=0.156]

Epoch 8/20 - Train Loss: 0.3569, Train IoU: 0.5019, Val Loss: 0.6136, Val IoU: 0.1562



Epoch 9 Train: 100%|██████████| 1592/1592 [09:15<00:00,  2.87it/s, loss=0.355, iou=0.504]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.88it/s, loss=0.568, iou=0.21]


Epoch 9/20 - Train Loss: 0.3554, Train IoU: 0.5041, Val Loss: 0.5680, Val IoU: 0.2098
  ↳ New best model saved with validation IoU: 0.2098


Epoch 10 Train: 100%|██████████| 1592/1592 [09:17<00:00,  2.86it/s, loss=0.348, iou=0.513]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.89it/s, loss=0.558, iou=0.221]


Epoch 10/20 - Train Loss: 0.3483, Train IoU: 0.5133, Val Loss: 0.5580, Val IoU: 0.2210
  ↳ New best model saved with validation IoU: 0.2210


Epoch 11 Train: 100%|██████████| 1592/1592 [09:16<00:00,  2.86it/s, loss=0.34, iou=0.525]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.81it/s, loss=0.561, iou=0.218]

Epoch 11/20 - Train Loss: 0.3400, Train IoU: 0.5250, Val Loss: 0.5609, Val IoU: 0.2178



Epoch 12 Train: 100%|██████████| 1592/1592 [09:19<00:00,  2.84it/s, loss=0.332, iou=0.537]
Validation: 100%|██████████| 68/68 [00:17<00:00,  3.91it/s, loss=0.56, iou=0.223]


Epoch 12/20 - Train Loss: 0.3316, Train IoU: 0.5370, Val Loss: 0.5596, Val IoU: 0.2231
  ↳ New best model saved with validation IoU: 0.2231


Epoch 13 Train: 100%|██████████| 1592/1592 [09:22<00:00,  2.83it/s, loss=0.325, iou=0.546]
Validation: 100%|██████████| 68/68 [00:18<00:00,  3.62it/s, loss=0.55, iou=0.231]


Epoch 13/20 - Train Loss: 0.3251, Train IoU: 0.5462, Val Loss: 0.5500, Val IoU: 0.2310
  ↳ New best model saved with validation IoU: 0.2310


Epoch 14 Train:  26%|██▌       | 408/1592 [02:24<06:51,  2.88it/s, loss=0.321, iou=0.552]