In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
import numpy as np
import os
import kagglehub
from glob import glob
from tqdm import tqdm
import torchvision.transforms as transforms

# Download dataset
print("Downloading dataset...")
dataset_path = kagglehub.dataset_download('fareselmenshawii/large-license-plate-dataset')
img_train_dir = os.path.join(dataset_path, 'images', 'train')
img_val_dir = os.path.join(dataset_path, 'images', 'val')
label_train_dir = os.path.join(dataset_path, 'labels', 'train')
label_val_dir = os.path.join(dataset_path, 'labels', 'val')
print("Dataset downloaded successfully")

In [None]:

# CNN Architecture
class LicensePlateCNN(nn.Module):
    def __init__(self):
        super(LicensePlateCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv_residual = nn.Conv2d(64, 128, kernel_size=1, stride=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(256)
        self.conv6 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
        self.bn6 = nn.BatchNorm2d(512)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, 4)
        print("Model initialized")

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        identity = x
        x = torch.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        identity = self.conv_residual(identity)
        x = x + identity
        x = torch.relu(x)
        x = torch.relu(self.bn4(self.conv4(x)))
        x = torch.relu(self.bn5(self.conv5(x)))
        x = torch.relu(self.bn6(self.conv6(x)))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = torch.sigmoid(self.fc(x))  # Ensure bbox coords in 0-1 range
        return x

# Custom Dataset
class LicensePlateDataset(Dataset):
    def __init__(self, img_dir, label_dir):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.img_files = sorted(glob(os.path.join(img_dir, '*.jpg')))
        self.label_files = sorted(glob(os.path.join(label_dir, '*.txt')))
        print(f"Loaded {len(self.img_files)} images and {len(self.label_files)} labels")

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.RandomHorizontalFlip(),
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        image = cv2.imread(img_path)
        if image is None:
            raise ValueError(f"Failed to load image: {img_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        label_path = self.label_files[idx]
        with open(label_path, 'r') as f:
            lines = f.readlines()

        if lines:
            _, x_center, y_center, width, height = map(float, lines[0].strip().split())
            x_min = (x_center - width / 2)
            y_min = (y_center - height / 2)
            x_max = (x_center + width / 2)
            y_max = (y_center + height / 2)
            bbox = np.array([x_min, y_min, x_max, y_max], dtype=np.float32)
        else:
            bbox = np.zeros(4, dtype=np.float32)

        image = self.transform(image)
        return image, torch.tensor(bbox)

# IoU Calculation

def calculate_iou(pred, target):
    x1_p, y1_p, x2_p, y2_p = pred
    x1_t, y1_t, x2_t, y2_t = target

    x1_i = max(x1_p, x1_t)
    y1_i = max(y1_p, y1_t)
    x2_i = min(x2_p, x2_t)
    y2_i = min(y2_p, y2_t)

    inter_area = max(0, x2_i - x1_i) * max(0, y2_i - y1_i)
    pred_area = max(0, x2_p - x1_p) * max(0, y2_p - y1_p)
    true_area = max(0, x2_t - x1_t) * max(0, y2_t - y1_t)

    union_area = pred_area + true_area - inter_area
    return inter_area / union_area if union_area > 0 else 0

# Training and Evaluation

def train_model(model, train_loader, val_loader, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Training on {device}")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.SmoothL1Loss()

    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs} started")
        model.train()
        train_loss = 0
        train_iou = 0
        for images, bboxes in tqdm(train_loader, desc="Training"):
            images, bboxes = images.to(device), bboxes.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, bboxes)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            for pred, true in zip(outputs.detach().cpu().numpy(), bboxes.cpu().numpy()):
                train_iou += calculate_iou(pred, true)

        train_loss /= len(train_loader)
        train_iou /= len(train_loader.dataset)

        model.eval()
        val_loss = 0
        val_iou = 0
        with torch.no_grad():
            for images, bboxes in tqdm(val_loader, desc="Validation"):
                images, bboxes = images.to(device), bboxes.to(device)
                outputs = model(images)
                val_loss += criterion(outputs, bboxes).item()
                for pred, true in zip(outputs.detach().cpu().numpy(), bboxes.cpu().numpy()):
                    val_iou += calculate_iou(pred, true)

        val_loss /= len(val_loader)
        val_iou /= len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{epochs} completed")
        print(f"Train Loss: {train_loss:.4f}, Train IoU: {train_iou:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val IoU: {val_iou:.4f}")

# Inference

def infer(model, image_path, device="cuda" if torch.cuda.is_available() else "cpu"):
    print(f"Running inference on {image_path}")
    model.eval()
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Failed to load image: {image_path}")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    orig_shape = image.shape[:2]
    image_resized = cv2.resize(image, (224, 224))
    image_tensor = torch.from_numpy(image_resized.transpose(2, 0, 1)).float() / 255.0
    image_tensor = image_tensor.unsqueeze(0).to(device)

    with torch.no_grad():
        bbox = model(image_tensor)[0].cpu().numpy()

    bbox[0::2] *= orig_shape[1]
    bbox[1::2] *= orig_shape[0]
    return bbox

In [None]:
# Main Execution
if __name__ == "__main__":
    print("Loading datasets...")
    train_dataset = LicensePlateDataset(img_train_dir, label_train_dir)
    val_dataset = LicensePlateDataset(img_val_dir, label_val_dir)

    print("Creating data loaders...")
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16)

    print("Initializing model...")
    model = LicensePlateCNN()
    print("Starting training...")
    train_model(model, train_loader, val_loader, epochs=10)

    print("Performing inference...")
    sample_image = train_dataset.img_files[0]
    bbox = infer(model, sample_image)
    print(f"Predicted Bounding Box: {bbox}")
