In [None]:
#!rm -rf /kaggle/working/PDLPR-algorithm

In [None]:
!git clone https://github.com/giankev/PDLPR-algorithm.git

## IMPORT

In [1]:
# standard library
import os
import sys
import math
import time
import shutil
import tarfile
import warnings
from pathlib import Path

# utility
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import time

#PyTorch & torchvision
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms as T
from torchvision.ops import box_iou
import torchvision.transforms as T

#Albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

#Custom repo modules 
repo_path = "/kaggle/working/PDLPR-algorithm/baseline_scr/detection"
sys.path.insert(0, repo_path)
from model import LPDetectorFPN
from trainer import set_seed, train, train_one_epoch, evaluate, ciou_loss, cxcywh_to_xyxy
sys.path.remove(repo_path)

warnings.filterwarnings("ignore")
set_seed(42)

## SETUP ENVIRONMENT

In [None]:
#downloading 50 imgs for train and 8k for test
!gdown --folder https://drive.google.com/drive/folders/143HxhUrqkFIdfCzZQ3dA4Mqt8cjARCxx?usp=sharing -O datasets
#https://drive.google.com/drive/u/1/folders/1Qirh0lsjdsroLHEmJDtS6sVXPQKalW6j

In [None]:
# extracting the .tar archive.
def extract_tar_archive(archive_path, destination_path):

    print(f"Extracting the tar archive in:{archive_path}")
    with tarfile.open(archive_path, "r") as tar:
        tar.extractall(path=destination_path)
        
    print(f"Archive extracted in: {destination_path}")

#delete the .tar archive which now is useless.
def delete_tar_archive(path_tar_archive):
    
    if os.path.exists(path_tar_archive):
        shutil.rmtree(path_tar_archive)
        print(f"Folder eliminated: {path_tar_archive}")
    else:
        print(f"Folder not found: {path_tar_archive}")

In [None]:
archive_path_train = "/kaggle/working/datasets/ccpd_train.tar"
archive_path_test = "/kaggle/working/datasets/ccpd_test.tar"
extract_path = "/kaggle/working/"

#when extracting the files, is important to eliminate the .tar archive which now occupy /kaggle/working space.
extract_tar_archive(archive_path_train, extract_path)
extract_tar_archive(archive_path_test, extract_path)
delete_tar_archive("/kaggle/working/datasets/")

## FUNCTION

In [2]:
#extracting the metadata from each img in this format (image_path,x1_bbox,y1_bbox,x2_bbox,y2_bbox)
def split_bbox(bbox_str):
    "extracting x1,y1,x2,y2, ex. '283___502_511___591'  →  ['283','502','511','591']"
    tokens = []
    for seg in bbox_str.split("___"):
        tokens.extend(seg.split("_"))
    if len(tokens) == 4 and all(t.isdigit() for t in tokens):
        return map(int, tokens)
    return (None,)*4

def count_parameters(model):
    total = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters:     {total:,}")
    print(f"Trainable parameters: {trainable:,}")

In [3]:
def preprocess_resize_bbox(df: pd.DataFrame,
                           out_dir: str = "preproc224",
                           img_size: int = 224,
                           save_as_pt: bool = False,
                           quality: int = 95) -> pd.DataFrame:
    """
    Resize images and bounding boxes, saving the output and returning an updated DataFrame.

     df         : DataFrame containing columns: image_path, x1_bbox, y1_bbox, x2_bbox, y2_bbox
     out_dir    : output folder to save resized images or tensors
     img_size   : target size (square, e.g. 224)
     save_as_pt : if True, saves images as .pt tensors; else as JPEG
     quality    : JPEG quality (only used if save_as_pt is False)
    """
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    records = []
    to_tensor = torch.from_numpy  # shortcut for converting numpy to tensor

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Pre‑resize"):
        # Load image and convert to RGB
        img = Image.open(row.image_path).convert("RGB")
        w0, h0 = img.size  # original dimensions

        # Resize image to target size (e.g., 224x224) using bilinear interpolation
        img = img.resize((img_size, img_size), Image.BILINEAR)

        # Scale bounding box coordinates to the new image size
        sx, sy = img_size / w0, img_size / h0
        x1 = row.x1_bbox * sx
        y1 = row.y1_bbox * sy
        x2 = row.x2_bbox * sx
        y2 = row.y2_bbox * sy

        # Get filename without extension
        stem = Path(row.image_path).stem

        if save_as_pt:
            # Convert image to tensor and normalize to [0,1]
            tensor = to_tensor(np.array(img)).permute(2, 0, 1).float() / 255
            path_out = out_dir / f"{stem}.pt"
            torch.save(tensor, path_out)
        else:
            # Save image as JPEG with given quality
            path_out = out_dir / f"{stem}.jpg"
            img.save(path_out, format="JPEG", quality=quality, optimize=True)

        # Store updated info (new image path and scaled bbox) in the new DataFrame
        records.append({
            "image_path": str(path_out),
            "x1_bbox": x1, "y1_bbox": y1,
            "x2_bbox": x2, "y2_bbox": y2,
            **{c: row[c] for c in df.columns if c not in (
               "image_path", "x1_bbox", "y1_bbox", "x2_bbox", "y2_bbox")}
        })

    return pd.DataFrame.from_records(records)

In [4]:
folder = "/kaggle/working/ccpd_subset_base/train"
rows   = []

for fname in os.listdir(folder):
    if not fname.endswith(".jpg"): continue

    parts = fname[:-4].split("-")           
    if len(parts) < 6:
        continue #the ccpd file name is wrong           

    x1,y1,x2,y2 = split_bbox(parts[2])          
    
    rows.append({
        "image_path": os.path.join(folder, fname),
        "x1_bbox": x1, "y1_bbox": y1,
        "x2_bbox": x2, "y2_bbox": y2
    })

df = pd.DataFrame(rows)
print("Rows number:", len(df))         
print("Columns numner:", df.shape[1])
print("Shape:", df.shape)
df.head()

Rows number: 50000
Columns numner: 5
Shape: (50000, 5)


Unnamed: 0,image_path,x1_bbox,y1_bbox,x2_bbox,y2_bbox
0,/kaggle/working/ccpd_subset_base/train/0254310...,182,424,500,530
1,/kaggle/working/ccpd_subset_base/train/0188362...,220,544,456,637
2,/kaggle/working/ccpd_subset_base/train/0263146...,211,483,460,600
3,/kaggle/working/ccpd_subset_base/train/0320761...,187,520,478,639
4,/kaggle/working/ccpd_subset_base/train/0393247...,147,463,509,573


## TRAIN PHASE

In [None]:
class PlateDatasetFastAug(Dataset):
    def __init__(self, df, augment=True, img_size=224):
        self.df = df.reset_index(drop=True)
        self.augment = augment
        self.img_size = img_size

        if augment:
            self.pipeline = A.Compose([
                # Blurring
                A.OneOf([
                    A.GaussianBlur(blur_limit=3, p=0.3),
                    A.MotionBlur(blur_limit=5, p=0.2),
                ], p=0.6),
                
                # Brightness & contrast variation
                A.OneOf([
                    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.7),
                    A.RandomBrightnessContrast(brightness_limit=(-0.6, -0.3), contrast_limit=0.2, p=0.6),
                ], p=0.5),
                
                A.RandomGamma(gamma_limit=(80, 120), p=0.3),
                A.ColorJitter(0.2, 0.2, 0.2, 0.1, p=0.4),
                
                # Random crop or zoom-in
                A.OneOf([
                    A.Compose([
                        A.LongestMaxSize(max_size=int(img_size * 0.6)),
                        A.PadIfNeeded(img_size, img_size,
                                      border_mode=cv2.BORDER_REPLICATE, value=(0, 0, 0)),
                    ], p=0.5),
                    A.RandomResizedCrop(size=(img_size, img_size), scale=(0.8, 1), ratio=(1.0, 1.0)),
                ], p=0.35),

                A.Perspective(scale=(0.03, 0.06), p=0.4),

                # CLAHE or shadow simulation
                A.OneOf([
                    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
                    A.RandomShadow(num_shadows_lower=1, num_shadows_upper=2,
                                   shadow_dimension=4, shadow_roi=(0, 0.4, 1, 1), p=0.5),
                ], p=0.3),

                # Downscale → simulates low resolution
                A.OneOf([
                    A.Downscale(scale_min=0.3, scale_max=0.5, interpolation=cv2.INTER_LINEAR, p=1.0)
                ], p=0.25),

                ToTensorV2()
            ], bbox_params=A.BboxParams(format="yolo", label_fields=["labels"]))
        
        else:
            self.pipeline = A.Compose([
                ToTensorV2()
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # Load and convert image to RGB
        img = cv2.cvtColor(cv2.imread(row.image_path), cv2.COLOR_BGR2RGB)

        # Normalize bounding box to [0, 1]
        x1, y1, x2, y2 = row.x1_bbox, row.y1_bbox, row.x2_bbox, row.y2_bbox
        cx = (x1 + x2) / 2 / self.img_size
        cy = (y1 + y2) / 2 / self.img_size
        w  = (x2 - x1) / self.img_size
        h  = (y2 - y1) / self.img_size
        bbox_yolo = [cx, cy, w, h]

        # Apply augmentation (or just transform)
        transformed = self.pipeline(
            image=img,
            bboxes=[bbox_yolo],
            labels=[0]
        )

        img_tensor = transformed["image"].float() / 255.0
        bbox = torch.tensor(transformed["bboxes"][0], dtype=torch.float32)

        return img_tensor, bbox

In [None]:
# split the dataset for training phase.
df_train, df_val = train_test_split(df, test_size=0.02, shuffle=True, random_state=42)

print(f"Train set: {len(df_train)} img")
print(f"Val set:   {len(df_val)} img")

In [None]:
#  Prepocess imgs into 224x224
df_train_224 = preprocess_resize_bbox(df_train, "train224", img_size=224, save_as_pt=False)
df_val_224   = preprocess_resize_bbox(df_val,   "val224",   img_size=224, save_as_pt=False)

In [None]:
train_ds = PlateDatasetFastAug(df_train_224, augment=True)
val_ds   = PlateDatasetFastAug(df_val_224, augment=False)

dl_train = DataLoader(train_ds, batch_size=128, shuffle=True,
                      num_workers=4, pin_memory=True, persistent_workers=True)
dl_val   = DataLoader(val_ds, batch_size=128, shuffle=False,
                      num_workers=4, pin_memory=True, persistent_workers=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = LPDetectorFPN()      
count_parameters(model)
model  = train(model, dl_train, dl_val,
               epochs=20, lr=1e-4, device=device)

torch.save(model.state_dict(), "/kaggle/working/lp2_detectorr.pt")

## TEST

In [5]:
root   = Path("/kaggle/working/ccpd_test")   
rows   = []

for jpg in root.rglob("*.jpg"):
    fname = jpg.name
    parts = fname[:-4].split("-")            
    if len(parts) < 6:
        continue  #wrong name file                         

    try:
        x1, y1, x2, y2 = split_bbox(parts[2])
    except Exception as e:
        print("skip", jpg, e)
        continue

    rows.append({
        "subset": jpg.parent.name,    
        "image_path": str(jpg),
        "x1_bbox": x1, "y1_bbox": y1,
        "x2_bbox": x2, "y2_bbox": y2,
    })

df = pd.DataFrame(rows)

print("Rows:",   len(df))
print("Cols:",   df.shape[1])
print("Shape:",  df.shape)
display(df.head())

Rows: 8000
Cols: 6
Shape: (8000, 6)


Unnamed: 0,subset,image_path,x1_bbox,y1_bbox,x2_bbox,y2_bbox
0,weather,/kaggle/working/ccpd_test/weather/0304-9_19-23...,230,484,457,596
1,weather,/kaggle/working/ccpd_test/weather/0295-4_1-253...,253,428,490,532
2,weather,/kaggle/working/ccpd_test/weather/0322-12_12-3...,304,384,502,520
3,weather,/kaggle/working/ccpd_test/weather/0455-18_17-4...,438,508,659,680
4,weather,/kaggle/working/ccpd_test/weather/0140-0_0-134...,134,387,326,448


In [6]:
# parameters setup
IOU_THR  = 0.7            
IMG_SIZE = 224            
device   = "cuda" if torch.cuda.is_available() else "cpu"

#model init
#otherwise you can use the weights from the previous training phase 
weights_github = "/kaggle/working/PDLPR-algorithm/baseline_scr/detection/detec_weights.pt" 
detector = LPDetectorFPN().to(device)
detector.load_state_dict(torch.load(weights_github, map_location=device))
detector.eval()

#utils
tfm = T.Compose([
    T.ToPILImage(),
    T.Resize((IMG_SIZE, IMG_SIZE), interpolation=T.InterpolationMode.BILINEAR),
    T.ToTensor()
])

def bbox_iou(pred, tgt, eps=1e-7):
    px1, py1 = pred[:, 0] - pred[:, 2] / 2, pred[:, 1] - pred[:, 3] / 2
    px2, py2 = pred[:, 0] + pred[:, 2] / 2, pred[:, 1] + pred[:, 3] / 2
    tx1, ty1 = tgt[:, 0] - tgt[:, 2] / 2, tgt[:, 1] - tgt[:, 3] / 2
    tx2, ty2 = tgt[:, 0] + tgt[:, 2] / 2, tgt[:, 1] + tgt[:, 3] / 2

    inter_w = (torch.min(px2, tx2) - torch.max(px1, tx1)).clamp(min=0)
    inter_h = (torch.min(py2, ty2) - torch.max(py1, ty1)).clamp(min=0)
    inter   = inter_w * inter_h
    area_p  = (px2 - px1) * (py2 - py1)
    area_t  = (tx2 - tx1) * (ty2 - ty1)
    union   = area_p + area_t - inter + eps
    return inter / union

loss_fn = torch.nn.SmoothL1Loss(reduction="mean")

#evaluation subset
@torch.no_grad()
def eval_subset(df_sub, name):
    iou_list, loss_list = [], []
    correct, t_forward, n_imgs = 0, 0.0, 0

    for _, row in tqdm(df_sub.iterrows(), total=len(df_sub),
                       desc=f"{name:10}", leave=False):

        img_bgr = cv2.imread(row.image_path);  n_imgs += 1
        if img_bgr is None: continue
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        h0, w0  = img_rgb.shape[:2]

        # preprocessing + forward (timed)
        t0 = time.time()
        img_t = tfm(img_rgb).unsqueeze(0).to(device)
        pred  = detector(img_t)[0].cpu()              # cxcywh ∈ [0,1]
        t_forward += (time.time() - t0)

        # ground truth scale to IMGSIZE
        sx, sy = IMG_SIZE / w0, IMG_SIZE / h0
        x1, y1, x2, y2 = row.x1_bbox, row.y1_bbox, row.x2_bbox, row.y2_bbox
        x1_r, y1_r, x2_r, y2_r = x1*sx, y1*sy, x2*sx, y2*sy
        tgt = torch.tensor([(x1_r + x2_r) / (2*IMG_SIZE),
                            (y1_r + y2_r) / (2*IMG_SIZE),
                            (x2_r - x1_r) / IMG_SIZE,
                            (y2_r - y1_r) / IMG_SIZE])

        #metrics
        iou  = bbox_iou(pred.unsqueeze(0), tgt.unsqueeze(0)).item()
        loss = loss_fn(pred, tgt).item()
        iou_list.append(iou);  loss_list.append(loss)
        if iou >= IOU_THR: correct += 1

    acc   = correct / n_imgs
    m_iou = np.mean(iou_list)
    m_l1  = np.mean(loss_list)
    fps   = n_imgs / t_forward if t_forward else 0.0

    print(f"CCPD_{name:<9} | Acc@{IOU_THR}: {acc:.4f} | IoU: {m_iou:.3f} "
          f"| L1: {m_l1:.4f} | img: {n_imgs} | FPS: {fps:.1f}")
    return n_imgs, acc, m_iou, m_l1, t_forward

#subset loop
df["subset"] = df.image_path.apply(lambda p: Path(p).parts[-2])

g_imgs = g_acc = g_iou = g_l1 = total_time = 0.0
for sub in sorted(df.subset.unique()):
    n, acc, miou, mloss, t = eval_subset(df[df.subset == sub], sub)
    g_imgs += n;  g_acc += acc*n;  g_iou += miou*n;  g_l1 += mloss*n;  total_time += t

if g_imgs:
    print(f"\n🔹 GLOBAL | Acc@{IOU_THR}: {g_acc/g_imgs:.4f} "
          f"| IoU: {g_iou/g_imgs:.3f} "
          f"| L1: {g_l1/g_imgs:.4f} "
          f"| FPS: {g_imgs/total_time:.1f}")

base      :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_base      | Acc@0.7: 0.9820 | IoU: 0.863 | L1: 0.0001 | img: 1000 | FPS: 87.9


blur      :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_blur      | Acc@0.7: 0.8310 | IoU: 0.794 | L1: 0.0001 | img: 1000 | FPS: 94.1


challenge :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_challenge | Acc@0.7: 0.8630 | IoU: 0.798 | L1: 0.0001 | img: 1000 | FPS: 96.1


db        :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_db        | Acc@0.7: 0.7470 | IoU: 0.765 | L1: 0.0003 | img: 1000 | FPS: 93.6


fn        :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_fn        | Acc@0.7: 0.7400 | IoU: 0.758 | L1: 0.0006 | img: 1000 | FPS: 94.8


rotate    :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_rotate    | Acc@0.7: 0.9340 | IoU: 0.810 | L1: 0.0002 | img: 1000 | FPS: 96.5


tilt      :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_tilt      | Acc@0.7: 0.8170 | IoU: 0.780 | L1: 0.0003 | img: 1000 | FPS: 99.6


weather   :   0%|          | 0/1000 [00:00<?, ?it/s]

CCPD_weather   | Acc@0.7: 0.9850 | IoU: 0.867 | L1: 0.0001 | img: 1000 | FPS: 96.0

🔹 GLOBAL | Acc@0.7: 0.8624 | IoU: 0.804 | L1: 0.0002 | FPS: 94.7
