In [None]:
!rm -rf /kaggle/working/

In [None]:
import tarfile
import shutil
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import yaml
import warnings
warnings.filterwarnings("ignore")

In [None]:
#download the training datatset, and test dataset (dim(train) = 50k, dim(test) = 8k)
!gdown --folder https://drive.google.com/drive/u/1/folders/1Qirh0lsjdsroLHEmJDtS6sVXPQKalW6j -O datasets

In [None]:
# extracting the .tar archive
def extract_tar_archive(archive_path, destination_path):

    print(f"Extracting the tar archive in:{archive_path}")
    with tarfile.open(archive_path, "r") as tar:
        tar.extractall(path=destination_path)
        
    print(f"Archive extracted in: {destination_path}")

#delete the .tar archive which now is useless
def delete_tar_archive(path_tar_archive):
    
    if os.path.exists(path_tar_archive):
        shutil.rmtree(path_tar_archive)
        print(f"Folder eliminated: {path_tar_archive}")
    else:
        print(f"Folder not found: {path_tar_archive}")

In [None]:
archive_path_train = "/kaggle/working/datasets/ccpd_train.tar"
archive_path_test = "/kaggle/working/datasets/ccpd_test.tar"
extract_path = "/kaggle/working/"
folder_path = "/kaggle/working/ccpd_subset_base/train"

In [None]:
extract_tar_archive(archive_path_train, extract_path)
extract_tar_archive(archive_path_test, extract_path)
delete_tar_archive("/kaggle/working/datasets/")

In [None]:
#cloning the yolov5 repo
!git clone https://github.com/ultralytics/yolov5  
%cd yolov5
%pip install -qr requirements.txt  #dependencies
%cd ..

In [None]:
DATA_PATH = "/kaggle/working/ccpd_subset_base/train"
CONTENT = {
    'train': '/kaggle/working/ccpd_yolo_dataset/images/train',
    'val': '/kaggle/working/ccpd_yolo_dataset/images/val',
    'nc': 1,
    'names': ['plate']
}

In [None]:
ccpd_path = "yolov5/ccpd.yaml"   
os.makedirs(os.path.dirname(ccpd_path), exist_ok=True)

# ----------- scrittura del file -------------------------------------
with open(ccpd_path, 'w') as f:
    yaml.dump(CONTENT, f, sort_keys=False)

print(f"file added in: {os.getcwd()}/{ccpd_path}")

In [None]:
#extracting the metadata from each img in this format (image_path,x1_bbox,y1_bbox,x2_bbox,y2_bbox,plate_number)
PROVINCES = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑",
             "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤",
             "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁",
             "新", "警", "学", "O"]

ALPHA = ['A','B','C','D','E','F','G','H','J','K',
             'L','M','N','P','Q','R','S','T','U','V',
             'W','X','Y','Z','O'] 

ADS = ['A','B','C','D','E','F','G','H','J','K',
       'L','M','N','P','Q','R','S','T','U','V',
       'W','X','Y','Z','0','1','2','3','4','5',
       '6','7','8','9','O']

def decode_plate(s):
    idx   = list(map(int, s.split("_")))
    try:
        return PROVINCES[idx[0]] + ALPHA[idx[1]] + "".join(ADS[i] for i in idx[2:])
    except Exception:
        return None

def split_bbox(bbox_str):
    # '283___502_511___591'  →  ['283','502','511','591']
    tokens = []
    for seg in bbox_str.split("___"):
        tokens.extend(seg.split("_"))
    if len(tokens) == 4 and all(t.isdigit() for t in tokens):
        return map(int, tokens)
    return (None,)*4

folder = "/kaggle/working/ccpd_subset_base/train"
rows   = []

for fname in os.listdir(folder):
    if not fname.endswith(".jpg"): continue

    parts = fname[:-4].split("-")           
    if len(parts) < 6: continue             

    x1,y1,x2,y2 = split_bbox(parts[2])      
    plate = decode_plate(parts[4])    

    rows.append({
        "image_path": os.path.join(folder, fname),
        "x1_bbox": x1, "y1_bbox": y1,
        "x2_bbox": x2, "y2_bbox": y2,
        "plate_number": plate
    })

df = pd.DataFrame(rows)

In [None]:
print("Rows number:", len(df))         
print("Columns numner:", df.shape[1])
print("Shape:", df.shape)
df.head()

In [None]:
#train dataset spit in 80/20 for training phase
df_train, df_val = train_test_split(df, test_size=0.2, shuffle=True, random_state=42)

print(f"Train set: {len(df_train)} img")
print(f"Val set:   {len(df_val)} img")

In [None]:
!rm -rf /kaggle/working/ccpd_yolo_dataset/

In [None]:
# Parametri immagine (usa la risoluzione effettiva delle tue immagini)
IMG_W, IMG_H = 720, 1160
CLASS_ID = 0

# Percorsi di input/output
SRC_IMG_DIR = "/kaggle/working/ccpd_subset_base/train"
OUT_BASE = "/kaggle/working/ccpd_yolo_dataset"

# Funzione aggiornata
def export_yolo(df_split, split_name, img_w, img_h):
    img_dir = os.path.join(OUT_BASE, "images", split_name)
    lbl_dir = os.path.join(OUT_BASE, "labels", split_name)
    os.makedirs(img_dir, exist_ok=True)
    os.makedirs(lbl_dir, exist_ok=True)

    for _, row in df_split.iterrows():
        try:
            x_center = (row["x1_bbox"] + row["x2_bbox"]) / 2 / img_w
            y_center = (row["y1_bbox"] + row["y2_bbox"]) / 2 / img_h
            width = (row["x2_bbox"] - row["x1_bbox"]) / img_w
            height = (row["y2_bbox"] - row["y1_bbox"]) / img_h

            yolo_line = f"{CLASS_ID} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"

            base_name = os.path.basename(row["image_path"])
            name_no_ext = os.path.splitext(base_name)[0]

            dst_img_path = os.path.join(img_dir, base_name)
            shutil.copy2(row["image_path"], dst_img_path)

            #label YOLO
            label_path = os.path.join(lbl_dir, f"{name_no_ext}.txt")
            with open(label_path, "w") as f:
                f.write(yolo_line)

        except Exception as e:
            print(f"Errore su file {row['image_path']}: {e}")

    print(f"{split_name.upper()} completato  {len(df_split)} esempi")

# for the two split
export_yolo(df_train, "train", IMG_W, IMG_H)
export_yolo(df_val, "val", IMG_W, IMG_H)

In [None]:
my_hyp = {
    'lr0': 0.001,        # learning-rate iniziale (fine-tuning)
    'lrf': 0.10,          # lr_final = lr0 * lrf  (cosine scheduler)
    'momentum': 0.937,
    'weight_decay': 0.0002,

    # warm-up
    'warmup_epochs': 3.0,
    'warmup_momentum': 0.8,
    'warmup_bias_lr': 0.1,

    # loss balance
    'box': 0.05,
    'cls': 0.20,          # una sola classe
    'cls_pw': 1.0,
    'obj': 0.90,
    'obj_pw': 1.0,
    'iou_t': 0.20,
    'anchor_t': 4.0,
    'fl_gamma': 0.0,

    # augmentation - colore / geometria
    'hsv_h': 0.15,
    'hsv_s': 0.50,
    'hsv_v': 0.8,

    'degrees': 7.5,
    'translate': 0.10,
    'scale': 0.40,
    'shear': 5.0,
    'perspective': 0.0,

    # flip & mix
    'flipud': 0.0,
    'fliplr': 0.0,

    'mosaic': 0.0,
    'mixup': 0.0,
    'copy_paste': 0.20
}

hyp_path = "yolov5/data/hyps/my_ccpd.yaml"   
os.makedirs(os.path.dirname(hyp_path), exist_ok=True)

# ----------- scrittura del file -------------------------------------
with open(hyp_path, 'w') as f:
    yaml.dump(my_hyp, f, sort_keys=False)

print(f"file added in: {os.getcwd()}/{hyp_path}")

In [None]:
#training phase on training data from ccpd_base
!wandb disabled
!python  -W ignore yolov5/train.py \
  --weights yolov5s.pt \
  --data yolov5/ccpd.yaml \
  --hyp yolov5/data/hyps/my_ccpd.yaml \
  --batch 32 \
  --epochs 5 \
  --freeze 10 \
  --name ccpd_ftA \
  --cache

In [None]:
""" new hyp to try
my_hyp = {
    # Ottimizzazione
    'lr0': 0.001,         # learning rate iniziale (↑)
    'lrf': 0.05,          # final lr come frazione del lr0 (↓)
    'momentum': 0.937,
    'weight_decay': 0.0002,

    # Warm-up
    'warmup_epochs': 1.5,     # meno warmup, più rapido
    'warmup_momentum': 0.8,
    'warmup_bias_lr': 0.1,

    # Loss balance
    'box': 0.07,          # ↑ migliora localizzazione box piccoli
    'cls': 0.20,          # singola classe
    'cls_pw': 1.0,
    'obj': 1.1,           # ↑ più enfasi sul rilevamento oggetti
    'obj_pw': 1.0,
    'iou_t': 0.25,        # soglia assegnazione anchor più rilassata
    'anchor_t': 4.0,
    'fl_gamma': 0.0,      # no focal loss

    # Augmentazione colore
    'hsv_h': 0.20,
    'hsv_s': 0.60,
    'hsv_v': 0.90,

    # Augmentazione geometrica
    'degrees': 25.0,       # ↑ per tilt/rotate
    'translate': 0.12,
    'scale': 0.50,         # ↑ per FN (vicino/lontano)
    'shear': 8.0,
    'perspective': 0.001,

    # Flip
    'flipud': 0.0,         # verticale disattivato
    'fliplr': 0.5,         # attivato: utile su blur/weather

    # Mix techniques
    'mosaic': 0.8,         # ↓ meno artefatti
    'mixup': 0.2,          # aggiunto
    'copy_paste': 0.35     # ↑ utile su oggetti piccoli come targhe
}


In [None]:
my_hyp = {
    'lr0': 0.0005,        # learning-rate iniziale (fine-tuning)
    'lrf': 0.10,          # lr_final = lr0 * lrf  (cosine scheduler)
    'momentum': 0.937,
    'weight_decay': 0.0002,

    # warm-up
    'warmup_epochs': 3.0,
    'warmup_momentum': 0.8,
    'warmup_bias_lr': 0.1,

    # loss balance
    'box': 0.05,
    'cls': 0.20,          # una sola classe
    'cls_pw': 1.0,
    'obj': 0.90,
    'obj_pw': 1.0,
    'iou_t': 0.20,
    'anchor_t': 4.0,
    'fl_gamma': 0.0,

    # augmentation - colore / geometria
    'hsv_h': 0.15,
    'hsv_s': 0.50,
    'hsv_v': 0.8,

    'degrees': 7.5,
    'translate': 0.10,
    'scale': 0.40,
    'shear': 5.0,
    'perspective': 0.0,

    # flip & mix
    'flipud': 0.0,
    'fliplr': 0.0,

    'mosaic': 1.0,
    'mixup': 0.0,
    'copy_paste': 0.20
}

hyp_path = "yolov5/data/hyps/my_ccpd_B.yaml"   
os.makedirs(os.path.dirname(hyp_path), exist_ok=True)

# ----------- scrittura del file -------------------------------------
with open(hyp_path, 'w') as f:
    yaml.dump(my_hyp, f, sort_keys=False)

print(f"file added in: {os.getcwd()}/{hyp_path}")

In [None]:
#UNFREEZE (fase-B)
!wandb disabled
!stdbuf -oL -eL python -u -W ignore yolov5/train.py \
  --weights /kaggle/input/weights-train-a/best_A.pt \
  --data  yolov5/ccpd.yaml \
  --hyp   yolov5/data/hyps/my_ccpd_B.yaml \
  --batch 32 \
  --epochs 40  \
  --name  ccpd_ftB \
  --cache

**TEST PHASE**

In [None]:
# === MAPPING CCPD ===
PROVINCES = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑",
             "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤",
             "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁",
             "新", "警", "学", "O"]

ALPHA = ['A','B','C','D','E','F','G','H','J','K',
         'L','M','N','P','Q','R','S','T','U','V',
         'W','X','Y','Z','O'] 

ADS = ['A','B','C','D','E','F','G','H','J','K',
       'L','M','N','P','Q','R','S','T','U','V',
       'W','X','Y','Z','0','1','2','3','4','5',
       '6','7','8','9','O']

# === FUNZIONI DECODIFICA ===
def decode_plate(s):
    try:
        idx = list(map(int, s.split("_")))
        return PROVINCES[idx[0]] + ALPHA[idx[1]] + "".join(ADS[i] for i in idx[2:])
    except Exception:
        return None

def split_bbox(bbox_str):
    tokens = []
    for seg in bbox_str.split("___"):
        tokens.extend(seg.split("_"))
    if len(tokens) == 4 and all(t.isdigit() for t in tokens):
        return map(int, tokens)
    return (None,) * 4

# === ESECUZIONE PRINCIPALE ===
folder = "/kaggle/working/ccpd_test"
rows = []

for root, _, files in os.walk(folder):
    for fname in files:
        if not fname.endswith(".jpg"):
            continue

        parts = fname[:-4].split("-")
        if len(parts) < 6:
            continue

        x1, y1, x2, y2 = split_bbox(parts[2])
        plate = decode_plate(parts[4])
        full_path = os.path.join(root, fname)

        rows.append({
            "image_path": full_path,
            "x1_bbox": x1,
            "y1_bbox": y1,
            "x2_bbox": x2,
            "y2_bbox": y2,
            "plate_number": plate
        })

df = pd.DataFrame(rows)
print(f"Dataset creato con {len(df)} righe")
df.head()

In [None]:
#!rm -rf /kaggle/working/ccpd_yolo_dataset/images/test
#!rm -rf /kaggle/working/ccpd_yolo_dataset/labels/test

In [None]:
IMG_W, IMG_H = 720, 1160
CLASS_ID = 0

OUT_BASE = "/kaggle/working/ccpd_yolo_dataset"

def export_yolo(df_split, split_name, img_w, img_h):
    base_img_dir = os.path.join(OUT_BASE, "images", split_name)
    base_lbl_dir = os.path.join(OUT_BASE, "labels", split_name)

    count = 0

    for _, row in df_split.iterrows():
        try:
            if None in (row["x1_bbox"], row["y1_bbox"], row["x2_bbox"], row["y2_bbox"]):
                continue

            # Bounding box in formato YOLO (normalizzato)
            x_center = (row["x1_bbox"] + row["x2_bbox"]) / 2 / img_w
            y_center = (row["y1_bbox"] + row["y2_bbox"]) / 2 / img_h
            width    = (row["x2_bbox"] - row["x1_bbox"]) / img_w
            height   = (row["y2_bbox"] - row["y1_bbox"]) / img_h

            yolo_line = f"{CLASS_ID} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"

            img_path = row["image_path"]
            base_name = os.path.basename(img_path)
            name_no_ext = os.path.splitext(base_name)[0]

            rel_subfolder = os.path.basename(os.path.dirname(img_path))

            #  Percorsi finali immagini e label
            img_dir = os.path.join(base_img_dir, rel_subfolder)
            lbl_dir = os.path.join(base_lbl_dir, rel_subfolder)
            os.makedirs(img_dir, exist_ok=True)
            os.makedirs(lbl_dir, exist_ok=True)

            dst_img_path = os.path.join(img_dir, base_name)
            shutil.copy2(img_path, dst_img_path)

            label_path = os.path.join(lbl_dir, f"{name_no_ext}.txt")
            with open(label_path, "w") as f:
                f.write(yolo_line)

            count += 1

        except Exception as e:
            print(f"Errore su file {row['image_path']}: {e}")

    print(f"{split_name.upper()} completato: {count} esempi salvati")


In [None]:
export_yolo(df, "test", IMG_W, IMG_H)

In [None]:
base_dir = "/kaggle/working/ccpd_yolo_dataset"
img_root = os.path.join(base_dir, "images", "test")
lbl_root = os.path.join(base_dir, "labels", "test")
template_yaml_path = "/kaggle/working/yolov5/ccpd_temp.yaml"
weights_path = "/kaggle/working/yolov5/runs/train/ccpd_ftB/weights/best.pt"

# Lista sottocartelle come 'blur', 'tilt', ...
subdirs = [d for d in os.listdir(img_root) if os.path.isdir(os.path.join(img_root, d))]

for sub in subdirs:
    img_dir = os.path.join("images/test", sub)  # relative path in YAML
    lbl_dir = os.path.join("labels/test", sub)

    
    yaml_content = f"""\
                    path: {base_dir}
                    train: {img_dir}  # unused
                    val: {img_dir}
                    nc: 1
                    names: ['plate']
                    """
    with open(template_yaml_path, "w") as f:
        f.write(yaml_content)

    print(f" Valutazione subset: {sub}")
    !python /kaggle/working/yolov5/val.py \
        --weights "{weights_path}" \
        --data {template_yaml_path} \
        --task val \
        --save-txt \
        --save-conf \
        --name test_{sub} \
        --project /kaggle/working/yolov5/runs/test \
        --exist-ok


In [None]:
!zip -r ccpd_results.zip /kaggle/working/yolov5/runs/test