In [None]:
#download the training datatset which is composed of 50k imgs from ccpd_base
!gdown --folder https://drive.google.com/drive/u/1/folders/1Qirh0lsjdsroLHEmJDtS6sVXPQKalW6j -O datasets

In [None]:
import tarfile
import shutil
import os

# extracting the .tar archive
archive_path = "/kaggle/working/datasets/ccpd_subset_base.tar"
extract_path = "/kaggle/working/"

with tarfile.open(archive_path, "r") as tar:
    tar.extractall(path=extract_path)
    
print("Archive extracted in:", extract_path)

In [None]:
#delete the .tar archive which now is useless
folder_to_delete = "/kaggle/working/datasets/"

if os.path.exists(folder_to_delete):
    shutil.rmtree(folder_to_delete)
    print(f"Folder eliminated: {folder_to_delete}")
else:
    print(f"Folder not found: {folder_to_delete}")

In [None]:
#counting the number of images on the directory specified, the number should be 50k.
folder_path = "/kaggle/working/ccpd_subset_base/train"
num_files = len([
                 f for f in os.listdir(folder_path)
                 if os.path.isfile(os.path.join(folder_path, f))
                ])

print(f" Number of images in '{folder_path}': {num_files}")

In [None]:
#cloning the yolov5 repo
!git clone https://github.com/ultralytics/yolov5  
%cd yolov5
%pip install -qr requirements.txt  #dependencies

In [None]:
DATA_PATH = "/kaggle/working/ccpd_subset_base/train"
CONTENT =  """
            train: /kaggle/working/ccpd_yolo_dataset/images/train
            val:  /kaggle/working/ccpd_yolo_dataset/images/val
            
            nc: 1
            names: ['plate']
           """

In [None]:
import pandas as pd
#extracting the metadata from each img in this format (image_path,x1_bbox,y1_bbox,x2_bbox,y2_bbox,plate_number)
PROVINCES = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑",
             "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤",
             "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁",
             "新", "警", "学", "O"]

ALPHA = ['A','B','C','D','E','F','G','H','J','K',
             'L','M','N','P','Q','R','S','T','U','V',
             'W','X','Y','Z','O'] 

ADS = ['A','B','C','D','E','F','G','H','J','K',
       'L','M','N','P','Q','R','S','T','U','V',
       'W','X','Y','Z','0','1','2','3','4','5',
       '6','7','8','9','O']

def decode_plate(s):
    idx   = list(map(int, s.split("_")))
    try:
        return PROVINCES[idx[0]] + ALPHA[idx[1]] + "".join(ADS[i] for i in idx[2:])
    except Exception:
        return None

def split_bbox(bbox_str):
    # '283___502_511___591'  →  ['283','502','511','591']
    tokens = []
    for seg in bbox_str.split("___"):
        tokens.extend(seg.split("_"))
    if len(tokens) == 4 and all(t.isdigit() for t in tokens):
        return map(int, tokens)
    return (None,)*4

folder = "/kaggle/working/ccpd_subset_base/train"
rows   = []

for fname in os.listdir(folder):
    if not fname.endswith(".jpg"): continue

    parts = fname[:-4].split("-")           
    if len(parts) < 6: continue             

    x1,y1,x2,y2 = split_bbox(parts[2])      
    plate = decode_plate(parts[4])    

    rows.append({
        "image_path": os.path.join(folder, fname),
        "x1_bbox": x1, "y1_bbox": y1,
        "x2_bbox": x2, "y2_bbox": y2,
        "plate_number": plate
    })

In [None]:
df = pd.DataFrame(rows)

In [None]:
print("Rows number:", len(df))         
print("Columns numner:", df.shape[1])
print("Shape:", df.shape)
df.head()

In [None]:
from sklearn.model_selection import train_test_split

df_train, df_val = train_test_split(df, test_size=0.2, shuffle=True, random_state=42)

print(f"Train set: {len(df_train)} img")
print(f"Val set:   {len(df_val)} img")

In [None]:
!rm -rf /kaggle/working/ccpd_yolo_dataset/

In [None]:
import os
import shutil
import pandas as pd

# Parametri immagine (usa la risoluzione effettiva delle tue immagini)
IMG_W, IMG_H = 720, 1160
CLASS_ID = 0

# Percorsi di input/output
SRC_IMG_DIR = "/kaggle/working/ccpd_subset_base/train"
OUT_BASE = "/kaggle/working/ccpd_yolo_dataset"

# Funzione aggiornata
def export_yolo(df_split, split_name, img_w, img_h):
    img_dir = os.path.join(OUT_BASE, "images", split_name)
    lbl_dir = os.path.join(OUT_BASE, "labels", split_name)
    os.makedirs(img_dir, exist_ok=True)
    os.makedirs(lbl_dir, exist_ok=True)

    for _, row in df_split.iterrows():
        try:
            x_center = (row["x1_bbox"] + row["x2_bbox"]) / 2 / img_w
            y_center = (row["y1_bbox"] + row["y2_bbox"]) / 2 / img_h
            width = (row["x2_bbox"] - row["x1_bbox"]) / img_w
            height = (row["y2_bbox"] - row["y1_bbox"]) / img_h

            yolo_line = f"{CLASS_ID} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"

            base_name = os.path.basename(row["image_path"])
            name_no_ext = os.path.splitext(base_name)[0]

            dst_img_path = os.path.join(img_dir, base_name)
            shutil.copy2(row["image_path"], dst_img_path)

            # Scrivi label YOLO
            label_path = os.path.join(lbl_dir, f"{name_no_ext}.txt")
            with open(label_path, "w") as f:
                f.write(yolo_line)

        except Exception as e:
            print(f"Errore su file {row['image_path']}: {e}")

    print(f {split_name.upper()} completato → {len(df_split)} esempi")

# Esegui con i parametri
export_yolo(df_train, "train", IMG_W, IMG_H)
export_yolo(df_val, "val", IMG_W, IMG_H)

In [None]:
with open("ccpd.yaml", "w") as file:
    file.write(CONTENT)

In [None]:
!wandb disabled

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
#training phase on training data from ccpd_base
!python -W ignore train.py \
  --img 640 \
  --batch 16 \
  --epochs 10 \
  --data /kaggle/working/ccpd.yaml \
  --weights yolov5s.pt \
  --name ccpd_yolo_finetune2 \
  --cache

In [None]:
#testing phase on test dataset
!python detect.py \
  --weights runs/train/ccpd_yolo_finetune22/weights/best.pt \
  --img 640 \
  --conf 0.25 \
  --source /kaggle/input/ccpd-dataset/images/val