## Imports and utils

In [None]:
!git clone https://github.com/ultralytics/yolov5.git
%cd yolov5
!pip install -r requirements.txt --quiet

In [None]:
import cv2
import os
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from models.yolo import Model
from utils.loss import ComputeLoss
import yaml
import urllib

In [None]:
# utils for decoding the labels

provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
             'X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
       'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']

# decodes the plate from the file name
def decode_plate(label_str):
    indices = list(map(int, label_str.split('_')))
    province = provinces[indices[0]]
    alphabet = alphabets[indices[1]]
    ad = ''
    for i in range(2, len(indices)):
        ad += ads[indices[i]]

    return province + alphabet + ad

full_charset = provinces[:-1] + alphabets[:-1] + ads[:-1]
char_to_idx = {char: idx+1 for idx, char in enumerate(full_charset)}  # leave 0 for CTC blank
idx_to_char = {idx: char for char, idx in char_to_idx.items()}

def encode_plate(text: str) -> List[int]:
    return [char_to_idx[c] for c in text if c in char_to_idx]


In [None]:
# torch dataset
class LicensePlateCCPDDataset(Dataset):
    def __init__(self, image_dir, transform=None, img_size=(640, 640)):
        self.image_dir = image_dir
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
        self.transform = transform
        self.img_size = img_size  # (H, W)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        filename = self.image_files[idx]
        path = os.path.join(self.image_dir, filename)

        # load image
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # needed to scale the bounding box
        original_h, original_w = image.shape[:2]

        # bounding box from filename
        parts = filename.split('-')
        bbox_part = parts[2]
        x1y1, x2y2 = bbox_part.split('_')
        x1, y1 = map(int, x1y1.split('~'))
        x2, y2 = map(int, x2y2.split('~'))

        # resize image to 640x640 using + intensity normalization
        resized_image = cv2.resize(image, self.img_size[::-1])
        image_tensor = torch.tensor(resized_image, dtype=torch.float32).permute(2, 0, 1) / 255.0

        # adjust bbox to resized scale
        scale_x = self.img_size[1] / original_w
        scale_y = self.img_size[0] / original_h

        x1_resized = x1 * scale_x / self.img_size[1]
        x2_resized = x2 * scale_x / self.img_size[1]
        y1_resized = y1 * scale_y / self.img_size[0]
        y2_resized = y2 * scale_y / self.img_size[0]

        bbox = torch.tensor([x1_resized, y1_resized, x2_resized, y2_resized], dtype=torch.float32)

        # plate text
        plate_raw = parts[4]
        plate_text = decode_plate(plate_raw)

        return image_tensor, plate_text, bbox

In [None]:
# creates the dataset and dataloader
dataset = LicensePlateCCPDDataset("/kaggle/input/ccpd-weather/ccpd_weather")
loader = DataLoader(dataset, batch_size=8, shuffle=True)

## Detection (YOLO)

In [None]:
# utils
num_classes = 1  # just detecting plates (needed as YOLO is also a classifier)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model(cfg='models/yolov5s.yaml', ch=3, nc=num_classes).to(device)

In [None]:
# downloads pretrained weights
url = 'https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.pt'
output_path = 'yolov5s.pt'

urllib.request.urlretrieve(url, output_path)

In [None]:
# loads the weights to the model structure
weights = torch.load('yolov5s.pt', map_location=device)['model'].float().state_dict()
model_dict = model.state_dict()

# skip last layer (avoid classifier)
filtered_weights = {k: v for k, v in weights.items() if not k.startswith('model.24.')}

# loads updated weights
model_dict.update(filtered_weights)
model.load_state_dict(model_dict, strict=False)

In [None]:
# bbox conversion function to YOLO format
def convert_to_yolo_format_normalized(boxes, image_index, default_label=0):
    
    if boxes.ndim == 1:
        boxes = boxes.unsqueeze(0) 

    if boxes.numel() == 0:
        return torch.empty((0, 6))
    
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    x_center = (x1 + x2) / 2
    y_center = (y1 + y2) / 2
    width = x2 - x1
    height = y2 - y1

    image_idx = torch.full((boxes.size(0),), fill_value=image_index, dtype=torch.float32)
    labels = torch.full((boxes.size(0),), fill_value=default_label, dtype=torch.float32)

    # same format as YOLO predictions
    target = torch.stack([image_idx, labels, x_center, y_center, width, height], dim=1)
    return target


In [None]:
# loads the YOLO loss function (based on IoU)
with open('data/hyps/hyp.scratch-low.yaml') as f:
    hyp = yaml.safe_load(f)

model.hyp = hyp
compute_loss = ComputeLoss(model)

In [None]:
# utils
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
model = model.to(device)

# training loop
for epoch in range(15):
    model.train()
    running_loss = 0

    for i, (imgs, _, annotations) in enumerate(tqdm(loader)):
        imgs = imgs.to(device)

        # need to convert the labels with the function
        targets = []
        for j in range(len(imgs)):
            boxes = annotations[j]
            yolo_target = convert_to_yolo_format_normalized(boxes, image_index=j)
            targets.append(yolo_target)

        if len(targets) == 0: # just in case
            continue

        targets = torch.cat(targets, dim=0).to(device)

        outputs = model(imgs)  # only returning outputs, but still modifying weights (through loss)
        loss, _ = compute_loss(outputs, targets) # using personalized loss

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()

    print(f"Epoch {epoch} - Loss: {running_loss / len(loader)}")

In [None]:
# saves the model
torch.save(model.state_dict(), "my_yolov5.pth")

In [None]:
# check uploading weights works
model = Model('models/yolov5s.yaml', ch=3, nc=1).to(device)

state_dict = torch.load("my_yolov5.pth", map_location=device)
model.load_state_dict(state_dict) 
model.eval() 