In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import shutil


In [None]:
import torch

print(f'{torch.cuda.is_available() = }')
print(f'{torch.cuda.device_count() = }')

## Preprocessing dataset


In [None]:
!rm -rf /kaggle/working/*
with open ("/kaggle/input/training-dataset/GreenParking/location.txt", "r") as file:
    lines = file.readlines()
# print(lines[:5])

In [None]:
path_dataset = "/kaggle/working/dataset"
os.mkdir(path_dataset)
labels_data = "/kaggle/working/dataset/labels"
images_data = "/kaggle/working/dataset/images"
os.mkdir(labels_data); os.mkdir(images_data)

### Processing motobike plate from: training-dataset (over 1700 images)

In [None]:
# labels
input_path = "/kaggle/input/training-dataset/GreenParking"
with open ("/kaggle/input/training-dataset/GreenParking/location.txt", "r") as file:
    lines = file.readlines()
for line in lines:
    line = line.strip().split()
    img_name = line[0]
    line[2:] = map(float, line[2:])
    img = cv2.imread(os.path.join(input_path, img_name))
    height, width, _ = img.shape
    # format bounding box for yolov12
    x_center = round((line[2] + line[4]/2)/width, 6) #x_center
    y_center = round((line[3] + line[5]/2)/height, 6) #y_center
    wid = round(line[4]/width, 6)   #width 
    hei =round(line[5]/height, 6)  #height
    # label = " ".join(line[1:])
    label_path = os.path.join(labels_data, img_name.replace(".jpg", ".txt"))
    with open(label_path, "w") as file:
        file.write(f'{0} {x_center} {y_center} {wid} {hei}\n' )

In [None]:
# images
from distutils.dir_util import copy_tree
copy_tree("/kaggle/input/training-dataset/GreenParking", images_data)
os.remove("/kaggle/working/dataset/images/location.txt")
copy_tree("/kaggle/input/number-plate-dataset-3/images", images_data)
copy_tree("/kaggle/input/number-plate-dataset-3/labels", labels_data)
os.remove("/kaggle/working/dataset/images/2.1.png")
os.remove("/kaggle/working/dataset/images/1.1.PNG")
os.remove("/kaggle/working/dataset/labels/2.1.txt")
os.remove("/kaggle/working/dataset/labels/1.1.txt")



### Processing car plate from: car-plate-dataset (over 1000 images)

In [None]:
input_path = "/kaggle/input/car-plate/car-plate"
def rename(name_file):
    name, ext = os.path.splitext(name_file)
    name = name.split(".rf.")[0]
    return name+ext
    
def copy_file(src, dst):
    for name_file in os.listdir(src):
        shutil.copy(os.path.join(src, name_file), os.path.join(dst, rename(name_file)))

copy_file("/kaggle/input/car-plate/car-plate/images", images_data)
copy_file("/kaggle/input/car-plate/car-plate/labels", labels_data)

### Split data

In [None]:
label_dir = "/kaggle/working/dataset/labels"
list_labels = [f for f in os.listdir(label_dir) if f.endswith(".txt")]
for label in list_labels:
    label_path = os.path.join(label_dir, label)
    with open(label_path, "r") as f:
        lines = f.readlines()
    new_lines = []
    for line in lines:
        values = line.strip().split() 
        bbox = list(map(float, values[1:])) 
        bbox[0] = max(bbox[2] / 2, bbox[0])  # X-center >= w/2
        bbox[1] = max(bbox[3] / 2, bbox[1])  # Y_center >= h/2
        new_lines.append(f"0 {' '.join(map(str, bbox))}\n")

    with open(label_path, "w") as f:
        f.writelines(new_lines)

In [None]:
from sklearn.model_selection import train_test_split

# Gets list of labels and images name
list_images = [f for f in os.listdir("/kaggle/working/dataset/images")]
list_labels = [f for f in os.listdir("/kaggle/working/dataset/labels")]

assert len(list_images) == len(list_labels)

# train, val: 80/20
train, val = train_test_split(list_images, test_size=1/5, random_state=42)

print(f'''
      len(train) = {len(train)}
      len(val) = {len(val)}
''')
# print(train)

In [None]:
os.mkdir("data_training")
def move_data(list_data, dst):
    os.mkdir(dst)
    images_dst = os.path.join(dst, "images"); os.mkdir(images_dst)
    labels_dst = os.path.join(dst, "labels"); os.mkdir(labels_dst)
    for image_name in list_data:
        image_path = os.path.join("/kaggle/working/dataset/images", image_name)
        shutil.copy(image_path, os.path.join(images_dst, image_name))
        label_name = image_name.replace(".jpg", ".txt")
        label_path = os.path.join("/kaggle/working/dataset/labels", label_name)
        shutil.copy(label_path, os.path.join(labels_dst, label_name))

move_data(train, "/kaggle/working/data_training/train")
move_data(val, "/kaggle/working/data_training/val")

### Data Augmentation

In [None]:
!pip install albumentations==1.3.0

In [None]:
import albumentations as A
from tqdm import tqdm

transform = A.Compose([
    A.RandomBrightnessContrast(p=0.3),  
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.3),
    A.RGBShift(r_shift_limit=20, g_shift_limit=20, b_shift_limit=20, p=0.3),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=30, p=0.3),
    A.MotionBlur(blur_limit=5, p=0.3),
    A.GaussNoise(p=0.3),  
    A.Perspective(scale=(0.05, 0.15), p=0.3),
    A.RandomSizedBBoxSafeCrop(height=256, width=256, p=0.3),
    A.MedianBlur(blur_limit=3, p=0.3),
    A.RandomRain(drop_length=5, drop_width=1, blur_value=3, p=0.3),
    A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3, p=0.3),  
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_lower=1, num_shadows_upper=2, p=0.3), 
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

def read_yolo_labels(label_path):
    with open(label_path, "r") as file:
        lines = file.readlines()
    bboxes = []
    for line in lines:
        parts = line.strip().split()
        bbox = list(map(float, parts[1:])) 
        bboxes.append(bbox)
    return bboxes

def save_yolo_labels(label_path, bboxes):
    with open(label_path, "w") as file:
        for bbox in bboxes:
            file.write(f"0 " + " ".join(map(str, bbox)) + "\n")
        
def augment_data(image_folder, label_folder, num_aug=4):
    image_files = [f for f in os.listdir(image_folder) if f.endswith(".jpg")]
    for img_file in tqdm(image_files):
        img_path = os.path.join(image_folder, img_file)
        label_path = os.path.join(label_folder, img_file.replace(".jpg", ".txt"))
        image = cv2.imread(img_path)
        h, w, _ = image.shape
        bboxes = read_yolo_labels(label_path)

        # Create augmented
        for i in range(num_aug):
            # print(bboxes)
            class_labels = ["0"] * len(bboxes) 
            augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)
            aug_img = augmented["image"]
            aug_bboxes = augmented["bboxes"]
            # print(aug_bboxes)

            aug_img_filename = f"{img_file.replace('.jpg', '')}_aug_{i}.jpg"
            aug_label_filename = f"{img_file.replace('.jpg', '')}_aug_{i}.txt"
            cv2.imwrite(os.path.join(image_folder, aug_img_filename), aug_img)
            save_yolo_labels(os.path.join(label_folder, aug_label_filename), aug_bboxes)

image_folder = "/kaggle/working/data_training/train/images"
label_folder =  "/kaggle/working/data_training/train/labels"
augment_data(image_folder, label_folder)

### Check dataset by creating folder contains: images are drawed labels

In [None]:
image_dir = '/kaggle/working/data_training/train/images'
label_dir = '/kaggle/working/data_training/train/labels'
label_img_dir = '/kaggle/working/data_training/labeled_image'
os.makedirs(label_img_dir)

for img_name in tqdm(os.listdir(image_dir)):
    img_path = os.path.join(image_dir, img_name)
    image = cv2.imread(img_path)
    # image = image[:, :, ::-1]
    # plt.imshow(image)

    label_path = os.path.join(label_dir, img_name.replace('.jpg', '.txt'))
    with open(label_path, 'r') as f:
        lines = f.readlines()

    # Plot the bounding box on the image
    for line in lines:
        class_id, x_center, y_center, width, height = map(float, line.strip().split())
        img_height, img_width, _ = image.shape
        
        # print(class_id, x_center, y_center, width, height, img_height, img_width)
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height
        x1 = int(x_center - width / 2)
        y1 = int(y_center - height / 2)
        x2 = int(x_center + width / 2)
        y2 = int(y_center + height / 2)
        # print((x1, y1), (x2, y2))
        # Draw bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.imwrite(os.path.join(label_img_dir, img_name), image)
        

In [16]:
# !zip -r /kaggle/working/data_training.zip /kaggle/working/data_training
!rm -rf /kaggle/working/data_training
!rm -rf /kaggle/working/dataset


In [None]:
# Define datasets.yaml file
import yaml

datasets_yaml = {
    'path':'/kaggle/working/data_training',
    'train': 'train',
    'val': 'val',
    'nc': 1, 
    'names': ['0'],
}

with open('datasets.yaml', 'w') as file:
    yaml.dump(datasets_yaml, file)

# rm /kaggle/working/datasets.yaml

## Training

In [None]:
!pip install -U ultralytics 
!pip install -U ipywidgets -q

In [None]:
# shutil.rmtree("/kaggle/working/runs")

In [None]:
from ultralytics import YOLO
# model = YOLO('yolo12n.pt')
model = YOLO("/kaggle/input/model/best.pt")

In [None]:
!nvidia-smi

In [None]:
# model.train(
#     data='datasets.yaml',  
#     epochs=100,           
#     batch=16,             
#     device='cuda',         
#     imgsz=640,  
#     iou = 0.75,
#     # workers=4,
#     # cache=True            
#     # iou - lr0 - momentum - dropout 
# )

In [None]:
# Save the trained model
model.save('best_model.pt')

In [None]:
# Load the training results
results = pd.read_csv(os.path.join("/kaggle/working/runs/detect/train/", 'results.csv'))
results.columns = results.columns.str.strip()  

# Extract epochs and accuracy metrics
epochs = results.index + 1  
mAP_0_5 = results['metrics/mAP50(B)']  # Mean Average Precision at IoU=0.5
mAP_0_5_0_95 = results['metrics/mAP50-95(B)']  # MAP at IoU=0.5:0.95

plt.figure(figsize=(10, 5))
plt.plot(epochs, mAP_0_5, label='mAP_0.5')
plt.plot(epochs, mAP_0_5_0_95, label='mAP_0.5:0.95')
plt.xlabel('Epoch');   plt.ylabel('Accuracy')
plt.title('Accuracy Over Epochs')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()


## Prediction and Text Extraction Use Paddle OCR

In [None]:
!pip install paddleocr
# !pip uninstall paddlepaddle-gpu
!pip install paddlepaddle

In [None]:
# !rm -rf /kaggle/working/results
r = [[[[[162.0, 60.0], [193.0, 69.0], [189.0, 80.0], [158.0, 70.0]], 
       ('TaJ]m-~P\\L6*XMV5<O_-}:IPL0LE0c0%Dv8i2:&1', 1702.5013427734375)],
      [[[264.0, 65.0], [285.0, 61.0], [288.0, 73.0], [266.0, 77.0]], 
       ('i\\EtFV*:,90&g3n7:A6Z>^3#(2P  B*', 914.424072265625)], 
      [[[515.0, 172.0], [528.0, 172.0], [528.0, 178.0], [515.0, 178.0]], 
       (' J+~H\'-&I"h}r|^|py\\orsv0o=. -3hkpiZ0uN#', 1416.0770263671875)]]]
r[0][2]

In [None]:
from paddleocr import PaddleOCR, draw_ocr
import cv2
import re
test_folder = '/kaggle/working/results'
!rm -rf /kaggle/working/results
os.mkdir(test_folder)
CONFIDENCE_THRESHOLD = 0.5

# https://paddlepaddle.github.io/PaddleOCR/latest/en/quick_start.html#use-by-code
ocr = PaddleOCR(use_angle_cls = True, use_gpu = False)

def paddle_ocr(image, x1, y1, x2, y2):
    image = image[y1:y2, x1: x2]
    result = ocr.ocr(image, det=False, rec = True, cls = False)
    text = ""
    for r in result:
        #print("OCR", r)
        scores = r[0][1]
        if np.isnan(scores):
            scores = 0
        else:  # threshold
            scores = int(scores * 100)
        if scores > 60:
            text = r[0][0]
    pattern = re.compile('[\W]')
    text = pattern.sub('', text)
    text = text.replace("???", "")
    text = text.replace("O", "0")
    text = text.replace("粤", "")
    return str(text)

def predict_ocr(image):
    results = model.predict(image, device='cpu')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # plt.imshow(image)
        
    for result in results:
        filtered_boxes = [box for box in result.boxes if box.conf[0] > CONFIDENCE_THRESHOLD]
        for box in filtered_boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            confidence = box.conf[0]  # Get confidence
            
            # Crop bounding box
            # rect = image[y1:y2, x1:x2]
            # plt.imshow(rect)
            # print((x1, y1), (x2, y2))
            text = paddle_ocr(image, x1, y1, x2, y2)
            # print("NONE") if result==None else print(result)
            # text = result[0][0]
                    
            print(f"Detected text: {confidence:.2f}_{text}")
            cv2.putText(image, f'{confidence:.2f}_{text}', (x1-30, y1-10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
                    
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3)

            # boxes = [elements[0] for elements in line for line in result]
            # txts = [elements[1][0] for elements in line for line in result]
            # scores = [elements[1][1] for elements in line for line in result]
            # output = draw_ocr(image, boxes, txts, scores, font_path='path_to_font.ttf')
            
            # # text = pytesseract.image_to_string(rect, config='--psm 6')  # perform OCR
    return image

def predict_folder(path_test):
    for img in os.listdir(path_test):
        image = cv2.imread(os.path.join(path_test, img))
        image = predict_ocr(image)
        cv2.imwrite(os.path.join(test_folder, img), image)
        
# predict_folder('/kaggle/input/demo-ttt')

### Capture frame from video and predict

In [None]:
import cv2
count = 0
!rm -rf /kaggle/working/vid_results
os.mkdir("/kaggle/working/vid_results")
while True:
    vidcap = cv2.VideoCapture('/kaggle/input/data-vid-vietnam/vid2.mp4')
    # fps = vidcap.get(cv2.CAP_PROP_FPS)  # Get FPS of vid
    # frame_skip = int(fps * 1)   # 1s
    # vidcap.set(cv2.CAP_PROP_POS_MSEC, time_ms)
    success, frame = vidcap.read()
    if success:
        count += 1
        if count%30==0:
            frame = predict_ocr(frame) 
            cv2.imshow("Video", frame)
            cv2.imwrite(os.path.join("/kaggle/working/vid_results", f"frame_{count}.jpg"), frame)
            cv2.waitKey()
            cv2.destroyAllWindows()
            
        if count == 1000:
            break
    else:
        break