# End to End Pipeline with YOLO + OCRs

## Utils function for evaluation

In [None]:
import pandas as pd
import numpy as np
from torchmetrics.text import CharErrorRate
import jiwer
THAI_COLORS = [
    "แดง",    # Red
    "น้ำเงิน", # Blue
    "เขียว",  # Green
    "เหลือง", # Yellow
    "ส้ม",    # Orange
    "ชมพู",   # Pink
    "ม่วง",   # Purple
    "น้ำตาล", # Brown
    "ขาว",    # White
    "ดำ",     # Black
    "เทา",    # Gray
    "ฟ้า",    # Light Blue
    "ทอง",    # Gold
    "เงิน",   # Silver
]
THAI_PROVINCES = [
    "กรุงเทพมหานคร",
    "สมุทรปราการ",
    "นนทบุรี",
    "ปทุมธานี",
    "พระนครศรีอยุธยา",
    "อ่างทอง",
    "ลพบุรี",
    "สิงห์บุรี",
    "ชัยนาท",
    "สระบุรี",
    "ชลบุรี",
    "ระยอง",
    "จันทบุรี",
    "ตราด",
    "ฉะเชิงเทรา",
    "ปราจีนบุรี",
    "นครนายก",
    "สระแก้ว",
    "นครราชสีมา",
    "บุรีรัมย์",
    "สุรินทร์",
    "ศรีสะเกษ",
    "อุบลราชธานี",
    "ยโสธร",
    "ชัยภูมิ",
    "อำนาจเจริญ",
    "หนองบัวลำภู",
    "ขอนแก่น",
    "อุดรธานี",
    "เลย",
    "หนองคาย",
    "มหาสารคาม",
    "ร้อยเอ็ด",
    "กาฬสินธุ์",
    "สกลนคร",
    "นครพนม",
    "มุกดาหาร",
    "เชียงใหม่",
    "ลำพูน",
    "ลำปาง",
    "อุตรดิตถ์",
    "แพร่",
    "น่าน",
    "พะเยา",
    "เชียงราย",
    "แม่ฮ่องสอน",
    "นครสวรรค์",
    "อุทัยธานี",
    "กำแพงเพชร",
    "ตาก",
    "สุโขทัย",
    "พิษณุโลก",
    "พิจิตร",
    "เพชรบูรณ์",
    "ราชบุรี",
    "กาญจนบุรี",
    "สุพรรณบุรี",
    "นครปฐม",
    "สมุทรสาคร",
    "สมุทรสงคราม",
    "เพชรบุรี",
    "ประจวบคีรีขันธ์",
    "นครศรีธรรมราช",
    "กระบี่",
    "พังงา",
    "ภูเก็ต",
    "สุราษฎร์ธานี",
    "ระนอง",
    "ชุมพร",
    "สงขลา",
    "สตูล",
    "ตรัง",
    "พัทลุง",
    "ปัตตานี",
    "ยะลา",
    "นราธิวาส",
    "บึงกาฬ"
]


def calculate_cer(predictions: list[str], labels: list[str]) -> list[float]:
    """Return the Character Error Rate (CER) between the predicted and target strings."""
    cer = CharErrorRate()
    cer_val = cer(predictions, labels)
    return cer_val.tolist()

def evaluate(label_df: pd.DataFrame, prediction_df: pd.DataFrame) -> pd.DataFrame:
    # Merge some of the columnof label_df to match with prediction_df
    ## Merge 'plate1' and 'plate2' to 'registration_no'
    label_df["registration_no"] = label_df["plate1"] + " " +label_df["plate2"]
    label_df = label_df.drop(columns=["plate1", "plate2"])
    ## Merge 'axles_wheels_no', 'wheels', 'tires' to 'axles_wheels_no'
    ## e.g. 'x x x' -> 'x เพลา x ล้อ ยาง x เส้น'
    label_df["axles_wheels_no"] = label_df["axles_wheels_no"].apply(lambda x: x + " เพลา")
    label_df["wheels"] = label_df["wheels"].apply(lambda x: x + " ล้อ")
    label_df["tires"] = label_df["tires"].apply(lambda x: "ยาง " + x + " เส้น")
    label_df["axles_wheels_no"] = label_df["axles_wheels_no"] + " " + label_df["wheels"] + " " + label_df["tires"]
    label_df = label_df.drop(columns=["wheels", "tires"])
    # Rename some columns of label_df to match with prediction_df
    rename_dict = {
        "province": "car_province",
        "type_car": "vehicle_use",
        "kind": "body_style",
        "num_body": "chassis_number",
        "brand": "manufacturer",
        "num_engine": "engine_number",
    }
    label_df = label_df.rename(columns=rename_dict)
    # Replace any NaN with ''
    label_df = label_df.fillna("")
    prediction_df = prediction_df.fillna("")
    # Replace 'ไม่พบข้อมูล' with ''
    prediction_df["year"] = prediction_df["year"].map(lambda x: x.replace("ไม่พบข้อมูล", ""))
    label_df["year"] = label_df["year"].map(lambda x: x.replace("ไม่พบข้อมูล", ""))
    # Rearrange the columns
    columns_of_interest = [
        'date_of_registration', 'registration_no', 'car_province', 'vehicle_use', 'type', 'body_style',
        'manufacturer', 'model', 'year', 'color', 'chassis_number', 'chassis_location', 'engine_manufacturer',
        'engine_number', 'engine_location', 'fuel_type', 'fuel_tank_number', 'cylinders', 'cubic_capacity',
        'horse_power', 'axles_wheels_no', 'unladen_weight', 'load_capacity', 'gross_weight', 'seats'
    ]
    label_df = label_df[columns_of_interest]
    prediction_df = prediction_df[columns_of_interest]
    # Create index column on both dataframes
    label_df["index"] = range(len(label_df))
    prediction_df["index"] = range(len(prediction_df))
    # Evaluate
    merged_df = pd.merge(label_df, prediction_df, on="index", suffixes=('_annotation', '_prediction'))
    merged_df.to_csv("merged_df.csv")
    eval_list = []
    for col in columns_of_interest:
        if f"{col}_annotation" in merged_df.columns and f"{col}_prediction" in merged_df.columns:
            avg_cer = np.mean(calculate_cer(merged_df[f"{col}_prediction"], merged_df[f"{col}_annotation"]))
            avg_accuracy = (merged_df[f"{col}_prediction"] == merged_df[f"{col}_annotation"]).mean() * 100
            eval_list.append({
                "column_name": col,
                "cer": avg_cer,
                "accuracy": avg_accuracy
            })
    eval_df = pd.DataFrame(eval_list)
    return eval_df

def map_with_cer(input_text: str, templates: list[str], cer_threshold: float = 0.5) -> str:
    """Try to map the input_text with the templates using CER. If the CER is below the threshold, return the template, else return the input_text."""
    if input_text == "":
        return ""
    cer_list = [jiwer.cer(template, input_text) for template in templates]
    # Remove the templatse if the CER is greater than the threshold
    templates = [template for i, template in enumerate(templates) if cer_list[i] < cer_threshold]
    cer_list = [cer for cer in cer_list if cer < cer_threshold]
    if len(cer_list) == 0:
        return input_text
    return templates[np.argmin(cer_list)]
    
def postprocess_df(df: pd.DataFrame) -> pd.DataFrame:
    # Post-process the dataframe
    # Apply `map_with_cer` fuel_type
    df["color"] = df["color"].apply(lambda x: map_with_cer(x, THAI_COLORS, cer_threshold=0.4))
    df["fuel_type"] = df["fuel_type"].apply(lambda x: map_with_cer(x, ["ดีเซล", "เบนซิน"], cer_threshold=0.8))
    df["body_style"] = df["body_style"].apply(lambda x: map_with_cer(x, ['กระบะบรรทุก', 'กระบะบรรทุก (ไม่มีหลังคา)', 'รถจักรยานยนต์', 'นั่งสองตอนท้ายบรรทุก', 'กระบะบรรทุก (ติดตั้งโครงเหล็ก)', 'กระบะบรรทุก  (ไม่มีหลังคา)', 'เก๋งสองตอน', 'รถแทรกเตอร์ที่ใช้ในการเกษตร', 'นั่งสามตอน'], cer_threshold=0.8))
    df["vehicle_use"] = df["vehicle_use"].apply(lambda x: map_with_cer(x,['รถยนต์บรรทุกส่วนบุคคล', 'รถจักรยานยนต์', 'รถยนต์นั่งส่วนบุคคลไม่เกิน 7 คน', 'รถแทรกเตอร์', 'รถจักรยานยนต์สาธารณะ'], cer_threshold=0.5))
    df["car_province"] = df["car_province"].apply(lambda x: map_with_cer(x, THAI_PROVINCES, cer_threshold=0.7))
    df["engine_location"] = df["engine_location"].apply(lambda x: map_with_cer(x,['ขวาเครื่อง', 'ซ้ายเครื่อง', 'ใต้ที่นั่ง', 'ซ้ายเรื่อง', 'ล่างเครื่อง'], cer_threshold=0.6))
    df["chassis_location"] = df["chassis_location"].apply(lambda x: map_with_cer(x, ['กลางขวา', 'หลังขวา', 'หน้าขวา', 'ใต้ที่นั่ง', 'หน้าซ้าย', 'หลัง', 'คอบังคับเลี้ยว', 'กระจังหน้าตอนใน'], cer_threshold=0.8))
    return df

## Determine dataset directory

Location where the dataset is stored

In [None]:
from pathlib import Path

dataset_dir = Path("../datasets/Data/Srisawad_Dataset_100")
image_paths = list(dataset_dir.glob("*.jpg"))

# Sort just to make it beautiful >.<
image_paths = list(sorted(image_paths))
image_paths[0]

## Get bboxes from YOLO first

In [None]:
from ultralytics import YOLO

# Load model
model = YOLO("../assets/best.pt")

In [None]:
yolo_results = model.predict(image_paths, imgsz=640, conf=0.25, half=True, device="cuda")

## Crop textboxes function from YOLO results

In [None]:
from PIL import Image

def get_textboxes(result) -> list[Image.Image]:
    # Get image
    image = Image.fromarray(result.orig_img)
    # Get bounding boxes
    bboxes = result.boxes.data[:, :4].round().cpu().int().numpy().tolist()

    # Get all textbox images
    textbox_images = []
    for bbox in bboxes:
        # Crop image
        x1, y1, x2, y2 = bbox
        textbox_image = image.crop((x1, y1, x2, y2))
        textbox_images.append(textbox_image)
    return textbox_images

## 1. Surya

In [None]:
from surya.model.recognition.model import load_model as load_recognizer
from surya.model.recognition.processor import load_processor as load_recognizer_processor

recognizer = load_recognizer()
recognizer_processor = load_recognizer_processor()

In [None]:
from tqdm import tqdm
from surya.recognition import batch_recognition

surya_document_data = []
# Detect bboxes for each document
for result in tqdm(yolo_results):
    # Get textboxes
    textbox_images = get_textboxes(result)

    # Get bounding boxes, class labels, and scores
    class_names = result.names
    class_predictions = result.boxes.cls.cpu().int().numpy()

    # Get all text predictions
    texts = batch_recognition(
        images=textbox_images,
        # Weird, but we need to tell the model that every
        # images is in Thai and English
        languages=[["th", "en"]] * len(textbox_images),
        model=recognizer,
        processor=recognizer_processor,
        batch_size=1
    )[0]

    document_info = {}
    for text, class_prediction in zip(texts, class_predictions):
        # Get class name
        predicted_class = class_names[class_prediction]
        # A little bit of cleaning
        text = " ".join(text.split())  # Remove extra whitespaces
        text = text.strip()  # Remove leading and trailing whitespaces
        # Save the data
        document_info[predicted_class] = text
    
    surya_document_data.append(document_info)

# Fill in missing keys
for document_info in surya_document_data:
    for key in class_names:
        if key not in document_info:
            document_info[key] = ""

In [None]:
import pandas as pd

# Construct the dataframe
surya_prediction_df = pd.DataFrame(surya_document_data)
# Convert YOLO class names to the actual column names
convert_column_name_dict = {
    'จำนวน (cylinders)': 'cylinders',
    'น้ำหนักรถ (unladen_weight)': 'unladen_weight',
    'เลขถังแก๊ส (fuel_tank_number)': 'fuel_tank_number',
    'แรงม้า (horse_power)': 'horse_power',
    'สี (color)': 'color',
    'แบบ (model)': 'model',
    'น้ำหนักบรรทุก/น้ำหนักเพลา (load_capacity)': 'load_capacity',
    'เลขตัวรถ (chassis_number)': 'chassis_number',
    'ยี่ห้อเครื่องยนต์ (engine_manufacturer)': 'engine_manufacturer',
    'ยี่ห้อรถ (manufacturer)': 'manufacturer',
    'ซีซี (cubic_capacity)': 'cubic_capacity',
    'อยู่ที่ (chassis_location)': 'chassis_location',
    'จังหวัด (car_province)': 'car_province',
    'ลักษณะ (body_style)': 'body_style',
    'รย (type)': 'type',
    'วันจดทะเบียน (date_of_registration)': 'date_of_registration',
    'ประเภท (vehicle_use)': 'vehicle_use',
    'น้ำหนักรวม (gross_weight)': 'gross_weight',
    'เลขเครื่องยนต์ (engine_number)': 'engine_number',
    'รุ่นปี คศ (year)': 'year',
    'ที่นั่ง (seats)': 'seats',
    'เลขทะเบียน (registration_no)': 'registration_no',
    'อยู่ที่ (engine_location)': 'engine_location',
    'จำนวนเพลาและล้อ (axles_wheels_no)': 'axles_wheels_no',
    'เชื้อเพลิง (fuel_type)': 'fuel_type'
}

# Rename columns in the dataframe
surya_prediction_df.rename(columns=convert_column_name_dict, inplace=True)
# Fill in missing columns
surya_prediction_df.fillna("", inplace=True)
# Post-process the dataframe
surya_prediction_df = postprocess_df(surya_prediction_df)

In [None]:
display(surya_prediction_df)

In [None]:
# Evaluate
annotated_df = pd.read_excel('../datasets/Data/annotation_sawad_100.xlsx', dtype=str).fillna("")
# Sort document_name
annotated_df = annotated_df.sort_values("document_name")
annotated_df.drop(columns=["document_id", "document_name"], inplace=True)
eval_df = evaluate(annotated_df, surya_prediction_df)
eval_df

## 2. EasyOCR

In [None]:
from easyocr import Reader

reader = Reader(["th", "en"])

In [None]:
easyocr_document_data = []
# Detect bboxes for each document
for result in tqdm(yolo_results):
    # Get textboxes
    textbox_images = get_textboxes(result)

    # Get bounding boxes, class labels, and scores
    class_names = result.names
    class_predictions = result.boxes.cls.cpu().int().numpy()

    # Get all text predictions
    texts = [reader.recognize(np.array(textbox)) for textbox in textbox_images]

    document_info = {}
    for text, class_prediction in zip(texts, class_predictions):
        # Get class name
        predicted_class = class_names[class_prediction]
        _, text, _ = text[0]
        # A little bit of cleaning
        text = " ".join(text.split())  # Remove extra whitespaces
        text = text.strip()  # Remove leading and trailing whitespaces
        # Save the data
        document_info[predicted_class] = text
    
    easyocr_document_data.append(document_info)

# Fill in missing keys
for document_info in easyocr_document_data:
    for key in class_names:
        if key not in document_info:
            document_info[key] = ""

In [None]:
import pandas as pd

# Construct the dataframe
easyocr_prediction_df = pd.DataFrame(easyocr_document_data)
# Convert YOLO class names to the actual column names
convert_column_name_dict = {
    'จำนวน (cylinders)': 'cylinders',
    'น้ำหนักรถ (unladen_weight)': 'unladen_weight',
    'เลขถังแก๊ส (fuel_tank_number)': 'fuel_tank_number',
    'แรงม้า (horse_power)': 'horse_power',
    'สี (color)': 'color',
    'แบบ (model)': 'model',
    'น้ำหนักบรรทุก/น้ำหนักเพลา (load_capacity)': 'load_capacity',
    'เลขตัวรถ (chassis_number)': 'chassis_number',
    'ยี่ห้อเครื่องยนต์ (engine_manufacturer)': 'engine_manufacturer',
    'ยี่ห้อรถ (manufacturer)': 'manufacturer',
    'ซีซี (cubic_capacity)': 'cubic_capacity',
    'อยู่ที่ (chassis_location)': 'chassis_location',
    'จังหวัด (car_province)': 'car_province',
    'ลักษณะ (body_style)': 'body_style',
    'รย (type)': 'type',
    'วันจดทะเบียน (date_of_registration)': 'date_of_registration',
    'ประเภท (vehicle_use)': 'vehicle_use',
    'น้ำหนักรวม (gross_weight)': 'gross_weight',
    'เลขเครื่องยนต์ (engine_number)': 'engine_number',
    'รุ่นปี คศ (year)': 'year',
    'ที่นั่ง (seats)': 'seats',
    'เลขทะเบียน (registration_no)': 'registration_no',
    'อยู่ที่ (engine_location)': 'engine_location',
    'จำนวนเพลาและล้อ (axles_wheels_no)': 'axles_wheels_no',
    'เชื้อเพลิง (fuel_type)': 'fuel_type'
}

# Rename columns in the dataframe
easyocr_prediction_df.rename(columns=convert_column_name_dict, inplace=True)
# Fill in missing columns
easyocr_prediction_df.fillna("", inplace=True)
# Post-process the dataframe
easyocr_prediction_df = postprocess_df(easyocr_prediction_df)

In [None]:
# Evaluate
annotated_df = pd.read_excel('../datasets/Data/annotation_sawad_100.xlsx', dtype=str).fillna("")
annotated_df = annotated_df.sort_values("document_name")
annotated_df.drop(columns=["document_id", "document_name"], inplace=True)
eval_df = evaluate(annotated_df, easyocr_prediction_df)
eval_df