In [None]:
!pip install ultralytics
!pip install -U ipywidgets
from ultralytics import YOLO
import torch

In [None]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')

model.train(
    data='/kaggle/input/cv-project-files2/dataset.yaml',
    epochs=75,                            # Number of training epochs
    imgsz=640,                            # Image size
    batch=32,                             # batch size
    lr0=0.001,                             # initial learning rate
    augment=True,                         # no data augmentation
    dropout=0.2       ,                    # dropout to reduce overfitting
    device=[0,1]
)


In [None]:
#from ultralytics import YOLO
# resume training
#model = YOLO('/kaggle/working/runs/detect/train/weights/last.pt')
#model.train(resume=True)

In [None]:
image_folder = '/kaggle/input/large-license-plate-dataset/images/test/'
model = YOLO('/kaggle/working/runs/detect/train/weights/best.pt')

results = model(image_folder,stream=False)
output_folder = '/kaggle/working/test'
ok = 0
skip = 0
for image in results:
    device = image.boxes.xywh.device
    suitable_crops = torch.empty((0,6), device=device)
    for i in range(len(image)):
        x, y, w, h = image.boxes.xywh[i].tolist() 
        if w > 50 and h > 20:
            conf = image.boxes.conf[i]
            keep_crop = torch.tensor([[x-w/2,y-h/2,x+w/2,y+h/2,conf,0]],device=device)
            suitable_crops = torch.cat((suitable_crops,keep_crop))
            ok+=1
        else:
            skip+=1
    image.update(suitable_crops)
    image.save_crop(output_folder)

print("ok",ok,"skip",skip)

In [None]:
# to save the run : zip the folder from this command and then download the zip file
#import shutil
#shutil.make_archive("train_20", 'zip', "/kaggle/working/runs/detect/train")

## PaddleOCR

In [None]:
!pip install paddlepaddle-gpu
!pip install paddleocr

In [3]:
import pandas as pd
import os
from paddleocr import PaddleOCR
from PIL import Image
import pytesseract
import cv2
import matplotlib.pyplot as plt
import numpy as np
import re
from tqdm import tqdm

In [4]:
def process_image(image):
    scaled_image = cv2.resize(image, (400, 300))
    gray_image = cv2.cvtColor(scaled_image, cv2.COLOR_BGR2GRAY)
    gaussian_image = cv2.GaussianBlur(gray_image, (23, 23), 0)
    
    process_path = '/kaggle/working/PaddleOCR/preprocessed_license_plate.jpg'
    cv2.imwrite(process_path, gaussian_image)
    

def display_image(image, title="Image", cmap='gray'):
    plt.figure(figsize=(2, 2))  
    plt.imshow(image, cmap=cmap) 
    plt.title(title) 
    plt.axis('off')
    plt.show()

In [7]:
csv_path = '/kaggle/input/license-plate-text-recognition-dataset/lpr.csv'  
images_folder = '/kaggle/input/license-plate-text-recognition-dataset/cropped_lps/cropped_lps'  
process_path = '/kaggle/working/PaddleOCR/preprocessed_license_plate.jpg'
ocr = PaddleOCR(use_gpu=True,lang="en",show_log=False, use_angle_cls=True)

data = pd.read_csv(csv_path)

download https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar to /root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer/en_PP-OCRv3_det_infer.tar


100%|██████████| 3910/3910 [00:06<00:00, 603.07it/s]


download https://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tar to /root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer/en_PP-OCRv4_rec_infer.tar


100%|██████████| 10000/10000 [00:05<00:00, 1963.53it/s]


download https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar to /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.tar


100%|██████████| 2138/2138 [00:15<00:00, 139.71it/s]








In [8]:
correct_count = 0
recognized_count = 0
total_count = 0
almost_count = 0
i = 0
for index, row in tqdm(data.iterrows(), total=len(data), mininterval=2):    
    if i < 10000:
        i+=1
        image_path = os.path.join(images_folder, row['images'])
        ground_truth = row['labels']
        image = cv2.imread(image_path)
        
        process_image(image)
        result = ocr.ocr(process_path, rec=True)
        if result and result[0]:
            recognized_text = result[0][0][1][0]
            recognized_text = re.sub(r'[^a-zA-Z0-9]', '', recognized_text)
        else:
            recognized_text = ""
        
        if recognized_text == ground_truth:
            correct_count += 1
            
        total_count+=1
    else:
        break
        
accuracy = correct_count / total_count * 100
print(f"Accuracy: {accuracy:.2f}% ")

 50%|█████     | 10000/20000 [06:10<06:10, 27.02it/s]

Accuracy: 63.04% 





# EasyOCR

In [12]:
!pip install numpy
!pip install easyocr
!pip install opencv-python
!pip install pandas
!pip install numpy



In [13]:
import os
import easyocr
import cv2
import pandas as pd
import numpy as np

In [14]:
images_path = '/kaggle/input/license-plate-text-recognition-dataset/cropped_lps/cropped_lps'
csv_path = '/kaggle/input/license-plate-text-recognition-dataset/lpr.csv' 

reader = easyocr.Reader(['en'], gpu=True)

data = pd.read_csv(csv_path)

In [15]:
def resize_image(image, target_height=200, target_width=600):
    original_height, original_width = image.shape[:2]

    scale_height = target_height / original_height
    scale_width = target_width / original_width

    scale_factor = min(scale_height, scale_width)

    new_width = int(original_width * scale_factor)
    new_height = int(original_height * scale_factor)

    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
    return resized_image


# def sharpen_image(image):
#    kernel = np.array([[0, -1, 0],
#                      [-1,  9, -1],
#                      [0, -1, 0]])
#    return cv2.filter2D(image, -1, kernel)

#def denoise_image(image):
#    return cv2.GaussianBlur(image, (5, 5), 0)


def process_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Image {image_path} not found or could not be loaded.")
        return None
        
    resized_img = resize_image(img)
    
    # gray_immg = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    # gaussian_img = cv2.GaussianBlur(gray_image, (3, 3), 0)
    # enhanced_img = enhance_contrast(resized_img)
    # sharpened_img = sharpen_image(resized_img)
    # denoised_img = denoise_image(img)
    # gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    
    return resized_img

def calculate_cer(recognized_text, ground_truth):
    incorrect_count = 0;

    for i in range(min(len(recognized_text), len(ground_truth))):
        if recognized_text[i] != ground_truth[i]:
            incorrect_count += 1
            
    # counting any extra recognised characters as incorrect
    incorrect_count += abs(len(recognized_text) - len(ground_truth))
    return incorrect_count


In [16]:
# initialising counters
correct_count = 0
recognized_count = 0
total_count = 0
almost_count = 0
total_incorrect_chars = 0
total_groundtruth_chars = 0

for index, row in data.iterrows():
    if  total_count < 5000:
        image_path = os.path.join(images_path, row['images'])
        ground_truth = row['labels']

        processed_img = process_image(image_path)
        if processed_img is not None:
            ocr_results_processed = reader.readtext(processed_img, detail=1, decoder='beamsearch')

        recognized_text = ""
        if ocr_results_processed:
            for result in ocr_results_processed:
                text, confidence = result[1], result[2]
                if confidence > 0.5:  
                    recognized_text = text.replace(" ", "").replace("-", "")
                    recognized_count += 1

        # normalising recognized text and ground truth for comparison
        recognized_text = recognized_text.strip().lower()
        ground_truth = ground_truth.strip().lower()

        incorrect_chars = calculate_cer(recognized_text, ground_truth)
        total_incorrect_chars += incorrect_chars
        total_groundtruth_chars += len(ground_truth)
        
        if recognized_text == ground_truth:
            correct_count += 1
        elif recognized_text in ground_truth and recognized_text != "":
            almost_count += 1

        total_count += 1
    else:
        break

accuracy = (correct_count / total_count) * 100
cer = (total_incorrect_chars / total_groundtruth_chars) * 100 
print("Total: ", total_count)
print(f"Accuracy: {correct_count} = {accuracy:.2f}%")
print(f"Character Error Rate (CER): {cer:.2f}%")
print(f"Recognized something: {recognized_count}")
print(f"Almost recognized: {almost_count} out of incorrects {total_count - correct_count}")

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Total:  5000
Accuracy: 618 = 12.36%
Character Error Rate (CER): 78.63%
Recognized something: 1780
Almost recognized: 145 out of incorrects 4382
