In [10]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
import os 
import shutil
from tqdm import tqdm
import yaml
import matplotlib.pyplot as plt 
import torch
import cv2
import pytesseract as pt

In [11]:
filenames = []

size_props = {
    'height':[],
    'width':[]
}

bounding_box_props = {
    'xmin':[],
    'ymin':[],
    'xmax':[],
    'ymax':[]
}

In [12]:
annotations_path = './dataset/annotations/'
native_os_path_join = os.path.join
for file in tqdm(os.listdir(annotations_path)):
    annotation = ET.parse(os.path.join(annotations_path, file))
    filenames.append(os.path.join(annotations_path, file))
    size = annotation.find('size')
    for name, prop_list in size_props.items():
        prop_value = size.find(name).text
        size_props[name].append(int(prop_value))
    bounding_box = annotation.find('object').find('bndbox')
    for name, prop_list in bounding_box_props.items():
        prop_value = bounding_box.find(name).text
        bounding_box_props[name].append(int(prop_value))

100%|██████████| 433/433 [00:00<00:00, 11699.40it/s]


In [13]:
df = pd.DataFrame({
    'file':filenames,
    'width':size_props['width'],
    'height':size_props['height'],
    'xmin':bounding_box_props['xmin'],
    'ymin':bounding_box_props['ymin'],
    'xmax':bounding_box_props['xmax'],
    'ymax':bounding_box_props['ymax']
})

In [14]:
df.head()

Unnamed: 0,file,width,height,xmin,ymin,xmax,ymax
0,./dataset/annotations/Cars0.xml,500,268,226,125,419,173
1,./dataset/annotations/Cars1.xml,400,248,134,128,262,160
2,./dataset/annotations/Cars10.xml,400,225,140,5,303,148
3,./dataset/annotations/Cars100.xml,400,267,175,114,214,131
4,./dataset/annotations/Cars101.xml,400,300,167,202,240,220


In [15]:
df['center_x'] = (df['xmax'] + df['xmin'])/(2*df['width'])
df['center_y'] = (df['ymax'] + df['ymin'])/(2*df['height'])

df['bb_width'] = (df['xmax'] - df['xmin'])/df['width']
df['bb_height'] = (df['ymax'] - df['ymin'])/df['height']

In [16]:
df.head()

Unnamed: 0,file,width,height,xmin,ymin,xmax,ymax,center_x,center_y,bb_width,bb_height
0,./dataset/annotations/Cars0.xml,500,268,226,125,419,173,0.645,0.55597,0.386,0.179104
1,./dataset/annotations/Cars1.xml,400,248,134,128,262,160,0.495,0.580645,0.32,0.129032
2,./dataset/annotations/Cars10.xml,400,225,140,5,303,148,0.55375,0.34,0.4075,0.635556
3,./dataset/annotations/Cars100.xml,400,267,175,114,214,131,0.48625,0.458801,0.0975,0.06367
4,./dataset/annotations/Cars101.xml,400,300,167,202,240,220,0.50875,0.703333,0.1825,0.06


In [17]:
# Keeping important columns only 
yolo_df = df[['file', 'center_x', 'center_y', 'bb_width', 'bb_height']]
# Performing 70-15-15 split
test_size = int(0.15 * len(df))

df_train, df_test = train_test_split(yolo_df, test_size=test_size)
df_train, df_val = train_test_split(df_train, test_size=test_size)

In [18]:
train_path = os.path.join('Images', 'train')
val_path = os.path.join('Images','val')
test_path = os.path.join('Images', 'test')
images_path = './dataset/images/'

if not os.path.exists(train_path):
    os.makedirs(train_path)
    print('Made folder for train set')

if not os.path.exists(val_path):
    os.makedirs(val_path)
    print('Made folder for val set')

if not os.path.exists(test_path):
    os.makedirs(test_path)
    print('Made folder for test set')

Made folder for train set
Made folder for val set
Made folder for test set


In [19]:
print('Moving images for train set')
for _, row  in tqdm(df_train.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(images_path, f'{image_name}.png')
    image_dst = os.path.join(train_path, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(train_path, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)
print('Done moving images for train set')

print('Moving images for val set')
for _, row  in tqdm(df_val.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(images_path, f'{image_name}.png')
    image_dst = os.path.join(val_path, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(val_path, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)
print('Done moving images for val set')

print('Moving images for test set')
for _, row  in tqdm(df_test.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(images_path, f'{image_name}.png')
    image_dst = os.path.join(test_path, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(test_path, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)
print('Done moving images for test set')

Moving images for train set


305it [00:01, 224.38it/s]


Done moving images for train set
Moving images for val set


64it [00:00, 227.56it/s]


Done moving images for val set
Moving images for test set


64it [00:00, 232.37it/s]

Done moving images for test set





In [1]:
# creating data.yaml file which will be used to train YOLO on custom data 
data = {
    'names':['License Plate'],
    'nc':1,
    'train':os.path.abspath(train_path),
    'val':os.path.abspath(val_path)
}

with open('data.yaml', 'w') as f:
    yaml.dump(data, f)

NameError: name 'train_path' is not defined

In [3]:
import cv2 
import os
import torch 
import numpy as np 
import datetime as dt 
import seaborn

In [4]:
runs_path = os.path.join('yolov5', 'runs', 'train')
latest_run = os.listdir(runs_path)[-1]
path = os.path.join(runs_path, latest_run, 'weights', 'best.pt')

In [5]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path=path, trust_repo='check')

Using cache found in C:\Users\admin/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2022-10-17 Python-3.7.9 torch-1.12.1+cpu CPU

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients
Adding AutoShape... 


In [6]:
img = cv2.imread('./front.jpg')
results = model(img)

In [11]:
type(results)

models.common.Detections

In [8]:
# Getting co ordinates of license plate
results_df = results.pandas().xyxy[0].loc[0]
x_min = int(results_df['xmin'])
x_max = int(results_df['xmax'])
y_min = int(results_df['ymin'])
y_max = int(results_df['ymax'])
# Cropping license plate from image
number_plate = img[y_min:y_max, x_min:x_max]
cv2.imwrite('plate.png',number_plate)


True

In [7]:
import matplotlib.pyplot as plt
def plot_images(img1, img2, title1="", title2=""):
    fig = plt.figure(figsize=[15,15])
    ax1 = fig.add_subplot(121)
    ax1.imshow(img1, cmap="gray")
    ax1.set(xticks=[], yticks=[], title=title1)
    
    ax2 = fig.add_subplot(122)
    ax2.imshow(img2, cmap="gray")
    ax2.set(xticks=[], yticks=[], title=title2)

In [9]:
# find the white rectangle
th = number_plate.copy()
th[th<200] = 0

bbox = np.where(th>0)
y0 = bbox[0].min()
y1 = bbox[0].max()
x0 = bbox[1].min()
x1 = bbox[1].max()

# crop the region of interest (ROI)
img = number_plate[y0:y1, x0:x1]
cv2.imwrite('ROI.png', img)

True

In [12]:
gray = cv2.cvtColor(number_plate,cv2.COLOR_BGR2GRAY)
# histogram equalization
equ = cv2.equalizeHist(gray)
# Gaussian blur
blur = cv2.GaussianBlur(equ, (5, 5), 1)

# manual thresholding
th2 = 120 # this threshold might vary!
equ[equ>=th2] = 255
equ[equ<th2]  = 0

cv2.imwrite('equ.png',equ)

True

In [13]:
# For long plate

import pytesseract
text = pytesseract.image_to_string(equ,lang='eng',config='--oem 3 --psm 6')

print(text)


51H-683.29]



In [11]:
# For short plate
text = reader.readtext(equ)
text
#print(text[0][-2] + '-' + text[1][-2])

[([[0, 6], [585, 6], [585, 89], [0, 89]], '51H-68329', 0.745216901640605)]

In [14]:
from paddleocr import PaddleOCR

ocr = PaddleOCR(lang='en')

TypeError: Couldn't build proto file into descriptor pool: duplicate file name (framework.proto)

In [19]:
results = ocr.ocr(number_plate)
result = results[0][0][-1][0] + '-' + results[0][1][-1][0]
result = result.replace('.','')
result

[2022/10/19 15:54:36] ppocr DEBUG: dt_boxes num : 2, elapse : 0.05919623374938965
[2022/10/19 15:54:36] ppocr DEBUG: rec_res num  : 2, elapse : 0.16908025741577148


'51H-68329'