In [None]:
from __future__ import print_function, division
import os
import pandas as pd
import numpy as np
import re
import requests
from tqdm import tqdm_notebook, tnrange
import json
from copy import deepcopy
from ast import literal_eval
from PIL import Image
from io import StringIO, BytesIO
import matplotlib.pyplot as plt
import pytesseract as tess
import cv2
import math

In [None]:
with open("../../Downloads/Indian_Number_plates.json", "r") as f:
    d = f.readlines()

In [None]:
image_dict = {}
for i, record in tqdm_notebook(enumerate(d)):
    image_dict[i] = json.loads(record)

In [None]:
def get_image(image_dict, idx, crop = True, plot = True):
    """
    from the image json file download the image and plot the license plate
    bounding box. Optionally crops the bounding box and returns the crop.
    Arguments:
        image_dict: image json as provided
        idx: integer, any integer id for the image to show
        crop: bool, whether to crop the license plate
        show: bool, whether to show the image
        
    """
    url = image_dict[idx]['content']
    response = requests.get(url, stream = True)
    img = np.array(Image.open(BytesIO(response.content)))
    img_h, img_w = img.shape[:2]
    bbox = image_dict[idx]['annotation'][0]['points']

    xmin, ymin = int(bbox[0]['x'] * img_w), int(img_h * bbox[0]['y'])
    xmax, ymax = int(bbox[1]['x'] * img_w), int(bbox[1]['y'] *img_h)

    if plot:
        plt.figure(figsize = (12, 10))
        plt.imshow(cv2.rectangle(deepcopy(img), (xmin, ymin), (xmax, ymax), (255, 255, 0), 2))

    if crop:
        img = img[ymin:ymax, xmin:xmax, :]
    return img

In [None]:
def ratioCheck(area, width, height):
    ratio = float(width) / float(height)
    if ratio < 1:
        ratio = 1 / ratio

    aspect = 4.7272
    min = 15*aspect*15  # minimum area
    max = 125*aspect*125  # maximum area

    rmin = 3
    rmax = 6

    if (area < min or area > max) or (ratio < rmin or ratio > rmax):
        return False
    return True

def isMaxWhite(plate):
    avg = np.mean(plate)
    if(avg>=115):
        return True
    else:
        return False

def validateRotationAndRatio(rect):
    (x, y), (width, height), rect_angle = rect

    if(width>height):
        angle = -rect_angle
    else:
        angle = 90 + rect_angle

    if angle>15:
        return False

    if height == 0 or width == 0:
        return False

    area = height*width
    if not ratioCheck(area,width,height):
        return False
    else:
        return True

def preprocess(img):
    imgBlurred = cv2.GaussianBlur(img, (5,5), 0)
    gray = cv2.cvtColor(imgBlurred, cv2.COLOR_BGR2GRAY)

    sobelx = cv2.Sobel(gray,cv2.CV_8U,1,0,ksize=3)

    ret2,threshold_img = cv2.threshold(sobelx,0,255,cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return threshold_img

def extract_contours(threshold_img):
    element = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(17, 3))
    morph_img_threshold = threshold_img.copy()
    cv2.morphologyEx(src=threshold_img, op=cv2.MORPH_CLOSE, kernel=element, dst=morph_img_threshold)

    contours, hierarchy= cv2.findContours(morph_img_threshold,mode=cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_NONE)
    return contours

def get_min(img):
    threshold_img  = preprocess(img)
    contours= extract_contours(threshold_img)
    for i,cnt in enumerate(contours):
        min_rect = cv2.minAreaRect(cnt)
        if validateRotationAndRatio(min_rect):
            x,y,w,h = cv2.boundingRect(cnt)
            plate_img = img[y:y+h,x:x+w]
            return plate_img
    return img

In [None]:
def reduce_colors(img, n):
    Z = img.reshape((-1,3))

    # convert to np.float32
    Z = np.float32(Z)

    # define criteria, number of clusters(K) and apply kmeans()
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    K = n
    ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)

    # Now convert back into uint8, and make original image
    center = np.uint8(center)
    res = center[label.flatten()]
    res2 = res.reshape((img.shape))

    return res2

def cut_sorroundings(img):
    orig_h, orig_w = img.shape[:2]
    _,thresh = cv2.threshold(img,1,255,cv2.THRESH_BINARY)
    contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    cnt = contours[0]
    x,y,w,h = cv2.boundingRect(cnt)
    cropped = img[y:y+h,x:x+w]
    pct = w * h / (orig_h * orig_w)
    #print(pct)
    if pct < 0.3:
        return img
    return cropped

def get_text(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized_img = cv2.GaussianBlur(gray_img,(5,5),0)
    
    equalized_img = cv2.equalizeHist(resized_img)
    
    reduced = cv2.cvtColor(reduce_colors(cv2.cvtColor(equalized_img, cv2.COLOR_GRAY2BGR), 8), cv2.COLOR_BGR2GRAY)
    
    ret, mask = cv2.threshold(reduced, 100, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    mask = cv2.erode(mask, kernel, iterations = 1)
    
    mask = cut_sorroundings(mask)
    text = tess.image_to_string(mask)
    
    return re.sub(r'\W+', '', text)

In [None]:
image_urls, lics = [], []
for idx in tnrange(len(image_dict)):
    plate_bbox = get_image(image_dict, idx, crop = True, plot = False)
    processed = get_min(plate_bbox)
    license_number = get_text(processed_img)
    image_urls.append(image_dict[idx]['content'])
    lics.append(license_number)
outputs = pd.DataFrame({"urls": image_urls, "license_plate": lics})
outputs.to_csv("license_plate_output.csv", index =False)