In [1]:
#For Video and Image Processing
import cv2
#For Mathematical Operations
import numpy as np
#For Image to text Conversion
import pytesseract
import tesseract-ocr
#For Data Manipulation
import pandas as pd
#For Regular Expression Operations
import re
#For Image Operations
from PIL import Image
import os

In [2]:
#Path to current working directory
cwd = os.getcwd()
#Create directories
os.mkdir(cwd+'\\datasets')
os.mkdir(cwd+'\\img')

E:\VIVEK\GitHubRepos\tensorzone


In [3]:
#Input CSV: {frame,minX,maxX,minY,maxY,text}
csvfile = "\\datasets\\testcsv.csv" 
#Input Video file of vehicles at toll
videofile = '\\datasets\\test.mp4'
#Regular Expression for a license plate
search_pattern_re = r'[A-Z]{2}-[0-9]{2}-[A-Z]{1,2}-[0-9]{3,4}'

In [5]:
#Class VideoProcessor takes two input files : the csvfile and the videofile for input, and the regular
#expression of the pattern to search. The basic purpose of the class is to process the videofile frame
#by frame.For each frame it detect objects(contours). The contour search space is refined by the mid 
#points computed from the csvfile. Particularly, for multiple entries of each frame in csvfile a list 
#of mid points is computed. If any of these mid points exist within a specific contour then that 
#contour is a valid contour to search for license plate numbers. The idea is similar to region proposals 
#method used in R-CNN to define search spaces for object detection. The valid contour then undergoes an 
#affine transformation of -12.5 degrees to straighten the text that seems tilted due to camera angle. 
#This angle of -12.5 degrees was fixed through experimentation. Text in then detected in the 
#transformed contour. This text is searched for a match of regular expression. If a match is obtained, 
#then the match is appended to a list of license numbers detected in that frame =>
#{frame:[list of license nos]}




class VideoProcessor(object):
    
    def __init__(self,csvfile,videofile,search_pattern_re):
        self.csv_file = csvfile
        self.video_file = videofile
        self.reg_ex = search_pattern_re
        self.final_dict = {}
    
    #Reads csv file
    def _read_csv_data(self):
        self.data = pd.read_csv(cwd + self.csv_file, index_col=0)
        
    #Compiles RegEx object    
    def _reg_ex_compile(self):
        self.reg = re.compile(self.reg_ex)
        
    #Detects contours in image    
    def get_contours(self,image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray,100,200)
        im2, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        return contours
    
    #Detects midpoints for data for each frame in csv
    def get_mids(self,data):
        mid_points = [[int((data.maxX[m]+data.minX[m])/2),int((data.maxY[m]+data.minY[m])/2)]
                for m in range(len(data))
        ]
        return mid_points
    
    #Gets box coordinates of each contour
    def get_box(self,contour):
        x_axes = [contour[j][0][0] for j in range(len(contour))]
        y_axes = [contour[j][0][1] for j in range(len(contour))]
        min_x,min_y,max_x,max_y = np.min(x_axes),np.min(y_axes),np.max(x_axes),np.max(y_axes)
        return {'min_x':min_x,
                'min_y':min_y,
                'max_x':max_x,
                'max_y':max_y
        }

    #Calculates affine transformation for input image
    def affine_transform(self,img,row,column,angle):
        M = cv2.getRotationMatrix2D((column/2,row/2),angle,1)
        trans_img = cv2.warpAffine(img,M,(column,row))
        return trans_img
    
    #Returns text in image
    def convert_to_text(self,img_arr):
        inp = Image.fromarray(img_arr)
        text = pytesseract.image_to_string(inp)
        return text
    
    #Process video frame by frame for final output
    def _process_video(self):
        cap = cv2.VideoCapture(cwd + self.video_file)
        i=0 #Frame index
        while (cap.isOpened()):
            self.final_dict.setdefault(i,[]) #Setting final data structure
            ret, frame = cap.read() #Reading video frame by frame
            contours = self.get_contours(frame) #detects contours in frame
            temp = self.data[self.data.frame==i] #Slices dataframe where frame == i
            size = len(temp) #Number of entries for each frame slice
            temp.index = range(size) #Resets index of sliced dataframe from (0,size)
            mids = self.get_mids(temp) #Computes mid points of sliced dataframe data
            #Iterating over Each contour
            for k,cont in enumerate(contours):
                box = self.get_box(cont) #Box coordinates of each contour
                #The condition either selects or rejects a contour
                if (any(box['min_x']<=elem[0]<=box['max_x'] and box['min_y']<=elem[1]<=box['max_y'] for elem in mids)):
                    #This is for refernce. Each contour image is saved based on box coordinates
                    cv2.imwrite(cwd+'\\img\\cont-{}{}.jpg'.format(i,k),frame[box['min_y']:box['max_y'],
                                                                             box['min_x']:box['max_x']])
                    #Reading each valid contour image. We can get around this step
                    im = cv2.imread(cwd+'\\img\\cont-{}{}.jpg'.format(i,k),0)
                    r,c = im.shape #Shape of image
                    trans_image = self.affine_transform(im,r,c,-12.25) #Image transformation
                    text = self.convert_to_text(trans_image) #Return text in image
                    text_lines = text.split('\n') #For multi-line text, form a list of strings
                    for string in textlines: #For each string
                        #Search for RegEx match 
                        str_det = self.reg.search(string.lstrip().rstrip().replace(' ','-'))#Removing end spaces and replacing between spaces with '-'
                        try:
                            #Append matched string to final data structure
                            self.final_dict[i].append(string[str_det.span(0):str_det.span(1)])
                        except:
                            continue
                    #Purpose of a contour is done. We can remove that contour image             
                    os.remove(cwd+'\\img\\cont-{}{}.jpg'.format(i,k))
              
            i+=1
        cap.release()
    
    #Create csv of final data structure
    def _create_csv(self):
        pd.DataFrame(self.final_dict).to_csv(cwd+"\\datasets\\output.csv")
                                            

In [6]:
def main():
    VP = VideoProcessor(csvfile,videofile,search_pattern_re)
    VP._reg_ex_compile()
    VP._read_csv_data()
    VP._process_video()
    VP._create_csv()

In [None]:
main()

![](architecture.jpg)

# Alternate Architecture

If one has decent GPU access, there can be a similar alternate. One can train a Haar Cascade Classifier for license plate detection. Following the same thread of video processing, once the classifier detects license plate, that particular contour in which it was detected could be passed for a regular expression match. If that matches, then the region proposals from the csv file could be used to increase the confidence of detecting correct license plate.