# OCR Applications (on Image and Video)
- Using OpenCV and Google's tesseract
- Practitioner: Debabrata Doloi

## 1) ON IMAGE

In [3]:
import cv2
import pytesseract

#defining path to tesseract OCR engine command line executable
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\Tesseract.exe"

### Thresholding

i'hv used Simple Binary Thresholding and OTSU binarization

*Simple Binary thresholding*: finds a threshold and set the image color white if < threshold and black if > threshold

*OTSU thresholding*: automatically picks a threshold for image 

In [4]:
img=cv2.imread('sample4.jpg')                                                   #reading image
gray_img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)                                   #Converting to GRAY scale as
binary_img=cv2.threshold(gray_img,0,255,cv2.THRESH_OTSU | cv2.THRESH_BINARY)[1] #Performing simple thresholding with OTSU binarization
cv2.imshow('threshold image',binary_img)                                        #Opening binary image
cv2.waitKey(0)                                                                  #Exit at pressing any key
cv2.destroyAllWindows()                                                         #destroying windows

### Custom configurations

* --psm: Specify page segmentation mode.
* --oem: Specify OCR Engine mode.

In [12]:
custom_config = r'--oem 3 --psm 6'                                              #Addind oem and psm to custom config
img_info=pytesseract.image_to_data(binary_img,output_type=pytesseract.Output.DICT,config=custom_config,lang='eng')  #Getting image data from tesseract
print(img_info['text'])                                                         #getting text info

['', '', '', '', 'This', 'is', 'SAMPLE', 'TEXT', '', 'Text', 'is', 'at', 'different', 'regions']


### Identifying text in image

In [13]:
im=img.copy()
total_boxes = len(img_info['text'])    #length of total no of blocks detected
for sequence_number in range(total_boxes):                                        #Looping through blocks
    if float(img_info['conf'][sequence_number])>30:                               #if confidence of box being text if greater than 30 (30-40 is optimal limit)
        (x, y, w, h) = (img_info['left'][sequence_number], img_info['top'][sequence_number], img_info['width'][sequence_number],  img_info['height'][sequence_number])  #get the coordinates of confident blocks
        im = cv2.rectangle(im, (x, y), (x + w, y + h), (0,255, 0), 2)     #Drawing a rectangle box over confident word
cv2.imshow('identified text',im)                                         #Showing final image
cv2.waitKey(0)                                                                   #Press any key to exit
cv2.destroyAllWindows()                                                          #destroy windows

### Defining Parsing function

In [14]:
def parse(data):
    '''Function to parse data from detected text'''
    parsed=[]
    last_word=''
    for word in data:
        if word!='':
            parsed.append(word)
            last_word=word
        if last_word!='' and word=='':
            parsed.append('\n')

    return " ".join(parsed)

### Saving Data

In [15]:
data=parse(img_info['text'])
file=open('./image_data.txt','a')
file.write(data)
data

'This is SAMPLE TEXT \n Text is at different regions'

# OCR Applications (on Image and Video)
- Using OpenCV and Google's tesseract
- Practitioner: Debabrata Doloi

## 2) ON VIDEO 

### imports

In [1]:
import time  
import pandas as pd
import cv2
import pytesseract



In [2]:
#defining path to tesseract OCR engine command line executable
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\Tesseract.exe"

### Video instance

In [3]:
#video=cv2.VideoCapture('video name with path ')

video=cv2.VideoCapture('video001.mp4')



### Defining Parser

In [4]:
parsed_data={}
def video_parser(time,data):
    '''Parse Data'''
    if data!='' and time not in parsed_data.keys():
        if data not in parsed_data.values():
            parsed_data[time]=data

def rescale_frame(frame, percent=95):
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation =cv2.INTER_AREA)
def parse(data):
    '''Function to parse data from detected text'''
    not_nes=['\\','/','!','~','`','|','-','=']
    parsed=[]
    last_word=''
    for word in data:
        if word!='' and word not in not_nes:
            parsed.append(word)
            last_word=word

    return " ".join(parsed)           

### Extraction

In [5]:
custom_config = r'--oem 1 --psm 6' 
start=time.time()# Start Time

while True:
    ret,frame=video.read()                               #Getting video frame
    now=round(time.time()-start)                         #Getting current time
    if ret:
        gray_frame=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) #Converting to GRAY scale as
        binary_frame=cv2.threshold(gray_frame,0,255,cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)[1] #Performing simple thresholding with OTSU binarization
        frame_info=pytesseract.image_to_data(binary_frame,output_type=pytesseract.Output.DICT,config=custom_config,lang='eng')
        fm=frame.copy()
        total_boxes = len(frame_info['text'])    #length of total no of blocks detected
        for sequence_number in range(total_boxes):                                        #Looping through blocks
            if float((frame_info['conf'][sequence_number]))>30:                               #if confidence of box being text if greater than 30 (30-40 is optimal limit)
                (x, y, w, h) = (frame_info['left'][sequence_number], frame_info['top'][sequence_number], frame_info['width'][sequence_number],  frame_info['height'][sequence_number])  #get the coordinates of confident blocks
                fm = cv2.rectangle(fm, (x, y), (x + w, y + h), (0,255, 0), 1)     #Drawing a rectangle box over confident word
        parsed=parse(frame_info['text'])
        video_parser(now,parsed)
        cv2.imshow('identified text ~Exit:esc~',fm)
        if cv2.waitKey(1)==27:
            break
    else:
        break
        
video.release()
cv2.destroyAllWindows()

### Exporting Data

In [6]:
df=pd.DataFrame(parsed_data,index=['Data']).T
df

Unnamed: 0,Data
0,—<~s
1,—<s
2,eld
3,=} [ele] 5-3)
4,BIGGEST
5,MOTION
6,DESIGN
7,DESIGN TRENDS
8,OF
9,THIS


In [21]:
df.to_csv('./video_data.csv')

In [8]:

# reading csv file
text = open("video_data.csv", "r")
  
# joining with space content of text
text = ' '.join([i for i in text])  
  
# replacing ',' by space
text = text.replace(",", " ")  
  
#displaying result
print(text)

 Data
 0 â€”<~s
 1 â€”<s
 2 THE
 3 BIGGEST
 4 MOTION
 5 DESIGN
 7 DESIGN TRENDS
 8 OF
 9 THIS
 10 YEAR
 11 LBS)
 12 FAST
 14 FAST TYPOGRAPHY
 18 FAST TYPOGRAP
 19 FAS TYPOGR
 20 _
 21 HE
 22 7
 23 a Apple's
 24 Apple's Big
 25 Apple's Big Launch
 26 ap Apple's Big Launch
 27 Launch
 28 in
 29 in 107
 30 in 107 sec
 31 in 107 seconds
 33 in seconds
 34 Read ?
 35 Ready?
 36 Try
 37 not
 38 blink



In [9]:
file=open('./image_data002.txt','a')
file.write(text)
text

" Data\n 0 â€”<~s\n 1 â€”<s\n 2 THE\n 3 BIGGEST\n 4 MOTION\n 5 DESIGN\n 7 DESIGN TRENDS\n 8 OF\n 9 THIS\n 10 YEAR\n 11 LBS)\n 12 FAST\n 14 FAST TYPOGRAPHY\n 18 FAST TYPOGRAP\n 19 FAS TYPOGR\n 20 _\n 21 HE\n 22 7\n 23 a Apple's\n 24 Apple's Big\n 25 Apple's Big Launch\n 26 ap Apple's Big Launch\n 27 Launch\n 28 in\n 29 in 107\n 30 in 107 sec\n 31 in 107 seconds\n 33 in seconds\n 34 Read ?\n 35 Ready?\n 36 Try\n 37 not\n 38 blink\n"