In [16]:
import numpy
import cv2
import random
import pickle
import os
from datetime import datetime
from tensorflow.keras.models import load_model
from tensorflow import device as model_device
import json
import progressbar
from reportlab.pdfgen import canvas
from pdf2docx import parse

In [23]:
from image_preprocessing.scan import *
from image_preprocessing.helper import resize,get_sorted_contours_bounding_box,skew_correction
UNKNOWN = np.NaN

In [21]:
def make_results_dir(dir):
    try:
        os.mkdir(dir)
    except OSError:
        print('OCR Results Directory exists in the system already.')

In [3]:
img = cv2.imread(r'C:\Users\Avinash\Desktop\New folder\OCR\test ocr\hello_world.png')
orig = img.copy()
try:
    img = document_warper(img)
except FourPointException:
    print("Couldn't find the edges of the current doucment.\nAt present, to improve accuracy we are working on skew correctrion on the present document")
    img = skew_correction(img)
img = cv2.GaussianBlur(img,(7,7),0)
filter2d = np.array([[-1,-1,-1],[-1,9,-1],[-1,-1,-1]],dtype='int8')
sharpen = cv2.filter2D(img,-1,filter2d)

img = cv2.fastNlMeansDenoisingColored(sharpen)


Couldn't find the edges of the current doucment.
At present, to improve accuracy we are working on skew correctrion on the present document
Best angle: -1


In [4]:
print(img.shape)
gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)

(577, 700, 3)


In [5]:
edged = edge_detection(img)
cnts,heirarchy = cv2.findContours(edged.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)


In [6]:
cnts,bounding_box = get_sorted_contours_bounding_box(cnts,method='top-to-bottom')
cv2.imshow('w',gray)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [13]:
chars=[]
boxs=[]
for i,c in enumerate(cnts):
    x,y,w,h = bounding_box[i]
    if (w>=7 and w<=150) and (h>=15 and h<=150):
        roi = gray[y:y+h,x:x+w]
        thresh = cv2.threshold(roi,0,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
        dims = random.randint(45,55)
        thresh = cv2.resize(thresh,(dims,dims),cv2.INTER_CUBIC)
        tH,tW = thresh.shape
        dX = int(max(0,128-tW)/2.0)
        dY = int(max(0,128-tH)/2.0)
        padded = cv2.copyMakeBorder(thresh,top=dY,bottom=dY,right=dX,left=dX,borderType=cv2.BORDER_CONSTANT,value=(255,255,255))
        padded = cv2.resize(padded,(128,128),cv2.INTER_CUBIC)
        padded = padded.astype('float32')/255.
        padded = np.expand_dims(padded,axis=-1)
        boxs.append((x,y,w,h))
        chars.append(padded)

In [8]:

with model_device('/cpu:0'):
    nums = load_model('models/alex_net nums.h5')
    lowercase = load_model('models/incep lower.h5')

In [9]:
with model_device('/gpu:1'):
    uppercase = load_model('models/incep upper.h5')

In [10]:

with open('final rf 0.90.sav','rb') as f:
    random_forest = pickle.load(f)

In [14]:
chars = np.array(chars,dtype='float32')

In [15]:
with open('classes/lower_classes.json','r') as f:
    lower_classes = json.load(f)
f.close()
with open('classes/upper_classes.json','r') as f:
    upper_classes = json.load(f)
f.close()

In [42]:
def rf_classify(rf_preds,uc,lc,nums,chars,bar):
    letter_class = []
    probs = []
    for i,c in enumerate(chars):
        tmp = np.reshape(c,((1,)+c.shape))

        if(rf_preds[i]==0):
            
            upper_pred = uppercase.predict(tmp)
            lower_pred = lowercase.predict(tmp)
            idx1,idx2 = np.argmax(upper_pred),np.argmax(lower_pred)
            final_class = chr(int(upper_classes[str(idx1)],16))
            prob = upper_pred[0][idx1]
            cv2.imshow('c',c)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            print("upper,prob {},{:.2f} lower,prob {},{:.2f}".format(idx1,prob,idx2,lower_pred[0][idx2]))
            if(upper_pred[0][idx1]<lower_pred[0][idx2]):
                final_class = chr(int(lower_classes[str(idx2)],16))
                prob = lower_pred[0][idx2]
        if(rf_preds[i]==1 or prob<0.5):

            preds = nums.predict(tmp)
            idx1 = np.argmax(preds)
            final_class = chr(48+idx1)
            prob = preds[0][idx1]
        if(prob<0.5):
            label = UNKNOWN
        
        print('letter {}:prob {}'.format(final_class,prob))
        letter_class.append(final_class)
        probs.append(prob)
        # bar.update(i)
    return letter_class,probs


In [44]:

rf_preds=random_forest.predict(chars.reshape((chars.shape[0],chars.shape[1]*chars.shape[2])))

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 170 out of 170 | elapsed:    0.1s finished


In [45]:

  
widgets = ['Classifying images [',
         progressbar.Timer(format= 'elapsed time: %(elapsed)s'),
         '] ',
           progressbar.Bar('='),' (',
           progressbar.ETA(), ') ',
          ]  
          

progressbar.streams.flush()
bar = progressbar.ProgressBar(max_value=chars.shape[0],
                              widgets=widgets).start()


labels,probs = rf_classify(rf_preds,uppercase,lowercase,nums,chars,bar)
bar.finish()
overview = img.copy()
i=0
for (label,(x,y,w,h)) in zip(labels,boxs):
    if(label==UNKNOWN):
        label="?"
    cv2.rectangle(overview,(x,y),(x+w,y+h),(0,255,0),2)
    cv2.putText(overview,label,(x-10,y-10),cv2.FONT_HERSHEY_SIMPLEX,1.2,(0,255,0),2)
    i+=1
    
    
cv2.imshow('final',overview)
cv2.waitKey(0)
cv2.destroyAllWindows()


Classifying images [elapsed time: 0:00:00] |                | (ETA:  --:--:--) upper,prob 7,1.00 lower,prob 24,0.99
letter H:prob 1.0
upper,prob 4,1.00 lower,prob 2,0.78
letter E:prob 1.0
upper,prob 11,1.00 lower,prob 10,0.97
letter L:prob 0.997218132019043
upper,prob 11,0.99 lower,prob 10,0.97
letter L:prob 0.9925963878631592
upper,prob 14,0.96 lower,prob 14,1.00
letter o:prob 0.9956271648406982
upper,prob 3,0.99 lower,prob 3,0.94
letter D:prob 0.9927777647972107
upper,prob 22,1.00 lower,prob 22,1.00
letter W:prob 1.0
upper,prob 22,1.00 lower,prob 10,0.98
letter W:prob 1.0
upper,prob 11,1.00 lower,prob 7,0.96
letter L:prob 0.9999949932098389
upper,prob 14,0.48 lower,prob 14,0.70
letter o:prob 0.7043538689613342


In [68]:
result_dir = os.path.join(os.getcwd(),r'OCR Results')
make_results_dir(result_dir)

curr_result_dir = os.path.join(result_dir,'results-{}'.format(str(datetime.now()).replace(':','-')[:19]))
try:
    os.mkdir(curr_result_dir)
except OSError:
    print(curr_result_dir+" exists in the system")

OCR Results Directory exists in the system already.


In [76]:
pdf_path=curr_result_dir+r'\ocr.pdf'

In [77]:
pdf = canvas.Canvas(pdf_path,bottomup=0,pagesize=(img.shape[1],img.shape[0]))
pdf.setTitle('OCR Results'+str(datetime.now())[:10])
px,py,ph=None,None,None
for label,(x,y,w,h) in zip(labels,boxs):
    if px is None and py is None:
        px,py=x,y
        pw,ph=w,h
    
    if py+ph>y:
        pdf.setFont('Times-Bold',ph)
        pdf.drawString(x,py+ph,label)
    else:
        pdf.setFont('Times-Bold',h)
        pdf.drawString(x,y+h,label)
        px,py,ph=x,y,h
pdf.save()

In [78]:
parse(pdf_path,curr_result_dir+r'\ocr.docx',start=0,end=None)

Parsing Page 1: 1/1...
Creating Page 1: 1/1...
--------------------------------------------------
Terminated in 0.15901759999906062s.


In [70]:
curr_result_dir

'c:\\Users\\Avinash\\Desktop\\New folder\\OCR\\OCR Results\\results-2021-05-14 16-53-03'

In [72]:
cv2.imwrite(curr_result_dir+r'\overview.png',overview)

True