<a href="https://colab.research.google.com/github/aakashagarwal6898/Ocular/blob/master/Ocular_Tesseract_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install pyocr  
!sudo apt-get install tesseract-ocr-eng

In [0]:
from PIL import Image    
import sys              
import pyocr             
import pyocr.builders
from google.colab.patches import cv2_imshow 
import cv2
import numpy as np  
import os
import tempfile
import json
import spacy

In [0]:
tools = pyocr.get_available_tools()
if len(tools) == 0:
    print("No OCR tool found")
    sys.exit(1)
tool = tools[0]
BINARY_THREHOLD= 180             #Set the binary threshold value
LANG ="eng"                      #Set the default language to english

model_dir = "path/to/saved_model"
nlpModel = spacy.load(model_dir)

In [0]:
def remove_noise_and_smooth(file_name):
  img = cv2.imread(file_name, 0)
  filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41,3)
  kernel = np.ones((1, 1), np.uint8)
  opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
  closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
  img = image_smoothening(img)
  or_image = cv2.bitwise_or(img, closing)
  
  return or_image

def image_smoothening(img):           
  ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
  ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
  blur = cv2.GaussianBlur(th2, (1, 1), 0)
  ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
  
  return th3

def process_image(doc_img):
  intermediate_image = remove_noise_and_smooth(doc_img)

  temp_file = tempfile.NamedTemporaryFile()             #create a temporary file
  temp_location = temp_file.name
  temp_location = os.path.join(temp_location + ".png") 

  cv2.imwrite(temp_location,intermediate_image)
  intermediate_image2 = temp_location

  txt = tool.image_to_string(
      Image.open(intermediate_image2),
      lang=LANG,
      builder=pyocr.builders.TextBuilder()
  )

  temp_file.close()                                     #destroy temporary file

  return txt

def ocr_predict(input_img):
  txt = process_image(input_img)     

  return txt


def ner_predict(textToPredict):
  doc = nlpModel(textToPredict)
  max_amt = 0
  i = 1
  data = {}
  items_list = []
  # Iterating over every entitiy to create a dictionary
  for ent in doc.ents:
    # Saving only one instance of Total Bill Amount
    if (ent.label_ == "Total bill amount"):
      try:
        amt = float(ent.text)
        if amt > max_amt:
          data["Total bill amount"] = amt
      except Exception as e:
        pass
    # Creating a list of Items
    elif (ent.label_ == "Items"):
      try:
        items_list.append(ent.text)
      except Exception as e:
        print(e)
    # Checking if the detected key is already present in the key,
    # If yes then we create a new key to store that value instead of overwriting the previous one
    else:
      if ent.label_ in data.keys():
        data[ent.label_+"-"+str(i)] = ent.text
        i +=1
      else:
        data[ent.label_] = ent.text
  # Staring the list of items using the Items key in the dictionary
  data["Items"]=items_list
  # Sorting all the elements of the dictionary
  data = dict(sorted(data.items()))
  json_data = json.dumps(data, indent=2)

  return json_data

def get_prediction(input_img):
  textToPredict = ocr_predict(input_img)
  json_data = ner_predict(textToPredict)

  return json_data

In [25]:
json_data = get_prediction("path/to/input_image")
print(json_data)

{
  "Date": "02/02/19",
  "Items": [
    "Little Juicy Pork Buns",
    "Beef w Bak Choy (L)",
    "Hunan Beef"
  ],
  "Store address": "1902 Jericho Turnpike\nNew Hyde Park NY 11040",
  "Store name": "Chef Wang",
  "Time": "02:14 PM"
}
