In [130]:
import cv2
import pytesseract
import pandas as pd
import os
import fastwer
import Levenshtein
import numpy as np
from PIL import Image
import tempfile

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

In [131]:
df_output = pd.DataFrame(columns = ['img_filename', 'ocr_output'])

In [132]:
def image_v1(df_output):    
    for img in os.listdir('images'):
        im = cv2.imread(f'./images/{img}')
        custom_config = ''
        if img == 'test1.png':
            custom_config = '--oem 3 --psm 6 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
        else:
            custom_config = '--oem 1 --psm 4 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
        
        output = pytesseract.image_to_string(im, config=custom_config)
        
        # print(output)
        dictionary = {'img_filename': img, 'ocr_output': str(output)}
        df_output = pd.concat([df_output, pd.DataFrame([dictionary])], ignore_index=True)

        # Inlocuiest '\n' cu ' '
        df_output['ocr_output'] = df_output['ocr_output'].apply(lambda x: x.replace('\n',' '))

    # Adauga raspunsurile corecte
    df_output.loc[df_output['img_filename'] == 'test1.png', 'gtruth'] = "Google Cloud Platform"
    df_output.loc[df_output['img_filename'] == 'test2.jpeg', 'gtruth'] = "Succes in rezolvarea tEMELOR la LABORAtoarele de Inteligenta Artificiala!"
    return df_output

    
    # img = cv2.imread(f'./images/{img_name}')
    # 
    # text = str(pytesseract.image_to_string(img_rgb, config=conf_set))
    # print(text)

In [133]:
def cer_levenshtein(ocr_output, gtruth):
    # Distanta Levenshtein
    distance = Levenshtein.distance(gtruth, ocr_output)

    # Character Error Rate (CER):
    cer = distance / len(gtruth)
    return cer

In [134]:
def wer_levenshtein(ocr_output, gtruth):
    # Obtine cuvintele
    gtruth_tokens = gtruth.split()
    ocr_output_tokens = ocr_output.split()

    # Distanta Levenshtein
    distance = Levenshtein.distance(gtruth_tokens, ocr_output_tokens)

    # Word Error Rate (WER)
    wer = distance / len(gtruth_tokens)
    return wer

In [135]:
def cer_jaccard(ocr_output, gtruth):
    set_output = set(ocr_output)
    set_gtruth = set(gtruth)
    intersection = len(set_output.intersection(set_gtruth))
    union = len(set_output.union(set_gtruth))
    cer = 1 - intersection / union
    return cer

In [136]:
def wer_jaccard(ocr_output, gtruth):
    set_output = set(ocr_output.split())
    set_gtruth = set(gtruth.split())
    intersection = len(set_output.intersection(set_gtruth))
    union = len(set_output.union(set_gtruth))
    wer = 1 - intersection / union
    return wer

In [137]:
def cer_wer(df_output):
    for index, row in df_output.iterrows():
        filename = row['img_filename']
        gtruth = row['gtruth']
        output = row['ocr_output']
        cer_lev = cer_levenshtein(output, gtruth)
        wer_lev = wer_levenshtein(output, gtruth)
        # cer = fastwer.score_sent(output, ref, char_level=True)
        # wer = fastwer.score_sent(output, ref, char_level=False)
        df_output.loc[df_output['img_filename'] == filename, 'cer_lev'] = round(cer_lev,2) 
        df_output.loc[df_output['img_filename'] == filename, 'wer_lev'] = round(wer_lev,2)

        cer_jac = cer_jaccard(output, gtruth)
        wer_jac = wer_jaccard(output, gtruth)
        df_output.loc[df_output['img_filename'] == filename, 'cer_jaccard'] = round(cer_jac,2) 
        df_output.loc[df_output['img_filename'] == filename, 'wer_jaccard'] = round(wer_jac,2)

    return df_output

In [138]:
cer_wer(image_v1(df_output))

Unnamed: 0,img_filename,ocr_output,gtruth,cer_lev,wer_lev,cer_jaccard,wer_jaccard
0,test1.png,I AW l rgmtolv loud I Of VIIII,Google Cloud Platform,1.14,2.67,0.52,1.0
1,test2.jpeg,ncca b Aesotvatza MELO LAEORA A E,Succes in rezolvarea tEMELOR la LABORAtoarele ...,0.67,1.0,0.43,1.0


In [139]:
def location(image, custom_config):    
    boxes = pytesseract.image_to_boxes(image, config=custom_config)

    for box in boxes.splitlines():
        box = box.split()
        x, y, w, h = map(int, box[1:5])
        cv2.rectangle(image, (x, image.shape[0] - y), (w, image.shape[0] - h), (0, 255, 0), 2)
    
    cv2.imshow('Bounding Boxes', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [140]:
for img in os.listdir('images'):
    image = cv2.imread(f'./images/{img}')
    custom_config = ''
    if img == 'test1.png':
        custom_config = '--oem 3 --psm 6 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    else:
        custom_config = '--oem 1 --psm 4 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    location(image, custom_config)

In [141]:
IMAGE_SIZE = 1800
BINARY_THREHOLD = 180

In [142]:
def set_image_dpi(file_path):
    im = Image.open(file_path)
    length_x, width_y = im.size
    factor = max(1, int(IMAGE_SIZE / length_x))
    size = factor * length_x, factor * width_y
    # size = (1800, 1800)
    im_resized = im.resize(size, Image.LANCZOS)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
    temp_filename = temp_file.name
    im_resized.save(temp_filename, dpi=(300, 300))
    return temp_filename

In [143]:
def image_smoothening(img):
    ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
    ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    blur = cv2.GaussianBlur(th2, (1, 1), 0)
    ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return th3

def remove_noise_and_smooth(file_name):
    img = cv2.imread(file_name, 0)
    filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41,
                                     3)
    kernel = np.ones((1, 1), np.uint8)
    opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
    img = image_smoothening(img)
    or_image = cv2.bitwise_or(img, closing)
    return or_image

In [144]:
def process_image_for_ocr(file_path):
    # TODO : Implement using opencv
    temp_filename = set_image_dpi(file_path)
    im_new = remove_noise_and_smooth(temp_filename)
    return im_new
    # return temp_filename

In [145]:
def image_v2(df_output):    
    for im in os.listdir('images'):
        # img = cv2.imread(process_image_for_ocr(f'./images/{im}'))
        img = process_image_for_ocr(f'./images/{im}')

        custom_config = ""
        if im == 'test1.png':
            custom_config = '--oem 3 --psm 6 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
        else:
            custom_config = '--oem 1 --psm 4 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
            
        pil_image = Image.fromarray(img)
        pil_image.show()
        location(image, custom_config)
        
        output = pytesseract.image_to_string(img, config=custom_config)
        
        # print(output)
        dictionary = {'img_filename': im, 'ocr_output': str(output)}
        df_output = pd.concat([df_output, pd.DataFrame([dictionary])], ignore_index=True)

        # Inlocuiest '\n' cu ' '
        df_output['ocr_output'] = df_output['ocr_output'].apply(lambda x: x.replace('\n',' '))

    # Adauga raspunsurile corecte
    df_output.loc[df_output['img_filename'] == 'test1.png', 'gtruth'] = "Google Cloud Platform"
    df_output.loc[df_output['img_filename'] == 'test2.jpeg', 'gtruth'] = "Succes in rezolvarea tEMELOR la LABORAtoaree de Inteligenta Artificiala!"
    return df_output

In [None]:
cer_wer(image_v2(df_output))

In [126]:
for img in os.listdir('images'):
    image = process_image_for_ocr(f'./images/{img}')
    
    custom_config = ''
    if img == 'test1.png':
        custom_config = '--oem 3 --psm 6 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    else:
        custom_config = '--oem 1 --psm 4 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    location(image, custom_config)

In [127]:
def bb_intersection_over_union(boxA, boxB):
	xA = max(boxA[0], boxB[0])
	yA = max(boxA[1], boxB[1])
	xB = min(boxA[2], boxB[2])
	yB = min(boxA[3], boxB[3])

	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
	
	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
	
	iou = interArea / float(boxAArea + boxBArea - interArea)
    
	return iou

In [129]:
def location_quality(image, custom_config):    
    boxes = pytesseract.image_to_boxes(image, config=custom_config)

    i = 0
    s = 0
    for box in boxes.splitlines():
        i += 1
        if i < 9:
            box = box.split()
            s = 5
            x, y, w, h = map(int, box[1:5])
            cv2.rectangle(image, (x, image.shape[0] - y), (w, image.shape[0] - h), (0, 255, 0), 2)
            if i == 6:
                cv2.rectangle(image, (700, 340), (770, 430), (255, 0, 0), 2)
                s += bb_intersection_over_union([x, image.shape[0] - y, w, image.shape[0] - h], [700, 340, 770, 430])
            if i == 7:
                cv2.rectangle(image, (770, 370), (820, 420), (255, 0, 0), 2)
                s += bb_intersection_over_union([x, image.shape[0] - y, w, image.shape[0] - h], [770, 370, 820, 420])
            if i == 8:
                cv2.rectangle(image, (835, 320), (1300, 425), (255, 0, 0), 2)
                s += bb_intersection_over_union([x, image.shape[0] - y, w, image.shape[0] - h], [835, 320, 1300, 425])
                
    print(s/8)
    cv2.imshow('Bounding Boxes', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

custom_config = '--oem 3 --psm 6 -l ron -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
image = cv2.imread('images/test2.jpeg')
location_quality(image, custom_config)

0.625
