In [1]:
import json
import re
import tempfile

from PIL import ImageDraw
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from array import array
import os
from PIL import Image
import sys
from dotenv import load_dotenv
import time
import numpy as np
import matplotlib.pyplot as plt
import cv2

In [2]:
def authenticate():
    '''
    Authenticate
    Authenticates your credentials and creates a client.
    '''
    load_dotenv()
    subscription_key = os.environ["VISION_KEY"]
    endpoint = os.environ["VISION_ENDPOINT"]
    credentials = CognitiveServicesCredentials(subscription_key)
    computervision_client = ComputerVisionClient(endpoint, credentials)
    '''
    END - Authenticate
    '''
    return computervision_client

In [3]:
computervision_client = authenticate()

Ocr will return a result for what it analyzed in an image.
    The result contains such things as : location and text .

In [4]:
def get_image_ocr_result(image_path, language=None):
    img = open(image_path, "rb")
    read_response = computervision_client.read_in_stream(
        image=img,
        mode="Printed",
        raw=True,
        language=language
    )
    operation_id = read_response.headers['Operation-Location'].split('/')[-1]
    while True:
        read_result = computervision_client.get_read_result(operation_id)
        if read_result.status not in ['notStarted', 'running']:
            break
        time.sleep(1)
    return read_result

### Getting text from file with OCR
    After getting the OCR result we can extract the text in the image.
    

In [5]:
def get_text_of_file(image_path, language=None):
    read_result = get_image_ocr_result(image_path, language)
    # Print the detected text, line by line
    result = ""
    if read_result.status == OperationStatusCodes.succeeded:
        for text_result in read_result.analyze_result.read_results:
            for line in text_result.lines:
                result += line.text
                result += " "
    return result


In [6]:
get_text_of_file('test2.jpeg')

'Lucces in resolvarea TEMELOR la LABORA toarele de Inteligenta ArtificialÃ ! '

### Getting the location of text in image
    After getting the ocr rezult we can get the actual location of text in image.

In [7]:
def get_location_of_text_file(image_path, language=None):
    read_result = get_image_ocr_result(image_path, language)
    locations = []
    if read_result.status == OperationStatusCodes.succeeded:
        for text_result in read_result.analyze_result.read_results:
            for line in text_result.lines:
                x1,y1,x2,y2,x3,y3,x4,y4=line.bounding_box
                locations.append([x1,y1,x3,y3])
    return locations


Analizam marja de erorare

Pentru acest lucru am pus intr un fisier json date despre textul nostru

In [8]:
with open("texts.json", "r") as file:
    data = json.load(file)

## CER - Character Error Rate
    calculez numarul de caractere diferite
    numar total de caractere gresite/numarul total de caractere in textul real
    lung_maxima=max(len(actual_text),len(predicted_text))

In [9]:
def cer_character_error_rate(predicted_text, actual_text):
    
    lung_minima = min(len(actual_text), len(predicted_text))
    errors = abs(len(actual_text) - len(predicted_text))  # caractere au ramas necitite sau citite in plus
    for i in range(0, lung_minima):
        if (predicted_text[i] != actual_text[i]):
            errors += 1
    cer = errors / len(actual_text)
    return cer


### Aplic cer pe toate imaginiile

In [None]:
def perform_cer_on_all_images():
    cers = {}
    for file in data:
        image_path = file["image_path"]
        actual_text = file["actual_text"]
        predicted_text = get_text_of_file(image_path)
        cer = cer_character_error_rate(predicted_text, actual_text)
        cers[image_path] = cer
    return cers
print(perform_cer_on_all_images())

In [None]:
Preia toate cuvintele dintr un text

In [None]:
def get_all_words(text):
    words = re.findall(r'\w+', text)
    return words

## Distanta levensthein
Numarul de cuvinte inlocuite,skipped or inserted!
=> cu el pot calcula wer
tot verific si det min daca nu sunt egale cifra de sus cu cifra din stanga
daca nu punctul resp devine min val din jur
=> distanta e fix ultima val din matrice

In [None]:
def levenshtein_distance(s1, s2):
    # initializez matrice pt memorarea dist intermediare
    dp = [[0] * (len(s2) + 1) for _ in range(len(s1) + 1)]
    # initializez prima linie si prima col cu indicii lor
    for i in range(len(s1) + 1):
        dp[i][0] = i
    for j in range(len(s2) + 1):
        dp[0][j] = j
    for i in range(1, len(s1) + 1):
        for j in range(1, len(s2) + 1):
            if (s1[i - 1] == s2[j - 1]):
                dp[i][j] = dp[i - 1][j - 1]
            else:
                # daca caract nu sunt eguale distanta cellcurent e min matricei inconj +1
                a = dp[i][j - 1]
                b = dp[i - 1][j]
                c = dp[i - 1][j - 1]
                min_value = min(a, b, c)
                dp[i][j] = min_value + 1
    return dp[len(s1)][len(s2)]

## Wer- Word Error Rate
calculeaza calitatea la nivelul cuvintelor prin intermediul levensthein

In [None]:
def wer_word_error_rate(predicted_text, actual_text):
    predicted_words = get_all_words(predicted_text)
    actual_words = get_all_words(actual_text)

    n = len(predicted_words)
    m = len(actual_words)

    # dist Levenshtein
    dist = levenshtein_distance(predicted_text, actual_text)
    wer = dist / m
    return wer

Fac pe toate Wer

In [None]:
def perform_wer_on_all_images():
    wers = {}
    for file in data:
        image_path = file["image_path"]
        actual_text = file["actual_text"]
        predicted_text = get_text_of_file(image_path)
        wer = wer_word_error_rate(predicted_text, actual_text)
        wers[image_path] = wer
    return wers
print(perform_wer_on_all_images())

Pe toate levensthein

In [None]:
def perform_levensthin_on_all_images():
    levs = {}
    for file in data:
        image_path = file["image_path"]
        actual_text = file["actual_text"]
        predicted_text = get_text_of_file(image_path)
        lev = levenshtein_distance(predicted_text, actual_text)
        levs[image_path] = lev
    return levs
print(perform_levensthin_on_all_images())

##Distanta Hamming
numarul de caractere diferite din cele doua texte (DACA TEXTELE SUNT EGALE)

In [None]:
def hamming_distance(s1, s2):
    if (len(s1) != len(s2)):
        raise ValueError("Nu au aceeeasi lungime")
    distance = sum(1 for c1, c2 in zip(s1, s2) if c1 != c2)
    return distance

In [None]:
Hamming pe toate

In [None]:
def perform_hamming_on_all_images():
    hams = {}
    for file in data:
        image_path = file["image_path"]
        actual_text = file["actual_text"]
        predicted_text = get_text_of_file(image_path)
        ham=hamming_distance(predicted_text,actual_text)
        hams[image_path]=ham
    return hams
print(perform_hamming_on_all_images())

Acum distantele

## LA NIVEL LOCATIE

In [None]:
### Desenez sa vad ce identifica OCR

In [None]:
def image_with_box_surrounding_text(image_path,language=None):
    locations=get_location_of_text_file(image_path,language)
    image=Image.open(image_path)
    draw=ImageDraw.Draw(image)
    for location in locations:
        x1,y1,x3,y3=location
        draw.rectangle([x1, y1, x3, y3], outline=(255, 0, 0), width=3)
    display(image)
image_with_box_surrounding_text('test1.png')
image_with_box_surrounding_text('test2.jpeg')

## IOU- Intersection Over Union. Pentru a calcula calitatea localizarii
Calculeaza overlap ul dintre locatii
Asa aflu si eu cat e overlapping ul . cu cat mai mare cu atat mai bine!
Am salvat in json si localizarea randuriilor. 

In [None]:
def calculate_iou(box1,box2):
    #iou= i/union else 0
    #calculam intersectia
    x1=max(box1[0],box2[0])
    y1=max(box1[1],box2[1])
    x2=min(box1[2],box2[2])
    y2=min(box1[3],box2[3])
    area_of_intersection=max(0,x2-x1)*max(0,y2-y1)
    #union=area1+are2-inters
    area_box1=(box1[2]-box1[0])*(box1[3]-box1[1])
    area_box2=(box2[2]-box2[0])*(box2[3]-box2[1])
    union=area_box1+area_box2-area_of_intersection
    #calculam iou
    if union==0:
        return 0
    else:
        return area_of_intersection/union

## Quality localization for one image
### Primesc path ul la imagine si locatiile corecte
### Returnez IOU pt fiecare rand

In [None]:
def quality_of_localization_one_image(image_path,actual_location,language=None):

    locations_from_ocr=get_location_of_text_file(image_path)
    max_found=max(len(actual_location),len(locations_from_ocr))
    ious=[]
    for i in range(0,max_found):
        ious.append(calculate_iou(actual_location[i],locations_from_ocr[i]))
    #daca ne au mai ramas linii neregasite de ocr
    for i in range(len(locations_from_ocr),len(actual_location)):
        ious.append(0)
    return ious


## Quality for each file!
### O face pe fiecare rand

In [None]:
def quality_localization_for_files():
    qualities={}
    for file in data:
        image_path=file["image_path"]
        actual_locations=file["locations"]
        qualities[image_path]=quality_of_localization_one_image(image_path,actual_locations)
    return qualities
quality_localization_for_files()

## Average Quality

Testez pe fisierele de test calitatea overall de localizare!

In [None]:
def quality_mean():
    qualities=quality_localization_for_files()
    mean_quals:list=[np.mean(quality) for quality in qualities.values()]
    return np.mean(mean_quals)
print(quality_mean())

## Show one image
As dori sa pot vizualiza anumite imagini

In [None]:
def show_one_image(image_to_plot,cmap=None):
    plt.imshow(image_to_plot,cmap=cmap)
    plt.axis('off')
    plt.show()

Pentru exemple salvez o imagine pentru a arata ce face fiecare functie

In [None]:
imagePrev=cv2.imread('test2.jpeg')

In [None]:
show_one_image(imagePrev)

## Imbunatatire calitate
### Exista mai multe metode de imbunatatire a calitatii. La nivel de cod noi putem preprocesa fiecare imagine
Astfel, declar urmatoarele functii pe care le voi folosi pentru a scade WER- ul!

### Normalizare
Normalizez imaginea printr-un min max!

In [None]:
def normalization(image):
    image_norm = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_norm=cv2.normalize(image_norm, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    return image_norm
show_one_image(normalization(imagePrev),cmap='gray')

## Image scaling

Doing research, am aflat ca OCR performa cel mai bine cu imagini peste 300 PPI, dar nici cu un PPI prea mare. Asa ca transform daca e nevoie imaginea la o imagine cu PPI de 300.
PPI- Pixel per Inch!

In [None]:
def image_scaling(image):
    """
        Ocr cere pentru o performanta mai buna imagini cu peste 300 PPI. daca e mai mic 300 crestem
    :param image:
    :return:
    """
    height,width=image.shape[:2]
    current_resolution = max(width, height)
    factor=300/current_resolution
    new_width=int(width*factor)
    new_height=int(height*factor)
    size=(new_width,new_height)
    resized_image = cv2.resize(image, (new_width, new_height))
    return resized_image
show_one_image(image_scaling(imagePrev))

## binarization
Nu am mai fol ca e asem cu thresholding! Efectiv cam fac aceleasi lucruri! Thresholding pot zice chiar mai bine!

In [None]:
def binarization(image):
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    return binary_image
show_one_image(binarization(imagePrev))

## Noise Removal

Unele imagini pot avea mult noise ceea ce poate compromite evaluarea textului. 
Blurez inca un pic imaginea si sterg noise ul!\

Dupa acea revin la imaginea neblurata!

In [None]:
def noise_removal(image):
    blurred_image = cv2.GaussianBlur(image, (5, 5), 0)

    # emove small noise
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    opened_image = cv2.morphologyEx(blurred_image, cv2.MORPH_OPEN, kernel)
    # after it comes thresholding -binarizing
    return opened_image
show_one_image(noise_removal(imagePrev))

## Thresholding

Thresholding poate ajuta enorm in cresterea performantei. 

In [None]:

def thresholding(image):
    # Check if the image is None
    if image is None:
        print("Error: Input image is None.")
        return None

    # Check if the image is already grayscale
    if len(image.shape) > 2 and image.shape[2] > 1:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # Apply Otsu's thresholding
    _, thresholded_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return thresholded_image

show_one_image(thresholding(normalization(imagePrev)))

## DESKEW

Daca o imagine e rotita de obicei incurca OCR in determinarea textului! Voi incerca sa mai elimin din acest deskewing!

In [None]:
import cv2

def deskew(image):
    # Check if the image is None
    if image is None:
        print("Error: Input image is None.")
        return None

    # Convert the image to grayscale if it has more than one channel
    if len(image.shape) > 2:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # Threshold the image to get a binary image
    _, binary_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Check if any contours are found
    if not contours:
        print("No contours found.")
        return image  # Return the original image
    
    # Find the orientation angle of the rectangle enclosing the contours
    rect = cv2.minAreaRect(contours[0])
    angle = rect[-1]
    
    # Adjust the angle if necessary
    if angle < -45:
        angle = -(90 + angle)
    elif angle == 90:
        angle = 90
    else:
        angle = -angle
    
    # Rotate the image to deskew it
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    
    return rotated


## CONTRAST ENCHANCING

    Pentru imbunatatirea recunoasterii o alta metoda super buna este cresterea contrastului! Textul poate deveni mai ligibil!

In [None]:
def enhance_contrast(image):
    if len(image.shape) == 2 or image.shape[2] == 1:  # Grayscale or single-channel image
        # Apply histogram equalization directly
        equalized = cv2.equalizeHist(image)
        enhanced_image = cv2.cvtColor(equalized, cv2.COLOR_GRAY2BGR)
    else:  # Color image
        # Separate color channels
        b, g, r = cv2.split(image)
        # Apply histogram equalization to each channel
        b_equalized = cv2.equalizeHist(b)
        g_equalized = cv2.equalizeHist(g)
        r_equalized = cv2.equalizeHist(r)
        # Merge equalized channels
        enhanced_image = cv2.merge([b_equalized, g_equalized, r_equalized])
    
    return enhanced_image
show_one_image(enhance_contrast(imagePrev))

Asta o fac pentru a salva progres!

In [None]:
def rename_file(image_path,toadd):
    base_name, extension = os.path.splitext(image_path)
    new_name = f"{base_name}{toadd}{extension}"
    return new_name

In [None]:
def get_actual_text(image_path):
    for obj in data:
        if obj["image_path"] == image_path:
            return obj["actual_text"]
    return None

## CUM VAD DACA procesarea curenta este buna

Pai prin salvarea versiunii anterioare a imaginii care are cel mai mic wer pot compara cu cea ce a fost din nou prelucrata!
RETURNEZ PE ACEA CARE FUNCTIONEAZA CEL MAI BINE

In [None]:
def get_better_ver(original_image,first_image,second_image,cv2f_image,cv2s_image):
    actual_text=get_actual_text(original_image)
    first_text=get_text_of_file(first_image)
    second_text=get_text_of_file(second_image)
    wer_f=wer_word_error_rate(first_text, actual_text)
    wer_s=wer_word_error_rate(second_text, actual_text)
    if wer_f> wer_s:
        return cv2s_image,second_image
    else:
        return cv2f_image,first_image
    

In [None]:
import os

def delete_files_with_name(directory, name):
    # List all files in the directory
    i = 1
    for filename in os.listdir(directory):
        # Check if the filename contains the specified name
        file_name = rename_file(name, "_proc" + str(i))
        i += 1
        if file_name == filename:
            os.remove(filename)


## PRIMA PRE PROCESARE!
Incerc prin mai multe etape sa prelucrez imaginea si sa ajung la cel mai bun WER!

In [None]:
def preprocessing(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Unable to load image.")
        return None

    # Normalization
    normalized = normalization(image)
    new_name1 = rename_file(image_path, '_proc1')
    cv2.imwrite(new_name1, normalized)
    better1, better_path1 = get_better_ver(image_path, image_path, new_name1, image, normalized)

    # Noise Removal
    noise_removaled = noise_removal(better1)
    new_name2 = rename_file(image_path, '_proc2')
    cv2.imwrite(new_name2, noise_removaled)
    better2, better_path2 = get_better_ver(image_path, better_path1, new_name2, better1, noise_removaled)

    # Contrast Enhancement
    enhanced = enhance_contrast(better2)
    new_name3 = rename_file(image_path, '_proc3')
    cv2.imwrite(new_name3, enhanced)
    better3, better_path3 = get_better_ver(image_path, better_path2, new_name3, better2, enhanced)



    # Deskewing
    deskewed = deskew(better3)
    new_name4 = rename_file(image_path, '_proc4')
    cv2.imwrite(new_name4, deskewed)
    better4, better_path4 = get_better_ver(image_path, better_path3, new_name4, better3, deskewed)

    # Scaling
    scaled = image_scaling(better4)
    new_name5 = rename_file(image_path, '_proc5')
    cv2.imwrite(new_name5, scaled)
    better5, better_path5 = get_better_ver(image_path, better_path4, new_name5, better4, scaled)

    # Thresholding
    thresholded = thresholding(better5)
    new_name6 = rename_file(image_path, '_proc6')
    cv2.imwrite(new_name6, thresholded)
    better6, better_path6 = get_better_ver(image_path, better_path5, new_name6, better5, thresholded)
    
    # Final save
    cv2.imwrite(rename_file(image_path, '_final'), better6)
    root_directory = os.getcwd()
    delete_files_with_name(root_directory,image_path)
    
    return better6





In [None]:
def preprocessing2(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Unable to load image.")
        return None

    # Normalization
    normalized = normalization(image)
    new_name1 = rename_file(image_path, '_proc1')
    cv2.imwrite(new_name1, normalized)
    better1, better_path1 = get_better_ver(image_path, image_path, new_name1, image, normalized)
   
    # Deskewing
    deskewed = deskew(better1)
    new_name2 = rename_file(image_path, '_proc2')
    cv2.imwrite(new_name2, deskewed)
    better2, better_path2 = get_better_ver(image_path, better_path1, new_name2, better1, deskewed)
   # Scaling
    scaled = image_scaling(better2)
    new_name3 = rename_file(image_path, '_proc3')
    cv2.imwrite(new_name3, scaled)
    better3, better_path3 = get_better_ver(image_path, better_path2, new_name3, better2, scaled)

    # Noise Removal
    noise_removaled = noise_removal(better3)
    new_name4 = rename_file(image_path, '_proc4')
    cv2.imwrite(new_name4, noise_removaled)
    better4, better_path4 = get_better_ver(image_path, better_path3, new_name4, better3, noise_removaled)

    # Contrast Enhancement
    enhanced = enhance_contrast(better4)
    new_name5 = rename_file(image_path, '_proc5')
    cv2.imwrite(new_name5, enhanced)
    better5, better_path5 = get_better_ver(image_path, better_path4, new_name5, better4, enhanced)


    # Thresholding
    thresholded = thresholding(better5)
    new_name6 = rename_file(image_path, '_proc6')
    cv2.imwrite(new_name6, thresholded)
    better6, better_path6 = get_better_ver(image_path, better_path5, new_name6, better5, thresholded)
    
    # Final save
    cv2.imwrite(rename_file(image_path, '_final'), better6)
    root_directory = os.getcwd()
    delete_files_with_name(root_directory,image_path)
    
    return better6




    

In [None]:
preprocessed_image = preprocessing('test2.jpeg')
show_one_image(preprocessed_image)

In [None]:
preprocessed_image = preprocessing2('test2.jpeg')
show_one_image(preprocessed_image)

In [None]:
preprocessed_image = preprocessing2('test1.png')
show_one_image(preprocessed_image)

In [None]:
preprocessed_image = preprocessing('test1.png')
show_one_image(preprocessed_image)

In [None]:
print(get_text_of_file('test1_final.png'))

In [None]:
print(get_text_of_file('test2_final.jpeg'))