In [1]:
from dotenv import load_dotenv
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials
import os
import time

load_dotenv()

subscription_key = os.environ.get('AZURE_SUBSCRIPTION_KEY')
endpoint = os.environ.get('AZURE_ENDPOINT')

cv_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))


def get_text_locations(image_path, mode="Handwritten"):
    with open(image_path, "rb") as image_file:
        read_response = cv_client.read_in_stream(
            image=image_file,
            mode=mode,
            raw=True
        )

    operation_id = read_response.headers['Operation-Location'].split('/')[-1]
    while True:
        read_result = cv_client.get_read_result(operation_id)
        if read_result.status not in ['notStarted', 'running']:
            break
        time.sleep(1)

    results = []
    if read_result.status == OperationStatusCodes.succeeded:
        for text_result in read_result.analyze_result.read_results:
            for line in text_result.lines:
                result = {
                    'text': line.text,
                    'bounding_box': line.bounding_box
                }
                results.append(result)
                print(f"Text detectat: '{line.text}'")
                print(f"Bounding box: {line.bounding_box}")
    return results


def is_box_inside(detected_box, general_box):
    x_coords = detected_box[0::2]
    y_coords = detected_box[1::2]

    for i in range(len(x_coords)):
        x, y = x_coords[i], y_coords[i]
        if x < general_box[0] or x > general_box[2] or y < general_box[1] or y > general_box[3]:
            return False
    return True


def evaluate_localization(image_path, box_general, mode="Handwritten"):
    detected_results = get_text_locations(image_path, mode)
    detected_boxes = [result['bounding_box'] for result in detected_results]
    total_boxes = len(detected_boxes)
    boxes_inside = 0
    for bounding_box_detected in detected_boxes:
        if is_box_inside(bounding_box_detected, box_general):
            boxes_inside += 1
    quality = boxes_inside / total_boxes if total_boxes > 0 else 0

    print(f"Nr boxuri detectae: {total_boxes}")
    print(f"Inside box: {boxes_inside}")
    print(f"Calitate localizare: {quality:.4f}")


image = "test2.jpeg"
box = [50, 280, 1500, 1300]
evaluate_localization(image, box)

Text detectat: 'Lucces in resolvarea'
Bounding box: [86.0, 314.0, 1335.0, 287.0, 1336.0, 443.0, 86.0, 478.0]
Text detectat: 'TEMELOR la'
Bounding box: [140.0, 590.0, 1045.0, 587.0, 1046.0, 723.0, 140.0, 727.0]
Text detectat: 'LABORA toarele de'
Bounding box: [81.0, 915.0, 1007.0, 926.0, 1004.0, 1039.0, 78.0, 1014.0]
Text detectat: 'Inteligenta Artificialà!'
Bounding box: [108.0, 1129.0, 1450.0, 1151.0, 1446.0, 1293.0, 105.0, 1259.0]
Nr boxuri detectae: 4
Inside box: 4
Calitate localizare: 1.0000
