💡 Probleme
Sa se identifice textul scris [de mana] intr-o imagine:

- locatia textului
- textul propriu-zis

📝 Cerinte
Specificaţi, implementaţi și testați subalgoritmi pentru problema enuntata. Sa se determine:

- calitatea procesului de recunoastere a textului, atat la nivel de caracter, cat si la nivel de cuvant.
            a. prin folosirea unei metrici de distanta sau
            b. prin folosirea mai multor metrici de distanta.
- calitatea localizarii corecte a textului in imagine
- posibilitati de imbunatatire a recunoasterii textului

In [12]:
from enum import verify

from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from array import array
import os
from PIL import Image
import sys
import time
import Levenshtein
from scipy.spatial.distance import hamming


'''
Authenticate
Authenticates your credentials and creates a client.
'''
subscription_key = os.environ["VISION_KEY"]
endpoint = os.environ["VISION_ENDPOINT"]
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
'''
END - Authenticate
'''

image = "poza_eu.jpg"
# img = open("test1.png", "rb")
# img = open("test2.jpeg", "rb")
img = open(image, "rb")
read_response = computervision_client.read_in_stream(
    image=img,
    mode="Printed",
    raw=True
)
# print(read_response.as_dict())

operation_id = read_response.headers['Operation-Location'].split('/')[-1]
while True:
    read_result = computervision_client.get_read_result(operation_id)
    if read_result.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

# Print the detected text, line by line
result = []
boxes = []
if read_result.status == OperationStatusCodes.succeeded:
    for text_result in read_result.analyze_result.read_results:
        for line in text_result.lines:
            print(line.text)
            result.append(line.text)
            boxes.append(line.bounding_box)

print()

# get/define the ground truth
# groundTruth = ["Google Cloud", "Platform"]
# groundTruth = ["Succes in rezolvarea", "tEMELOR la", "LABORAtoaree de", "Inteligenta Artificiala!"]
groundTruth = ["În Neamț a nins", "În Cluj e frig!"]

# compute the performance
noOfCorrectLines = sum(i == j for i, j in zip(result, groundTruth))
print(noOfCorrectLines)

In Neamt a mins
In Elyj e frig !

0


In [13]:
# 1. Hamming

def hamming_distance(detected, truth):
    detected_list = list(" ".join(detected))
    truth_list = list(" ".join(truth))

    length = max(len(detected_list), len(truth_list))
    detected_list.extend(" " * (length - len(detected_list)))
    truth_list.extend(" " * (length - len(truth_list)))

    return hamming(detected_list, truth_list) * length

if len(result) == len(groundTruth):
    print(f"Hamming Distance: {hamming_distance(result, groundTruth)}")
else:
    print("Different text lengths !!")


Hamming Distance: 8.0


In [14]:
# 2. Jacard

def jaccard_similarity(detected, truth):
    detected_set = set(" ".join(detected).split())
    truth_set = set(" ".join(truth).split())
    C = detected_set.intersection(truth_set)
    D = detected_set.union(truth_set)
    return len(C)/len(D)

print(f"Jaccard Similarity: {jaccard_similarity(result, groundTruth):.2f}")


Jaccard Similarity: 0.15


In [23]:
# 3. Levenshtein

def levenshtein_accuracy(detected, truth):
    detected_words = [word for line in detected for word in line.split()]
    truth_words = [word for line in truth for word in line.split()]

    length = max(len(detected_words), len(truth_words))
    detected_words.extend(" " * (length - len(detected_words)))
    truth_words.extend(" " * (length - len(truth_words)))

    total_chars = sum(len(word) for word in truth_words)
    total_words = len(truth_words)

    char_errors = sum(Levenshtein.distance(d, t) for d, t in zip(detected_words, truth_words))

    return  (1 - char_errors / total_chars) * 100 if total_chars > 0 else 0

print(f"Character accuracy (Levenshtein): {levenshtein_accuracy(result, groundTruth):.2f}%")



Character accuracy (Levenshtein): 68.00%


In [22]:
# Text position

def text_position(bounding_box, img_width, img_height):

    center_x = (bounding_box[0] + bounding_box[2] + bounding_box[4] + bounding_box[6]) / 4
    center_y = (bounding_box[1] + bounding_box[3] + bounding_box[5] + bounding_box[7]) / 4


    if center_y < img_height / 3:
        vertical = "Top"
    elif center_y < 2 * img_height / 3:
        vertical = "Middle"
    else:
        vertical = "Bottom"

    if center_x < img_width / 3:
        horizontal = "Left"
    elif center_x < 2 * img_width / 3:
        horizontal = "Center"
    else:
        horizontal = "Right"

    return f"{vertical}-{horizontal}"


with Image.open(image) as img:
    img_width, img_height = img.size


for line, box in zip(result, boxes):
    region = text_position(box, img_width, img_height)
    print(f"'{line}' : {region} ")

contor = 0
verify = 0
test = ["Top-Left", "Middle-Center"]
for line, box in zip(result, boxes):
    region = text_position(box, img_width, img_height)
    if region == test[contor]:
        verify += 1
    contor += 1
print(f" Text position accuracy: {verify/contor * 100} %")




'In Neamt a mins' : Top-Left 
'In Elyj e frig !' : Middle-Center 
 Text position accuracy: 100.0 %


In [None]:
# Posibilitati de imbunatatire a recunoasterii textului:

# - sa prelucram imaginea pentru a mari distanta intre litere ( a confundat uj cu y)
# - sa antrenam modelul pe mai multe exemple de scris de mana (a confundat n -ul cu m pentru ca e scris diferit fata de 'standard'
# - prelucrare imagine la nivel de luminozitate, claritate