In [1]:
import os
import cv2
import time

from dotenv import load_dotenv
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials

In [2]:
'''
Authenticate
Authenticates your credentials and creates a client.
'''

load_dotenv('azure.env')

subscription_key = os.getenv("AZURE_SUBSCRIPTION_KEY")
endpoint = os.getenv("AZURE_ENDPOINT")

if not subscription_key or not endpoint:
    raise ValueError("Environment variables are missing!")

cognitive_services_credentials = CognitiveServicesCredentials(subscription_key)

computervision_client = ComputerVisionClient(endpoint, cognitive_services_credentials)

'''
END - Authenticate
'''

'\nEND - Authenticate\n'

In [5]:
# Function to load and preprocess the image
def preprocess_image(img_path):
    img = cv2.imread(img_path, 0)
    # Apply adaptive thresholding to binarize the image
    binary_img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    return binary_img

# Function to remove noise using median blur
def remove_noise(binary_img):
    noise_removed = cv2.medianBlur(binary_img, 3)
    return noise_removed

# Function to save the preprocessed image
def save_image(image, save_path):
    cv2.imwrite(save_path, image)

# Function to perform OCR on the image
def ocr_image(img_path):
    with open(img_path, "rb") as image_stream:
        ocr_result = computervision_client.read_in_stream(
            image_stream, language="en", raw=True
        )

    # Get operation ID from response header
    op_location = ocr_result.headers["Operation-Location"]
    op_id = op_location.split("/")[-1]

    # Poll for OCR completion
    while True:
        ocr_result = computervision_client.get_read_result(op_id)
        if ocr_result.status not in ['notStarted', 'running']:
            break
        time.sleep(1)

    return ocr_result

# Function to extract text from OCR result
def extract_text_from_result(ocr_result):
    text = ""
    if ocr_result.status == OperationStatusCodes.succeeded:
        for text_result in ocr_result.analyze_result.read_results:
            for line in text_result.lines:
                text += line.text + "\n"
    return text

In [6]:
image_path = 'data/images2/test2.png'
output_path = 'data/images2/test2_processed.png'

binary_image = preprocess_image(image_path)
noise_removed_img = remove_noise(binary_image)

save_image(noise_removed_img, output_path)

result = ocr_image(output_path)

extracted_text = extract_text_from_result(result)

print(extracted_text)

Succes
in Rezolvaria
TEMELOR la
LABORA toarele de
Inteligenta Artificiala

