# OCR Models Evaluation on French and English Datasets


In [8]:
import os
import json
import random
import xml.etree.ElementTree as ET
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image
import cv2
import time
# import matplotlib.pyplot as plt
# import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# # OCR Libraries
# import pytesseract
# import easyocr

# from google.cloud import vision


In [None]:
pip install python-doctr[torch]  

In [None]:

from doctr.io import DocumentFile
from doctr.models import ocr_predictor
doctr_available = True


In [None]:
!pip install surya-ocr

In [None]:
pip install --upgrade surya-ocr torch torchvision transformers

In [None]:
!pip install torchvision

In [None]:
!pip install surya-ocr torch torchvision

In [1]:
from PIL import Image
from surya.recognition import RecognitionPredictor
from surya.detection import DetectionPredictor

image = Image.open('/kaggle/input/ocr-dataset/English_OCR_dataset/English_OCR_dataset/images/0001123541.png')
recognition_predictor = RecognitionPredictor()
detection_predictor = DetectionPredictor()

predictions = recognition_predictor([image], det_predictor=detection_predictor)

2025-07-09 18:37:20.465173: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752086240.644285     151 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752086240.696560     151 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Downloading text_recognition model to /root/.cache/datalab/models/text_recognition/2025_05_16: 100%|██████████| 10/10 [00:18<00:00,  1.83s/it]
Downloading text_detection model to /root/.cache/datalab/models/text_detection/2025_05_07: 100%|██████████| 6/6 [00:02<00:00,  2.08it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  2.28it/s]
Recognizing Text: 100%|██████████| 43/43 [00:03<00:00, 11.27it/s]


In [None]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/kaggle/input/json-data/active-sun-455914-a1-141238abf96c.json'


## Dataset Configuration

In [None]:
ENGLISH_DATASET_PATH = "/kaggle/input/ocr-dataset/English_OCR_dataset/English_OCR_dataset"
FRENCH_DATASET_PATH = "/kaggle/input/ocr-dataset/French_OCR_dataset/French_OCR_dataset"

SAMPLES_PER_DATASET = 10
TOTAL_SAMPLES = SAMPLES_PER_DATASET * 2

## Data Loading 

In [3]:
def get_french_samples(dataset_path, num_samples):
    samples = []
    
    image_files = [f for f in os.listdir(dataset_path) if f.endswith('.jpg')]
    
    valid_pairs = []
    
    for img_file in image_files:
        base_name = img_file.replace('_default.jpg', '')  
        xml_file = None
        
        possible_xml = [
            f"{base_name}_default.xml",  
            f"{base_name[:-1]}g_default.xml",
        ]
        
        for xml_name in possible_xml:
            if os.path.exists(os.path.join(dataset_path, xml_name)):
                xml_file = xml_name
                break
        
        if xml_file:
            valid_pairs.append((img_file, xml_file))
    
    
    selected_pairs = random.sample(valid_pairs, min(num_samples, len(valid_pairs)))
    
    for img_file, xml_file in selected_pairs:
        samples.append({
            'dataset': 'French',
            'image_path': os.path.join(dataset_path, img_file),
            'annotation_path': os.path.join(dataset_path, xml_file),
            'image_name': img_file,
            'annotation_name': xml_file
        })
    
    return samples

def get_english_samples(dataset_path, num_samples):
    samples = []
    
    images_path = os.path.join(dataset_path, 'images')
    annotations_path = os.path.join(dataset_path, 'annotations')
    
    # Get all image files
    image_files = [f for f in os.listdir(images_path) if f.endswith('.png')]
    selected_files = random.sample(image_files, min(num_samples, len(image_files)))
    
    for img_file in selected_files:
        # Find corresponding json
        base_name = img_file.replace('.png', '')
        json_file = f"{base_name}.json"
        
        json_path = os.path.join(annotations_path, json_file)
        if os.path.exists(json_path):
            samples.append({
                'dataset': 'English',
                'image_path': os.path.join(images_path, img_file),
                'annotation_path': json_path,
                'image_name': img_file,
                'annotation_name': json_file
            })
    
    return samples

In [5]:
def extract_text_from_french_xml(xml_path):
    """Extract ground truth text from French dataset XML file"""
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        
        # Find all String elements with CONTENT attribute
        texts = []
        for string_elem in root.findall('.//{http://www.loc.gov/standards/alto/ns-v4#}String'):
            content = string_elem.get('CONTENT')
            if content:
                texts.append(content)
        
        return ' '.join(texts)
    except Exception as e:
        print(f"Error parsing XML {xml_path}: {e}")
        return ""

def extract_text_from_english_json(json_path):
    """Extract ground truth text from English dataset JSON file"""
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        texts = []
        for form_item in data.get('form', []):
            text = form_item.get('text', '').strip()
            if text:
                texts.append(text)
        
        return ' '.join(texts)
    except Exception as e:
        print(f"Error parsing JSON {json_path}: {e}")
        return ""

In [6]:


def extract_boxes_from_french_xml(xml_path):
    """Extract bounding boxes and text from French dataset XML file"""
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        
        boxes = []
        for string_elem in root.findall('.//{http://www.loc.gov/standards/alto/ns-v4#}String'):
            content = string_elem.get('CONTENT')
            hpos = string_elem.get('HPOS')
            vpos = string_elem.get('VPOS')
            width = string_elem.get('WIDTH')
            height = string_elem.get('HEIGHT')
            
            if content and hpos and vpos and width and height:
                x1 = int(float(hpos))
                y1 = int(float(vpos))
                x2 = x1 + int(float(width))
                y2 = y1 + int(float(height))
                
                boxes.append({
                    'text': content,
                    'box': [x1, y1, x2, y2]
                })
        
        return boxes
    except Exception as e:
        print(f"Error parsing XML boxes {xml_path}: {e}")
        return []

def extract_boxes_from_english_json(json_path):
    """Extract bounding boxes and text from English dataset JSON file"""
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        boxes = []
        for form_item in data.get('form', []):
            text = form_item.get('text', '').strip()
            box = form_item.get('box', [])
            
            if text and len(box) == 4:
                boxes.append({
                    'text': text,
                    'box': box  # Already in [x1, y1, x2, y2] format
                })
        
        return boxes
    except Exception as e:
        print(f"Error parsing JSON boxes {json_path}: {e}")
        return []



## Dataset and Load Ground Truth

In [None]:


french_samples = get_french_samples(FRENCH_DATASET_PATH, SAMPLES_PER_DATASET)
english_samples = get_english_samples(ENGLISH_DATASET_PATH, SAMPLES_PER_DATASET)

all_samples = french_samples + english_samples

print(f"French samples: {len(french_samples)}")
print(f"English samples: {len(english_samples)}")
print(f"Total samples: {len(all_samples)}")

for sample in all_samples:
    if sample['dataset'] == 'French':
        sample['ground_truth'] = extract_text_from_french_xml(sample['annotation_path'])
        sample['ground_truth_boxes'] = extract_boxes_from_french_xml(sample['annotation_path'])
    else:
        sample['ground_truth'] = extract_text_from_english_json(sample['annotation_path'])
        sample['ground_truth_boxes'] = extract_boxes_from_english_json(sample['annotation_path'])



## OCR Model

In [17]:
easyocr_reader = None
google_client = None
doctr_model = None
surya_recognition_predictor = None
surya_detection_predictor = None

models_available = {
    'tesseract': True,
    'easyocr': True,
    'surya': True,
    'doctr': True,
    'google_vision':True
}

def initialize_ocr_models():
    global easyocr_reader, google_client, doctr_model, surya_recognition_predictor, surya_detection_predictor
    
    print("Initializing models")
    
    try:
        if models_available['easyocr']:
            easyocr_reader = easyocr.Reader(['en', 'fr'])
    except Exception as e:
        print(f"EasyOCR initialization failed: {e}")
        models_available['easyocr'] = False
    
    #DocTR
    try:
        if models_available['doctr']:
            doctr_model = ocr_predictor(pretrained=True)
    except Exception as e:
        print(f"DocTR initialization failed: {e}")
        models_available['doctr'] = False
    #Surya
    try:
        if models_available['surya']:
            surya_recognition_predictor = RecognitionPredictor()
            surya_detection_predictor = DetectionPredictor()
    except Exception as e:
        print(f"Surya OCR initialization failed: {e}")
        models_available['surya'] = False
        
    # Google Cloud Vision
    try:
        if models_available['google_vision']:
            google_client = vision.ImageAnnotatorClient()
    except Exception as e:
        print(f"Google Cloud Vision initialization failed: {e}")
        models_available['google_vision'] = False
    
    available_models = [k for k, v in models_available.items() if v]
    return available_models



In [None]:
def tesseract_ocr(image_path):
    try:
        image = Image.open(image_path)
        text = pytesseract.image_to_string(image)
        return text.strip()
    except Exception as e:
        print(f"Tesseract error on {image_path}: {e}")
        return ""

def easyocr_ocr(image_path):
    global easyocr_reader
    try:
        if not easyocr_reader:
            print("easyocr not initialized.")
            return ""
        results = easyocr_reader.readtext(image_path)
        text = ' '.join([result[1] for result in results])
        return text.strip()
    except Exception as e:
        print(f"easyocr error on {image_path}: {e}")
        return ""
        
def surya_ocr(image_path):
    global surya_recognition_predictor, surya_detection_predictor
    try:
        if not surya_recognition_predictor or not surya_detection_predictor:
            print("Surya not initialized")
            return ""
        
        image = Image.open(image_path)
        predictions = surya_recognition_predictor([image], det_predictor=surya_detection_predictor)
        
        # Extract text from predictions
        text_parts = []
        for text_line in predictions[0].text_lines:
            text_parts.append(text_line.text)
        
        return ' '.join(text_parts).strip()
    except Exception as e:
        print(f"Surya error: {e}")
        return ""
        
def doctr_ocr(image_path):
    global doctr_model
    try:
        if not doctr_model:
            print("DocTR not initialized")
            return ""
        
        # Load document
        doc = DocumentFile.from_images(image_path)
        
        # Run OCR
        result = doctr_model(doc)
        
        # Extract text
        text_parts = []
        for page in result.pages:
            for block in page.blocks:
                for line in block.lines:
                    for word in line.words:
                        text_parts.append(word.value)
        
        return ' '.join(text_parts).strip()
        
    except Exception as e:
        print(f"doctr error on {image_path}: {e}")
        return ""


def google_vision_ocr(image_path):
    global google_client
    try:
        if not google_client:
            print("gg vsion not initialized")
            return ""
        
        with open(image_path, 'rb') as image_file:
            content = image_file.read()
        
        image = vision.Image(content=content)
        response = google_client.text_detection(image=image)
        texts = response.text_annotations
        
        if texts:
            return texts[0].description.strip()
        return ""
    except Exception as e:
        print(f"gg vision error on {image_path}: {e}")
        return ""



## Run OCR eval

In [None]:

def run_single_ocr_evaluation(samples, ocr_model_name):
    results = []
    
    ocr_functions = {
        'tesseract': tesseract_ocr,
        'easyocr': easyocr_ocr,
        'doctr':doctr_ocr,
        'surya':surya_ocr,
        'google_vision': google_vision_ocr
    }
    
    ocr_function = ocr_functions[ocr_model_name]
    
    print(f"Running {ocr_model_name} on {len(samples)} imgs")
    
    for i, sample in enumerate(samples):
        print(f"Processing {i+1}/{len(samples)}: {sample['image_name']}")
        
        sample_result = {
            'sample_id': i,
            'dataset': sample['dataset'],
            'image_name': sample['image_name'],
            'ground_truth': sample['ground_truth'],
            'ocr_result': {}
        }
        
        start_time = time.time()
        
        predicted_text = ocr_function(sample['image_path'])
        processing_time = time.time() - start_time
        
        # Evaluate results
        metrics = evaluate_ocr_result(predicted_text, sample['ground_truth'])
        
        sample_result['ocr_result'] = {
            'model': ocr_model_name,
            'predicted_text': predicted_text,
            'processing_time': processing_time,
            'metrics': metrics
        }
        print(f"  Similarity: {metrics['similarity']:.3f}, P: {metrics['precision']:.3f}, R: {metrics['recall']:.3f}, F1: {metrics['f1']:.3f}, Time: {processing_time:.2f}s")
        

        results.append(sample_result)
    
    return results

In [33]:
def run_single_ocr_evaluation_with_spatial(samples, ocr_model_name):
    results = []
    
    ocr_functions = {
        'easyocr': easyocr_ocr,
        'doctr':doctr_ocr,
        'google_vision': google_vision_ocr,
        'surya':surya_ocr
    }
    
    ocr_function = ocr_functions[ocr_model_name]
    
    spatial_supported = ocr_model_name in ['easyocr', 'doctr', 'google_vision','surya']
    
    print(f"Running {ocr_model_name} on {len(samples)} samples")
    
    for i, sample in enumerate(samples):
        print(f"Processing {i+1}/{len(samples)}: {sample['image_name']}")
        
        sample_result = {
            'sample_id': i,
            'dataset': sample['dataset'],
            'image_name': sample['image_name'],
            'ground_truth': sample['ground_truth'],
            'ground_truth_boxes': sample.get('ground_truth_boxes', []),
            'ocr_result': {}
        }
        
        start_time = time.time()
        
        # Get OCR text
        predicted_text = ocr_function(sample['image_path'])
        processing_time = time.time() - start_time
        
        # Get OCR boxes
        predicted_boxes = []
        if spatial_supported:
            predicted_boxes = get_ocr_boxes_with_text(ocr_model_name, sample['image_path'])
        
        if spatial_supported and sample.get('ground_truth_boxes'):
            metrics = evaluate_ocr_with_spatial(
                predicted_text, 
                sample['ground_truth'],
                predicted_boxes,
                sample['ground_truth_boxes']
            )
            evaluation_type = 'spatial'
        else:
            metrics = evaluate_ocr_result(predicted_text, sample['ground_truth'])
            evaluation_type = 'text_only'
        
        sample_result['ocr_result'] = {
            'model': ocr_model_name,
            'predicted_text': predicted_text,
            'predicted_boxes': predicted_boxes,
            'processing_time': processing_time,
            'metrics': metrics,
            'evaluation_type': evaluation_type
        }
        
        if evaluation_type == 'spatial':
            print(f"  Text F1: {metrics['text_f1']:.3f}, Detection F1: {metrics['detection_f1']:.3f}, Combined: {metrics['combined_f1']:.3f}, IoU: {metrics['avg_iou']:.3f}")
        else:
            print(f"  Similarity: {metrics['similarity']:.3f}, F1: {metrics['f1']:.3f}, Time: {processing_time:.2f}s")
        
        results.append(sample_result)
    
    return results

## Evaluate

In [None]:
def calculate_iou(box1, box2):
    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    x2_inter = min(box1[2], box2[2])
    y2_inter = min(box1[3], box2[3])
    
    if x1_inter >= x2_inter or y1_inter >= y2_inter:
        return 0.0
    
    intersection = (x2_inter - x1_inter) * (y2_inter - y1_inter)
    
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0.0

In [None]:

def get_ocr_boxes_with_text(ocr_model_name, image_path):
    boxes = []
    
    try:
        if ocr_model_name == 'easyocr':
            global easyocr_reader
            if easyocr_reader:
                results = easyocr_reader.readtext(image_path)
                for result in results:
                    bbox_points = result[0]
                    text = result[1]
                    # Convert bbox points 
                    x_coords = [point[0] for point in bbox_points]
                    y_coords = [point[1] for point in bbox_points]
                    x1, x2 = min(x_coords), max(x_coords)
                    y1, y2 = min(y_coords), max(y_coords)
                    boxes.append({'text': text, 'box': [x1, y1, x2, y2]})
        
        elif ocr_model_name == 'doctr':  
            global doctr_model
            if doctr_model:
                # Load document
                doc = DocumentFile.from_images(image_path)
                result = doctr_model(doc)
                
                from PIL import Image
                img = Image.open(image_path)
                img_width, img_height = img.size
                
                for page in result.pages:
                    for block in page.blocks:
                        for line in block.lines:
                            for word in line.words:
                                # Convert 
                                bbox = word.geometry
                                x1 = int(bbox[0][0] * img_width)
                                y1 = int(bbox[0][1] * img_height)
                                x2 = int(bbox[1][0] * img_width)
                                y2 = int(bbox[1][1] * img_height)
                                
                                boxes.append({'text': word.value,'box': [x1, y1, x2, y2]})

        elif ocr_model_name == 'surya':  
            from PIL import Image
            global surya_recognition_predictor, surya_detection_predictor
            if surya_recognition_predictor and surya_detection_predictor:
                image = Image.open(image_path)
                predictions = surya_recognition_predictor([image], det_predictor=surya_detection_predictor)
                
                for text_line in predictions[0].text_lines:
                    text = text_line.text
                    bbox = text_line.bbox
                    boxes.append({'text': text, 'box': bbox})
                    
        elif ocr_model_name == 'google_vision':
            global google_client
            if google_client:
                with open(image_path, 'rb') as image_file:
                    content = image_file.read()
                
                image = vision.Image(content=content)
                response = google_client.text_detection(image=image)
                texts = response.text_annotations
                
                for annotation in texts[1:]:
                    text = annotation.description
                    vertices = annotation.bounding_poly.vertices
                    
                    # Convert vertices to [x1, y1, x2, y2]
                    x_coords = [vertex.x for vertex in vertices]
                    y_coords = [vertex.y for vertex in vertices]
                    x1, x2 = min(x_coords), max(x_coords)
                    y1, y2 = min(y_coords), max(y_coords)
                    boxes.append({'text': text, 'box': [x1, y1, x2, y2]})

    except Exception as e:
        print(f"Error getting boxes from {ocr_model_name}: {e}")
    
    return boxes

In [None]:
from PIL import Image


def evaluate_spatial_accuracy(predicted_boxes, ground_truth_boxes, iou_threshold=0.5):
    """Evaluate spatial accuracy using bounding box matching"""
    if not predicted_boxes or not ground_truth_boxes:
        return {
            'detection_precision': 0.0,
            'detection_recall': 0.0,
            'detection_f1': 0.0,
            'avg_iou': 0.0,
            'matched_pairs': 0,
            'total_predicted': len(predicted_boxes),
            'total_ground_truth': len(ground_truth_boxes)
        }
    
    # Find best matches between predicted and ground truth boxes
    matched_pairs = []
    used_gt_indices = set()
    
    for pred_idx, pred_box in enumerate(predicted_boxes):
        best_iou = 0.0
        best_gt_idx = -1
        
        for gt_idx, gt_box in enumerate(ground_truth_boxes):
            if gt_idx in used_gt_indices:
                continue
            
            iou = calculate_iou(pred_box['box'], gt_box['box'])
            if iou > best_iou and iou >= iou_threshold:
                best_iou = iou
                best_gt_idx = gt_idx
        
        if best_gt_idx != -1:
            matched_pairs.append({
                'pred_idx': pred_idx,
                'gt_idx': best_gt_idx,
                'iou': best_iou,
                'pred_text': pred_box['text'],
                'gt_text': ground_truth_boxes[best_gt_idx]['text']
            })
            used_gt_indices.add(best_gt_idx)
    
    # Calculate metrics
    num_matches = len(matched_pairs)
    detection_precision = num_matches / len(predicted_boxes) if predicted_boxes else 0.0
    detection_recall = num_matches / len(ground_truth_boxes) if ground_truth_boxes else 0.0
    detection_f1 = 2 * (detection_precision * detection_recall) / (detection_precision + detection_recall) if (detection_precision + detection_recall) > 0 else 0.0
    avg_iou = sum(pair['iou'] for pair in matched_pairs) / num_matches if num_matches > 0 else 0.0
    
    return {
        'detection_precision': detection_precision,
        'detection_recall': detection_recall,
        'detection_f1': detection_f1,
        'avg_iou': avg_iou,
        'matched_pairs': num_matches,
        'total_predicted': len(predicted_boxes),
        'total_ground_truth': len(ground_truth_boxes),
        'matches': matched_pairs
    }

def evaluate_ocr_with_spatial(predicted_text, ground_truth_text, predicted_boxes, ground_truth_boxes):
    # Text based evaluation 
    text_metrics = evaluate_ocr_result(predicted_text, ground_truth_text)
    
    # Spatial evaluation 
    spatial_metrics = evaluate_spatial_accuracy(predicted_boxes, ground_truth_boxes)
    
    # Combined metrics
    combined_metrics = {
        # Text metrics
        'text_similarity': text_metrics['similarity'],
        'text_precision': text_metrics['precision'],
        'text_recall': text_metrics['recall'],
        'text_f1': text_metrics['f1'],
        
        # Spatial metrics
        'detection_precision': spatial_metrics['detection_precision'],
        'detection_recall': spatial_metrics['detection_recall'],
        'detection_f1': spatial_metrics['detection_f1'],
        'avg_iou': spatial_metrics['avg_iou'],
        'matched_boxes': spatial_metrics['matched_pairs'],
        'total_predicted_boxes': spatial_metrics['total_predicted'],
        'total_ground_truth_boxes': spatial_metrics['total_ground_truth'],
        
        # Combined score (average of text F1 and detection F1)
        'combined_f1': (text_metrics['f1'] + spatial_metrics['detection_f1']) / 2,
        
        # Additional info
        'predicted_length': text_metrics.get('predicted_length', 0),
        'ground_truth_length': text_metrics.get('ground_truth_length', 0),
        'spatial_matches': spatial_metrics.get('matches', [])
    }
    
    return combined_metrics

In [None]:
from difflib import SequenceMatcher
import re

def clean_text(text):
    text = re.sub(r'\s+', ' ', text.strip())
    return text.lower()

def calculate_similarity(text1,text2):
    clean1 = clean_text(text1)
    clean2 = clean_text(text2)
    return SequenceMatcher(None, clean1, clean2).ratio()

def calculate_word_accuracy(predicted, ground_truth):
    pred_words = set(clean_text(predicted).split())
    gt_words = set(clean_text(ground_truth).split())
    
    if not gt_words:
        return {'precision': 0.0, 'recall': 0.0, 'f1': 0.0}
    
    if not pred_words:
        return {'precision': 0.0, 'recall': 0.0, 'f1': 0.0}
    
    intersection = pred_words.intersection(gt_words)
    
    precision = len(intersection) / len(pred_words) if pred_words else 0.0
    recall = len(intersection) / len(gt_words) if gt_words else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    
    return {
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

def evaluate_ocr_result(predicted, ground_truth):
    similarity = calculate_similarity(predicted, ground_truth)
    word_metrics = calculate_word_accuracy(predicted, ground_truth)
    
    return {
        'similarity': similarity,
        'precision': word_metrics['precision'],
        'recall': word_metrics['recall'],
        'f1': word_metrics['f1'],
        'predicted_length': len(predicted),
        'ground_truth_length': len(ground_truth)
    }

In [21]:
initialize_ocr_models()

Initializing OCR models...
✗ EasyOCR initialization failed: name 'easyocr' is not defined
✗ DocTR initialization failed: name 'ocr_predictor' is not defined
✓ Surya OCR initialized
✗ Google Cloud Vision initialization failed: name 'vision' is not defined

Available models: ['tesseract', 'surya']


['tesseract', 'surya']

In [31]:
surya_results = run_single_ocr_evaluation(all_samples,'surya')

Running surya on 20 imgs
Processing 1/20: 12_0bb90_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
Recognizing Text: 100%|██████████| 122/122 [00:06<00:00, 17.64it/s] 


  Similarity: 0.335, P: 0.817, R: 0.801, F1: 0.809, Time: 8.89s
Processing 2/20: 14_e26ee_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it]
Recognizing Text: 100%|██████████| 127/127 [00:07<00:00, 16.49it/s] 


  Similarity: 0.140, P: 0.976, R: 0.971, F1: 0.974, Time: 9.24s
Processing 3/20: 2_f1a3f_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.09s/it]
Recognizing Text: 100%|██████████| 22/22 [00:03<00:00,  5.83it/s]


  Similarity: 0.272, P: 0.557, R: 0.576, F1: 0.567, Time: 5.05s
Processing 4/20: 8_5eeab_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.28s/it]
Recognizing Text: 100%|██████████| 120/120 [00:07<00:00, 16.95it/s] 


  Similarity: 0.347, P: 0.777, R: 0.770, F1: 0.774, Time: 8.66s
Processing 5/20: 10_10fe5_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
Recognizing Text: 100%|██████████| 125/125 [00:07<00:00, 17.59it/s] 


  Similarity: 0.366, P: 0.821, R: 0.802, F1: 0.811, Time: 8.60s
Processing 6/20: 16_e8fb7_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.18s/it]
Recognizing Text: 100%|██████████| 115/115 [00:08<00:00, 13.46it/s] 


  Similarity: 0.228, P: 0.937, R: 0.952, F1: 0.944, Time: 10.07s
Processing 7/20: 17_24e64_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
Recognizing Text: 100%|██████████| 123/123 [00:08<00:00, 14.95it/s]


  Similarity: 0.168, P: 0.944, R: 0.959, F1: 0.952, Time: 9.78s
Processing 8/20: 11_ef202_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
Recognizing Text: 100%|██████████| 124/124 [00:06<00:00, 18.50it/s] 


  Similarity: 0.233, P: 0.814, R: 0.812, F1: 0.813, Time: 8.20s
Processing 9/20: 5_1df9d_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
Recognizing Text: 100%|██████████| 122/122 [00:08<00:00, 14.61it/s] 


  Similarity: 0.225, P: 0.837, R: 0.812, F1: 0.824, Time: 9.94s
Processing 10/20: 18_ac1da_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
Recognizing Text: 100%|██████████| 122/122 [00:09<00:00, 13.03it/s] 


  Similarity: 0.165, P: 0.918, R: 0.931, F1: 0.925, Time: 11.39s
Processing 11/20: 0060080406.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.67it/s]
Recognizing Text: 100%|██████████| 76/76 [00:07<00:00,  9.86it/s]


  Similarity: 0.062, P: 0.662, R: 0.718, F1: 0.689, Time: 8.04s
Processing 12/20: 0001477983.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.85it/s]
Recognizing Text: 100%|██████████| 42/42 [00:03<00:00, 10.73it/s]


  Similarity: 0.185, P: 0.856, R: 0.896, F1: 0.876, Time: 4.22s
Processing 13/20: 88547278_88547279.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.01it/s]
Recognizing Text: 100%|██████████| 56/56 [00:08<00:00,  6.49it/s]


  Similarity: 0.471, P: 0.545, R: 0.808, F1: 0.651, Time: 8.92s
Processing 14/20: 92094746.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.90it/s]
Recognizing Text: 100%|██████████| 35/35 [00:02<00:00, 13.46it/s]


  Similarity: 0.060, P: 0.703, R: 0.735, F1: 0.719, Time: 2.89s
Processing 15/20: 0001239897.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.01it/s]
Recognizing Text: 100%|██████████| 56/56 [00:03<00:00, 17.61it/s]


  Similarity: 0.048, P: 0.494, R: 0.472, F1: 0.483, Time: 3.46s
Processing 16/20: 00836816.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.12it/s]
Recognizing Text: 100%|██████████| 36/36 [00:04<00:00,  8.53it/s]


  Similarity: 0.238, P: 0.699, R: 0.711, F1: 0.705, Time: 4.50s
Processing 17/20: 00860012_00860014.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.83it/s]
Recognizing Text: 100%|██████████| 68/68 [00:04<00:00, 15.13it/s]


  Similarity: 0.024, P: 0.722, R: 0.712, F1: 0.717, Time: 4.81s
Processing 18/20: 71108371.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.79it/s]
Recognizing Text: 100%|██████████| 63/63 [00:04<00:00, 14.25it/s]


  Similarity: 0.236, P: 0.915, R: 0.910, F1: 0.912, Time: 4.74s
Processing 19/20: 01122115.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.41it/s]
Recognizing Text: 100%|██████████| 62/62 [00:03<00:00, 16.34it/s]


  Similarity: 0.124, P: 0.683, R: 0.658, F1: 0.670, Time: 4.14s
Processing 20/20: 00836244.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.23it/s]
Recognizing Text: 100%|██████████| 87/87 [00:04<00:00, 19.52it/s]

  Similarity: 0.094, P: 0.532, R: 0.517, F1: 0.524, Time: 4.80s





In [40]:
spatial_surya_results = run_single_ocr_evaluation_with_spatial(all_samples,'surya')

Running surya on 20 samples...
Processing 1/20: 12_0bb90_default.jpg




Detecting bboxes:   0%|          | 0/1 [00:00<?, ?it/s][A[A

Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.24s/it][A[A


Recognizing Text:   0%|          | 0/122 [00:00<?, ?it/s][A[A

Recognizing Text:   1%|          | 1/122 [00:04<09:39,  4.79s/it][A[A

Recognizing Text:   6%|▌         | 7/122 [00:05<01:01,  1.88it/s][A[A

Recognizing Text:   7%|▋         | 9/122 [00:05<00:45,  2.51it/s][A[A

Recognizing Text:  12%|█▏        | 15/122 [00:05<00:19,  5.38it/s][A[A

Recognizing Text:  15%|█▍        | 18/122 [00:05<00:14,  6.95it/s][A[A

Recognizing Text:  17%|█▋        | 21/122 [00:05<00:11,  8.81it/s][A[A

Recognizing Text:  20%|█▉        | 24/122 [00:05<00:08, 10.91it/s][A[A

Recognizing Text:  22%|██▏       | 27/122 [00:05<00:07, 13.13it/s][A[A

Recognizing Text:  25%|██▌       | 31/122 [00:05<00:05, 16.83it/s][A[A

Recognizing Text:  31%|███       | 38/122 [00:05<00:03, 25.50it/s][A[A

Recognizing Text:  35%|███▌      | 43/122 [00:06<00:02, 26.36

  Text F1: 0.809, Detection F1: 0.343, Combined: 0.576, IoU: 0.569
Processing 2/20: 14_e26ee_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.23s/it]
Recognizing Text: 100%|██████████| 127/127 [00:07<00:00, 16.52it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
Recognizing Text: 100%|██████████| 127/127 [00:07<00:00, 16.57it/s] 


  Text F1: 0.974, Detection F1: 0.768, Combined: 0.871, IoU: 0.622
Processing 3/20: 2_f1a3f_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.09s/it]
Recognizing Text: 100%|██████████| 22/22 [00:03<00:00,  5.80it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.10s/it]
Recognizing Text: 100%|██████████| 22/22 [00:03<00:00,  5.79it/s]


  Text F1: 0.567, Detection F1: 0.622, Combined: 0.594, IoU: 0.594
Processing 4/20: 8_5eeab_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
Recognizing Text: 100%|██████████| 120/120 [00:07<00:00, 16.89it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
Recognizing Text: 100%|██████████| 120/120 [00:07<00:00, 16.90it/s] 


  Text F1: 0.774, Detection F1: 0.492, Combined: 0.633, IoU: 0.569
Processing 5/20: 10_10fe5_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.24s/it]
Recognizing Text: 100%|██████████| 125/125 [00:07<00:00, 17.59it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
Recognizing Text: 100%|██████████| 125/125 [00:07<00:00, 17.47it/s] 


  Text F1: 0.811, Detection F1: 0.258, Combined: 0.535, IoU: 0.551
Processing 6/20: 16_e8fb7_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it]
Recognizing Text: 100%|██████████| 115/115 [00:08<00:00, 13.33it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it]
Recognizing Text: 100%|██████████| 115/115 [00:08<00:00, 13.29it/s] 


  Text F1: 0.944, Detection F1: 0.678, Combined: 0.811, IoU: 0.558
Processing 7/20: 17_24e64_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.25s/it]
Recognizing Text: 100%|██████████| 123/123 [00:08<00:00, 14.96it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
Recognizing Text: 100%|██████████| 123/123 [00:08<00:00, 14.94it/s]


  Text F1: 0.952, Detection F1: 0.854, Combined: 0.903, IoU: 0.619
Processing 8/20: 11_ef202_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.25s/it]
Recognizing Text: 100%|██████████| 124/124 [00:06<00:00, 18.34it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
Recognizing Text: 100%|██████████| 124/124 [00:06<00:00, 18.37it/s] 


  Text F1: 0.813, Detection F1: 0.178, Combined: 0.495, IoU: 0.557
Processing 9/20: 5_1df9d_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
Recognizing Text: 100%|██████████| 122/122 [00:08<00:00, 14.57it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
Recognizing Text: 100%|██████████| 122/122 [00:08<00:00, 14.57it/s] 


  Text F1: 0.824, Detection F1: 0.813, Combined: 0.818, IoU: 0.628
Processing 10/20: 18_ac1da_default.jpg


Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
Recognizing Text: 100%|██████████| 122/122 [00:09<00:00, 13.02it/s] 
Detecting bboxes: 100%|██████████| 1/1 [00:01<00:00,  1.25s/it]
Recognizing Text: 100%|██████████| 122/122 [00:09<00:00, 13.02it/s] 


  Text F1: 0.925, Detection F1: 0.959, Combined: 0.942, IoU: 0.662
Processing 11/20: 0060080406.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.67it/s]
Recognizing Text: 100%|██████████| 76/76 [00:07<00:00,  9.80it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.68it/s]
Recognizing Text: 100%|██████████| 76/76 [00:07<00:00,  9.84it/s]


  Text F1: 0.689, Detection F1: 0.622, Combined: 0.656, IoU: 0.704
Processing 12/20: 0001477983.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.92it/s]
Recognizing Text: 100%|██████████| 42/42 [00:03<00:00, 10.74it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.86it/s]
Recognizing Text: 100%|██████████| 42/42 [00:03<00:00, 10.71it/s]


  Text F1: 0.876, Detection F1: 0.447, Combined: 0.661, IoU: 0.731
Processing 13/20: 88547278_88547279.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.96it/s]
Recognizing Text: 100%|██████████| 56/56 [00:08<00:00,  6.50it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.66it/s]
Recognizing Text: 100%|██████████| 56/56 [00:08<00:00,  6.49it/s]


  Text F1: 0.651, Detection F1: 0.521, Combined: 0.586, IoU: 0.733
Processing 14/20: 92094746.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.94it/s]
Recognizing Text: 100%|██████████| 35/35 [00:02<00:00, 13.39it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.97it/s]
Recognizing Text: 100%|██████████| 35/35 [00:02<00:00, 13.44it/s]


  Text F1: 0.719, Detection F1: 0.194, Combined: 0.456, IoU: 0.657
Processing 15/20: 0001239897.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.17it/s]
Recognizing Text: 100%|██████████| 56/56 [00:03<00:00, 17.64it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.11it/s]
Recognizing Text: 100%|██████████| 56/56 [00:03<00:00, 17.57it/s]


  Text F1: 0.483, Detection F1: 0.621, Combined: 0.552, IoU: 0.771
Processing 16/20: 00836816.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.21it/s]
Recognizing Text: 100%|██████████| 36/36 [00:04<00:00,  8.59it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  5.16it/s]
Recognizing Text: 100%|██████████| 36/36 [00:04<00:00,  8.58it/s]


  Text F1: 0.705, Detection F1: 0.585, Combined: 0.645, IoU: 0.729
Processing 17/20: 00860012_00860014.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.82it/s]
Recognizing Text: 100%|██████████| 68/68 [00:04<00:00, 15.14it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.50it/s]
Recognizing Text: 100%|██████████| 68/68 [00:04<00:00, 15.04it/s]


  Text F1: 0.717, Detection F1: 0.744, Combined: 0.731, IoU: 0.717
Processing 18/20: 71108371.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.79it/s]
Recognizing Text: 100%|██████████| 63/63 [00:04<00:00, 14.21it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.64it/s]
Recognizing Text: 100%|██████████| 63/63 [00:04<00:00, 14.21it/s]


  Text F1: 0.912, Detection F1: 0.694, Combined: 0.803, IoU: 0.757
Processing 19/20: 01122115.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.43it/s]
Recognizing Text: 100%|██████████| 62/62 [00:03<00:00, 16.21it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.38it/s]
Recognizing Text: 100%|██████████| 62/62 [00:03<00:00, 16.31it/s]


  Text F1: 0.670, Detection F1: 0.179, Combined: 0.425, IoU: 0.557
Processing 20/20: 00836244.png


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.73it/s]
Recognizing Text: 100%|██████████| 87/87 [00:04<00:00, 19.56it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  4.71it/s]
Recognizing Text: 100%|██████████| 87/87 [00:04<00:00, 19.42it/s]

  Text F1: 0.524, Detection F1: 0.859, Combined: 0.692, IoU: 0.699





In [None]:
doctr_results = run_single_ocr_evaluation(all_samples, 'doctr')

In [None]:
spatial_doctr_results = run_single_ocr_evaluation_with_spatial(all_samples,'doctr')

In [None]:
easyocr_results = run_single_ocr_evaluation(all_samples, 'easyocr')

In [None]:
spatial_easyocr_results = run_single_ocr_evaluation_with_spatial(all_samples, 'easyocr')

In [None]:
tesseract_results = run_single_ocr_evaluation(all_samples, 'tesseract')

In [None]:
gg_vision_results = run_single_ocr_evaluation(all_samples,'google_vision')


In [None]:
spatial_gg_vision_results = run_single_ocr_evaluation_with_spatial(all_samples,'google_vision')


In [41]:
import json
import numpy as np

def convert_numpy_types(obj):
    if isinstance(obj, dict):
        return {key: convert_numpy_types(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy_types(item) for item in obj]
    elif isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

# Convert all results
all_ocr_results = {
    # 'easyocr': easyocr_results,
    # 'tesseract': tesseract_results,
    # 'google_vision': gg_vision_results, 
    # 'spatial_easyocr': spatial_easyocr_results,
    # 'spatial_google_vision': spatial_gg_vision_results,
    # 'doctr': doctr_results,
    # 'spatial_doctr': spatial_doctr_results
    'surya':surya_results,
    'spatial_surya':spatial_surya_results
}

converted_results = convert_numpy_types(all_ocr_results)

with open('all_ocr_results_surya.json', 'w', encoding='utf-8') as f:
    json.dump(converted_results, f, indent=2, ensure_ascii=False)

## Display resuls

In [28]:
def calculate_average_metrics(json_file_path, output_csv_path=None):
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    results = []
    
    for model_name, samples in data.items():
        print(f"Processing {model_name}...")
        
        # Initialize metric lists
        metrics_lists = {
            'similarity': [],
            'precision': [],
            'recall': [],
            'f1': [],
            'predicted_length': [],
            'ground_truth_length': [],
            'processing_time': [],
            'detection_precision': [],
            'detection_recall': [],
            'detection_f1': [],
            'avg_iou': []
        }
        
        # Collect metrics
        for sample in samples:
            metrics = sample['ocr_result']['metrics']
            processing_time = sample['ocr_result']['processing_time']
            
            # Add processing time
            metrics_lists['processing_time'].append(processing_time)
            
            # text metrics 
            similarity = metrics.get('similarity', metrics.get('text_similarity'))
            precision = metrics.get('precision', metrics.get('text_precision'))
            recall = metrics.get('recall', metrics.get('text_recall'))
            f1 = metrics.get('f1', metrics.get('text_f1'))
            
            if similarity is not None:
                metrics_lists['similarity'].append(similarity)
            if precision is not None:
                metrics_lists['precision'].append(precision)
            if recall is not None:
                metrics_lists['recall'].append(recall)
            if f1 is not None:
                metrics_lists['f1'].append(f1)
            
            if 'predicted_length' in metrics:
                metrics_lists['predicted_length'].append(metrics['predicted_length'])
            if 'ground_truth_length' in metrics:
                metrics_lists['ground_truth_length'].append(metrics['ground_truth_length'])
            
            # Detection metrics 
            for metric_name in ['detection_precision', 'detection_recall', 'detection_f1', 'avg_iou']:
                if metric_name in metrics:
                    metrics_lists[metric_name].append(metrics[metric_name])
        
        # Calculate averages 
        result = {'model': model_name, 'sample_count': len(samples)}
        
        for metric_name, values in metrics_lists.items():
            if len(values) > 0:  
                avg_value = sum(values) / len(values)
                result[f'avg_{metric_name}'] = round(avg_value, 4)
        
        results.append(result)
    
    df = pd.DataFrame(results)
    
    df = df.sort_values('model').reset_index(drop=True)
    
    if output_csv_path:
        df.to_csv(output_csv_path, index=False)
        print(f"Results saved to {output_csv_path}")
    
    return df

In [53]:
def display_summary_table(df):
    text_df = df[~df['model'].str.contains('spatial', case=False)].copy()
    text_cols = ['model', 'avg_similarity', 'avg_precision', 'avg_recall', 'avg_f1', 'avg_processing_time']
    available_text_cols = [col for col in text_cols if col in text_df.columns]
    text_table = text_df[available_text_cols].copy()
    
    spatial_df = df[df['model'].str.contains('spatial', case=False)].copy()
    
    if len(spatial_df) > 0:
        detection_cols = ['model', 'avg_detection_precision', 'avg_detection_recall', 'avg_detection_f1', 'avg_avg_iou']
        available_detection_cols = [col for col in detection_cols if col in spatial_df.columns]
        spatial_detection_df = spatial_df[available_detection_cols].copy()
    else:
        spatial_detection_df = None
    
    return text_table, spatial_detection_df

In [54]:
df = calculate_average_metrics("/home/duyle/Documents/Case-Study2/combined_ocr_results.json", "ocr_average_metrics.csv")

# Display summary
text, spatial = display_summary_table(df)

Processing easyocr...
Processing tesseract...
Processing google_vision...
Processing spatial_easyocr...
Processing spatial_google_vision...
Processing doctr...
Processing spatial_doctr...
Processing surya...
Processing spatial_surya...
Results saved to ocr_average_metrics.csv


In [55]:
text

Unnamed: 0,model,avg_similarity,avg_precision,avg_recall,avg_f1,avg_processing_time
0,doctr,0.2471,0.7736,0.7857,0.778,11.3139
1,easyocr,0.1811,0.5926,0.5945,0.593,5.1605
2,google_vision,0.456,0.8065,0.8015,0.8025,0.6742
7,surya,0.2011,0.7606,0.7762,0.7669,7.0168
8,tesseract,0.3203,0.6325,0.6487,0.638,4.3286


In [56]:
spatial

Unnamed: 0,model,avg_detection_precision,avg_detection_recall,avg_detection_f1,avg_avg_iou
3,spatial_doctr,0.1034,0.3042,0.1501,0.6706
4,spatial_easyocr,0.2321,0.4128,0.286,0.69
5,spatial_google_vision,0.0405,0.1588,0.0635,0.3493
6,spatial_surya,0.5751,0.5777,0.5716,0.6491
