In [41]:
import random
import numpy as np
from glob import glob
from tqdm import tqdm
from PIL import Image, ImageOps
import easyocr

# Initialize easyocr reader
easy_model = easyocr.Reader(['en'])

results = []
img_dir = '/playpen-storage/levlevi/player-re-id/src/testing/ocr_model_comparisons/text_recognition/nba_100_test_set'
img_fps = glob(f'{img_dir}/*.jpg')

def augment_image(image):
    angle = random.uniform(-1, 1)
    image = image.rotate(angle)
    max_translate = 5
    translate_x = random.uniform(-max_translate, max_translate)
    translate_y = random.uniform(-max_translate, max_translate)
    image = image.transform(
        image.size, Image.AFFINE,
        (1, 0, translate_x, 0, 1, translate_y)
    )
    scale = random.uniform(0.99, 1.01)
    new_size = (int(image.size[0] * scale), int(image.size[1] * scale))
    image = image.resize(new_size, Image.BICUBIC)
    if scale < 1:
        pad_x = (image.size[0] - new_size[0]) // 2
        pad_y = (image.size[1] - new_size[1]) // 2
        image = ImageOps.expand(image, border=(pad_x, pad_y, pad_x, pad_y))
    else:
        crop_x = (new_size[0] - image.size[0]) // 2
        crop_y = (new_size[1] - image.size[1]) // 2
        image = image.crop((crop_x, crop_y, crop_x + image.size[0], crop_y + image.size[1]))
    return image

def is_valid_jersey_number(text):
    if text.isdigit():
        number = int(text)
        return 0 <= number <= 99
    return False

file_paths = []
for fp in tqdm(img_fps):
    file_paths.append(fp)
    image = Image.open(fp)
    temp_results = []
    for bootstrap in range(10):
        augmented_image = augment_image(image)
        augmented_image_np = np.array(augmented_image)
        results_texts = easy_model.readtext(augmented_image_np, detail=1)
        
        valid_results = [(text, confidence) for (bbox, text, confidence) in results_texts if is_valid_jersey_number(text)]
        temp_results.extend(valid_results)
    
    if temp_results:
        best_result = max(temp_results, key=lambda x: x[1])
        results.append(best_result)
    else:
        results.append(None)

# Print or save the final results
for i, result in enumerate(results):
    print(f"Image {i}: {result}")

100%|██████████| 100/100 [00:13<00:00,  7.51it/s]

Image 0: None
Image 1: None
Image 2: None
Image 3: None
Image 4: None
Image 5: None
Image 6: None
Image 7: None
Image 8: None
Image 9: None
Image 10: ('22', 0.997989934655515)
Image 11: None
Image 12: None
Image 13: None
Image 14: None
Image 15: ('93', 0.9821293278442225)
Image 16: None
Image 17: None
Image 18: None
Image 19: None
Image 20: None
Image 21: None
Image 22: ('16', 0.9853786492296873)
Image 23: None
Image 24: ('15', 0.993182072638799)
Image 25: None
Image 26: None
Image 27: None
Image 28: None
Image 29: None
Image 30: ('34', 0.9996507047897645)
Image 31: None
Image 32: None
Image 33: None
Image 34: None
Image 35: None
Image 36: None
Image 37: None
Image 38: None
Image 39: ('32', 0.9974679143668232)
Image 40: None
Image 41: None
Image 42: None
Image 43: None
Image 44: None
Image 45: None
Image 46: None
Image 47: None
Image 48: ('12', 0.6426323476911264)
Image 49: None
Image 50: None
Image 51: None
Image 52: None
Image 53: None
Image 54: None
Image 55: None
Image 56: None
Ima




In [None]:
import pandas as pd

predictions = []
confs = []
for result in results:
    if result is not None:
        predictions.append(result[0])
        confs.append(result[1])
    else:
        predictions.append(None)
        confs.append(None)
        
ground_truth_labels = [f.split('/')[-1].split('_')[1].split('.')[0] for f in file_paths]
df = pd.DataFrame()
df['ground_truth'] = ground_truth_labels
df['prediction'] = predictions
df['confidence'] = confs

In [40]:
not_null_df = df[df['prediction'].notnull()]
correct = not_null_df['ground_truth'] == not_null_df['prediction']
not_null_df['correct'] = correct
sum(not_null_df['correct'])

8