# Character Error Rate (OCR vs Object Detection)

In [1]:
from datasets import load_metric
metric = load_metric("cer")

In [2]:
import os

def get_cer_score(predictions_folder, gt_folder):
    predictions, ground_truth = [], []

    for filename in os.listdir(predictions_folder):
        curr_preds, curr_gt = [], []
        with open(os.path.join(predictions_folder, filename)) as f:
            for line in f.readlines():
                curr_preds.append(line.replace('\n', ''))
        with open(os.path.join(gt_folder, filename)) as f:
            for line in f.readlines():
                curr_gt.append(line.replace('\n', ''))
        if len(curr_preds) < len(curr_gt):
            predictions.extend(['-'] * (len(curr_gt) - len(curr_preds)))
        elif len(curr_preds) > len(curr_gt):
            ground_truth.extend(['-'] * (len(curr_preds) - len(curr_gt)))
        predictions.extend(curr_preds)
        ground_truth.extend(curr_gt)
    
    cer_score = metric.compute(predictions=predictions, references=ground_truth)
    return cer_score

Сравним результаты работы 2 пайплайнов: OCR и детекции отдельных цифр. Для этого посчитаем Character Error Rate (CER) для обоих подходов.

Начнём с OCR:

При запуске пайплайна на 71 фотографии было отобрано и размечено 103 торца

In [3]:
OCR_PREDICTIONS_FOLDER = '/home/asya/studying/Проекты/serial_number_recognition/copy_OCR_pipeline/jet-project/strings'
OCR_GROUND_TRUTH_FOLDER = '/home/asya/studying/Проекты/serial_number_recognition/copy_OCR_pipeline/jet-project/ground_truth'

In [4]:
ocr_cer_score = get_cer_score(OCR_PREDICTIONS_FOLDER, OCR_GROUND_TRUTH_FOLDER)
ocr_cer_score

0.2749003984063745

Теперь пайплайн с детекцией отдельных цифр:

При запуске пайплайна на 71 фотографии было отобрано и размечено 129 торца

In [5]:
OD_PREDICTIONS_FOLDER = '/home/asya/studying/Проекты/serial_number_recognition/repo/jet-project/strings'
OD_GROUND_TRUTH_FOLDER = '/home/asya/studying/Проекты/serial_number_recognition/repo/jet-project/ground_truth'

In [6]:
od_cer_score = get_cer_score(OD_PREDICTIONS_FOLDER, OD_GROUND_TRUTH_FOLDER)
od_cer_score

0.35773026315789475

Получаем, что на всех отобранных алгоритмами фотографиях значение ошибки меньше для OCR-пайплайна. Но это может быть связано ещё и с тем, что модель находит строчки с номерами не на всех фотографиях торцов (пайплайн с детекцией разметил больше прутков, чем OCR).

Поэтому для полноты оценки дополнительно посчитаем значение CER:
1. только на общих (пересекающихся) фотографиях
2. на фотографиях хорошего качества (на взгляд) из первоначальной разбивки
3. на фотографиях плохого качества

## На общих фотографиях

In [7]:
IMG_INTERSECTION = list(set(os.listdir(OCR_PREDICTIONS_FOLDER)) & set(os.listdir(OD_PREDICTIONS_FOLDER)))

In [8]:
def get_intersection_cer_score(predictions_folder, gt_folder):
    predictions, ground_truth = [], []

    for filename in IMG_INTERSECTION:
        curr_preds, curr_gt = [], []
        with open(os.path.join(predictions_folder, filename)) as f:
            for line in f.readlines():
                curr_preds.append(line.replace('\n', ''))
        with open(os.path.join(gt_folder, filename)) as f:
            for line in f.readlines():
                curr_gt.append(line.replace('\n', ''))
        if len(curr_preds) < len(curr_gt):
            predictions.extend(['-'] * (len(curr_gt) - len(curr_preds)))
        elif len(curr_preds) > len(curr_gt):
            ground_truth.extend(['-'] * (len(curr_preds) - len(curr_gt)))
        predictions.extend(curr_preds)
        ground_truth.extend(curr_gt)
    
    cer_score = metric.compute(predictions=predictions, references=ground_truth)
    return cer_score

In [9]:
ocr_inter_cer_score = get_intersection_cer_score(OCR_PREDICTIONS_FOLDER, OCR_GROUND_TRUTH_FOLDER)
ocr_inter_cer_score

0.25311203319502074

In [10]:
od_inter_cer_score = get_intersection_cer_score(OD_PREDICTIONS_FOLDER, OD_GROUND_TRUTH_FOLDER)
od_inter_cer_score

0.26997840172786175

На общих фотографиях OCR чуть лучше.

## На фотографиях хорошего качества

In [11]:
def get_appropriate_cer_score(predictions_folder, gt_folder):
    predictions, ground_truth = [], []

    for filename in os.listdir(predictions_folder):
        if filename.startswith('IMG_17'):
            curr_preds, curr_gt = [], []
            with open(os.path.join(predictions_folder, filename)) as f:
                for line in f.readlines():
                    curr_preds.append(line.replace('\n', ''))
            with open(os.path.join(gt_folder, filename)) as f:
                for line in f.readlines():
                    curr_gt.append(line.replace('\n', ''))
            if len(curr_preds) < len(curr_gt):
                predictions.extend(['-'] * (len(curr_gt) - len(curr_preds)))
            elif len(curr_preds) > len(curr_gt):
                ground_truth.extend(['-'] * (len(curr_preds) - len(curr_gt)))
            predictions.extend(curr_preds)
            ground_truth.extend(curr_gt)
    
    cer_score = metric.compute(predictions=predictions, references=ground_truth)
    return cer_score

In [12]:
ocr_good_cer_score = get_appropriate_cer_score(OCR_PREDICTIONS_FOLDER, OCR_GROUND_TRUTH_FOLDER)
ocr_good_cer_score

0.043795620437956206

In [13]:
od_good_cer_score = get_appropriate_cer_score(OD_PREDICTIONS_FOLDER, OD_GROUND_TRUTH_FOLDER)
od_good_cer_score

0.0072992700729927005

На "хороших" изображениях оба подхода отрабатывают почти идеально, но детекция отдельных цифр чуть лучше.

## На фотографиях плохого качества

In [14]:
def get_bad_cer_score(predictions_folder, gt_folder):
    predictions, ground_truth = [], []

    for filename in os.listdir(predictions_folder):
        if filename.startswith('IMG_6'):
            curr_preds, curr_gt = [], []
            with open(os.path.join(predictions_folder, filename)) as f:
                for line in f.readlines():
                    curr_preds.append(line.replace('\n', ''))
            with open(os.path.join(gt_folder, filename)) as f:
                for line in f.readlines():
                    curr_gt.append(line.replace('\n', ''))
            if len(curr_preds) < len(curr_gt):
                predictions.extend(['-'] * (len(curr_gt) - len(curr_preds)))
            elif len(curr_preds) > len(curr_gt):
                ground_truth.extend(['-'] * (len(curr_preds) - len(curr_gt)))
            predictions.extend(curr_preds)
            ground_truth.extend(curr_gt)
    
    cer_score = metric.compute(predictions=predictions, references=ground_truth)
    return cer_score

In [15]:
ocr_bad_cer_score = get_bad_cer_score(OCR_PREDICTIONS_FOLDER, OCR_GROUND_TRUTH_FOLDER)
ocr_bad_cer_score

0.4350758853288364

In [16]:
od_bad_cer_score = get_bad_cer_score(OD_PREDICTIONS_FOLDER, OD_GROUND_TRUTH_FOLDER)
od_bad_cer_score

0.5366459627329192

На изображениях плохого качества пайплайны отрабатывают заметно хуже. В этом случае OCR справляется лучше.