In [1]:
import pandas as pd
import os

import fastwer # Character Error rate (CER), Word error rate (WER)

from natsort import natsorted

from difflib import SequenceMatcher

In [2]:
def character_level_accuracy(ocr_text, ground_truth):
    matcher = SequenceMatcher(None, ocr_text, ground_truth)
    return matcher.ratio()

In [3]:
def evaluation_dataframe(ground_truth_dir: str, pred_dir: str):
    # Path to the folder containing the .txt files 
    ground_truth_files = [f for f in os.listdir(f"../{ground_truth_dir}") if f.endswith('.txt')]
    tuned_pred_files = [f for f in os.listdir(f"../{pred_dir}") if f.endswith('.txt')]

    # Initialize a dictionary to store the file contents
    data = {}

    # Loop through each .txt file and read its contents
    for file1, file2 in zip(natsorted(ground_truth_files), natsorted(tuned_pred_files)):
        file_path_gt = os.path.join(f"../{ground_truth_dir}", file1)
        file_path_tn = os.path.join(f"../{pred_dir}", file2)

        with open(file_path_gt, 'r') as f_gt:
            gt_content = f_gt.read().strip()  # Read the content and strip newline characters
        
        with open(file_path_tn, 'r') as f_tn:
            tn_content = f_tn.read().strip()

        # Store both ground truth and tuned prediction content using the filename as the key
        data[file1] = {'ground_truth': gt_content, 'pred_ocr': tn_content}

    df = pd.DataFrame.from_dict(data).T
    df.index.name = 'image_file'
    df = df.reset_index()

    for index, row in df.iterrows():
        filename, ref, output = row['image_file'], row['ground_truth'], row['pred_ocr']
        cer = fastwer.score_sent(output, ref, char_level=True)
        wer = fastwer.score_sent(output, ref, char_level=False)
        df.loc[df['image_file'] == filename, 'cer'] = round(cer,2) # Round value to 2 decimal places
        df.loc[df['image_file'] == filename, 'wer'] = round(wer,2)
        df.loc[df['image_file'] == filename, 'character_accuracy'] = df.apply(lambda row: character_level_accuracy(row['pred_ocr'],
                                                                                                                row['ground_truth']), axis=1) * 100
    
    return df


# CER/WER for invidiual pipelines

In [4]:
df_individualized = evaluation_dataframe("ground_truth", "output/tuned_output_text")

df_individualized

Unnamed: 0,image_file,ground_truth,pred_ocr,cer,wer,character_accuracy
0,img1.txt,Made in 13502577 China 139BF7A4 B 0451 AB,Made in 13502577 China 139BF7A4 B 0451 AB,0.0,0.0,100.0
1,img2.txt,054.129.808 KYS Germany >PA6-GF30<,054. 129 KyS 808 GERMANY yPA6-GF30,47.06,150.0,61.764706
2,img3.txt,VW AG 038 907 281 D Made in Germany WLO 51299 ...,VW AG 030 907 781 0 Made in Germany #LO 512990...,27.94,60.0,77.697842
3,img4.txt,>PA6-GF15 < 3 1662797 5,PAc-GF15 < 81662797=,26.09,80.0,79.069767
4,img5.txt,4RA 007 791-20 >PBT-GF20<,6 2100 1 791 007 4RRA,76.0,150.0,21.73913
5,img6.txt,1K0 907 655 B ESP-DUOSENSOR,1KO 907 655 B ESP_ DUCQENs~r,29.63,60.0,72.727273
6,img7.txt,VW AG Brown Black 03S 10/2014 5Q0.035.570 IHU ...,AG WW Brown 3 3 IBlack 10/2014 035,62.26,88.89,59.770115
7,img8.txt,5M0.035.570.B Made inTunisia BBT 23/10/09,5M0.035.570.6 Made inTunisia BBT 23/10/09,2.44,20.0,97.560976
8,img9.txt,8E0 035 456 C FKW 0681 021498 19/07 091 0411 2...,8E0 035 456 C QQD FKW le C€ 0681 021498 yp: 09...,29.9,44.44,84.90566
9,img10.txt,BOSCH 8R0 907 637 B 0101 005 ESP-Sensor MM3.R8...,BOSCH 8RO 907 637 B 0101 005 ESP-Sensor MM3.RS...,8.4,17.39,94.957983


In [5]:
# Overall performances
mean_cer = df_individualized['cer'].mean()
mean_wer = df_individualized['wer'].mean()
mean_acc = df_individualized['character_accuracy'].mean()

print(f'Mean CER = {mean_cer}%, Mean WER = {mean_wer}%, Mean Acc = {mean_acc}')

Mean CER = 31.461999999999993%, Mean WER = 63.099999999999994%, Mean Acc = 76.14532701933354


# CER/WER for generalized pipeline

In [7]:
df_generalized = evaluation_dataframe("ground_truth", "output/output_text_easyocr")

df_generalized

Unnamed: 0,image_file,ground_truth,pred_ocr,cer,wer,character_accuracy
0,img1.txt,Made in 13502577 China 139BF7A4 B 0451 AB,n B 435025 4139BF7A4 Made 0451 China AB IG,68.29,100.0,60.240964
1,img2.txt,054.129.808 KYS Germany >PA6-GF30<,"054.129, KyS L808 GERMAnY ~PA6-GF30 <",47.06,150.0,64.788732
2,img3.txt,VW AG 038 907 281 D Made in Germany WLO 51299 ...,%] 1 8 2 818 4,88.24,100.0,7.317073
3,img4.txt,>PA6-GF15 < 3 1662797 5,FA6-GF15 < 316627975 3,21.74,80.0,84.444444
4,img5.txt,4RA 007 791-20 >PBT-GF20<,"5 0 1 221r"" =20 791 007 A4RA",92.0,200.0,18.867925
5,img6.txt,1K0 907 655 B ESP-DUOSENSOR,IKO 907 655 B ESP _ DUCSENS~R 0 L0,44.44,120.0,72.131148
6,img7.txt,VW AG Brown Black 03S 10/2014 5Q0.035.570 IHU ...,AG Brown VW 8 2 8 3 2 Black 10/2014 035,71.7,111.11,56.521739
7,img8.txt,5M0.035.570.B Made inTunisia BBT 23/10/09,5mO.035.570.B Made inTunisia BBT 23/10/09,4.88,20.0,95.121951
8,img9.txt,8E0 035 456 C FKW 0681 021498 19/07 091 0411 2...,8EO 035 456 C FKW C€ 0681 021498 0 7 yp: 091 0...,25.77,44.44,86.538462
9,img10.txt,BOSCH 8R0 907 637 B 0101 005 ESP-Sensor MM3.R8...,BOSCH 8RO 907 637 B 0101 005 ESP-Sensor MM3 R8...,8.4,17.39,94.957983


In [8]:
# Overall performances
mean_cer = df_generalized['cer'].mean()
mean_wer = df_generalized['wer'].mean()
mean_acc = df_generalized['character_accuracy'].mean()

print(f'Mean CER = {mean_cer}%, Mean WER = {mean_wer}%, Mean Acc = {mean_acc}')

Mean CER = 45.26866666666667%, Mean WER = 86.488%, Mean Acc = 66.69944032706246
