# Plotting the results

In [1]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go

In [2]:
path = os.path.dirname(os.getcwd())

# Perline (Simple prompt LLMs vs. OCR/HTR)

In [3]:
# Files
bleu_scores = {}
cer_scores = {}
for file in os.listdir(path + '/results/scores_comparisons/eval'):
    if file.endswith('.csv'):
        read_file = pd.read_csv(path + '/results/scores_comparisons/eval/' + file, index_col=0)
        name = file.split('.')[0]
        if file.startswith('bleu'):
            name = name[5:]
            bleu_scores[name] = read_file
        elif file.startswith('cer'):
            name = name[4:]
            cer_scores[name] = read_file

In [4]:
bleu_df = pd.DataFrame()  # Initialize df before the loop
for key in bleu_scores.keys():
    if bleu_scores[key].keys().isin(['bleu', 'id', 'file']).all():
        temp = bleu_scores[key][['bleu', 'id']].reset_index(drop=True)
        name = key.split('_perline')[0]
        temp['model'] = name
    else:
        temp = bleu_scores[key].reset_index(drop=False)
        temp = temp[['bleu', 'id']].reset_index(drop=True)
        name = key.split('_perline')[0]
        temp['model'] = name
    bleu_df = pd.concat([bleu_df, temp], axis=0, ignore_index=True)  # Use ignore_index=True to avoid reindexing issues


In [5]:
cer_scores
cer_df = pd.DataFrame()  # Initialize df before the loop
for key in cer_scores.keys():
    if cer_scores[key].keys().isin(['bleu', 'id', 'file']).all():
        temp = cer_scores[key][['cer', 'id']].reset_index(drop=True)
        name = key.split('_perline')[0]
        temp['model'] = name
    else:
        temp = cer_scores[key].reset_index(drop=False)
        temp = temp[['cer', 'id']].reset_index(drop=True)
        name = key.split('_perline')[0]
        temp['model'] = name
    cer_df = pd.concat([cer_df, temp], axis=0, ignore_index=True) 

In [6]:
cer_df['file'] = cer_df['id'].astype(str).apply(lambda x: x.split('_')[0])
cer_df['file'] = cer_df['file'].astype(int)
cer_df

Unnamed: 0,cer,id,model,file
0,0.857319,1_0,claude_two_text_example,1
1,0.024390,1_2,claude_two_text_example,1
2,0.586207,1_4,claude_two_text_example,1
3,0.270270,1_5,claude_two_text_example,1
4,0.025000,1_6,claude_two_text_example,1
...,...,...,...,...
5151,0.284553,20_9,gpt_one_text_example,20
5152,0.310345,20_10,gpt_one_text_example,20
5153,0.000000,20_11,gpt_one_text_example,20
5154,0.027778,20_12,gpt_one_text_example,20


In [7]:
fig = go.Figure()
for j in cer_df['model'].unique():
    cer_mean = cer_df[(cer_df['model'] == j)]['cer'].mean()
    cer_std = cer_df[(cer_df['model'] == j)]['cer'].std()

    fig.add_trace(go.Scatter(x=[j], y=[cer_mean], 
                            mode='markers', name=f'{j}', 
                            error_y=dict(type='data', array=[cer_std], visible=True)))

# fig.add_trace(go.Violin(x=cer_df['model'], y=cer_df['cer'],
#                         points='all'))
fig.show()

In [8]:
bleu_df['model'].unique()

array(['claude_one_text_example', 'claude_one_example', 'gpt_complex',
       'gpt_two_example', 'gpt_two_text_example', 'gpt_refine', 'TrOCR',
       'claude_refine', 'claude_two_text_example', 'TrOCR20_whole_scan',
       'TrOCR20', 'TrOCR50', 'claude_simple', 'claude_complex', 'easyOCR',
       'gpt_simple', 'gpt_one_text_example', 'pytesseractOCR',
       'gpt_one_example', 'claude_two_example'], dtype=object)

In [22]:
if bleu_df['model'].str.contains('gpt').any():
        print(bleu_df['model'].unique())

In [25]:
llm_order = ['gpt_simple', 'claude_simple', 'gpt_complex', 'claude_complex',
             'gpt_one_example', 'claude_one_example', 'gpt_two_example', 'claude_two_example', 
             'gpt_one_text_example', 'claude_one_text_example', 'gpt_two_text_example', 'claude_two_text_example', 
             'gpt_refine', 'claude_refine']
ocr_order = ['easyOCR', 'pytesseractOCR', 'TrOCR'] # Still need Keras
ocr_ft_order = ['TrOCR20', 'TrOCR50']


gpt_color = px.colors.qualitative.Set2[0]
claude_color = px.colors.qualitative.Set2[1]
ocr_color = px.colors.qualitative.Set2[2]
ocr_ft_color = px.colors.qualitative.Set2[3]


fig = go.Figure()

for model in llm_order:
    if 'gpt' in model:
        fig.add_trace(go.Box(
            x=bleu_df[bleu_df['model'] == model]['model'], 
            y=bleu_df[bleu_df['model'] == model]['bleu'],
            name=model,
            boxmean=True,
            marker=dict(color=gpt_color) 
        ))
    else:
        fig.add_trace(go.Box(
            x=bleu_df[bleu_df['model'] == model]['model'], 
            y=bleu_df[bleu_df['model'] == model]['bleu'],
            name=model,
            boxmean=True,
            marker=dict(color=claude_color) 
        ))
# Add traces for OCR models with a different color
for model in ocr_order:
    fig.add_trace(go.Box(
        x=bleu_df[bleu_df['model'] == model]['model'], 
        y=bleu_df[bleu_df['model'] == model]['bleu'],
        name=model,
        boxmean=True,
        marker=dict(color=ocr_color)  # Assign OCR color
    ))

# Add traces for OCR fine-tuned models with another color
for model in ocr_ft_order:
    fig.add_trace(go.Box(
        x=bleu_df[bleu_df['model'] == model]['model'], 
        y=bleu_df[bleu_df['model'] == model]['bleu'],
        name=model,
        boxmean=True,
        marker=dict(color=ocr_ft_color)  # Assign OCR fine-tuned color
    ))

# Get the start and midpoint of each group
ocr_start_index = len(llm_order)  # The first position of ocr_order
ocr_ft_start_index = ocr_start_index + len(ocr_order)  # Start of ocr_ft_order
llm_midpoint = ocr_start_index / 2  # Midpoint of LLM models for placing the text
ocr_midpoint = ocr_start_index + (len(ocr_order) / 2)  # Midpoint of OCR models for placing the text

# Update the layout with custom category ordering, vertical line, and annotations
fig.update_layout(
    title='BLEU Scores',
    xaxis_title='Model',
    yaxis_title='BLEU Score',
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(
        categoryorder='array',  # Set ordering to be custom
        categoryarray=llm_order + ocr_order + ocr_ft_order  # Concatenate the model orders
    ),
    showlegend=False,
    shapes=[
        # Add a vertical line between LLM and OCR models
        dict(
            type="line",
            x0=ocr_start_index - 0.5,  # Place the line between the two groups
            x1=ocr_start_index - 0.5,
            y0=0,
            y1=1,
            xref="x",
            yref="paper",  # Stretch the line across the plot's full height
            line=dict(color="black", width=2)
        )
    ],
    annotations=[
        # Add annotation for LLMs above LLM models
        dict(
            x=llm_midpoint,  # Midpoint of LLM models
            y=1.2,  # Position above the plot
            xref='x',
            yref='paper',
            text='<LLMs>',
            showarrow=False,
            font=dict(size=14)
        ),
        # Add annotation for OCRs above OCR models
        dict(
            x=ocr_midpoint,  # Midpoint of OCR models
            y=1.2,  # Position above the plot
            xref='x',
            yref='paper',
            text='<OCRs>',
            showarrow=False,
            font=dict(size=14)
        )
    ]
)

fig.show()

