In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


llms = [ 
    "gemma3n",  
    "qwen2.5",  
    "llama3.1",  
    "orca2",  
]


examples = {
    "FIC" : "join_examples_dict_1", 
    "CIC" : 'join_examples_dict_2',
    "FSC" : 'vector_based_examples_dict_1',
    "CSC" : 'vector_based_examples_dict_2',
}

model_types = ['TF', 'FT', 'UN', 'IN']


def extract_model_type(model, suffix=""): 
    flag = True
    for ll in llms:
        if ll in model:
            flag = False
    if flag: 
        return "P1"
    elif ' U ' in model: 
        return f"UN{suffix}"
    elif ' ∩ ' in model: 
        return f"IN{suffix}"
    elif '-tf-p2' in model: 
        return "TF"
    elif '-ft-p2' in model: 
        return "FT"
    elif '-z-p2' in model: 
        return "ZS" 
    else: 
        return "P1"
    
def models_from(model):
    for ll in llms:
        if ll in model:
            return ll


# def df_plot_show(groups, datasets): 
#     prompt_types = list(groups.keys())
#     average_f1 = np.array(list(groups.values()))

#     bar_width = 0.2                   # width of each bar
#     group_spacing = 0.5  # extra space between groups

#     group_width = len(datasets) * bar_width + group_spacing
#     x = np.arange(len(prompt_types)) * group_width

#     # Create the bar plot
#     fig, ax = plt.subplots(figsize=(14, 6))
#     for i, dataset in enumerate(datasets):
#         ax.bar(x + i*bar_width, average_f1[:, i], width=bar_width, label=dataset)
        
#     # Labels and styling
#     ax.set_xlabel("Prompt Type", fontsize=12)
#     ax.set_ylabel("F1 Score", fontsize=12)
#     ax.set_title("Average F1 Score per Prompt Type and Dataset (Standard Blocking)", fontsize=14)
#     ax.set_xticks(x + bar_width*(len(datasets)-1)/2)
#     ax.set_xticklabels(prompt_types)
#     ax.legend(title="Datasets")

    
#     plt.tight_layout()

#     plt.show()
    




In [12]:


def results_extraction_main(): 
    
    groups = dict()
    indexes = list()        
    datasets = ['D2', 'D5', 'D6', 'D7', 'D8']
    for candidate_pairs in ['original', 'standard_blocking']:
        for i in ['recall', 'precision', 'f1']:
            key = (candidate_pairs, i)
            groups[key] = list()
            for dataset in datasets:
            
                results = f'../results/{candidate_pairs}/{dataset}_clustering.csv'
                results = pd.read_csv(results)

                results['model_type'] = results['model'].apply(lambda x: extract_model_type(x, ''))
                results['_from'] = results['model'].apply(models_from)
                
                if candidate_pairs == 'original': 
                    results = results[(results['model_type'] == 'UN') & (results['examples'] == examples['FSC'])] 
                else: 
                    results = f'../results/{candidate_pairs}/{dataset}_ui.csv'
                    results = pd.read_csv(results)
                    cols = ["precision","recall","f1"]

                    for col in cols: 
                        results[col] = results[col] * 100

                    results['model_type'] = results['model'].apply(lambda x: extract_model_type(x, ""))
                    results['_from'] = results['model'].apply(models_from)
                    results = results[(results['model_type'] == 'UN') & (results['examples'] == examples['FSC'])] 
                


                groupby = results.groupby(by=['_from'])
            
                for ll in llms: 
                    if f'{dataset}-{ll}' not in indexes:
                        indexes.append(f'{dataset}-{ll}')
                    groups[key].append(groupby.get_group(ll)[i].mean())
    
    return groups, indexes


def results_extraction_ComEM(): 
    
    groups = dict()
    indexes = list()        
    datasets = ['D2', 'D5', 'D6', 'D7', 'D8']
    for candidate_pairs in ['Com-EM']:
        for i in ['recall', 'precision', 'f1-score']:
            
            key = (candidate_pairs, i) if 'f1' not in i else (candidate_pairs, 'f1')
            groups[key] = list()
            
            results = f'../results/com_em.csv'
            results = pd.read_csv(results)
            groupby = results.groupby(by=['dataset', 'model'])

            for dataset in datasets:
                for ll in llms: 
                    if f'{dataset}-{ll}' not in indexes:
                        indexes.append(f'{dataset}-{ll}')
                    groups[key].append(groupby.get_group((dataset, ll))[i].mean())
    
    return groups, indexes


In [8]:

# candidate_pairs = 'standard_blocking'
groups, indexes = results_extraction_main()
# return
# Create a MultiIndex dataframe
df = pd.DataFrame(groups, index=indexes)

# display(df)

# return

# # Style function to bold maximum values in each column
# def highlight_max(s):
#     is_max = s == s.min()
#     return ["font-weight: bold" if v else "" for v in is_max]

# styled_df = df.style.apply(highlight_max, axis=0).set_table_styles(
#     {
#         ("kNN-Join", "D2"): [{"selector": "th", "props": "border: 1px solid black;"}],
#         ("blocking workflows", "D2"): [{"selector": "th", "props": "border: 1px solid black;"}],
#     },
#     overwrite=False,
# )

df.to_excel("FSC_UN_metrics.xlsx", engine='openpyxl')
# styled_df
df

Unnamed: 0_level_0,original,original,original,standard_blocking,standard_blocking,standard_blocking
Unnamed: 0_level_1,recall,precision,f1,recall,precision,f1
D2-gemma3n,82.330827,94.150035,87.843891,90.509761,90.328898,90.418816
D2-qwen2.5,78.477444,97.722765,87.048187,87.635575,96.481293,91.845542
D2-llama3.1,78.806391,87.127743,82.757742,87.689805,82.408619,84.957603
D2-orca2,76.080827,89.702441,82.331715,86.388286,90.171275,88.232037
D5-gemma3n,92.188295,83.958437,87.879828,95.24464,82.425695,88.348387
D5-qwen2.5,90.508906,84.528102,87.416246,93.870258,84.292413,88.814819
D5-llama3.1,91.603053,77.330621,83.862948,93.347993,72.477758,81.468793
D5-orca2,62.849873,81.674227,71.03171,70.560748,79.056699,74.506787
D6-gemma3n,86.490683,68.479982,76.43811,90.401786,66.991496,76.914027
D6-qwen2.5,72.981366,69.151754,71.014964,72.433036,66.447023,69.30103


In [14]:
# candidate_pairs = 'standard_blocking'
groups, indexes = results_extraction_ComEM()
# return
# Create a MultiIndex dataframe
df = pd.DataFrame(groups, index=indexes)
df.to_excel("Com_EM_metrics.xlsx", engine='openpyxl')
# styled_df
df

Unnamed: 0_level_0,Com-EM,Com-EM,Com-EM
Unnamed: 0_level_1,recall,precision,f1
D2-gemma3n,79.35368,25.802685,38.942731
D2-qwen2.5,35.72711,33.166667,34.399309
D2-llama3.1,92.998205,6.648697,12.410158
D2-orca2,0.0,0.0,0.0
D5-gemma3n,90.690691,5.462109,10.303651
D5-qwen2.5,52.252252,13.975904,22.053232
D5-llama3.1,87.987988,0.891417,1.764954
D5-orca2,6.606607,10.328638,8.058608
D6-gemma3n,44.444444,0.143678,0.28643
D6-qwen2.5,55.555556,0.455373,0.903342
