In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import pathlib

# Functions
def create_datasets(dataset_names, base_path=None):
    datasets = {}
    for name in dataset_names:
        file_path = f'{base_path}/{name}.csv'
        datasets[name] = pd.read_csv(file_path)
    return datasets

def acc_pre_rec_f1(df):
    df = df[df['pred'] != -1]
    return {
        'Accuracy': accuracy_score(df['label'], df['pred']),
        'Precision': precision_score(df['label'], df['pred']),
        'Recall': recall_score(df['label'], df['pred']),
        'F1': f1_score(df['label'], df['pred'])
    }

def f1_group(df):
    df = df[df['pred'] != -1]
    metrics = {}
    for (general_or_domain, simple_or_complex), group in df.groupby(['general_or_domain', 'simple_or_complex']):
        name = f'{general_or_domain}_{simple_or_complex}'
        metrics[name] = {
            'F1': f1_score(group['label'], group['pred'])
        }
    return metrics

def calculate_results(datasets):
    results = []
    for name, df in datasets.items():
        type_, dataset = name.split('_')
        metrics = f1_group(df)
        row = [type_, dataset]
        for metric_name in ['domain_complex', 'domain_simple', 'general_simple', 'general_complex']:
            f1_score = metrics.get(metric_name, {}).get('F1', None)
            row.append(f1_score)
        results.append(row)
    return results

  from pandas.core import (


In [2]:
gemini_results_path = pathlib.Path('generative', 'gemini_predictions')
llama3_results_path = pathlib.Path('generative', 'llama3_predictions')

dataset_names = ['dirty_DBLP-ACM', 'dirty_iTunes-Amazon', 'dirty_Walmart-Amazon', 
                 'structured_DBLP-ACM', 'structured_iTunes-Amazon', 'structured_Walmart-Amazon', 
                 'structured_Beer', 'structured_Fodors-Zagats', 'structured_Amazon-Google', 
                 'textual_Abt-Buy']

llama3_datasets = create_datasets(dataset_names, base_path=llama3_results_path)
gemini_datasets = create_datasets(dataset_names, base_path=gemini_results_path)

llama3_results = {name: acc_pre_rec_f1(df) for name, df in llama3_datasets.items()}
gemini_results = {name: acc_pre_rec_f1(df) for name, df in gemini_datasets.items()}

llama3_results_df = pd.DataFrame(llama3_results).T
gemini_results_df = pd.DataFrame(gemini_results).T

# Split the index into 'Type' and 'Dataset'
llama3_results_df.index = pd.MultiIndex.from_tuples(llama3_results_df.index.str.split('_').tolist(), names=['Type', 'Dataset'])
gemini_results_df.index = pd.MultiIndex.from_tuples(gemini_results_df.index.str.split('_').tolist(), names=['Type', 'Dataset'])

# Reset the index to make 'Type' and 'Dataset' as columns
llama3_results_df.reset_index(inplace=True)
gemini_results_df.reset_index(inplace=True)

# Reorder the columns
llama3_results_df = llama3_results_df[['Type', 'Dataset', 'Accuracy', 'Precision', 'Recall', 'F1']]
gemini_results_df = gemini_results_df[['Type', 'Dataset', 'Accuracy', 'Precision', 'Recall', 'F1']]

# Save to csv
llama3_results_df.to_csv('results/llama3_results.csv', index=False)
gemini_results_df.to_csv('results/gemini_results.csv', index=False)

# Print the dataframes
print("Llama3")
print(llama3_results_df.to_string(index=False), "\n")

print("Gemini")
print(gemini_results_df.to_string(index=False))

Llama3
      Type        Dataset  Accuracy  Precision   Recall       F1
     dirty       DBLP-ACM  0.483080   0.236651 0.847285 0.369968
     dirty  iTunes-Amazon  0.373853   0.270195 0.898148 0.415418
     dirty Walmart-Amazon  0.635387   0.185585 0.847150 0.304469
structured       DBLP-ACM  0.438621   0.232828 0.931445 0.372536
structured  iTunes-Amazon  0.403670   0.273810 0.851852 0.414414
structured Walmart-Amazon  0.565955   0.167740 0.910622 0.283296
structured           Beer  0.708791   0.343750 0.982143 0.509259
structured  Fodors-Zagats  0.748677   0.294355 0.829545 0.434524
structured  Amazon-Google  0.679463   0.203083 0.731838 0.317939
   textual        Abt-Buy  0.716077   0.260376 0.891859 0.403075 

Gemini
      Type        Dataset  Accuracy  Precision   Recall       F1
     dirty       DBLP-ACM  0.928730   0.736215 0.939752 0.825625
     dirty  iTunes-Amazon  0.717890   0.462687 0.861111 0.601942
     dirty Walmart-Amazon  0.926672   0.612648 0.602332 0.607446
structure

In [3]:
llama3_results = calculate_results(llama3_datasets)
gemini_results = calculate_results(gemini_datasets)

df_llama3 = pd.DataFrame(llama3_results, columns=['Type', 'Dataset', 'Domain Complex', 'Domain Simple', 'General Simple', 'General Complex'])
df_gemini = pd.DataFrame(gemini_results, columns=['Type', 'Dataset', 'Domain Complex', 'Domain Simple', 'General Simple', 'General Complex'])

# save to csv
df_llama3.to_csv('results/llama3_results_prompt.csv', index=False)
df_gemini.to_csv('results/gemini_results_prompt.csv', index=False)

print("Llama3")
print(df_llama3.to_string(index=False), "\n")

print("Gemini")
print(df_gemini.to_string(index=False))

Llama3
      Type        Dataset  Domain Complex  Domain Simple  General Simple  General Complex
     dirty       DBLP-ACM        0.512350       0.334229        0.306267         0.380211
     dirty  iTunes-Amazon        0.454545       0.442308        0.372881         0.400000
     dirty Walmart-Amazon        0.376321       0.396301        0.209827         0.309108
structured       DBLP-ACM        0.568846       0.330146        0.323681         0.349310
structured  iTunes-Amazon        0.527473       0.440000        0.330579         0.393939
structured Walmart-Amazon        0.358923       0.438320        0.197876         0.254081
structured           Beer        0.903226       0.337662        0.373333         0.848485
structured  Fodors-Zagats        0.816327       0.388350        0.209790         0.878049
structured  Amazon-Google        0.279461       0.383167        0.264463         0.383315
   textual        Abt-Buy        0.410811       0.570978        0.306452         0.397104 

G