In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

def calculate_metrics(df):
    df = df[df['label'] != -1]
    return {
        'Accuracy': accuracy_score(df['label'], df['pred']),
        'Precision': precision_score(df['label'], df['pred'], average='weighted', zero_division=0),
        'Recall': recall_score(df['label'], df['pred'], average='weighted', zero_division=0),
        'F1': f1_score(df['label'], df['pred'], average='weighted', zero_division=0)
    }

datasets = {
    'dirty_DBLP_ACM': pd.read_csv('llama3_predictions/dirty_DBLP-ACM.csv'),
    'dirty_iTunes_Amazon': pd.read_csv('llama3_predictions/dirty_iTunes-Amazon.csv'),
    'dirty_Walmart_Amazon': pd.read_csv('llama3_predictions/dirty_Walmart-Amazon.csv'),
    'structured_DBLP_ACM': pd.read_csv('llama3_predictions/structured_DBLP-ACM.csv'),
    'structured_iTunes_Amazon': pd.read_csv('llama3_predictions/structured_iTunes-Amazon.csv'),
    'structured_Walmart_Amazon': pd.read_csv('llama3_predictions/structured_Walmart-Amazon.csv'),
    'structured_Beer': pd.read_csv('llama3_predictions/structured_Beer.csv'),
    'structured_Fodors_Zagats': pd.read_csv('llama3_predictions/structured_Fodors-Zagats.csv'),
    'structured_Amazon_Google': pd.read_csv('llama3_predictions/structured_Amazon-Google.csv'),
    'textual_Abt_Buy': pd.read_csv('llama3_predictions/textual_Abt-Buy.csv')
}

results = {name: calculate_metrics(df) for name, df in datasets.items()}

df_all = pd.DataFrame(results).T
df_all.style.background_gradient(cmap='RdYlGn')

  from pandas.core import (


Unnamed: 0,Accuracy,Precision,Recall,F1
dirty_DBLP_ACM,0.48186,0.800409,0.48186,0.526598
dirty_iTunes_Amazon,0.373853,0.711752,0.373853,0.348094
dirty_Walmart_Amazon,0.63531,0.900406,0.63531,0.710637
structured_DBLP_ACM,0.437424,0.826858,0.437424,0.469969
structured_iTunes_Amazon,0.40367,0.699751,0.40367,0.397946
structured_Walmart_Amazon,0.565886,0.905998,0.565886,0.650477
structured_Beer,0.708791,0.894891,0.708791,0.749321
structured_Fodors_Zagats,0.748677,0.891771,0.748677,0.791419
structured_Amazon_Google,0.679241,0.879788,0.679241,0.742128
textual_Abt_Buy,0.715423,0.904061,0.715423,0.769204


In [3]:
def calculate_metrics(df):
    df = df[df['label'] != -1]
    metrics = {}
    for name, group in df.groupby(['general_or_domain', 'simple_or_complex']):
        metrics[name] = {
            'Accuracy': accuracy_score(group['label'], group['pred']),
            'Precision': precision_score(group['label'], group['pred'], average='weighted', zero_division=0),
            'Recall': recall_score(group['label'], group['pred'], average='weighted', zero_division=0),
            'F1': f1_score(group['label'], group['pred'], average='weighted', zero_division=0)
        }
    return metrics

results = {}
for name, df in datasets.items():
    results.update({(name,) + key: value for key, value in calculate_metrics(df).items()})

df_all = pd.DataFrame(results).T
df_all.index.names = ['Dataset', 'GeneralOrDomain', 'SimpleOrComplex']
df_all.style.background_gradient(subset=['Accuracy', 'Precision', 'Recall', 'F1'], cmap='RdYlGn')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Accuracy,Precision,Recall,F1
Dataset,GeneralOrDomain,SimpleOrComplex,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dirty_DBLP_ACM,domain,complex,0.720178,0.843108,0.720178,0.751518
dirty_DBLP_ACM,domain,simple,0.394503,0.776919,0.394503,0.42649
dirty_DBLP_ACM,general,complex,0.452708,0.830943,0.452708,0.486814
dirty_DBLP_ACM,general,simple,0.360146,0.733763,0.360146,0.389502
dirty_iTunes_Amazon,domain,complex,0.449541,0.769035,0.449541,0.446947
dirty_iTunes_Amazon,domain,simple,0.46789,0.732247,0.46789,0.47911
dirty_iTunes_Amazon,general,complex,0.256881,0.81422,0.256881,0.11721
dirty_iTunes_Amazon,general,simple,0.321101,0.603208,0.321101,0.287961
dirty_Walmart_Amazon,domain,complex,0.712055,0.91759,0.712055,0.771703
dirty_Walmart_Amazon,domain,simple,0.776964,0.90463,0.776964,0.819235


In [4]:
def calculate_metrics(df):
    df = df[df['label'] != -1]
    metrics = {}
    for category in ['general_or_domain', 'simple_or_complex']:
        for name, group in df.groupby(category):
            metrics[name] = {
                'F1': f1_score(group['label'], group['pred'], average='weighted', zero_division=0)
            }
    return metrics

results = []
for name, df in datasets.items():
    metrics = calculate_metrics(df)
    results.append((name, 
                    metrics.get('domain', {}).get('F1', None), 
                    metrics.get('general', {}).get('F1', None),
                    metrics.get('simple', {}).get('F1', None),
                    metrics.get('complex', {}).get('F1', None)))

df_all = pd.DataFrame(results, columns=['Dataset', 'Domain F1', 'General F1', 'Simple F1', 'Complex F1'])
df_all.set_index('Dataset', inplace=True)
df_all.style.background_gradient(subset=['Domain F1', 'General F1', 'Simple F1', 'Complex F1'], cmap='RdYlGn')



Unnamed: 0_level_0,Domain F1,General F1,Simple F1,Complex F1
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dirty_DBLP_ACM,0.604904,0.438771,0.408071,0.631277
dirty_iTunes_Amazon,0.463546,0.211055,0.388684,0.302084
dirty_Walmart_Amazon,0.796029,0.615483,0.6798,0.740776
structured_DBLP_ACM,0.592196,0.322768,0.326133,0.592276
structured_iTunes_Amazon,0.56762,0.180515,0.367076,0.42895
structured_Walmart_Amazon,0.791829,0.476231,0.632057,0.668557
structured_Beer,0.744591,0.754051,0.509663,0.958205
structured_Fodors_Zagats,0.839533,0.74307,0.611793,0.963319
structured_Amazon_Google,0.751703,0.732426,0.730564,0.753485
textual_Abt_Buy,0.82365,0.712356,0.776739,0.761485


In [8]:
def calculate_metrics(df):
    df = df[df['label'] != -1]
    metrics = {}
    for (general_or_domain, simple_or_complex), group in df.groupby(['general_or_domain', 'simple_or_complex']):
        name = f'{general_or_domain}_{simple_or_complex}'
        metrics[name] = {
            'F1': f1_score(group['label'], group['pred'], average='weighted', zero_division=0)
        }
    return metrics

results = []
for name, df in datasets.items():
    metrics = calculate_metrics(df)
    results.append((name, 
                    metrics.get('domain_simple', {}).get('F1', None), 
                    metrics.get('domain_complex', {}).get('F1', None),
                    metrics.get('general_simple', {}).get('F1', None),
                    metrics.get('general_complex', {}).get('F1', None)))

df_all = pd.DataFrame(results, columns=['Dataset', 'Domain Simple F1', 'Domain Complex F1', 'General Simple F1', 'General Complex F1'])
df_all['Mean F1'] = df_all[['Domain Simple F1', 'Domain Complex F1', 'General Simple F1', 'General Complex F1']].mean(axis=1)
df_all.set_index('Dataset', inplace=True)
df_all.style.background_gradient(subset=['Domain Simple F1', 'Domain Complex F1', 'General Simple F1', 'General Complex F1', 'Mean F1'], cmap='RdYlGn', axis=1)

Unnamed: 0_level_0,Domain Simple F1,Domain Complex F1,General Simple F1,General Complex F1,Mean F1
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dirty_DBLP_ACM,0.42649,0.751518,0.389502,0.486814,0.513581
dirty_iTunes_Amazon,0.47911,0.446947,0.287961,0.11721,0.332807
dirty_Walmart_Amazon,0.819235,0.771703,0.509201,0.708998,0.702284
structured_DBLP_ACM,0.357289,0.777154,0.29339,0.351407,0.44481
structured_iTunes_Amazon,0.504264,0.628238,0.205976,0.150067,0.372136
structured_Walmart_Amazon,0.830575,0.75143,0.361504,0.575704,0.629803
structured_Beer,0.487113,0.968301,0.531915,0.948296,0.733906
structured_Fodors_Zagats,0.726378,0.954449,0.483139,0.972695,0.784165
structured_Amazon_Google,0.80097,0.700264,0.654146,0.804281,0.739915
textual_Abt_Buy,0.876911,0.769331,0.669826,0.753567,0.767409
