In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import os 

def calculate_metrics(df):
    """
    Calculate accuracy, precision, recall, and F1 score for the given DataFrame.
    Rows where 'pred' is -1 are omitted because they are not predictions. 
    """
    df = df[df['pred'] != -1]
    return {
        'Accuracy': accuracy_score(df['label'], df['pred']),
        'Precision': precision_score(df['label'], df['pred']),
        'Recall': recall_score(df['label'], df['pred']),
        'F1': f1_score(df['label'], df['pred'])
    }

# List of dataset names
dataset_names = [
    'dirty_DBLP-ACM',
    'dirty_iTunes-Amazon',
    'dirty_Walmart-Amazon',
    'structured_DBLP-ACM',
    'structured_iTunes-Amazon',
    'structured_Walmart-Amazon',
    'structuredBeer',
    'structured_Fodors-Zagats',
    'structured_Amazon-Google',
    'textual_Abt-Buy'
]

# Read datasets into a dictionary
datasets = {}
for name in dataset_names:
    try:
        datasets[name] = pd.read_csv(os.path.join('gemini_predictions', f'{name}.csv'))
    except FileNotFoundError:
        print(f"File {name}.csv not found in the directory 'gemini_predictions'.")

# Calculate metrics for each dataset
results = {name: calculate_metrics(df) for name, df in datasets.items()}

# Convert results to a DataFrame and display with a gradient background
df_all = pd.DataFrame(results).T
df_all.style.background_gradient(cmap='RdYlGn', axis=1)

Unnamed: 0,Accuracy,Precision,Recall,F1
dirty_DBLP-ACM,0.92873,0.736215,0.939752,0.825625
dirty_iTunes-Amazon,0.71789,0.462687,0.861111,0.601942
dirty_Walmart-Amazon,0.926672,0.612648,0.602332,0.607446
structured_DBLP-ACM,0.905884,0.658656,0.987613,0.790268
structured_iTunes-Amazon,0.642202,0.398305,0.87037,0.546512
structured_Walmart-Amazon,0.944241,0.775832,0.573834,0.659717
structured_Beer,0.928375,0.916667,0.589286,0.717391
structured_Fodors-Zagats,0.948413,0.728972,0.886364,0.8
structured_Amazon-Google,0.894352,0.4811,0.448718,0.464345
textual_Abt-Buy,0.938592,0.768645,0.612864,0.681972


In [50]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def calculate_metrics(df):
    df = df[df['pred'] != -1]
    metrics = {}
    for name, group in df.groupby(['general_or_domain', 'simple_or_complex']):
        label = group['label']
        pred = group['pred']
        metrics[name] = {
            'Accuracy': accuracy_score(label, pred),
            'Precision': precision_score(label, pred, zero_division=0),
            'Recall': recall_score(label, pred, zero_division=0),
            'F1': f1_score(label, pred, zero_division=0)
        }
    return metrics

results = {}
for name, df in datasets.items():
    results.update({(name,) + key: value for key, value in calculate_metrics(df).items()})

# Now sorted_results is a new dictionary with keys in alphabetical order

df_all = pd.DataFrame(results).T
df_all.index.names = ['Dataset', 'GeneralOrDomain', 'SimpleOrComplex']
df_all.style.background_gradient(subset=['Accuracy', 'Precision', 'Recall', 'F1'], cmap='RdYlGn')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Accuracy,Precision,Recall,F1
Dataset,GeneralOrDomain,SimpleOrComplex,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dirty_DBLP_ACM,domain,complex,0.945815,0.770035,0.995495,0.868369
dirty_DBLP_ACM,domain,simple,0.939749,0.819957,0.851351,0.835359
dirty_DBLP_ACM,general,complex,0.907804,0.660714,1.0,0.795699
dirty_DBLP_ACM,general,simple,0.921553,0.723214,0.912162,0.806773
dirty_iTunes_Amazon,domain,complex,0.633028,0.402985,1.0,0.574468
dirty_iTunes_Amazon,domain,simple,0.926606,0.952381,0.740741,0.833333
dirty_iTunes_Amazon,general,complex,0.449541,0.305882,0.962963,0.464286
dirty_iTunes_Amazon,general,simple,0.862385,0.714286,0.740741,0.727273
dirty_Walmart_Amazon,domain,complex,0.937042,0.610345,0.917098,0.732919
dirty_Walmart_Amazon,domain,simple,0.923377,0.928571,0.202073,0.331915


In [51]:
def calculate_metrics(df):
    df = df[df['pred'] != -1]
    metrics = {}
    for category in ['general_or_domain', 'simple_or_complex']:
        for name, group in df.groupby(category):
            metrics[name] = {
                'F1': f1_score(group['label'], group['pred'])
            }
    return metrics

results = []
for name, df in datasets.items():
    metrics = calculate_metrics(df)
    results.append((name, 
                    metrics.get('domain', {}).get('F1', None), 
                    metrics.get('general', {}).get('F1', None),
                    metrics.get('simple', {}).get('F1', None),
                    metrics.get('complex', {}).get('F1', None)))

df_all = pd.DataFrame(results, columns=['Dataset', 'Domain F1', 'General F1', 'Simple F1', 'Complex F1'])
df_all.set_index('Dataset', inplace=True)
df_all.style.background_gradient(subset=['Domain F1', 'General F1', 'Simple F1', 'Complex F1'], cmap='RdYlGn', axis=1)



Unnamed: 0_level_0,Domain F1,General F1,Simple F1,Complex F1
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dirty_DBLP_ACM,0.852834,0.800943,0.820325,0.830366
dirty_iTunes_Amazon,0.661972,0.550898,0.776699,0.514563
dirty_Walmart_Amazon,0.601671,0.612546,0.416988,0.704837
structured_DBLP_ACM,0.816954,0.765271,0.781433,0.799099
structured_iTunes_Amazon,0.619718,0.49505,0.636364,0.490566
structured_Walmart_Amazon,0.647975,0.670471,0.446602,0.792271
structured_Beer,0.711111,0.723404,0.619048,0.8
structured_Fodors_Zagats,0.847059,0.763636,0.864198,0.754386
structured_Amazon_Google,0.467153,0.462006,0.193772,0.591389
textual_Abt_Buy,0.670391,0.69281,0.424581,0.82839


In [52]:
def calculate_metrics(df):
    df = df[df['pred'] != -1]
    metrics = {}
    for (general_or_domain, simple_or_complex), group in df.groupby(['general_or_domain', 'simple_or_complex']):
        name = f'{general_or_domain}_{simple_or_complex}'
        metrics[name] = {
            'F1': f1_score(group['label'], group['pred'])
        }
    return metrics

results = []
for name, df in datasets.items():
    metrics = calculate_metrics(df)
    results.append((name, 
                    metrics.get('domain_simple', {}).get('F1', None), 
                    metrics.get('domain_complex', {}).get('F1', None),
                    metrics.get('general_simple', {}).get('F1', None),
                    metrics.get('general_complex', {}).get('F1', None)))

df_all = pd.DataFrame(results, columns=['Dataset', 'Domain Simple F1', 'Domain Complex F1', 'General Simple F1', 'General Complex F1'])
df_all['Mean F1'] = df_all[['Domain Simple F1', 'Domain Complex F1', 'General Simple F1', 'General Complex F1']].mean(axis=1)
df_all.set_index('Dataset', inplace=True)
df_all.style.background_gradient(subset=['Domain Simple F1', 'Domain Complex F1', 'General Simple F1', 'General Complex F1', 'Mean F1'], cmap='RdYlGn', axis=1)

Unnamed: 0_level_0,Domain Simple F1,Domain Complex F1,General Simple F1,General Complex F1,Mean F1
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dirty_DBLP_ACM,0.835359,0.868369,0.806773,0.795699,0.82655
dirty_iTunes_Amazon,0.833333,0.574468,0.727273,0.464286,0.64984
dirty_Walmart_Amazon,0.331915,0.732919,0.487633,0.679245,0.557928
structured_DBLP_ACM,0.820416,0.81359,0.745909,0.785146,0.791265
structured_iTunes_Amazon,0.734694,0.55914,0.578313,0.436975,0.57728
structured_Walmart_Amazon,0.383333,0.80597,0.501818,0.779343,0.617616
structured_Beer,0.6,0.8,0.636364,0.8,0.709091
structured_Fodors_Zagats,0.810811,0.875,0.909091,0.666667,0.815392
structured_Amazon_Google,0.176471,0.610909,0.20915,0.575624,0.393039
textual_Abt_Buy,0.362205,0.839827,0.480565,0.817427,0.625006
