In [1]:
import os
import pandas as pd
import numpy as np
import json
from unidecode import unidecode

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def normalize_value(value, is_date=False):
    if value != value:
        return ''

    if is_date:
        try:
            return date_parser.parse(value)
        except:
            try:
                return pd.to_datetime(value)
            except:
                pass    
        
    if type(value).__module__ == 'numpy':
        value = value.item()

    if isinstance(value, str):            
        if value.startswith('-') and value.replace('-', '').replace(',', '').isdigit():                
            return int(value.replace(',', ''))
        if value.replace(',', '').isdigit():                
            return int(value.replace(',', ''))
        if value.startswith('=') and value.isdigit():                
            return int(value.replace('=', ''))
        if value.startswith('-') and value.replace('-', '').replace(',', '').replace('.', '').isdigit():                
            return float(value.replace(',', ''))
        if value.replace(',', '').replace('.', '').isdigit():                
            return float(value.replace(',', ''))
        
        value = value.strip().lower()

        if value in ('none', 'n/a', 'nan', '-'):
            return '' 

        value = value.replace('&', 'and')

        if value == 'united states':
            return 'usa'
        if value == 'united kingdom':
            return 'uk'
        
        value = unidecode(value)        
        value = ''.join(c for c in value if c.isalnum()) 
        return value

    return value

def normalize_key(value, is_date=False):
    if value != value:
        return ''

    if is_date:
        try:
            return str(date_parser.parse(value))
        except:
            try:
                return str(pd.to_datetime(value))
            except:
                pass  

    if isinstance(value, str):  
        value = value.strip().lower()

        if value in ('none', 'n/a', 'nan', '-', '--', 'unknown'):
            return '' 

        value = value.replace('&', 'and')

        if value == 'united states':
            return 'usa'
        if value == 'united kingdom':
            return 'uk'

        value = unidecode(value)        
        value = ''.join(c for c in value if c.isalnum()) 
        return value

    return str(value)

def normalize_primary_columns(df, norm_columns, date_columns, primary_columns, keys_type):
    for col in norm_columns:
        df[col] = df[col].apply(normalize_key, col in date_columns)
    
    for col, key_type in zip(primary_columns, keys_type):
        if key_type == 'year':
            df[col] = df[col].astype(float).astype(int)
            
        df[col] = df[col].astype(str)
        
    return [tuple(r) for r in df[primary_columns].to_numpy()]    

def find_row(df, columns, values):
    query = ' & '.join([f'(`{col}`=="{value}")' for col, value in zip(columns, values)])    
    return df.query(query)                    

def evaluate_table(df_fetched, df_ref, primary_columns, keys_type, date_columns, epsilons):
    columns = df_ref.columns
    df_fetched.columns = columns    
    df_fetched = df_fetched.drop_duplicates(subset=primary_columns)   

    norm_columns = set(primary_columns)        
    for pc in primary_columns:
        df_fetched = df_fetched[df_fetched[pc].notna()]    
    
    fetched_entities = normalize_primary_columns(df_fetched, norm_columns, date_columns, primary_columns, keys_type)
    ref_entities = normalize_primary_columns(df_ref, norm_columns, date_columns, primary_columns, keys_type)

    total_matches = 0
    key_matches = 0

    for fetched_entity in fetched_entities:
        if fetched_entity in ref_entities: 
            row_fetched = find_row(df_fetched, primary_columns, fetched_entity)
            row_ref = find_row(df_ref, primary_columns, fetched_entity)
            key_matches += 1
            
            for column in columns:
                try:
                    value_fetched = row_fetched[column].values[0]
                    value_ref = row_ref[column].values[0]

                    norm_value_fetched = normalize_value(value_fetched, column in date_columns)
                    norm_value_ref = normalize_value(value_ref, column in date_columns)

                    if norm_value_fetched == norm_value_ref:    
                        total_matches += 1
                    elif column in epsilons and norm_value_ref != '' and norm_value_fetched != '':
                        if norm_value_ref * 0.999 < norm_value_fetched < norm_value_ref * 1.001:
                            total_matches += 1   
                except:
                    pass
            
    recall = total_matches/(df_ref.shape[0] * df_ref.shape[1])
    precision = total_matches/(df_fetched.shape[0] * df_fetched.shape[1])
    f1_score = 2*recall*precision/(recall+precision) if (recall + precision) > 0 else 0.0

    keys_recall = key_matches/len(ref_entities)
    keys_precision = key_matches/len(fetched_entities)
    keys_f1_score = 2*keys_recall*keys_precision/(keys_recall+keys_precision) if (keys_recall + keys_precision) > 0 else 0.0
    
    nk = len(primary_columns)
    
    non_keys_recall = (total_matches - key_matches*nk) / (df_ref.shape[0] * (df_ref.shape[1] - nk))
    non_keys_precision = (total_matches - key_matches*nk) / (df_fetched.shape[0] * (df_fetched.shape[1] - nk))
    non_keys_f1_score = 2*non_keys_recall*non_keys_precision/(non_keys_recall+non_keys_precision) if (non_keys_recall + non_keys_precision) > 0 else 0.0
     
    relative_non_key_accuracy = (total_matches - key_matches*nk) / (key_matches * (df_ref.shape[1] - nk))    
        
    return keys_recall, keys_precision, keys_f1_score, non_keys_recall, non_keys_precision, non_keys_f1_score, recall, precision, f1_score, relative_non_key_accuracy

## JSON

In [5]:
metadata_path="DATA/Benchmark/cfg.json"

with open(metadata_path, "rb") as f:
    metadata = json.load(f)

In [None]:
tables_folder = '<folder_with_generated_tables>'
output_folder = '<folder_to_save_results>'

In [7]:
tables = []

keys_recall_scores = []
keys_precision_scores = []
keys_f1_scores = []
non_keys_recall_scores = []
non_keys_precision_scores = []
non_keys_f1_scores = []
recall_scores = []
precision_scores = []
f1_scores = []
rel_nk_acc_scores = []

for i in range(100):
    idx = "%d" % i
    md = metadata[idx]
    print(md['name'])
    try:
        fetched_table = pd.read_csv(os.path.join(tables_folder, "%s.csv" % md['name']))
        gt_table = pd.read_csv(md['path'])
        primary_columns = md['keys']
        keys_type = md['keys_type']
        date_columns = md['dateColumns']
        epsilons = md['epsilons']
        kr, kp, kf1, nkr, nkp, nkf1, r, p, f1, rnka  = evaluate_table(fetched_table, 
                                                                       gt_table, 
                                                                       primary_columns, 
                                                                       keys_type,
                                                                       date_columns, 
                                                                       epsilons)
    except:    
        kr, kp, kf1, nkr, nkp, nkf1, r, p, f1, rnka = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        
    tables.append(md['name'])
    
    keys_recall_scores.append(kr)
    keys_precision_scores.append(kp)
    keys_f1_scores.append(kf1)
    non_keys_recall_scores.append(nkr)
    non_keys_precision_scores.append(nkp)
    non_keys_f1_scores.append(nkf1)
    recall_scores.append(r)
    precision_scores.append(p)
    f1_scores.append(f1)
    rel_nk_acc_scores.append(rnka)
    
    print("====================")
    print("Keys Recall: %.4f" % kr)
    print("Keys Precision: %.4f" % kp)
    print("Keys F1: %.4f" % kf1)
    
    print("Non Keys Recall: %.4f" % nkr)
    print("Non Keys Precision: %.4f" % nkp)
    print("Non Keys F1: %.4f" % nkf1)
    
    print("Relative Non Keys Accuracy: %.4f" % rnka)
    
    print("Recall: %.4f" % r)
    print("Precision: %.4f" % p)
    print("F1: %.4f" % f1)    
    
    print("====================")    
    
res_df = pd.DataFrame([tables, 
                       keys_recall_scores,
                       keys_precision_scores,
                       keys_f1_scores,
                       non_keys_recall_scores,
                       non_keys_precision_scores,
                       non_keys_f1_scores,
                       rel_nk_acc_scores,
                       recall_scores,
                       precision_scores, 
                       f1_scores                       
                       ]).T
res_df.columns = ['Table', 
                  'Keys_Recall', 'Keys_Precision', 'Keys_F1_Score',
                  'Non_Keys_Recall', 'Non_Keys_Precision', 'Non_Keys_F1_Score', 'Rel_Non_Keys_Accuracy',
                  'Recall', 'Precision', 'F1_Score'
                  ]

res_df['Keys_Recall'] = res_df['Keys_Recall'].astype(float).round(4)
res_df['Keys_Precision'] = res_df['Keys_Precision'].astype(float).round(4)
res_df['Keys_F1_Score'] = res_df['Keys_F1_Score'].astype(float).round(4)
res_df['Non_Keys_Recall'] = res_df['Non_Keys_Recall'].astype(float).round(4)
res_df['Non_Keys_Precision'] = res_df['Non_Keys_Precision'].astype(float).round(4)
res_df['Non_Keys_F1_Score'] = res_df['Non_Keys_F1_Score'].astype(float).round(4)
res_df['Rel_Non_Keys_Accuracy'] = res_df['Rel_Non_Keys_Accuracy'].astype(float).round(4)
res_df['Recall'] = res_df['Recall'].astype(float).round(4)
res_df['Precision'] = res_df['Precision'].astype(float).round(4)
res_df['F1_Score'] = res_df['F1_Score'].astype(float).round(4)

means = pd.DataFrame(['All'] + res_df.mean(axis=0, numeric_only=True).tolist()).T
means.columns = res_df.columns        
res_df = pd.concat([res_df, means], axis=0)

res_df.to_csv("%s/scores.csv" % result_folder, index=False)

republican_straw_polls_2012
Keys Recall: 0.0000
Keys Precision: 0.0000
Keys F1: 0.0000
Non Keys Recall: 0.0000
Non Keys Precision: 0.0000
Non Keys F1: 0.0000
Relative Non Keys Accuracy: 0.0000
Recall: 0.0000
Precision: 0.0000
F1: 0.0000
russia_demographics_1946_2012
Keys Recall: 0.3134
Keys Precision: 1.0000
Keys F1: 0.4773
Non Keys Recall: 0.0117
Non Keys Precision: 0.0374
Non Keys F1: 0.0179
Relative Non Keys Accuracy: 0.0374
Recall: 0.0318
Precision: 0.1016
F1: 0.0485
belgium_demographics_1900_2011
Keys Recall: 0.2589
Keys Precision: 1.0000
Keys F1: 0.4113
Non Keys Recall: 0.0000
Non Keys Precision: 0.0000
Non Keys F1: 0.0000
Relative Non Keys Accuracy: 0.0000
Recall: 0.0288
Precision: 0.1111
F1: 0.0457
australia_demographics_1900_2010
Keys Recall: 0.1081
Keys Precision: 1.0000
Keys F1: 0.1951
Non Keys Recall: 0.0011
Non Keys Precision: 0.0104
Non Keys F1: 0.0020
Relative Non Keys Accuracy: 0.0104
Recall: 0.0130
Precision: 0.1204
F1: 0.0235
new_brunswick_parishes_2006_2011
Keys Reca

Keys Recall: 0.1349
Keys Precision: 0.8947
Keys F1: 0.2345
Non Keys Recall: 0.1349
Non Keys Precision: 0.8947
Non Keys F1: 0.2345
Relative Non Keys Accuracy: 1.0000
Recall: 0.1349
Precision: 0.8947
F1: 0.2345
ramsar_convention_parties
Keys Recall: 0.6988
Keys Precision: 0.9280
Keys F1: 0.7973
Non Keys Recall: 0.0422
Non Keys Precision: 0.0560
Non Keys F1: 0.0481
Relative Non Keys Accuracy: 0.0603
Recall: 0.2610
Precision: 0.3467
F1: 0.2978
guitar_hero_5_songs
Keys Recall: 0.6824
Keys Precision: 0.7945
Keys F1: 0.7342
Non Keys Recall: 0.2918
Non Keys Precision: 0.3397
Non Keys F1: 0.3139
Relative Non Keys Accuracy: 0.4276
Recall: 0.3569
Precision: 0.4155
F1: 0.3840
south_cambridgeshire_district_council_1973_2012
Keys Recall: 1.0000
Keys Precision: 1.0000
Keys F1: 1.0000
Non Keys Recall: 0.4028
Non Keys Precision: 0.4028
Non Keys F1: 0.4028
Relative Non Keys Accuracy: 0.4028
Recall: 0.4625
Precision: 0.4625
F1: 0.4625
dublin_maternity_hospital_mortality_rates_1784_1849
Keys Recall: 1.000

Keys Recall: 0.8824
Keys Precision: 0.5172
Keys F1: 0.6522
Non Keys Recall: 0.1765
Non Keys Precision: 0.1034
Non Keys F1: 0.1304
Relative Non Keys Accuracy: 0.2000
Recall: 0.2941
Precision: 0.1724
F1: 0.2174
mongolia_provinces_population_79_89_00_09
Keys Recall: 0.9091
Keys Precision: 0.9091
Keys F1: 0.9091
Non Keys Recall: 0.0114
Non Keys Precision: 0.0114
Non Keys F1: 0.0114
Relative Non Keys Accuracy: 0.0125
Recall: 0.1909
Precision: 0.1909
F1: 0.1909
tulsa_shock_2010
Keys Recall: 0.2222
Keys Precision: 0.4000
Keys F1: 0.2857
Non Keys Recall: 0.0000
Non Keys Precision: 0.0000
Non Keys F1: 0.0000
Relative Non Keys Accuracy: 0.0000
Recall: 0.0185
Precision: 0.0333
F1: 0.0238
london_heathrow_busiest_routes_2012
Keys Recall: 0.4000
Keys Precision: 0.4000
Keys F1: 0.4000
Non Keys Recall: 0.0222
Non Keys Precision: 0.0222
Non Keys F1: 0.0222
Relative Non Keys Accuracy: 0.0556
Recall: 0.1167
Precision: 0.1167
F1: 0.1167
hungarian_grand_prix_qualifying_2012
Keys Recall: 1.0000
Keys Precisi

Keys Recall: 0.7000
Keys Precision: 0.5385
Keys F1: 0.6087
Non Keys Recall: 0.3500
Non Keys Precision: 0.2692
Non Keys F1: 0.3043
Relative Non Keys Accuracy: 0.5000
Recall: 0.5250
Precision: 0.4038
F1: 0.4565
bifa_british_independent_film_2010_2012
Keys Recall: 0.9333
Keys Precision: 0.9333
Keys F1: 0.9333
Non Keys Recall: 0.9333
Non Keys Precision: 0.9333
Non Keys F1: 0.9333
Relative Non Keys Accuracy: 1.0000
Recall: 0.9333
Precision: 0.9333
F1: 0.9333
through_the_wormhole_season_4
Keys Recall: 0.4000
Keys Precision: 0.4000
Keys F1: 0.4000
Non Keys Recall: 0.2500
Non Keys Precision: 0.2500
Non Keys F1: 0.2500
Relative Non Keys Accuracy: 0.6250
Recall: 0.3000
Precision: 0.3000
F1: 0.3000
un_habitat_scroll_of_honour_award_1991
Keys Recall: 0.0000
Keys Precision: 0.0000
Keys F1: 0.0000
Non Keys Recall: 0.0000
Non Keys Precision: 0.0000
Non Keys F1: 0.0000
Relative Non Keys Accuracy: 0.0000
Recall: 0.0000
Precision: 0.0000
F1: 0.0000
miss_universe_semifinal_scores_1993
Keys Recall: 0.9000