In [14]:
import os
import sys
import csv
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

repo_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(repo_root)

from src.utils.file_handlers import group_dataset_files
from src.utils.c_operation_helpers import  run_simulation_convergence
from src.utils.file_handlers import process_directory_real_data


from datasets.utils.extract_ordered_games import read_data_so
from src.models.BradleyTerry import compute_predicted_ratings_HO_BT
from src.models.zermelo import compute_predicted_ratings_plackett_luce




In [15]:
EXPERIMENT_PATH = os.path.join(repo_root, 'exp', 'ex04')

In [16]:

DATASET_PATH = os.path.join(repo_root, 'datasets')
print(DATASET_PATH)
node_path = os.path.join(DATASET_PATH, 'Real_Data/00002_nodes.txt')
edge_path = os.path.join(DATASET_PATH, 'Real_Data/00002_edges.txt')


results = run_simulation_convergence(node_path, edge_path, is_synthetic=0)

for group in results:
    print(group, len(results[group]['rms_convergence_criteria']))

/home/jackyeung99/senior_thesis/higher_order_ranking/datasets
HO 11
Z 59
BIN 11
BINZ 117


In [17]:
def file_handlers(): 
    data_path = os.path.join(EXPERIMENT_PATH, 'data')

    all_results = []
    
    for file in os.listdir(data_path):
        id, _ = file.split('_data.csv')
        if file.endswith('.csv'):
      
            file_path = os.path.join(data_path, file)
            df = pd.read_csv(file_path)

            result = {
                    'dataset':id, 
                    'Ours': df['Ours'].mean(), 
                    'Zermelo': df['Zermelo'].mean(),
                    'Higher_Order_Speedup': df['Zermelo'].mean() / df['Ours'].mean(),
                    'Ours_BIN': df['Ours_bin'].mean(),
                    'Zermelo_BIN' : df['Zermelo_bin'].mean(),
                    'Binary_Speedup': df['Zermelo_bin'].mean() / df['Ours_bin'].mean()
                      }

            all_results.append(result)

    final_df = pd.DataFrame(all_results)
    final_df['dataset'] = pd.to_numeric(final_df['dataset'], errors='coerce')
    return final_df.sort_values(by='dataset')


def visualize_df(df, title):
    """
    Visualizes a DataFrame with custom styling.
    
    Highlights the maximum value in each row if `highlight_max` is True, 
    and hides the index.
    """
    print(f"\n{title}\n")
    
    styled_df = df.style.set_table_styles(
        # [{'selector': 'th', 'props': [('display', 'none')]}], overwrite=False
    ).set_properties(**{
        'background-color': 'LightGray',
        'color': 'black',
        'border-color': 'black',
        'border-style': 'solid',
        'border-width': '1px',
        'text-align': 'left'
    })
    
    
    styled_df = styled_df.hide(axis='index')  # Hide the index
    display(styled_df)

In [19]:
df = file_handlers()
df.to_csv(os.path.join(EXPERIMENT_PATH, 'results/Convergence_Table.csv'))

In [20]:
unique_datasets = df['dataset'].unique()
dataset_info = pd.read_csv(os.path.join(repo_root, 'datasets', 'dataset_info.csv'))

In [21]:
merged_df = df.merge(dataset_info[['Name', 'Dataset_ID']], left_on='dataset', right_on='Dataset_ID', how='inner').drop(columns=['Dataset_ID','dataset'])
merged_df = merged_df[['Name'] + [col for col in merged_df.columns if col != 'Name']]

In [22]:
visualize_df(merged_df, 'Conbergence on Real Datasets')


Conbergence on Real Datasets



Name,Ours,Zermelo,Higher_Order_Speedup,Ours_BIN,Zermelo_BIN,Binary_Speedup
FIFA_World_Cup,9.7,50.4,5.195876,9.08,87.2,9.603524
UCL,11.34,59.94,5.285714,10.86,126.22,11.622468
Sushi_10,6.92,13.42,1.939306,7.12,51.16,7.185393
Sushi_100,7.02,21.18,3.017094,7.22,78.84,10.919668
AGH_Course_Selection_2004,6.98,533.98,76.501433,7.06,2313.2,327.648725
APA_Election_2009,7.42,17.72,2.38814,7.28,36.56,5.021978
Network_Science,24.2,143.74,5.939669,26.12,301.72,11.551302
