In [12]:
import os
import sys
import csv
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

repo_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(repo_root)

from src.utils.file_handlers import group_dataset_files
from src.utils.c_operation_helpers import  run_simulation_convergence
from src.utils.file_handlers import process_directory_real_data


from datasets.utils.extract_ordered_games import read_data_so
from src.models.BradleyTerry import compute_predicted_ratings_HO_BT
from src.models.zermello import compute_predicted_ratings_plackett_luce




In [13]:
EXPERIMENT_PATH = os.path.join(repo_root, 'exp', 'ex04')

In [74]:
DATASET_PATH = os.path.join(repo_root, 'datasets')
edge_path = os.path.join(DATASET_PATH, 'Raw_Data/preflib/00058-00000218.soi')
data, pi_values = read_data_so(edge_path)



_, iter = compute_predicted_ratings_HO_BT(data, pi_values, verbose=True)
print(len(iter))
_, iter2 = compute_predicted_ratings_plackett_luce(data, pi_values, verbose=True)
print(len(iter2))

print(len(iter2)/len(iter))



8
14
1.75


In [8]:

DATASET_PATH = os.path.join(repo_root, 'datasets')
print(DATASET_PATH)
node_path = os.path.join(DATASET_PATH, 'Real_Data/00010_nodes.txt')
edge_path = os.path.join(DATASET_PATH, 'Real_Data/00010_edges.txt')


results = run_simulation_convergence(node_path, edge_path, is_synthetic=0)

for group in results:
    print(group, len(results[group]['rms_convergence_criteria']))

/home/jackyeung99/senior_thesis/higher_order_ranking/datasets
HO 7
Z 33
BIN 7
BINZ 54


In [16]:
def file_handlers(): 
    data_path = os.path.join(os.getcwd(), 'data')

    all_results = []
    
    for file in os.listdir(data_path):
        id, _ = file.split('_data.csv')
        if file.endswith('.csv'):
      
            file_path = os.path.join(data_path, file)
            df = pd.read_csv(file_path)

            result = {
                    'dataset':id, 
                    'Ours': df['Ours'].mean(), 
                    'Zermello': df['Zermello'].mean(),
                    'Higher_Order_Speedup': df['Zermello'].mean() / df['Ours'].mean(),
                    'Ours_BIN': df['Ours_bin'].mean(),
                    'Zermello_BIN' : df['Zermello_bin'].mean(),
                    'Binary_Speedup': df['Zermello_bin'].mean() / df['Ours_bin'].mean()
                      }

            all_results.append(result)

    final_df = pd.DataFrame(all_results)
    final_df['dataset'] = pd.to_numeric(final_df['dataset'], errors='coerce')
    return final_df.sort_values(by='dataset')


def visualize_df(df, title):
    """
    Visualizes a DataFrame with custom styling.
    
    Highlights the maximum value in each row if `highlight_max` is True, 
    and hides the index.
    """
    print(f"\n{title}\n")
    
    styled_df = df.style.set_table_styles(
        # [{'selector': 'th', 'props': [('display', 'none')]}], overwrite=False
    ).set_properties(**{
        'background-color': 'LightGray',
        'color': 'black',
        'border-color': 'black',
        'border-style': 'solid',
        'border-width': '1px',
        'text-align': 'left'
    })
    
    
    styled_df = styled_df.hide(axis='index')  # Hide the index
    display(styled_df)

In [7]:
df = file_handlers()
df.to_csv('results/Convergence_Table.csv')

In [8]:
unique_datasets = df['dataset'].unique()
dataset_info = pd.read_csv(os.path.join(repo_root, 'datasets', 'dataset_info.csv'))

In [9]:
merged_df = df.merge(dataset_info[['Name', 'Dataset_ID']], left_on='dataset', right_on='Dataset_ID', how='inner').drop(columns=['Dataset_ID','dataset'])
merged_df = merged_df[['Name'] + [col for col in merged_df.columns if col != 'Name']]

In [11]:
visualize_df(merged_df, 'Conbergence on Real Datasets')


Conbergence on Real Datasets



Name,Ours,Zermello,Higher_Order_Speedup,Ours_BIN,Zermello_BIN,Binary_Speedup
FIFA_World_Cup,9.62,50.52,5.251559,9.04,87.44,9.672566
UCL,11.34,58.78,5.183422,10.88,123.08,11.3125
Sushi_10,6.92,13.32,1.924855,7.1,50.32,7.087324
Sushi_100,6.98,20.66,2.959885,7.18,76.52,10.657382
AGH_Course_Selection_2004,6.94,534.38,77.0,7.22,2316.62,320.861496
APA_Election_2009,7.34,17.6,2.39782,7.3,35.82,4.906849
Network_Science,24.2,144.56,5.973554,25.84,300.92,11.645511
