In [116]:
import os
import sys
import csv
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import table

repo_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(repo_root)

from src import *

In [117]:
process_directory_real_data(os.getcwd())

In [118]:


def process_results(base_path):
    # Read the summary CSV files
    log_likelihood_df = pd.read_csv(os.path.join(base_path, 'log_likelihood_summary.csv')).groupby(by=['dataset']).mean().reset_index()
    leadership_log_likelihood_df = pd.read_csv(os.path.join(base_path, 'leadership_log_likelihood_summary.csv')).groupby(by=['dataset']).mean().reset_index()

    filtered_log_like = log_likelihood_df.drop(columns=['rep'])
    filtered_leader_like = leadership_log_likelihood_df.drop(columns=['rep'])
    
    log_like = subtract_columns(filtered_log_like, 3)
    leadership_log = subtract_columns(filtered_leader_like, 4)

    return log_like, leadership_log

def subtract_columns(df, compared_col):
    columns = df.columns.tolist()
    base_column = columns[compared_col] 
    print(base_column)
    for col in columns[2:]:
        
        df[col] = df[col] - df[base_column] 
    
    return df


def visualize_df(df, title):
    print(f"\n{title}\n")
    styled_df = df.style.set_table_styles(
       
    ).set_properties(**{
        'background-color': 'LightGray',
        'color': 'black',
        'border-color': 'black',
        'border-style': 'solid',
        'border-width': '1px',
        'text-align': 'left'
    })
    
    display(styled_df)

In [119]:
unique_datasets = pd.read_csv(os.path.join(os.path.join(os.getcwd(), 'results'), 'log_likelihood_summary.csv'))['dataset'].unique()
dataset_info = pd.read_csv(os.path.join(repo_root, 'datasets', 'dataset_info.csv'))
filtered_dataset_info = dataset_info[dataset_info['dataset_id'].isin(unique_datasets)].set_index('dataset_id')

visualize_df(filtered_dataset_info, 'Datasets Tested')



Datasets Tested



Unnamed: 0_level_0,name,N,M,K1,K2
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,irish_election,36,138011,1,14
2,debian,36,3352,1,9
4,netflix_prize,196,163759,3,4
6,skate,249,174,14,30
7,electoral_reform_society,30,35610,1,29
8,glasgow_city_council,209,188376,1,13
9,AGH_course_selection,10,299,7,9
12,t_shirt,12,30,11,11
14,sushi,103,10000,10,10
18,minneapolis_election,778,137482,1,3


In [120]:

log_like, leadership_log = process_results(os.path.join(os.getcwd(), 'results'))

def rename_df(df):
    merged_df = df.merge(dataset_info, left_on='dataset', right_on='dataset_id').drop(columns=['dataset_id', 'N', 'M', 'K1', 'K2', 'dataset'])
    merged_df = merged_df.rename(columns={'name': 'dataset_name'}).set_index('dataset_name')

    return merged_df




HO_BT
HOL_BT


In [121]:

log_like = rename_df(log_like)
visualize_df(log_like, 'log likelihood')


log likelihood



Unnamed: 0_level_0,BT,BT_leadership,HO_BT,HOL_BT,Spring_Rank,Spring_Rank_Leadership,Page_Rank,Page_Rank_Leadership,Point_Wise
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
irish_election,-5.044229,-0.133888,0.0,-5.143064,-5.625426,-5.717296,-5.196946,-5.077089,-5.082346
debian,-4.672714,-0.381944,0.0,-4.985351,-5.455923,-5.495389,-6.983254,-6.086496,-5.014611
netflix_prize,-1.929228,-0.059703,0.0,-1.98513,-2.0083,-2.036996,-2.861661,-2.702381,-2.090668
skate,-20.047749,-22.152391,0.0,-42.288676,-38.298251,-52.1519,-66.932374,-44.982715,-35.446929
electoral_reform_society,-6.177104,-0.028019,0.0,-6.195116,-6.307919,-6.431429,-6.236753,-6.274201,-6.18484
glasgow_city_council,-3.158094,-0.108939,0.0,-3.258237,-3.273359,-3.494488,-3.266435,-3.19952,-3.193524
AGH_course_selection,-9.075683,-2.162426,0.0,-9.655373,-22.409253,-56.516805,-11.97639,-10.724481,-9.951059
t_shirt,-15.781606,-6.304894,0.0,-18.417697,-28.668071,-81.344196,-19.486445,-17.614313,-16.341428
sushi,-14.458811,-1.206657,0.0,-15.260622,-14.519396,-15.89553,-15.793643,-15.325292,-14.603172
minneapolis_election,-1.427299,-0.003589,0.0,-1.430777,-2.783171,-3.002387,-1.504377,-1.490002,-1.472192


In [122]:
leadership_log = rename_df(leadership_log)
visualize_df(leadership_log, 'Leadership log likelihood')


Leadership log likelihood



Unnamed: 0_level_0,BT,BT_leadership,HO_BT,HOL_BT,Spring_Rank,Spring_Rank_Leadership,Page_Rank,Page_Rank_Leadership,Point_Wise
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
irish_election,-1.413646,-0.001634,-0.020173,0.0,-1.433378,-1.419383,-1.471437,-1.440121,-1.432245
debian,-1.249969,-0.000879,-0.04265,0.0,-1.368773,-1.272094,-2.00442,-1.719453,-1.403802
netflix_prize,-0.960354,-0.000101,-0.020483,0.0,-1.015259,-1.001538,-1.512117,-1.402239,-1.095738
skate,-0.745733,0.097293,-0.018711,0.0,-2.550378,-1.226761,-4.375815,-3.057249,-2.419413
electoral_reform_society,-1.521415,-0.000959,-0.003062,0.0,-1.53838,-1.544886,-1.535947,-1.538078,-1.526059
glasgow_city_council,-1.145459,-0.001661,-0.032338,0.0,-1.158389,-1.141853,-1.228527,-1.19476,-1.174335
AGH_course_selection,-0.798906,-1.147674,0.280547,0.0,-1.349901,-0.311867,-3.065081,-2.215976,-1.679889
t_shirt,-2.293822,0.000527,-0.110854,0.0,-2.713953,-2.90972,-2.619781,-2.425969,-2.290493
sushi,-2.089955,-0.00656,-0.084965,0.0,-2.134286,-2.100719,-2.32733,-2.237109,-2.196671
minneapolis_election,-0.926821,-3.9e-05,-0.002397,0.0,-1.529747,-1.610487,-0.988544,-0.975298,-0.968206
