In [7]:
import os
import sys

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib

repo_root = os.path.abspath(os.path.join(os.getcwd(), '..',))
sys.path.append(repo_root)

from src.utils.file_handlers import group_dataset_files, read_dataset_files
from src.models.BradleyTerry import compute_predicted_ratings_HO_BT
from src.models.zermello import compute_predicted_ratings_plackett_luce
from src.utils.metrics import measure_likelihood, measure_leadership_likelihood, measure_tau
from src.utils.operation_helpers import run_models_synthetic, run_models, split_games
from sklearn.model_selection import train_test_split
from src.utils.c_operation_helpers import run_simulation_synthetic, run_simulation, run_simulation_convergence

## Synthetic 

In [8]:
data_dir = os.path.join(repo_root, 'datasets', 'Synthetic_Data')
grouped = group_dataset_files(data_dir)

for dataset in grouped:
    print(dataset)
    data, pi_values = read_dataset_files(grouped[dataset], data_dir, is_synthetic=True)
 
    train, test = train_test_split(data, test_size=.8)
    predicted, iter = compute_predicted_ratings_HO_BT(train, pi_values, verbose=True)
 
    print(f"Iterations: {len(iter)}")
    print(f"HO Likelihood: {measure_likelihood(predicted, test)}")
    print(f"HOL Likelihood: {measure_leadership_likelihood(predicted, test)}")
    print(f"TAU: {measure_tau(predicted, pi_values)}")

    break

N-1000_M-1000_K-5_L-0
Iterations: 52
HO Likelihood: -4.40120956415431
HOL Likelihood: -1.4358037845255924
TAU: 0.422441601354638


In [9]:
data_dir = os.path.join(repo_root, 'datasets', 'Synthetic_Data')
grouped = group_dataset_files(data_dir)
for dataset in grouped:
    print(dataset)
    edge_file = grouped[dataset]['edges']
    node_file = grouped[dataset]['nodes']

    edge_path = os.path.join(data_dir, edge_file)
    node_path = os.path.join(data_dir, node_file)

    results = run_simulation_synthetic(node_path, edge_path, .8)
    print(results)

N-1000_M-1000_K-5_L-0
  model  av_error  spearman   kendall     prior   HO_Like  HOL_Like iterations
0    HO  0.142812  0.870621  0.688136  -1.75393  -10.1199  -1.10181         17
1   HOL   0.21433  0.634838  0.455556  -1.57324  -11.4286  -1.27797         13
2   BIN  0.147937  0.863436  0.680601  -1.85909  -10.2599  -1.10535         13
3  BINL  0.220474  0.630873  0.452761  -1.68693  -11.6189  -1.28272         10


## Real

In [33]:
data_dir = os.path.join(repo_root, 'datasets', 'Real_Data')
grouped = group_dataset_files(data_dir)

data, pi_values = read_dataset_files(grouped['00106'], data_dir, is_synthetic=False)

# train, test = train_test_split(data, test_size=.8)
train, test = split_games(data, .8)

_, info = compute_predicted_ratings_HO_BT(train, pi_values, verbose=True)        
print(len(info))
_, info = compute_predicted_ratings_plackett_luce(train, pi_values, verbose=True)        
print(len(info))

df = run_models(train, test, pi_values)
print(df)


410
522
         model  log-likelihoods  leadership-log-likelihood
0          BIN        -2.347709                  -0.704776
1         BINL        -3.647801                  -0.548435
2        HO_BT        -2.318407                  -0.748193
3       HOL_BT        -3.404857                  -0.560377
4  Spring_Rank        -2.079217                  -0.752441
5    Page_Rank        -2.556301                  -1.108194
6   Point_Wise        -2.360322                  -0.957587


In [31]:
data_dir = os.path.join(repo_root, 'datasets', 'Real_Data')
grouped = group_dataset_files(data_dir)

edge_file = grouped['00106']['edges']
node_file = grouped['00106']['nodes']

edge_path = os.path.join(data_dir, edge_file)
node_path = os.path.join(data_dir, node_file)

results = run_simulation(node_path, edge_path, .8)
print(results)

  model  av_error    spearman     kendall     prior   HO_Like   HOL_Like  \
0    HO  0.421094  -0.0857143  -0.0666667  -1.54455  -6.36203  -0.775713   
1   HOL  0.437927    0.142857   0.0666667  -1.59171  -4.06419  -0.194339   
2   BIN  0.430317   0.0857143   0.0666667  -1.55203  -5.93088  -0.644032   
3  BINL  0.447308   0.0845154  -0.0666667  -1.66226   -3.9374  -0.126713   

  iterations  
0          8  
1         15  
2          8  
3         15  


In [38]:
data_dir = os.path.join(repo_root, 'datasets', 'Real_Data')
grouped = group_dataset_files(data_dir)

edge_file = grouped['00106']['edges']
node_file = grouped['00106']['nodes']

edge_path = os.path.join(data_dir, edge_file)
node_path = os.path.join(data_dir, node_file)

results = run_simulation_convergence(node_path, edge_path, 1, .8)
print(len(results['HO']['rms_convergence_criteria']))
print(len(results['Z']['rms_convergence_criteria']))

9
28
