In [12]:
import os
import sys

import pandas as pd
import numpy as np
from collections import defaultdict

repo_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(repo_root)

from src.models import compute_predicted_ratings_HOL_BT, compute_predicted_ratings_HO_BT
from src.utils.file_handlers import group_dataset_files, read_dataset_files
from src.utils.c_operation_helpers import run_simulation
from src.utils.graph_tools import create_hypergraph_from_data

In [13]:
def read_node_mappings(file_path):
    pi_values = {}
    with open(file_path) as f:
        for i in f.readlines():
            line = i.split()
         
            pi_values[int(line[0])] = str(line[1])
         

    return pi_values

def read_node_netsci(file_path):
    pi_values = {}
    with open(file_path) as f:
        for i in f.readlines():
            line = i.split()
         
            pi_values[int(line[0])] = '_'.join(line[1:])
         

    return pi_values


def replace_keys_with_mapping(pred_rankings, mapping):
    updated_rankings = {}
    for key, value in pred_rankings.items():
        if key in mapping:
            updated_rankings[mapping[key]] = value
        # else:
        #     updated_rankings[key] = value

    return sorted(updated_rankings.items(), key=lambda x:x[1], reverse=True)


def count_games(data, pi_values):

    game_count = defaultdict(int)
    for player in pi_values:
        for game in data:
            if player in game:
                game_count[player] += 1

    return game_count




In [14]:
data_dir = os.path.join(repo_root, 'datasets', 'Real_Data')

filein_idx = '00007_nodes.txt'
filein_data = '00007_edges.txt'

dataset_files = {
    'nodes':filein_idx, 
    'edges': filein_data
    }


data, pi_values = read_dataset_files(dataset_files, data_dir)

pred_rankings = compute_predicted_ratings_HOL_BT(data, pi_values)
sorted_rankings = sorted(pred_rankings.items(), key=lambda x:x[1], reverse=True)

mapping = read_node_netsci(os.path.join(repo_root, 'datasets', 'names_netsci.dat'))
game_counts = count_games(data, pi_values)

# with over ten papers
# authors = [(p, s) for p, s in sorted_rankings if game_counts.get(p, 0) > 10]
authors = [mapping[p] for p, s in sorted_rankings if game_counts.get(p, 0) > 10][:10]


In [15]:
data_dir = os.path.join(repo_root, 'datasets', 'Real_Data')

filein_idx = '00002_nodes.txt'
filein_data = '00002_edges.txt'

dataset_files = {
    'nodes':filein_idx, 
    'edges': filein_data
    }


data, pi_values = read_dataset_files(dataset_files, data_dir)
pred_rankings = compute_predicted_ratings_HO_BT(data, pi_values)

mapping = read_node_mappings(os.path.join(data_dir, '00002_nodes.txt'))
replaced_w_names = replace_keys_with_mapping(pred_rankings, mapping)

ucl = [p for p,s in replaced_w_names[:10]]



In [16]:
data_dir = os.path.join(repo_root, 'datasets', 'Real_Data')

filein_idx = '00001_nodes.txt'
filein_data = '00001_edges.txt'

dataset_files = {
    'nodes':filein_idx, 
    'edges': filein_data
    }


data, pi_values = read_dataset_files(dataset_files, data_dir)
pred_rankings = compute_predicted_ratings_HO_BT(data, pi_values)

mapping = read_node_mappings(os.path.join(data_dir, '00001_nodes.txt'))
replaced_w_names = replace_keys_with_mapping(pred_rankings, mapping)

wc = [p for p,s in replaced_w_names[:10]]


In [17]:
data = [authors, ucl, wc]
data = [wc, ucl, authors]
df = pd.DataFrame({
    'FWC (Fifa World Cup)': wc,
    'UCL (Champions League)': ucl,
    'NS (Network Science)': authors
    })
df = df.replace('_', ' ', regex=True)
df.index = range(1, len(df) + 1)
df.index.name = 'Rank'
display(df)

Unnamed: 0_level_0,FWC (Fifa World Cup),UCL (Champions League),NS (Network Science)
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Brazil,Real Madrid,Alessandro Vespignani
2,Germany,Bayern Munich,Hernán A. Makse
3,Italy,Barcelona,Luı́s A. Nunes Amaral
4,Argentina,Liverpool,Romualdo Pastor‐Satorras
5,Netherlands,Chelsea,Kim Sneppen
6,France,Manchester City,Albert‐László Barabási
7,Croatia,Juventus,Lenka Zdeborová
8,England,Milan,Shlomo Havlin
9,Sweden,Paris Saint-Germain,J. F. F. Mendes
10,Czechoslovakia,Atlético Madrid,H. Eugene Stanley
