In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
from IPython.core.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

In [None]:
import sys, os
import seaborn as sns

In [None]:
sys.path.append('../python/')
import experiment_utils.player_prediction_visualization as ppv
import experiment_utils.player_prediction_performance as ppp
from data_processing.tennis_player_processing import load_dataset_parameters

# 1. Set parameters

## a.) Choose dataset

In [None]:
dataset_id = "rg17"
#dataset_id = "uo17"

In [None]:
min_epoch, num_of_days, dates, missing_dates, _, _ = load_dataset_parameters(dataset_id)

## b.) Select centrality measures (with configuration) to evaluate

In [None]:
score_folders = [
    "tk_b1.00_Exp(b:0.500,n:7200.000)",
    "ttk_b1.00_Exp(b:0.500,n:7200.000)_length_limit_2",
    "tpr_a0.85_b0.50",
    "hc_snapshot_10",
    "indeg_snapshot_10",
    "nbm_snapshot_10",
    "spr_snapshot_10_a0.85_i100",
    "did_Exp(b:0.500,n:7200.000)"
]
print(score_folders)

## c.) Other parameters


In [None]:
N_THREADS = 1
lookback_size = 24
first_snapshot = -24
last_snapshot = 0
use_binary_labels = True
num_of_intervals = num_of_days*24
root_dir = "../results/%s/predict_tennis_players_experiment/" % dataset_id
dir_postfix = "_binary%s" % use_binary_labels
experiment_path = root_dir
tennis_players_source_path = root_dir + "/tennis_player_labels%s/" % dir_postfix
sim_res_folder = root_dir + "/metrics%s" % dir_postfix
experiment_path = sim_res_folder + "/tennis_player_labels%s_tmp" % dir_postfix
prediction_path = "../data/%s_data/centrality_measures/original/" % dataset_id
img_dir = sim_res_folder + "/img_from%i_to%i" % (first_snapshot, last_snapshot)

In [None]:
print(tennis_players_source_path,experiment_path,prediction_path)

In [None]:
for p in (root_dir, sim_res_folder, img_dir):
    if not os.path.exists(p):
        os.makedirs(p)
        print("Directory was created: %s" % p)

# 2. Normalized Discounted Cumulative Gain (NDCG)

In [None]:
metric_id = "ndcg@50"

## i.) Adjust labels for lookback

In [None]:
interval_bounds = ppp.get_interval_bounds(num_of_days, lookback_size)

In [None]:
ppp.duplicate_label_files(experiment_path, tennis_players_source_path, interval_bounds)

## ii.) Calculate NDCG

In [None]:
prediction_results = {}
paths = [experiment_path, prediction_path]

### Temporal Katz centrality

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "tk", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Truncated Temporal Katz centrality

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "ttk", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Decayed Indegree

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "did", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Temporal PageRank

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "tpr", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Static PageRank

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "spr", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Static Indegree

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "indeg", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Static Negative beta-measure

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "nbm", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

### Static Harmonic centrality

In [None]:
%%time
ppp.calculate_metrics_for_prediction(prediction_results, "hc", metric_id, score_folders, interval_bounds, paths, sim_res_folder, n_threads=N_THREADS)

# 3. Visualization

#### Setting custom color palette

In [None]:
paper_rc = {'lines.linewidth': 5,'lines.markersize': 20}    
sns.set_context("paper", rc = paper_rc, font_scale = 4.25)
sns.set_style("whitegrid")
custom_palette = sns.color_palette("Set2", 8)
m_palette = sns.color_palette(['#5cd65c','#ff6666','#ff944d'])
custom_palette = m_palette + custom_palette
sns.set_palette(m_palette + custom_palette)

## i.) Select subset of days for evaluation

In [None]:
if dataset_id == "rg17":
    day_indexes = list(range(4,num_of_days)) # from 2017-05-28 to 2017-06-11
elif dataset_id == "uo17":
    day_indexes = list(range(7,num_of_days)) # from 2017-08-28 to 2017-09-10
else:
    raise RuntimeError("Invalid 'dataset_id'!")
dates = [dates[i] for i in day_indexes]
print(day_indexes)
print(dates)

## ii.) Mean hourly NDCG (over the days)

In [None]:
ppv.visu_mean_behaviour(prediction_results, score_folders, day_indexes, first_snapshot, last_snapshot, "mixed", metric_id.upper(), img_dir, custom_palette)

## iii.) NDCG timeseries for every day

In [None]:
paper_rc = {'lines.linewidth': 3,'lines.markersize': 10}              
sns.set_context("paper", rc = paper_rc, font_scale = 3)
sns.set_style("whitegrid")

In [None]:
ppv.visu_pred_perf_per_day(prediction_results, score_folders, interval_bounds, day_indexes, dates, metric_id.upper(), img_dir)