In [None]:
from IPython.core.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

In [None]:
import sys
sys.path.insert(0,"../../python/")
from parametrization import ParamHelper

In [None]:
import os, shutil
import numpy as np
import pandas as pd

In [None]:
import prediction_utils.rg_prediction_sim as rgps
import prediction_utils.rg_prediction_visu as rgpv

In [None]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
paper_rc = {'lines.linewidth': 5,'lines.markersize': 20}              
sns.set_context("paper", rc = paper_rc, font_scale = 4.25)
sns.set_style("whitegrid")

In [None]:
custom_palette = sns.color_palette("Set2", 8)
m_palette = sns.color_palette(['#5cd65c','#ff6666','#ff944d'])
custom_palette = m_palette + custom_palette
sns.set_palette(m_palette + custom_palette)

# 1. Load experiment parameters

In [None]:
ph = ParamHelper("../../pipelines/Evaluation.json", "ipython/experiments/roland_garros_predict_player.ipynb")

In [None]:
rg_root_dir = ph.get("rg_root_dir")
img_dir = ph.get("img_dir")
experiment_path = rg_root_dir + "/daily_in_advance/"
img_prefix = "%s/img/" % experiment_path
tennis_players_source_path =  ph.get("tennis_players_source_path")
original_experiment_path = experiment_path + "/tennis_players_copied"
prediction_experiment_path = "../../data/centrality_scores/rg17_epoch_t457_d3600/original/"
lookback_size = 30
num_of_days = 19
num_of_intervals = num_of_days*24
N_THREADS = ph.get("num_of_threads")

In [None]:
print(tennis_players_source_path,original_experiment_path,prediction_experiment_path)

### Load included nodes (account of professional tennis players)

In [None]:
included_accounts = list(np.recfromtxt("../../data/preprocessed/recoded_player_accounts.txt"))
sim_res_folder = experiment_path + "similarity_metrics/"

In [None]:
for p in (experiment_path, sim_res_folder):
    if not os.path.exists(p):
        os.makedirs(p)
        print("Directory was created: %s" % p)

### Set other parameters for similarity computation

In [None]:
norm_factors = ph.get("norm_factor")
static_lookbacks = ph.get("static_lookbacks")

In [None]:
score_folders = ph.get("score_folders")
print(score_folders)

In [None]:
intervals = range(0,num_of_intervals)
days = range(0,num_of_days)
print num_of_intervals, num_of_days

### Copy tennis player score files to all directory

In [None]:
def get_interval_bounds(lookback_size=2*24):
    interval_bounds = []
    for day_idx in days:
        upper_bound = (day_idx+1)*24
        lower_bound = upper_bound - lookback_size
        interval_subset = [max(0,lower_bound),upper_bound]
        interval_bounds += [(day_idx,interval_subset)]
    return interval_bounds

def duplicate_label_files(interval_bounds):
    for day_idx, bounds in interval_bounds:
        target_folder = "%s/%i" % (original_experiment_path, day_idx)
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)
            full_src_file = "%s/players_%i.csv" % (tennis_players_source_path, day_idx)
            for i in range(bounds[0],bounds[1]):
                dest = "%s/players_%i.csv" % (target_folder, i)
                shutil.copy(full_src_file, dest)
            print("Labels for the %ith day were duplicated!" % day_idx)

In [None]:
interval_bounds = get_interval_bounds(lookback_size)

In [None]:
duplicate_label_files(interval_bounds)

# 2. Calculate similarity metrics

In [None]:
metric_id = "ndcg"

In [None]:
prediction_results, score_stat_results = {}, {}

In [None]:
experiment_paths = [original_experiment_path, prediction_experiment_path]

## OnlineRank

In [None]:
olr_filters = None

In [None]:
%%time
rgps.calculate_metrics_for_prediction(prediction_results, "olr", metric_id, score_folders, interval_bounds, experiment_paths, sim_res_folder, filter_keys=olr_filters, restricted_indices=included_accounts, n_threads=N_THREADS)

## Temporal PageRank

In [None]:
%%time
rgps.calculate_metrics_for_prediction(prediction_results, "tpr", metric_id, score_folders, interval_bounds, experiment_paths, sim_res_folder, restricted_indices=included_accounts, n_threads=N_THREADS)

## Static PageRank

In [None]:
%%time
rgps.calculate_metrics_for_prediction(prediction_results, "spr", metric_id, score_folders, interval_bounds, experiment_paths, sim_res_folder, restricted_indices=included_accounts, n_threads=N_THREADS)

## Static Indegree

In [None]:
%%time
rgps.calculate_metrics_for_prediction(prediction_results, "indeg", metric_id, score_folders, interval_bounds, experiment_paths, sim_res_folder, restricted_indices=included_accounts, n_threads=N_THREADS)

## Static Negative beta-measure

In [None]:
%%time
rgps.calculate_metrics_for_prediction(prediction_results, "nbm", metric_id, score_folders, interval_bounds, experiment_paths, sim_res_folder, restricted_indices=included_accounts, n_threads=N_THREADS)

## Static Harmonic centrality

In [None]:
%%time
rgps.calculate_metrics_for_prediction(prediction_results, "hc", metric_id, score_folders, interval_bounds, experiment_paths, sim_res_folder, restricted_indices=included_accounts, n_threads=N_THREADS)

# 3. Visualization

In [None]:
markers = ["s","*","o","^","v",">","D",]

def pred_perf_plot(score_visu_list,day_idx):
    l_bound, u_bound = interval_bounds[day_idx][1][0], interval_bounds[day_idx][1][1]
    x = range(0,u_bound-l_bound)
    visu_args = []
    for i,score in enumerate(score_visu_list):
        score_pref = score.split("_")[0]
        m = markers[i % len(markers)]
        y = prediction_results[score_pref][day_idx][score]
        visu_args += [x,y,"%s-" % m]
    res = plt.plot(*visu_args)
    x_ticks = list(reversed(-np.array(range(0,len(y)+1,5))))
    plt.xticks(range(0,u_bound-l_bound,5),x_ticks)
    return res
    
def visu_pred_perf_per_day(score_visu_list, day_index_bounds):
    days = range(day_index_bounds[0],day_index_bounds[1])
    #print(days)
    num_plots = len(days)
    n_rows, n_cols = num_plots // 2 + 1, 2
    print(n_rows, n_cols, num_plots)
    fig = plt.figure(figsize=(n_cols*10,n_rows*5))
    lines = None
    for i in range(num_plots):
        plt.subplot(n_rows,n_cols,i+1)
        lines = pred_perf_plot(score_visu_list,days[i])
        plt.ylim((0.0,1.0))
        plt.ylabel(metric_id)
        plt.title(dates[i])
    detailed_relabel = ph.get("is_detailed_relabel")
    fig.legend(lines,tuple([rgpv.relabel(score,detailed_relabel) for score in score_visu_list]),(0.55,0.065))
    plt.savefig("%s/%s/detailed.png" % (img_prefix,img_dir))
    
def visu_mean_behaviour(visu_index_list,day_index_bounds,pref="mixed",metric=metric_id, title_text=""):
    time_series = []
    for score in visu_index_list:
        if pref != "mixed" and pref not in score:
            continue
        score_pref = score.split("_")[0]
        for day_idx in days:
            perf_values = prediction_results[score_pref][day_idx][score]
            interval_idx = list(reversed(-np.array(range(1,len(perf_values)+1))))
            time_series += list(zip([score for i in interval_idx],[day_idx for i in interval_idx],interval_idx,perf_values))
    if len(time_series) > 0:
        time_series_df = pd.DataFrame(time_series,columns=["score","day","snapshot",metric])
        detailed_relabel = False#ph.get("is_detailed_relabel")
        time_series_df["score"] = time_series_df["score"].apply(lambda x: rgpv.relabel(x,detailed_relabel))
        print(len(time_series_df))
        time_series_df = time_series_df[(time_series_df["day"] >= day_index_bounds[0]) & (time_series_df["day"] <= day_index_bounds[1])]
        print(len(time_series_df))
        plt.figure(figsize=(22,14))
        score_vals = time_series_df["score"].unique()
        for i,val in enumerate(score_vals):
            c, m = custom_palette[i % len(custom_palette)], markers[i % len(markers)]
            sns.tsplot(data=time_series_df[time_series_df["score"]==val], time="snapshot", unit="day", condition="score", value=metric, ci=95, color=c, marker=m)
        dir_name = img_prefix + "/" + img_dir
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        plt.savefig("%s/mean_%s_perf.png" % (dir_name,pref))
    else:
        print("No data to visualize!")

## Select subset of days for evaluation

In [None]:
first_day_idx = 4 # 2017-05-28
last_day_index = num_of_days # 2017-06-11
day_index_bounds = (first_day_idx,last_day_index)

In [None]:
dates = ["2017-05-%.2i" % i for i in range(24,32)]
dates += ["2017-06-%.2i" % i for i in range(1,12)]
dates = dates[first_day_idx:last_day_index]
dates

## OnlineRank

In [None]:
try:
    title_text = ph.get("title_text")
except:
    title_text = ""
visu_mean_behaviour(score_folders, day_index_bounds, pref="olr", title_text=title_text)

## Temporal PageRank

In [None]:
visu_mean_behaviour(score_folders, day_index_bounds, pref="tpr")

## PageRank

In [None]:
visu_mean_behaviour(score_folders, day_index_bounds, pref="spr", title_text="of Static PageRank models")

## Indegree

In [None]:
visu_mean_behaviour(score_folders, day_index_bounds, pref="indeg", title_text="of Static Indegree models")

## Negative beta_measure

In [None]:
visu_mean_behaviour(score_folders, day_index_bounds, pref="nbm", title_text="of Static Negative beta-measure models")

## Indegree

In [None]:
visu_mean_behaviour(score_folders, day_index_bounds, pref="hc", title_text="of Static Harmonic centrality models")

## Mixed

In [None]:
visu_mean_behaviour(score_folders, day_index_bounds)

### By Day

In [None]:
paper_rc = {'lines.linewidth': 3,'lines.markersize': 10}              
sns.set_context("paper", rc = paper_rc, font_scale = 3)
sns.set_style("whitegrid")

In [None]:
visu_pred_perf_per_day(score_folders, day_index_bounds)