In [1]:
import pandas as pd
import numpy as np
from os.path import join as opj
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

# Configurations

In [8]:
base_folder = '/shared/3/projects/relationship-aware-networks-wikipedia'
model_folder = opj(base_folder, 'models')
usecase = 'ga' #ga
required_measures = ['binary_prec_at_2_perc', 'binary_prec_at_10_perc', 'binary_precision_optimal_th', 'binary_recall_optimal_th', 'binary_f1_optimal_th', 'macro_f1_opt_th', 'macro_auc']

## Data Statistics Table

In [3]:
rel_model = '10.2' if usecase == 'fa' else '20.2'
modeling_df = pd.read_csv(opj(model_folder, str(rel_model), 'modeling_df.csv'))
modeling_df.shape

(7199, 332)

In [95]:
rel_columns = ['is_sustainable_conservative_approach', 'EDIT_time_to_promotion', 'EDIT_num_revisions', 'EDIT_num_authors', 'DISCUSSIONS_num_authors']
rel_stats = ['sum', 'mean', 'median', 'std']
stats_df = modeling_df[rel_columns].copy()
stats_df_sum = stats_df.sum()
stats_df_mean = stats_df.mean()
stats_df_median = stats_df.median()
stats_df_std = stats_df.std()

stats_df.loc['sum'] = stats_df_sum
stats_df.loc['mean'] = stats_df_mean
stats_df.loc['median'] = stats_df_median
stats_df.loc['std'] = stats_df_std

In [96]:
for rc in rel_columns:
    cur_sum = int(stats_df.loc[rel_stats[0]][rc])
    cur_mean = round(stats_df.loc[rel_stats[1]][rc], 2)
    cur_median = int(stats_df.loc[rel_stats[2]][rc])
    cur_std = round(stats_df.loc[rel_stats[3]][rc], 2)
    items_to_print = [str(cur_sum), str(cur_mean), str(cur_median), str(cur_std)]
    main_string_to_print = '& '.join(items_to_print)
    prefix = '& '
    sufix = ''
    print(prefix + main_string_to_print + sufix)
#stats_df[['sum', 'mean', 'median', 'std']]

& 38801& 0.94& 1& 0.25
& 83519644& 2013.64& 1524& 1869.3
& 20169570& 486.28& 159& 1048.59
& 3724692& 89.8& 32& 176.69
& 364106& 8.78& 3& 26.12


## Results Table Printing

In [9]:
concise_eval_measures = dict()
# models_to_import = ['1.01', '1.1', '1.11', '1.12', '1.13', '1.14', '1.15', '1.21'] if usecase == 'fa' else \
#     ['2.01', '2.1', '2.11', '2.12', '2.13', '2.14', '2.15', '2.21']
models_to_import = ['10.01', '10.1', '10.11', '10.12', '10.13', '10.14', '10.15', '10.21'] if usecase == 'fa' else \
    ['20.01', '20.1', '20.11', '20.12', '20.13', '20.14', '20.15', '20.21']

In [10]:
mean_eval_measures = dict()
std_eval_measures = dict()
for cur_model in models_to_import:
    #cur_model_results_f_name = opj(model_folder, str(cur_model), 'bootstrap_eval_measures_test.csv')
    cur_model_results_f_name = opj(model_folder, str(cur_model), 'eval_measures_test.csv')
    cur_results = pd.read_csv(cur_model_results_f_name)
    cur_results.set_index('Unnamed: 0', drop=True, inplace=True)
    cur_results_mean = cur_results.loc['mean'][required_measures]
    cur_results_std = cur_results.loc['std'][required_measures]
    mean_eval_measures[cur_model] = cur_results_mean.to_dict()
    std_eval_measures[cur_model] = cur_results_std.to_dict()

In [11]:
for model, measures in mean_eval_measures.items():
    printing_strs_list = list() 
    for cur_measure, value in measures.items():
        corres_std = str(round(float(std_eval_measures[model][cur_measure]), 2))
        str_to_add = str(round(float(value), 2)) + '$\pm$'+ corres_std
        printing_strs_list.append(str_to_add)
    # end of the inner loop, we have to print the required string
    main_string_to_print = '& '.join(printing_strs_list)
    prefix = '& '
    sufix = ' \\\[2pt]' if model.endswith('.01') or model.endswith('.21') else '\\\[2pt] &'
    print(prefix + main_string_to_print + sufix)

& 0.42$\pm$0.02& 0.25$\pm$0.02& 0.39$\pm$0.03& 0.12$\pm$0.01& 0.18$\pm$0.01& 0.57$\pm$0.01& 0.7$\pm$0.01 \\[2pt]
& 0.53$\pm$0.05& 0.38$\pm$0.02& 0.33$\pm$0.02& 0.3$\pm$0.02& 0.31$\pm$0.02& 0.63$\pm$0.01& 0.78$\pm$0.0\\[2pt] &
& 0.45$\pm$0.02& 0.32$\pm$0.02& 0.26$\pm$0.01& 0.28$\pm$0.01& 0.27$\pm$0.01& 0.61$\pm$0.01& 0.71$\pm$0.01\\[2pt] &
& 0.23$\pm$0.04& 0.21$\pm$0.02& 0.19$\pm$0.01& 0.24$\pm$0.03& 0.21$\pm$0.02& 0.55$\pm$0.01& 0.68$\pm$0.01\\[2pt] &
& 0.32$\pm$0.05& 0.27$\pm$0.02& 0.21$\pm$0.02& 0.41$\pm$0.13& 0.27$\pm$0.04& 0.58$\pm$0.01& 0.75$\pm$0.01\\[2pt] &
& 0.47$\pm$0.02& 0.33$\pm$0.01& 0.34$\pm$0.01& 0.2$\pm$0.01& 0.25$\pm$0.01& 0.6$\pm$0.01& 0.71$\pm$0.01\\[2pt] &
& 0.65$\pm$0.03& 0.53$\pm$0.02& 0.46$\pm$0.02& 0.45$\pm$0.03& 0.45$\pm$0.02& 0.7$\pm$0.01& 0.83$\pm$0.01\\[2pt] &
& 0.75$\pm$0.04& 0.57$\pm$0.02& 0.48$\pm$0.03& 0.47$\pm$0.02& 0.48$\pm$0.02& 0.72$\pm$0.01& 0.87$\pm$0.01 \\[2pt]
