# Notebook for results concatenation and preprocessing

In [None]:
from pathlib import Path
import pandas as pd

## Random seed selector processing

In [None]:
random_path = Path("experiments/random/results.csv")
random_df = pd.read_csv(random_path, index_col=0)
random_df.head()

In [None]:
experiment_params = set(random_df.columns)
experiment_params.remove("repetition_run")
experiment_params.remove("gain")
experiment_params.remove("diffusion_len")
experiment_params.remove("active_actors_prct")
experiment_params.remove("seed_actors_prct")

experiment_metrics = set(random_df.columns).difference(experiment_params)

experiment_params = list(experiment_params)
experiment_metrics = list(experiment_metrics)

print(f"Columns that are multi-indices: {experiment_params}")
print(f"Columns that have been left: {experiment_metrics}")

In [None]:
reindexed_df = random_df.set_index(experiment_params)
reindexed_df.head()

In [None]:
averaged_random_df = pd.DataFrame()
for metric in experiment_metrics:
    avg = reindexed_df.groupby(reindexed_df.index)[str(metric)].mean()
    averaged_random_df = pd.concat([averaged_random_df, avg], axis=1)
averaged_random_df.index.set_names(experiment_params, inplace=True)
averaged_random_df = averaged_random_df.reset_index()

averaged_random_df["selection_metric"] = "random"

averaged_random_df.head()

## Greedy seed selector processing

In [None]:
reference_df = averaged_random_df

greedy_path = Path("experiments/greedy/results.csv")
greedy_df = pd.read_csv(greedy_path)
greedy_df = greedy_df.drop("Unnamed: 0", axis=1)

greedy_df.head()

In [None]:
print(f"Length of raw dataframe: {len(greedy_df)}")

for net in greedy_df["network"].unique():

    # take unique seed_actors_prct vals for all ssm except greedy given the net
    allowed_values = reference_df.loc[
        reference_df["network"] == net
    ]["seed_actors_prct"].unique()

    # take all rows where ssm is greedy and seed_actors_prct is not a good val
    greedy_rows_to_be_dropped = greedy_df.loc[
        (greedy_df["network"] == net) &
        (~greedy_df["seed_actors_prct"].round(2).isin(allowed_values.round(2)))
    ]

    print(f"Removing {len(greedy_rows_to_be_dropped)} rows for net: {net}")
    greedy_df = greedy_df.drop(greedy_rows_to_be_dropped.index)

print(f"Length of processed dataframe: {len(greedy_df)}")

greedy_df["selection_metric"] = "greedy"
# greedy_df.to_csv("greedy_imporved.csv")
greedy_df.head()


## Processing of another metrics

In [None]:
root_path = Path("experiments")
final_path = root_path.joinpath("all_results.csv")
if final_path.exists():
    final_path.unlink()
experiments = [*root_path.glob("*")]

In [None]:
def prepare_csv(metric_path):
    df = pd.read_csv(metric_path.joinpath("results.csv"), index_col=0)
    if metric_path.stem == "k_sheel":
        df["selection_metric"] = "k_shell"
    elif metric_path.stem == "k_sheel_mln":
        df["selection_metric"] = "k_shell_mln"
    else:
        df["selection_metric"] = metric_path.stem  
    return df

In [None]:
exp_dfs = [
    prepare_csv(e_name) for e_name in experiments if 
    ("random" not in str(e_name) and "greedy" not in str(e_name))
]

## Saving final dataframe

In [None]:
exp_dfs.append(averaged_random_df)
exp_dfs.append(greedy_df)
final_df = pd.concat(exp_dfs)
final_df = final_df.drop("repetition_run", axis=1)
final_df = final_df.reset_index().drop("index", axis=1)

In [None]:
final_df.to_csv(root_path.joinpath("all_results.csv"))