# 3) Analyze and evaluate optimization output - TRAINING PROTOCOLS

This final notebook uses the `runs.pkl` file created in notebook 2 and it analyzes:

- the distance between different feature sets in the parameter space
- the distance between different feature sets in the feature space - training protocols
- the distance between different feature sets in the extracellular signals

In [None]:
import pickle
import pandas as pd
import seaborn as sns
import sys
import shutil

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

import matplotlib.pyplot as plt
from scipy.spatial import distance
import MEAutility as mu
import json
import time
import numpy as np
from tqdm import tqdm
from pathlib import Path

from scipy.stats import kruskal, mannwhitneyu, wilcoxon

import multimodalfitting as mf

%matplotlib notebook

In [None]:
save_fig = True
figure_folder = Path("..") / "figures"

if save_fig:
    figure_folder.mkdir(exist_ok=True)

In [None]:
base_dir = Path("../..")

In [None]:
colors_dict = {"soma": "C0",
               "all": "C1",
               "sections": "C2",
               "single": "C3"}
feature_sets = {"soma": "soma",
                "all": "extra",
                "sections": "extra",
                "single": "extra"}
figsize = (10, 7)

## Load GT params and optimization output

In [None]:
# general
model_name = "hay_ais"
probe_type = "planar" # linear 

cell_models_folder = base_dir / "cell_models"
model_folder = cell_models_folder / model_name
probe_file = model_folder / "fitting" / "efeatures" / "probe_BPO.json"
pkl_file_name = "runs.pkl"

In [None]:
result_folder = Path("../..")

In [None]:
result_dates = [r.name for r in (base_dir / "results").iterdir()]
# use latest results
results_date = max(result_dates)
result_folder = base_dir / "results" / results_date

In [None]:
cell = mf.create_ground_truth_model(model_name=model_name,
                                    release=False)
cell_release = mf.create_ground_truth_model(model_name=model_name,
                                            release=True)

probe = mf.define_electrode(probe_file=probe_file)

param_names = [param.name for param in cell.params.values() if not param.frozen]

params_release = {}
for param in cell_release.params_by_names(param_names):
    params_release[param.name] = param.value

In [None]:
protocol_for_eap = "IDrest_300"

In [None]:
data = pickle.load(open(result_folder / pkl_file_name, 'rb'))

df_optimization = pd.DataFrame(data)
df_model = df_optimization.query(f"model == '{model_name}'")

opt_results_training = None
results_file = f"all_responses_{model_name}.pkl"

In [None]:
fig, ax = plt.subplots()
min_evals = 3000

keep_idxs = []
for idx, row in df_model.iterrows():
    if max(row["nevals"]) > min_evals:
        keep_idxs.append(idx)
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=colors_dict[row["strategy"]],
                ls='-', 
                lw=0.8,
                alpha=0.75)
    else:
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=colors_dict[row["strategy"]],
                ls='--', 
                lw=0.5,
                alpha=0.75)

ax.set_title("Min fitness")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xlabel("Neval")
ax.set_ylabel("Min fitness")
ax.set_yscale('log')

# Load protocols and original features

In [None]:
extra_kwargs = mf.utils.get_extra_kwargs()
extra_kwargs

In [None]:
eva_extra = mf.create_evaluator(
    model_name=model_name,
    strategy="all",
    protocols_with_lfp="IDrest_300",
    **extra_kwargs
)

In [None]:
# check num features
for strategy in np.unique(df_model.strategy):
    eva = mf.create_evaluator(
        model_name=model_name,
        strategy=strategy,
        protocols_with_lfp="IDrest_300",
        **extra_kwargs
    )
    print(f"Strategy {strategy} --> num features {len(eva.fitness_calculator.objectives)}")

# Compute release responses

In [None]:
responses_release = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), param_values=params_release)

In [None]:
eap_release = mf.utils.calculate_eap(responses=responses_release, protocols=eva_extra.fitness_protocols, 
                                     protocol_name=protocol_for_eap, **extra_kwargs)

In [None]:
features_release = {}
for i in tqdm(np.arange(len(eva_extra.fitness_calculator.objectives)), 
              desc="computing features"):
    obj = eva_extra.fitness_calculator.objectives[i]
    features_release[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat = obj.features[0]
        feat_value = feat.calculate_feature(responses_release)
        feat_score = feat.calculate_score(responses_release)
        if feat_value is None:
            print(f"{feat.name} cannot be computed: skipping")
            continue
        features_release[feat.name]["value"] = feat_value
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
fig_gt_intra = mf.plot_responses(responses_release, color="k", return_fig=True)

In [None]:
ax = mf.plot_eap(responses_release, eva_extra.fitness_protocols, probe,
                 protocol_name=protocol_for_eap, color="k")
fig_gt_extra = ax.get_figure()

In [None]:
if save_fig:
    fig_gt_intra.savefig(figure_folder / "gt_intra.pdf", transparent=True)
    fig_gt_extra.savefig(figure_folder / "gt_extra.pdf", transparent=True)

# Compute and plot best responses

In [None]:
opt_results_training = {}

for strategy in np.unique(df_model.strategy):
    print(f"Simulating best '{strategy}'")
    # simulate all responses
    opt_df = df_model.query(f"strategy == '{strategy}'")
    opt_results_training[strategy] = {}

    all_responses = {}
    all_eaps = {}
    all_params = {}
    for idx, row in opt_df.iterrows():
        seed = row.seed
        print("\tSeed", row.seed)
        population = row.population
        scores = [sum(pop.fitness.values) for pop in population]
        best_individual_idx = np.argmin(scores)
        params = population[best_individual_idx]
        params_dict = {k: v for k, v in zip(param_names, params)}
        all_params[seed] = params_dict
        responses_seed = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), 
                                                 param_values=params_dict)
        all_responses[seed] = responses_seed
        eap_seed = mf.utils.calculate_eap(responses=responses_seed, protocols=eva_extra.fitness_protocols, 
                                     protocol_name=protocol_for_eap, **extra_kwargs)
        all_eaps[seed] = eap_seed
    opt_results_training[strategy]["eaps"] = all_eaps
    opt_results_training[strategy]["responses"] = all_responses
    opt_results_training[strategy]["params"] = all_params

In [None]:
for strategy in opt_results_training:
    features_best = {}
    opt_results_training[strategy]["fitness"] = {}
    for seed, responses in opt_results_training[strategy]["responses"].items():
        extra_fitness = 0
        intra_fitness = 0
        for i in tqdm(np.arange(len(eva_extra.fitness_calculator.objectives)), 
                      desc=f"computing features {strategy}"):
            obj = eva_extra.fitness_calculator.objectives[i]
            feat = obj.features[0]
            features_best[feat.name] = {}
            if len(obj.features) == 1:
                feat_value = obj.features[0].calculate_feature(responses)
                feat_score = obj.features[0].calculate_score(responses)
                if "MEA" in feat.name:
                    extra_fitness += feat_score
                else:
                    intra_fitness += feat_score
        opt_results_training[strategy]["fitness"][seed] = {"intra": intra_fitness, "extra": extra_fitness, 
                                                           "total": intra_fitness + extra_fitness}
        print("seed", seed)
        print("\tINTRA", intra_fitness)
        print("\tEXTRA", extra_fitness)
        print("\tTOTAL", intra_fitness + extra_fitness)

In [None]:
# best responses are the solutions that minimize intra_score
best_extras = df_fitness.loc[df_fitness.groupby("strategy")["intra_score"].idxmin()]
print(best_extras)

for idx, row in best_extras.iterrows():
    strategy = row["strategy"]
    seed = row["seed"]
    print("Strategy", strategy, "best seed", seed)
    responses = opt_results_training[strategy]["responses"][seed]
    eap = opt_results_training[strategy]["eaps"][seed]
    params = opt_results_training[strategy]["params"][seed]
    opt_results_training[strategy]["best_seed"] = seed
    opt_results_training[strategy]["best_responses"] = responses
    opt_results_training[strategy]["best_eap"] = eap
    opt_results_training[strategy]["best_params"] = params
    eap_release_norm = eap_release / np.ptp(np.abs(eap_release), 1, keepdims=True)
    eap_norm = eap / np.ptp(np.abs(eap), 1, keepdims=True)
    eap_dist = np.sum(np.abs(eap_release_norm.ravel() - eap_norm.ravel()))
    opt_results_training[strategy]["best_eap_dist"] = eap_dist
    print(f"EAP distance: {eap_dist}")

In [None]:
with open(result_folder / results_file, 'wb') as f:
    pickle.dump(opt_results_training, f, protocol=pickle.HIGHEST_PROTOCOL)