# 3) Analyze and evaluate optimization output - TRAINING PROTOCOLS

This final notebook uses the `runs.pkl` file created in notebook 2 and it analyzes:

- the distance between different feature sets in the parameter space
- the distance between different feature sets in the feature space - training protocols
- the distance between different feature sets in the extracellular signals

In [None]:
import pickle
import pandas as pd
import seaborn as sns
import sys
import shutil

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

import matplotlib.pyplot as plt
from scipy.spatial import distance
import MEAutility as mu
import json
import time
import numpy as np
from tqdm import tqdm
from pathlib import Path

from scipy.stats import kruskal, mannwhitneyu, wilcoxon

import multimodalfitting as mf

%matplotlib notebook

In [None]:
save_fig = True
figure_folder = Path(".") / "figures_hay_ais"

figure_folder = Path("/Users/abuccino/Documents/Submissions/papers/multimodal/hay_ais") / "opt"

if save_fig:
    figure_folder.mkdir(exist_ok=True)

In [None]:
base_dir = Path("../..")

In [None]:
colors_dict = {"soma": "C0",
               "all": "C1",
               "sections": "C2",
               "single": "C3"}
feature_sets = {"soma": "soma",
                "all": "extra",
                "sections": "extra",
                "single": "extra"}
figsize = (10, 7)

## Load GT params and optimization output

In [None]:
# general
model_name = "hay_ais"
probe_type = "planar" # linear 

cell_models_folder = base_dir / "cell_models"
model_folder = cell_models_folder / model_name
probe_file = model_folder / "fitting" / "efeatures" / "probe_BPO.json"

In [None]:
# change this with folder containing your pkl file
results_date = '220214'  # '211124' '220111' # 
result_folder = base_dir / "results" / results_date

In [None]:
cell = mf.create_ground_truth_model(model_name=model_name,
                                    release=False)
cell_release = mf.create_ground_truth_model(model_name=model_name,
                                            release=True)

probe = mf.define_electrode(probe_file=probe_file)

param_names = [param.name for param in cell.params.values() if not param.frozen]

params_release = {}
for param in cell_release.params_by_names(param_names):
    params_release[param.name] = param.value

In [None]:
protocol_for_eap = "IDrest_300"

In [None]:
pkl_file_name = "runs.pkl"

In [None]:
!pip install scikit-posthocs

In [None]:
data = pickle.load(open(result_folder / pkl_file_name, 'rb'))
df_optimization = pd.DataFrame(data)
df_model = df_optimization.query(f"model == '{model_name}'")
# set strategy column
df_model.loc[:, "strategy"] = df_model["extra_strategy"].values.copy()
df_model.loc[df_model["feature_set"] == "soma", "strategy"] = "soma"

if (result_folder / "opt_results.pkl").is_file():
    with open(result_folder / "opt_results.pkl", 'rb') as f:
        opt_results_all = pickle.load(f)

In [None]:
fig, ax = plt.subplots()
min_evals = 3000

keep_idxs = []
for idx, row in df_model.iterrows():
    if max(row["nevals"]) > min_evals:
        keep_idxs.append(idx)
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=colors_dict[row["strategy"]],
                ls='-', 
                lw=0.8,
                alpha=0.75)
    else:
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=colors_dict[row["strategy"]],
                ls='--', 
                lw=0.5,
                alpha=0.75)

ax.set_title("Min fitness")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xlabel("Neval")
ax.set_ylabel("Min fitness")
ax.set_yscale('log')

# Load protocols and original features

In [None]:
extra_kwargs = mf.utils.get_extra_kwargs()
extra_kwargs

In [None]:
eva_extra = mf.create_evaluator(
    model_name=model_name,
    feature_set="extra",
    extra_strategy="all",
    protocols_with_lfp="IDrest_300",
    **extra_kwargs
)

In [None]:
# check num features
for strategy in np.unique(df_model.strategy):
    extra_strategy = strategy if strategy in ["all", "single", "sections"] else None
    eva = mf.create_evaluator(
        model_name=model_name,
        feature_set=feature_sets[strategy],
        extra_strategy=extra_strategy,
        protocols_with_lfp="IDrest_300",
        **extra_kwargs
    )
    print(f"Strategy {strategy} --> num features {len(eva.fitness_calculator.objectives)}")
    
print(f"Validation: --> num features {len(eva_extra.fitness_calculator.objectives)}")

# Compute release responses

In [None]:
responses_release = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), param_values=params_release)

In [None]:
eap_release = mf.utils.calculate_eap(responses=responses_release, protocols=eva_extra.fitness_protocols, 
                                     protocol_name=protocol_for_eap, **extra_kwargs)

In [None]:
for obj in eva_extra.fitness_calculator.objectives:
    print(obj.features[0].name)

In [None]:
features_release = {}
for i in tqdm(np.arange(len(eva_extra.fitness_calculator.objectives)), 
              desc="computing features"):
    obj = eva_extra.fitness_calculator.objectives[i]
    features_release[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat = obj.features[0]
        feat_value = feat.calculate_feature(responses_release)
        feat_score = feat.calculate_score(responses_release)
        if feat_value is None:
            print(f"{feat.name} cannot be computed: skipping")
            continue
        features_release[feat.name]["value"] = feat_value
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
fig_gt_intra = mf.plot_responses(responses_release, color="k", return_fig=True)

In [None]:
ax = mf.plot_eap(responses_release, eva_extra.fitness_protocols, probe,
                 protocol_name=protocol_for_eap, color="k")
fig_gt_extra = ax.get_figure()

In [None]:
if save_fig:
    fig_gt_intra.savefig(figure_folder / "gt_intra.pdf", transparent=True)
    fig_gt_extra.savefig(figure_folder / "gt_extra.pdf", transparent=True)

# Compute and plot best responses

In [None]:
max_feature_value = 50
opt_results_all = {}

In [None]:
# for strategy in np.unique(df_model.strategy):
#     opt_results[strategy] = {}
#     opt_df = df_model.query(f"strategy == '{strategy}'")
#     # get best index
#     best_idx = np.argmin([row["logbook"].select("min")[-1] for idx, row in opt_df.iterrows()])
#     best_idx_old = np.argmin(opt_df.best_fitness)
#     params_sample = opt_df.iloc[best_idx]
#     params_dict = {k: v for k, v in zip(param_names, params_sample.best_params)}
#     opt_results[strategy]["best_fitness"] = params_sample.best_fitness
#     opt_results[strategy]["best_params"] = params_dict
#     print(f"{strategy} --  best fitness: {params_sample.best_fitness}")
#     print("best", best_idx, "best absolute", best_idx_old)

In [None]:
for strategy in np.unique(df_model.strategy):
    print(f"Simulating best '{strategy}'")
    # simulate all responses
    opt_df = df_model.query(f"strategy == '{strategy}'")
    opt_results_all[strategy] = {}
    
    all_responses = {}
    all_eaps = {}
    all_params = {}
    for idx, row in opt_df.iterrows():
        seed = row.seed
        print("\tSeed", row.seed)
        population = row.population
        scores = [sum(pop.fitness.values) for pop in population]
        best_individual_idx = np.argmin(scores)
        params = population[best_individual_idx]
        params_dict = {k: v for k, v in zip(param_names, params)}
        all_params[seed] = params_dict
        responses_seed = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), 
                                                 param_values=params_dict)
        all_responses[seed] = responses_seed
        eap_seed = mf.utils.calculate_eap(responses=responses_seed, protocols=eva_extra.fitness_protocols, 
                                     protocol_name=protocol_for_eap, **extra_kwargs)
        all_eaps[seed] = eap_seed
    opt_results_all[strategy]["eaps"] = all_eaps
    opt_results_all[strategy]["responses"] = all_responses
    opt_results_all[strategy]["params"] = all_params

In [None]:
for strategy in opt_results_all:
    print(strategy)
    opt_results_all[strategy]["fitness"] = {}
    for seed, responses in opt_results_all[strategy]["responses"].items():
        extra_fitness = 0
        intra_fitness = 0
        for i in tqdm(np.arange(len(eva_extra.fitness_calculator.objectives)), 
                      desc=f"computing features {strategy}"):
            obj = eva_extra.fitness_calculator.objectives[i]
            feat = obj.features[0]
            features_best[feat.name] = {}
            if len(obj.features) == 1:
                feat_value = obj.features[0].calculate_feature(responses)
                feat_score = obj.features[0].calculate_score(responses)
                if "MEA" in feat.name:
                    extra_fitness += feat_score
                else:
                    intra_fitness += feat_score
        opt_results_all[strategy]["fitness"][seed] = {"intra": intra_fitness, "extra": extra_fitness, 
                                                      "total": intra_fitness + extra_fitness}
        print("seed", seed)
        print("\tINTRA", intra_fitness)
        print("\tEXTRA", extra_fitness)
        print("\tTOTAL", intra_fitness + extra_fitness)

In [None]:
seeds_array = []
strategy_array = []
intra_score_array = []
extra_score_array = []
total_score_array = []

for strategy in opt_results_all:
    for seed, fitness in opt_results_all[strategy]["fitness"].items():
        seeds_array.append(seed)
        strategy_array.append(strategy)
        intra_score_array.append(fitness["intra"])
        extra_score_array.append(fitness["extra"])
        total_score_array.append(fitness["total"])
df_fitness = pd.DataFrame({"seed": seeds_array, "strategy": strategy_array,
                           "intra_score": intra_score_array, "extra_score": extra_score_array, 
                           "total_score": total_score_array})

In [None]:
best_extras = df_fitness.iloc[df_fitness.groupby("strategy")["extra_score"].idxmin()]
best_extras

In [None]:
df_fitness.sort_values(['intra_score', 'extra_score'],ascending=True).groupby('strategy').head(3)

In [None]:
fig_intra_seeds, ax = plt.subplots(figsize=(7, 10))
sns.boxplot(data=df_fitness, x="strategy", y="intra_score", order=order, ax=ax)
ax.set_xlabel("Strategy", fontsize=15)
ax.set_ylabel("Score", fontsize=15)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_title("Intracellular\n(10 seeds)", fontsize=20)

fig_extra_seeds, ax = plt.subplots(figsize=(7, 10))
sns.boxplot(data=df_fitness, x="strategy", y="extra_score", order=order)
ax.set_xlabel("Strategy", fontsize=15)
ax.set_ylabel("Score", fontsize=15)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_title("Extracellular\n(10 seeds)", fontsize=20)

In [None]:
if save_fig:
    fig_intra_seeds.savefig(figure_folder / "intra_seed.pdf")
    fig_extra_seeds.savefig(figure_folder / "extra_seed.pdf")    

In [None]:
for idx, row in best_extras.iterrows():
    strategy = row["strategy"]
    seed = row["seed"]
    print("Strategy", strategy, "best seed", seed)
    responses = opt_results_all[strategy]["responses"][seed]
    eap = opt_results_all[strategy]["eaps"][seed]
    params = opt_results_all[strategy]["params"][seed]
    opt_results_all[strategy]["best_seed"] = seed
    opt_results_all[strategy]["best_responses"] = responses
    opt_results_all[strategy]["best_eap"] = eap
    opt_results_all[strategy]["best_params"] = params
    eap_release_norm = eap_release / np.ptp(np.abs(eap_release), 1, keepdims=True)
    eap_norm = eap / np.ptp(np.abs(eap), 1, keepdims=True)
    eap_dist = np.sum(np.abs(eap_release_norm.ravel() - eap_norm.ravel()))
    opt_results[strategy]["best_eap_dist"] = eap_dist
    print(eap_dist)

In [None]:
for strategy in np.unique(df_model.strategy):
    responses = opt_results_all[strategy]["best_responses"]
    features_best = {}
    for obj in eva_extra.fitness_calculator.objectives:
        feat = obj.features[0]
        features_best[feat.name] = {}
        if len(obj.features) == 1:
            feat_value = obj.features[0].calculate_feature(responses)
            if feat_value is None:
                feat_value = max_feature_value
            features_best[feat.name]["value"] = feat_value
            if "MEA" not in feat.name:
                feat_score = np.abs(features_release[feat.name]["value"] - feat_value) / feat.exp_std
            else:
                feat_score = np.abs(distance.cosine(features_release[feat.name]["value"], feat_value))
            features_best[feat.name]["score"] = feat_score
        else:
            print(f"More than one feature for objective: {obj.name}")
    opt_results_all[strategy]["features"] = features_best

In [None]:
# plot
figs_intra = {}
figs_extra = {}
protocols_to_plot = ["APWaveform_290", "IDrest_250", "IV_-100"]
titles = protocols_to_plot
for strategy in np.unique(df_model.strategy):
    responses_to_plot = [responses_release, opt_results_all[strategy]["best_responses"]]
    colors = ["k", colors_dict[strategy]]
    labels = ["GT", strategy.upper()]
    fig_intra = mf.plot_multiple_responses(responses_to_plot, 
                                           protocol_names=protocols_to_plot,
                                           colors=colors, 
                                           titles=titles,
                                           return_fig=True, 
                                           labels=labels)
    ax_extra = mf.plot_multiple_eaps(responses_to_plot, 
                                     eva_extra.fitness_protocols, probe,
                                     protocol_name=protocol_for_eap, 
                                     colors=colors, labels=labels, norm=True)
    fig_extra = ax_extra.get_figure()
    figs_intra[strategy] = fig_intra
    figs_extra[strategy] = fig_extra

In [None]:
with open(result_folder / "opt_results.pkl", 'wb') as f:
    pickle.dump(opt_results_all, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
for strategy in opt_results:
    print(f"Distance {strategy}: {opt_results[strategy]['eap_dist']}")

In [None]:
if save_fig:
    for strategy, fig in figs_intra.items():
        fig.savefig(figure_folder / f"{strategy}_intra.pdf", transparent=True)
    for strategy, fig in figs_extra.items():
        fig.savefig(figure_folder / f"{strategy}_extra.pdf", transparent=True)

## Compare best-fitted models

In [None]:
df_test = pd.DataFrame.from_dict(opt_results, orient="index")
df_test["strategy"] = df_test.index

## Compare parameters 

Here we normalize the parameters based on the boundaries and compute the relative difference to GT.

In [None]:
param_json = model_folder / "parameters.json"

with param_json.open() as f:
    params = json.load(f)

param_boundaries = {}
for param in params:
    if "bounds" in param:
        if isinstance(param['sectionlist'], list):
            for sec in param['sectionlist']:
                param_boundaries[f"{param['param_name']}_{sec}"] = param["bounds"]
        else:
            sec = param['sectionlist']
            param_boundaries[f"{param['param_name']}_{sec}"] = param["bounds"]

# scale params_release by boundaries
params_release_norm = {}
for param_name, param_val in params_release.items():
    bounds = param_boundaries[param_name]
    param_norm = (param_val - bounds[0]) / (bounds[1] - bounds[0])
    params_release_norm[param_name] = param_norm

In [None]:
params_release_norm

In [None]:
feature_set_array = []
param_name_array = []
param_value_array = []
param_norm_array = []
release_value_array = []
release_norm_array = []
diff_with_release_array = []
section_array = []

for i, (index, opt) in enumerate(df_test.iterrows()):
    params_dict = opt.best_params
    
    for param_name, param_value in params_dict.items():
        feature_set_array.append(opt.strategy)
        param_name_array.append(param_name)
        param_value_array.append(param_value)
        section = param_name.split("_")[-1]
        if section == "segment":
            section = "ais"
        section_array.append(section)
        release_value_array.append(params_release[param_name])
        release_norm_array.append(params_release_norm[param_name])
        # compute norm value
        bounds = param_boundaries[param_name]
        param_norm = (param_value - bounds[0]) / (bounds[1] - bounds[0])
        param_norm_array.append(param_norm)
        diff_with_release = np.abs(param_value - params_release[param_name]) / params_release[param_name]
#         diff_with_release_array.append(abs(param_norm - params_release_norm[param_name]))
        diff_with_release_array.append(diff_with_release)

        
df_params = pd.DataFrame({"strategy": feature_set_array, "param_name": param_name_array,
                          "param_value": param_value_array, "param_norm": param_norm_array, 
                          "release_value": release_value_array, "release_norm": release_norm_array,
                          "diff_release": diff_with_release_array, "section": section_array}) 

In [None]:
order_full = ["soma", "all", "sections", "single"]
order = []
for strategy in order_full:
    if strategy in opt_results:
        order.append(strategy)

In [None]:
# overall parameter diff
fig, ax = plt.subplots(figsize=figsize)
sns.boxenplot(data=df_params, y="strategy", x="diff_release", ax=ax, order=order)
ax.set_xlabel("Relative error (%)", fontsize=12)
ax.set_ylabel("Straregy", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("All params", fontsize=15)
ax.set_xlim(0, 10)


In [None]:
for section in np.unique(df_params.section):
    fig, ax1 = plt.subplots(figsize=figsize)
    df_section = df_params.query(f"section == '{section}'")
    sns.barplot(data=df_section, y="param_name", x="diff_release", hue="strategy",
                orientation="horizontal", ax=ax1, hue_order=order)
    ax1.set_xlabel("Norm. param difference", fontsize=12)
    ax1.set_ylabel("Param", fontsize=12)
    ax1.spines["top"].set_visible(False)
    ax1.spines["right"].set_visible(False)
    ax1.set_title(f"{section} params", fontsize=15)
    
    fig, ax2 = plt.subplots(figsize=figsize)
    sns.boxplot(data=df_section, y="strategy", x="diff_release", ax=ax2, order=order)
    ax2.set_xlabel("Norm. param difference", fontsize=12)
    ax2.set_ylabel("Strategy", fontsize=12)
    ax2.spines["top"].set_visible(False)
    ax2.spines["right"].set_visible(False)
    ax2.set_title(f"{section} params - ALL", fontsize=15)

## Compare features

In [None]:
feature_name_array = []
feature_set_array = []
feature_score_array = []
feature_type_array = []

for strategy, res in opt_results.items():
    feats = res["features"]
    for feat_name, feat_dict in feats.items():
        feature_set_array.append(strategy)
        feature_name_array.append(feat_name)
        if "MEA" not in feat_name:
            feature_type_array.append("intra")
        else:
            feature_type_array.append("extra")
        feature_score_array.append(feat_dict["score"])
        
df_feats = pd.DataFrame({"feature_set": feature_set_array, "feat_name": feature_name_array,
                         "feat_score": feature_score_array, "feature_type": feature_type_array})

In [None]:
fig_feat_intra, ax = plt.subplots(figsize=figsize)

sns.boxplot(data=df_feats.query("feature_type == 'intra'"), y="feature_set", x="feat_score", 
            order=order, ax=ax)
ax.set_ylabel("Feature scores (intracellular)", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Intracellular features", fontsize=15)

fig_feat_extra, ax = plt.subplots(figsize=figsize)

sns.boxplot(data=df_feats.query("feature_type == 'extra'"), 
            y="feature_set", x="feat_score", order=order, ax=ax)
ax.set_ylabel("Feature scores (extracellular)", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Extracellular features", fontsize=15)


In [None]:
df_feats.query("feature_type == 'intra'").groupby("feature_set").sum()

In [None]:
df_feats.query("feature_type == 'extra'").groupby("feature_set").sum()

In [None]:
from scipy.stats import mannwhitneyu, wilcoxon

In [None]:
df_intra = df_feats.query("feature_type == 'intra'")
df_extra = df_feats.query("feature_type == 'extra'")

In [None]:
intra_soma = df_intra.query("feature_set == 'soma'")["feat_score"]
intra_sections = df_intra.query("feature_set == 'sections'")["feat_score"]
intra_all = df_intra.query("feature_set == 'all'")["feat_score"]
intra_single = df_intra.query("feature_set == 'single'")["feat_score"]

extra_soma = df_extra.query("feature_set == 'soma'")["feat_score"]
extra_sections = df_extra.query("feature_set == 'sections'")["feat_score"]
extra_all = df_extra.query("feature_set == 'all'")["feat_score"]
extra_single = df_extra.query("feature_set == 'single'")["feat_score"]

In [None]:
print("Intra - Sections VS SOMA:", wilcoxon(intra_sections, intra_soma))
print("Intra - All VS SOMA:", wilcoxon(intra_all, intra_soma))
print("Intra - Single VS SOMA:", wilcoxon(intra_single, intra_soma))
# print("Intra - All VS Sections:", wilcoxon(intra_sections, intra_all))

In [None]:
print("Extra - Sections VS SOMA:", wilcoxon(extra_sections, extra_soma))
print("Extra - All VS SOMA:", wilcoxon(extra_all, extra_soma))
print("Extra - Single VS SOMA:", wilcoxon(extra_single, extra_soma))
# print("Extra - All VS Sections:", wilcoxon(extra_sections, extra_all))

In [None]:
if save_fig:
    fig_feat_intra.savefig(figure_folder / "feat_intra.pdf")
    fig_feat_extra.savefig(figure_folder / "feat_extra.pdf")

## Compare EAP distance

In [None]:
df_test["eap_dist"]

In [None]:
fig_cos, ax = plt.subplots()
sns.barplot(data=df_test, x="strategy", y="eap_dist", order=order, ax=ax)
ax.set_ylabel("Cosine distance", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Extracellular difference", fontsize=15)

In [None]:
if save_fig:
    fig_cos.savefig(figure_folder / "eap_dist.pdf")