# 4) Analyze and evaluate optimization output

This final notebook uses the `runs.pkl` file created in notebook 2 and it analyzes the optimization performance

In [None]:
import pickle
import pandas as pd
import seaborn as sns
import sys
import shutil

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

import matplotlib.pyplot as plt
from scipy.spatial import distance
import MEAutility as mu
import json
import time
import numpy as np
from pathlib import Path
from pprint import pprint
from tqdm import tqdm

from scipy.stats import kruskal, mannwhitneyu, wilcoxon

import multimodalfitting as mf

%matplotlib notebook

In [None]:
save_fig = True
figure_folder = Path("..") / "figures"

if save_fig:
    figure_folder.mkdir(exist_ok=True)

In [None]:
base_dir = Path("../..")

In [None]:
colors_dict = {"soma": "C0",
               "all": "C1",
               "sections": "C2",
               "single": "C3"}
feature_sets = {"soma": "soma",
                "all": "extra",
                "sections": "extra",
                "single": "extra"}
figsize = (10, 7)

## Load GT params and optimization output

In [None]:
# general
model_name = "hay_ais"
probe_type = "planar" # linear 

cell_models_folder = base_dir / "cell_models"
model_folder = cell_models_folder / model_name
probe_file = model_folder / "fitting" / "efeatures" / "probe_BPO.json"

In [None]:
result_dates = [r.name for r in (base_dir / "results").iterdir()]
# use latest results
results_date = max(result_dates)
result_folder = base_dir / "results" / results_date

In [None]:
cell = mf.create_ground_truth_model(model_name=model_name,
                                    release=False)
cell_release = mf.create_ground_truth_model(model_name=model_name,
                                            release=True)

probe = mf.define_electrode(probe_file=probe_file)

param_names = [param.name for param in cell.params.values() if not param.frozen]

params_release = {}
for param in cell_release.params_by_names(param_names):
    params_release[param.name] = param.value

In [None]:
protocol_for_eap = "IDrest_300"
protocol_for_eap_val = "firepattern_120"

In [None]:
pkl_file_name = "runs.pkl"

In [None]:
data = pickle.load(open(result_folder / pkl_file_name, 'rb'))
df_optimization = pd.DataFrame(data)
df_model = df_optimization.query(f"model == '{model_name}'")

opt_results_training = None
results_file = f"all_responses_{model_name}.pkl"
if (result_folder / results_file).is_file():
    with open(result_folder / results_file, 'rb') as f:
        opt_results_training = pickle.load(f)
else:
    raise Exception(f"Couldn't fint result file: {results_file}. Run notebook 3a first!")

opt_results_validation = None
results_val_file = f"validation_responses_{model_name}.pkl"
if (result_folder / results_val_file).is_file():
    with open(result_folder / results_val_file, 'rb') as f:
        opt_results_val = pickle.load(f)
        compute_val_responses = False
else:
    compute_val_responses = True

# Load protocols and original features

In [None]:
extra_kwargs = mf.utils.get_extra_kwargs()
extra_kwargs

In [None]:
protocols_used_for_opt = ["IV_-20", "IV_-100", "IDrest_150", "IDrest_250", "IDrest_300",
                          "APWaveform_260"]

In [None]:
protocols_to_exclude = ["IV", "APWaveform", "IDrest"]

In [None]:
eva_extra_train = mf.create_evaluator(
    model_name=model_name,
    strategy="all",
    protocols_with_lfp=protocol_for_eap,
    all_protocols=False,
    **extra_kwargs
)

In [None]:
eva_extra_val = mf.create_evaluator(
    model_name=model_name,
    strategy="all",
    protocols_with_lfp=protocol_for_eap_val,
    all_protocols=True,
    exclude_protocols=protocols_to_exclude,
    **extra_kwargs
)

In [None]:
print(f"All training features --> num features {len(eva_extra_train.fitness_calculator.objectives)}")
print(f"All validation features --> num features {len(eva_extra_val.fitness_calculator.objectives)}")

# Compute release responses

In [None]:
t_start = time.time()
responses_release_train = eva_extra_train.run_protocols(eva_extra_train.fitness_protocols.values(), 
                                                        param_values=params_release)
t_stop = time.time()
print(f"Simulated responses in {np.round(t_stop - t_start, 2)} s")

In [None]:
t_start = time.time()
responses_release_val = eva_extra_val.run_protocols(eva_extra_val.fitness_protocols.values(), 
                                                    param_values=params_release)
t_stop = time.time()
print(f"Simulated responses in {np.round(t_stop - t_start, 2)} s")

In [None]:
eap_release = mf.utils.calculate_eap(responses=responses_release_val, 
                                     protocols=eva_extra_val.fitness_protocols, 
                                     protocol_name=protocol_for_eap_val, **extra_kwargs)

# compute extracellular features
std_from_mean = 0.05
extra_features = mf.efeatures_extraction.compute_extra_features(
    eap_release, fs=extra_kwargs["fs"],
    upsample=extra_kwargs["upsample"])

In [None]:
features_release = {}
for i in tqdm(np.arange(len(eva_extra_val.fitness_calculator.objectives)), 
              desc="computing features"):
    obj = eva_extra_val.fitness_calculator.objectives[i]
    features_release[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat = obj.features[0]
        feat_value = feat.calculate_feature(responses_release_val)
        feat_score = feat.calculate_score(responses_release_val)
        features_release[feat.name]["value"] = feat_value
    else:
        print(f"More than one feature for objective: {obj.name}")

num_intra_features = len(features_release)
print(f"Intra features: {num_intra_features}")
# add extra features
for efeat_name, feat in extra_features.items():
    for chan, feat_val in enumerate(feat):
        fature_name = f"{protocol_for_eap}.MEA.{efeat_name}_{chan}"
        features_release[fature_name] = {}
        features_release[fature_name]["value"] = feat_val
num_extra_features = len(features_release) - num_intra_features
print(f"Extra features: {num_extra_features}")

In [None]:
fig_gt_intra = mf.plot_responses(responses_release_train, color="k", return_fig=True, max_rows=3)

### Find best responses

In [None]:
seeds_array = []
strategy_array = []
intra_score_array = []
extra_score_array = []
total_score_array = []

strategies = ["soma", "all", "sections", "single"]
              
for strategy in strategies:
    for seed, fitness in opt_results_training[strategy]["fitness"].items():
        seeds_array.append(seed)
        strategy_array.append(strategy)
        intra_score_array.append(fitness["intra"])
        extra_score_array.append(fitness["extra"])
        total_score_array.append(fitness["total"])
df_fitness = pd.DataFrame({"seed": seeds_array, "strategy": strategy_array,
                           "intra_score": intra_score_array, "extra_score": extra_score_array, 
                           "total_score": total_score_array})

In [None]:
order = ["soma", "all", "sections", "single"]

In [None]:
fig_intra_seeds, ax = plt.subplots(figsize=(7, 10))
sns.boxplot(data=df_fitness, x="strategy", y="intra_score", order=order, ax=ax,
            palette=colors_dict)
ax.set_xlabel("Strategy", fontsize=15)
ax.set_ylabel("Score", fontsize=15)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_title("Intracellular\n(10 seeds)", fontsize=20)

fig_extra_seeds, ax = plt.subplots(figsize=(7, 10))
sns.boxplot(data=df_fitness, x="strategy", y="extra_score", order=order,
            palette=colors_dict)
ax.set_xlabel("Strategy", fontsize=15)
ax.set_ylabel("Score", fontsize=15)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xticklabels(ax.get_xticklabels(), fontsize=12)
ax.set_title("Extracellular\n(10 seeds)", fontsize=20)

In [None]:
if save_fig:
    fig_intra_seeds.savefig(figure_folder / "fig4A-left.pdf")
    fig_extra_seeds.savefig(figure_folder / "fig4A-right.pdf")

In [None]:
max_feature_value = 50
for strategy in np.unique(df_model.strategy):
    responses = opt_results_training[strategy]["best_responses"]
    features_best = {}
    for i in tqdm(np.arange(len(eva_extra_val.fitness_calculator.objectives)), 
                  desc=f"computing features {strategy}"):
        obj = eva_extra_train.fitness_calculator.objectives[i]
        feat = obj.features[0]
        features_best[feat.name] = {}
        if len(obj.features) == 1:
            feat_value = obj.features[0].calculate_feature(responses)
            if feat_value is None:
                feat_value = max_feature_value
            features_best[feat.name]["value"] = feat_value
            if "MEA" not in feat.name:
                feat_score = np.abs(features_release[feat.name]["value"] - feat_value) / feat.exp_std
            else:
                feat_score = np.abs(distance.cosine(features_release[feat.name]["value"], feat_value))
            features_best[feat.name]["score"] = feat_score
        else:
            print(f"More than one feature for objective: {obj.name}")
    opt_results_training[strategy]["features"] = features_best

### Plot responses to training protocols


In [None]:
figs_intra = {}
figs_extra = {}
protocols_to_plot = ["APWaveform_290", "IDrest_250", "IV_-100"]
titles = protocols_to_plot
for strategy in np.unique(df_model.strategy):
    responses_to_plot = [responses_release_train, opt_results_training[strategy]["best_responses"]]
    colors = ["k", colors_dict[strategy]]
    labels = ["GT", strategy.upper()]
    fig_intra = mf.plot_multiple_responses(responses_to_plot, 
                                           protocol_names=protocols_to_plot,
                                           colors=colors, 
                                           titles=titles,
                                           return_fig=True, 
                                           labels=labels)
    figs_intra[strategy] = fig_intra

# Compute and plot validation responses

In [None]:
max_feature_value = 50
opt_results_val = {}

In [None]:
strategies = ["soma", "all", "sections", "single"]

In [None]:
if compute_val_responses:
    for strategy in strategies:
        opt_results_val[strategy] = {}
        print(f"Simulating best '{strategy}' -- seed: {opt_results_training[strategy]['best_seed']}")
        best_params = opt_results_training[strategy]["best_params"]
        t_start = time.time()
        responses = eva_extra_val.run_protocols(eva_extra_val.fitness_protocols.values(), 
                                                param_values=best_params)
        eap = mf.utils.calculate_eap(responses=responses, protocols=eva_extra_val.fitness_protocols, 
                                     protocol_name=protocol_for_eap_val, **extra_kwargs)
        t_stop = time.time()
        print(f"Simulated responses in {np.round(t_stop - t_start, 2)} s")
        eap_release_norm = eap_release / np.ptp(np.abs(eap_release), 1, keepdims=True)
        eap_norm = eap / np.ptp(np.abs(eap), 1, keepdims=True)
        eap_dist = np.sum(np.abs(eap_release_norm.ravel() - eap_norm.ravel()))
        opt_results_val[strategy]["eap_dist"] = eap_dist
        opt_results_val[strategy]["responses"] = responses
        opt_results_val[strategy]["eap"] = eap    

In [None]:
feat_objectives = [obj.features[0].name for obj in eva_extra_val.fitness_calculator.objectives]

In [None]:
len(feat_objectives)

In [None]:
if compute_val_responses:
    for strategy in strategies:
        responses = opt_results_val[strategy]["responses"]
        eap = opt_results_val[strategy]["eap"]
        extra_features_strategy = mf.efeatures_extraction.compute_extra_features(
                                        eap, fs=extra_kwargs["fs"],
                                        upsample=extra_kwargs["upsample"])
        opt_results_val[strategy]["extra_features"] = extra_features_strategy

        features_best = {}
        feat_release_keys = list(features_release.keys())
        for i in tqdm(np.arange(len(feat_release_keys)), desc=f"computing features {strategy}"):

            feat_name = feat_release_keys[i]
            features_best[feat_name] = {}
            
            release_value = features_release[feat_name]["value"]

            if feat_name in feat_objectives:
                feat = eva_extra_val.fitness_calculator.objectives[feat_objectives.index(feat_name)].features[0]

                feat_value = feat.calculate_feature(responses)
                if feat_value is None:
                    feat_value = max_feature_value

                feat_score = np.abs(release_value - feat_value) / np.abs(std_from_mean * release_value)

            else:
                # extra
                _, _, efeat_full = feat_name.split(".")
                efeat_split = efeat_full.split("_")
                chan = int(efeat_split[-1])
                efeat = "_".join(efeat_split[:-1])

                feat_value = extra_features_strategy[efeat][chan]

                if release_value != 0:
                    feat_score = abs(feat_value - release_value) / abs(std_from_mean * release_value)
                else:                    
                    feat_score = abs(feat_value - release_value)

            features_best[feat_name] = {"value": feat_value, "score": feat_score}

        opt_results_val[strategy]["features"] = features_best

In [None]:
protocols_to_plot = ["firepattern_200", "HyperDepol_-160", "HyperDepol_-40", 
                     "sAHP_250", "PosCheops_300"]
titles = protocols_to_plot
figs_intra = {}
for strategy in strategies:
    responses_to_plot = [responses_release_val, opt_results_val[strategy]["responses"]]
    colors = ["k", colors_dict[strategy]]
    labels = ["GT", strategy.upper()]
    fig = mf.plot_multiple_responses(responses_to_plot, 
                                     colors=colors, return_fig=True, 
                                     protocol_names=protocols_to_plot,
                                     titles=titles,
                                     figsize=(7, 12))
    figs_intra[strategy] = fig

In [None]:
figs_extra = {}
for strategy in strategies:
    responses = opt_results_val[strategy]["responses"]
    responses_to_plot = [responses_release_val, responses]
    colors = ["k", colors_dict[strategy]]
    labels = ["GT", strategy.upper()]
    ax_extra = mf.plot_multiple_eaps(responses_to_plot, 
                                     eva_extra_val.fitness_protocols, probe,
                                     protocol_name=protocol_for_eap_val, 
                                     colors=colors, #labels=labels, 
                                     norm=True)
    fig = ax_extra.get_figure()
    figs_extra[strategy] = fig

In [None]:
if save_fig:
    for strategy in figs_intra.keys():
        figs_intra[strategy].savefig(figure_folder / f"fig4C_{strategy}.png", dpi=300)
        figs_intra[strategy].savefig(figure_folder / f"fig4C_{strategy}.pdf")
        figs_extra[strategy].savefig(figure_folder / f"fig4D_{strategy}.png", dpi=300)        
        figs_extra[strategy].savefig(figure_folder / f"fig4D_{strategy}.pdf")

## Compare best-fitted models

In [None]:
order_full = ["soma", "all", "sections", "single"]
order = []
for strategy in order_full:
    if strategy in opt_results_val:
        order.append(strategy)

### Compare features

In [None]:
feature_name_array = []
feature_set_array = []
feature_score_array = []
feature_type_array = []
protocol_type_array = []

for strategy in strategies:
    feats = opt_results_val[strategy]["features"]
    for feat_name, feat_dict in feats.items():
        feature_set_array.append(strategy)
        feature_name_array.append(feat_name)
        if "MEA" not in feat_name:
            feature_type_array.append("intra")
        else:
            feature_type_array.append("extra")
        feature_score_array.append(feat_dict["score"])
        protocol_type = feat_name.split(".")[0].split("_")[0]
        protocol_type_array.append(protocol_type)
        
df_feats = pd.DataFrame({"feature_set": feature_set_array, "feat_name": feature_name_array,
                         "feature_type": feature_type_array, "feat_score": feature_score_array, 
                         "protocol_type": protocol_type_array})

In [None]:
df_feats_intra = df_feats.query("feature_type == 'intra'").dropna()
df_feats_extra = df_feats.query("feature_type == 'extra'").dropna()

fig_feat_intra, ax = plt.subplots(figsize=(7, 10))
sns.boxplot(data=df_feats_intra, x="feature_set", y="feat_score", order=order, #hue="protocol_type", 
            ax=ax, showfliers=False)
n = len(df_feats_intra.query("feature_set == 'soma'"))
# g = sns.swarmplot(data=df_feats, y="feature_set", x="feat_score", ax=ax)
ax.set_ylabel("Feature scores", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title(f"Intracellular features\n(n={n})", fontsize=20)
ax.set_xlabel("Strategy", fontsize=15)
ax.set_ylabel("Score", fontsize=15)
#ax.set_ylim(0, 21)

fig_feat_extra, ax = plt.subplots(figsize=(7, 10))
sns.boxplot(data=df_feats_extra, 
            x="feature_set", y="feat_score", order=order, ax=ax, showfliers=False)
n = len(df_feats_extra.query("feature_set == 'soma'"))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title(f"Extracellular features\n(n={n})", fontsize=20)
ax.set_xlabel("Strategy", fontsize=15)
ax.set_ylabel("Score", fontsize=15)
#ax.set_ylim(0, 21)


In [None]:
if save_fig:
    fig_feat_intra.savefig(figure_folder / "fig4Bleft-intra.pdf")
    fig_feat_extra.savefig(figure_folder / "fig4Bright-extra.pdf")    

In [None]:
import scipy.stats as ss
import statsmodels.api as sa
import scikit_posthocs as sp

In [None]:
sp.posthoc_conover(df_feats_intra, val_col='feat_score', 
                   group_col='feature_set', p_adjust = 'holm')

In [None]:
sp.posthoc_conover(df_feats_extra, val_col='feat_score', 
                   group_col='feature_set', p_adjust = 'holm')

In [None]:
intra_soma = df_feats_intra.query("feature_set == 'soma'")["feat_score"]
intra_sections = df_feats_intra.query("feature_set == 'sections'")["feat_score"]
intra_all = df_feats_intra.query("feature_set == 'all'")["feat_score"]
intra_single = df_feats_intra.query("feature_set == 'single'")["feat_score"]

extra_soma = df_feats_extra.query("feature_set == 'soma'")["feat_score"]
extra_sections = df_feats_extra.query("feature_set == 'sections'")["feat_score"]
extra_all = df_feats_extra.query("feature_set == 'all'")["feat_score"]
extra_single = df_feats_extra.query("feature_set == 'single'")["feat_score"]

In [None]:
# print("Intra - Sections VS SOMA:", wilcoxon(intra_sections, intra_soma))
# print("Intra - All VS SOMA:", wilcoxon(intra_all, intra_soma))
#print("Intra - Single VS SOMA:", wilcoxon(intra_single, intra_soma))

In [None]:
# print("Extra - Sections VS SOMA:", wilcoxon(extra_sections, extra_soma))
# print("Extra - All VS SOMA:", wilcoxon(extra_all, extra_soma))
# print("Extra - Single VS SOMA:", wilcoxon(extra_single, extra_soma))

In [None]:
df_test = pd.DataFrame.from_dict(opt_results_val, orient="index")
df_test["strategy"] = df_test.index

In [None]:
fig_cos, ax = plt.subplots()
sns.barplot(data=df_test, x="strategy", y="eap_dist", order=order, ax=ax)
ax.set_ylabel("Distance", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Extracellular difference", fontsize=15)

In [None]:
opt_results_val["gt"] = {}
opt_results_val["gt"]["responses"] = responses_release_train
opt_results_val["gt"]["eap"] = eap_release

In [None]:
with open(result_folder / results_val_file, 'wb') as f:
    pickle.dump(opt_results_val, f, protocol=pickle.HIGHEST_PROTOCOL)