# 3) Analyze and evaluate optimization output

This final notebook uses the `runs.pkl` file created in notebook 2 and it analyzes:

- the distance between different feature sets in the parameter space
- the distance between different feature sets in the feature space
- the distance between different feature sets in the extracellular signals

In [None]:
import pickle
import pandas as pd
import seaborn as sns
import sys
import shutil

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

import matplotlib.pyplot as plt
from scipy.spatial import distance
import MEAutility as mu
import json
import time
import numpy as np
from pathlib import Path

from scipy.stats import kruskal, mannwhitneyu, wilcoxon

import multimodalfitting as mf

%matplotlib notebook

## Load GT params and optimization output

In [None]:
# general
model_name = "hay_ais"
probe_type = "planar" # linear 
data_base_folder = Path(f"/Users/abuccino/Documents/Codes/modeling/multimodal-fitting/multimodalfitting/fitting_data/data_210927")

cell_models_folder = Path("..") / "cell_models"
model_folder = cell_models_folder / model_name

In [None]:
data_folder = data_base_folder / f"{model_name}_ecode_probe_{probe_type}"

# change this with folder containing your pkl file
result_folder = Path("..") / "results" / '211124' 

In [None]:
cell = mf.create_ground_truth_model(model_name=model_name,
                                    release=False)
cell_release = mf.create_ground_truth_model(model_name=model_name,
                                            release=True)

probe = mf.define_electrode(probe_type=probe_type)

param_names = [param.name for param in cell.params.values() if not param.frozen]
# sim = ephys.simulators.LFPySimulator(cell, cvode_active=True, electrode=probe, mechs_folders=model_folder)

params_release = {}
for param in cell_release.params_by_names(param_names):
    params_release[param.name] = param.value

In [None]:
protocol_for_eap = "IDrest_300"

In [None]:
pkl_file_name = "runs.pkl"

In [None]:
data = pickle.load(open(result_folder / pkl_file_name, 'rb'))
df_optimization = pd.DataFrame(data)
df_model = df_optimization.query(f"model == '{model_name}'")

In [None]:
opt_soma = df_model.query("feature_set == 'soma'")
opt_extra = df_model.query("feature_set == 'extra'")
print(f"Somatic optimizations: {len(opt_soma)}")
print(f"Extra optimizations: {len(opt_extra)}")

In [None]:
fig, ax = plt.subplots()
min_evals = 3000
color_strategy = {"all": "C1", "sections": "C2", "single": "C3"}

keep_idxs_soma = []
for idx, row in opt_soma.iterrows():
    if max(row["nevals"]) > min_evals:
        keep_idxs_soma.append(idx)
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color="C0",
                ls='-', 
                lw=0.8,
                alpha=0.75)
    else:
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color="C0",
                ls='--', 
                lw=0.5,
                alpha=0.75)
keep_idx_extra = []
for idx, row in opt_extra.iterrows():
    #print(max(row['nevals']))
    if max(row["nevals"]) > min_evals:
        keep_idx_extra.append(idx)
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=color_strategy[row["extra_strategy"]],
                ls='-', 
                lw=0.8,
                alpha=0.75, 
                label=idx)
    else:
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=color_strategy[row["extra_strategy"]],
                ls='--', 
                lw=0.5,
                alpha=0.75, 
                label=idx)
ax.set_title("Min fitness")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xlabel("Neval")
ax.set_ylabel("Min fitness")
ax.set_yscale('log')

# Load protocols and original features

In [None]:
protocols_file = data_folder / "efeatures" / "protocols_BPO_all.json"
feature_file = data_folder / "efeatures" / "features_BPO_all.json"

In [None]:
extra_kwargs = dict(fs=20,
                    fcut=[300, 6000],
                    filt_type="filtfilt",
                    ms_cut=[3, 5])

In [None]:
eva_extra = mf.create_evaluator(
    model_name=model_name,
    feature_set="extra",
    extra_strategy="all",
    protocols_with_lfp="IDrest_300",
    **extra_kwargs
)

In [None]:
for extra_strategy in np.unique(opt_extra.extra_strategy):
    
    eva_ex = mf.create_evaluator(
        model_name=model_name,
        feature_set="extra",
        extra_strategy=extra_strategy,
        protocols_with_lfp="IDrest_300",
        **extra_kwargs
    )
    print(f"Strategy {extra_strategy} --> num features {len(eva_ex.fitness_calculator.objectives)}")

In [None]:
opt_soma = opt_soma.loc[keep_idxs_soma]

In [None]:
opt_soma.best_fitness

In [None]:
opt_extra = opt_extra.loc[keep_idx_extra]

# Compute release responses

In [None]:
responses_release = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), param_values=params_release)

In [None]:
eap_release = mf.utils.calculate_eap(responses=responses_release, protocols=eva_extra.fitness_protocols, 
                                     protocol_name=protocol_for_eap, **extra_kwargs)

In [None]:
features_release = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_release[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(responses_release)
        feat_score = obj.features[0].calculate_score(responses_release)
        features_release[obj.features[0].name]["value"] = feat_value
        features_release[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")

## "soma" example

In [None]:
best_soma = np.argmin(opt_soma.best_fitness)

In [None]:
params_sample_soma = opt_soma.iloc[best_soma]
params_sample_soma_dict = {k: v for k, v in zip(param_names, params_sample_soma.best_params)}
display(params_sample_soma.best_fitness)

In [None]:
response_soma = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), param_values=params_sample_soma_dict)

In [None]:
features_best_soma = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_best_soma[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(response_soma)
        feat_score = obj.features[0].calculate_score(response_soma)
        features_best_soma[obj.features[0].name]["value"] = feat_value
        features_best_soma[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
eap_soma = mf.utils.calculate_eap(responses=response_soma, protocols=eva_extra.fitness_protocols, 
                                  protocol_name=protocol_for_eap, **extra_kwargs)

In [None]:
mf.plot_multiple_responses([responses_release, response_soma], colors=["k", "C0"])

In [None]:
mf.plot_multiple_eaps([responses_release, response_soma], eva_extra.fitness_protocols, probe,
                            protocol_name=protocol_for_eap, colors=["k", "C0"])

In [None]:
eap_dist_soma = distance.cosine(eap_release.ravel(), eap_soma.ravel())

## "extra" example

In [None]:
opt_all = opt_extra.query("extra_strategy == 'all'")
opt_sections = opt_extra.query("extra_strategy == 'sections'")
opt_single = opt_extra.query("extra_strategy == 'single'")

In [None]:
best_all = np.argmin(opt_all.best_fitness)
best_sections = np.argmin(opt_sections.best_fitness)
best_single = np.argmin(opt_single.best_fitness)

In [None]:
params_sample_all = opt_all.iloc[best_all]
params_sample_dict_all = {k: v for k, v in zip(param_names, params_sample_all.best_params)}
display("ALL", params_sample_all.best_fitness)

params_sample_sections = opt_sections.iloc[best_sections]
params_sample_dict_sections = {k: v for k, v in zip(param_names, params_sample_sections.best_params)}
display("SECTIONS", params_sample_sections.best_fitness)

params_sample_single = opt_single.iloc[best_single]
params_sample_dict_single = {k: v for k, v in zip(param_names, params_sample_single.best_params)}
display("SINGLE", params_sample_single.best_fitness)

In [None]:
response_all = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), 
                                       param_values=params_sample_dict_all)
response_sections = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), 
                                            param_values=params_sample_dict_sections)
response_single = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), 
                                          param_values=params_sample_dict_single)

In [None]:
eap_all = mf.utils.calculate_eap(responses=response_all, protocols=eva_extra.fitness_protocols, 
                                 protocol_name=protocol_for_eap, **extra_kwargs)
eap_sections = mf.utils.calculate_eap(responses=response_sections, protocols=eva_extra.fitness_protocols, 
                                      protocol_name=protocol_for_eap, **extra_kwargs)
eap_single = mf.utils.calculate_eap(responses=response_single, protocols=eva_extra.fitness_protocols, 
                                    protocol_name=protocol_for_eap, **extra_kwargs)

In [None]:
features_best_all = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_best_all[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(response_all)
        feat_score = obj.features[0].calculate_score(response_all)
        features_best_all[obj.features[0].name]["value"] = feat_value
        features_best_all[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")
        
features_best_sections = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_best_sections[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(response_sections)
        feat_score = obj.features[0].calculate_score(response_sections)
        features_best_sections[obj.features[0].name]["value"] = feat_value
        features_best_sections[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")
        
features_best_single = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_best_single[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(response_single)
        feat_score = obj.features[0].calculate_score(response_single)
        features_best_single[obj.features[0].name]["value"] = feat_value
        features_best_single[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
mf.plot_multiple_responses([responses_release, response_all, response_sections, response_single], 
                           colors=["k", "C1", "C2", "C3"])

In [None]:
ax_all = mf.plot_multiple_eaps([responses_release, response_soma, response_all], 
                               eva_extra.fitness_protocols, probe,
                               protocol_name=protocol_for_eap, 
                               colors=["k", "C0", "C1"])
ax_all.set_title("ALL")

ax_sec = mf.plot_multiple_eaps([responses_release, response_soma, response_sections], 
                               eva_extra.fitness_protocols, probe,
                               protocol_name=protocol_for_eap, 
                               colors=["k", "C0", "C2"])
ax_sec.set_title("SECTIONS")

ax_sin = mf.plot_multiple_eaps([responses_release, response_soma, response_single], 
                               eva_extra.fitness_protocols, probe,
                               protocol_name=protocol_for_eap, 
                               colors=["k", "C0", "C3"])
ax_sin.set_title("SINGLE")

In [None]:
eap_dist_all = distance.cosine(eap_release.ravel(), eap_all.ravel())
eap_dist_sections = distance.cosine(eap_release.ravel(), eap_sections.ravel())
eap_dist_single = distance.cosine(eap_release.ravel(), eap_single.ravel())

In [None]:
print("Cosine dist SOMA", eap_dist_soma)
print("Cosine dist EXTRA ALL", eap_dist_all)
print("Cosine dist EXTRA SECTIONS", eap_dist_sections)
print("Cosine dist EXTRA SINGLE", eap_dist_single)

## Compare best-fitted models

In [None]:
feat_gt_dict = {}
for obj in eva_extra.fitness_calculator.objectives:
    feat_gt_dict[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(responses_release)
        feat_score = obj.features[0].calculate_score(responses_release)
        feat_gt_dict[obj.features[0].name]["value"] = feat_value
        feat_gt_dict[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
df_best_soma = opt_soma.iloc[best_soma].to_frame().transpose()
df_best_soma["extra_strategy"] = "soma"
df_best_soma["responses"] = [response_soma]
df_best_soma["eap"] = [eap_soma]
df_best_soma["eap_dist"] = eap_dist_soma

df_best_all = opt_all.iloc[best_all].to_frame().transpose()
df_best_all["responses"] = [response_all]
df_best_all["eap"] = [eap_all]
df_best_all["eap_dist"] = eap_dist_all


df_best_sections = opt_sections.iloc[best_sections].to_frame().transpose()
df_best_sections["responses"] = [response_sections]
df_best_sections["eap"] = [eap_sections]
df_best_sections["eap_dist"] = eap_dist_sections

df_best_single = opt_single.iloc[best_single].to_frame().transpose()
df_best_single["responses"] = [response_single]
df_best_single["eap"] = [eap_single]
df_best_single["eap_dist"] = eap_dist_single

df_test = pd.concat([df_best_soma, df_best_all, df_best_sections, df_best_single])
df_test

In [None]:
eap_dist_single

## Compare parameters 

Here we normalize the parameters based on the boundaries and compute the relative difference to GT.

In [None]:
param_json = model_folder / "parameters.json"

with param_json.open() as f:
    params = json.load(f)

param_boundaries = {}
for param in params:
    if "bounds" in param:
        if isinstance(param['sectionlist'], list):
            for sec in param['sectionlist']:
                param_boundaries[f"{param['param_name']}_{sec}"] = param["bounds"]
        else:
            sec = param['sectionlist']
            param_boundaries[f"{param['param_name']}_{sec}"] = param["bounds"]

# scale params_release by boundaries
params_release_norm = {}
for param_name, param_val in params_release.items():
    bounds = param_boundaries[param_name]
    param_norm = (param_val - bounds[0]) / (bounds[1] - bounds[0])
    params_release_norm[param_name] = param_norm

In [None]:
params_release_norm

In [None]:
feature_set_array = []
seed_array = []
param_name_array = []
param_value_array = []
param_norm_array = []
release_value_array = []
release_norm_array = []
diff_with_release_array = []
section_array = []

for i, (index, opt) in enumerate(df_test.iterrows()):
    params_dict = {k: v for k, v in zip(param_names, opt.best_params)}
    
    for param_name, param_value in params_dict.items():
        feature_set_array.append(opt.extra_strategy)
        seed_array.append(opt.seed)   
        param_name_array.append(param_name)
        param_value_array.append(param_value)
        section_array.append(param_name.split("_")[-1])
        release_value_array.append(params_release[param_name])
        release_norm_array.append(params_release_norm[param_name])
        # compute norm value
        bounds = param_boundaries[param_name]
        param_norm = (param_value - bounds[0]) / (bounds[1] - bounds[0])
        param_norm_array.append(param_norm)
        diff_with_release_array.append(abs(param_norm - params_release_norm[param_name]))

for param_name, param_value in params_release.items():
    feature_set_array.append("GT")
    seed_array.append(0)   
    param_name_array.append(param_name)
    param_value_array.append(param_value)
    section_array.append(param_name.split("_")[-1])
    release_value_array.append(params_release[param_name])
    release_norm_array.append(params_release_norm[param_name])
    # compute norm value
    bounds = param_boundaries[param_name]
    param_norm = (param_value - bounds[0]) / (bounds[1] - bounds[0])
    param_norm_array.append(param_norm)
    diff_with_release_array.append(0)
        
df_params = pd.DataFrame({"seed": seed_array, "feature_set": feature_set_array, "param_name": param_name_array,
                          "param_value": param_value_array, "param_norm": param_norm_array, 
                          "release_value": release_value_array, "release_norm": release_norm_array,
                          "diff_release": diff_with_release_array, "section": section_array})

In [None]:
df_no_gt = df_params.query("feature_set != 'GT'")

In [None]:
# overall parameter diff
plt.figure()
sns.boxenplot(data=df_no_gt, y="feature_set", x="diff_release")

In [None]:
plt.figure()
sns.barplot(data=df_no_gt.query("section == 'somatic'"), y="param_name", x="diff_release", hue="feature_set",
            orientation="horizontal")
plt.figure()
sns.boxenplot(data=df_no_gt.query("section == 'somatic'"), y="feature_set", x="diff_release")

In [None]:
plt.figure()
sns.barplot(data=df_no_gt.query("section == 'apical'"), y="param_name", x="diff_release", hue="feature_set",
            orientation="horizontal")
plt.figure()
sns.boxenplot(data=df_no_gt.query("section == 'apical'"), y="feature_set", x="diff_release")

In [None]:
plt.figure()
sns.barplot(data=df_no_gt.query("section == 'basal'"), y="param_name", x="diff_release", hue="feature_set",
            orientation="horizontal")
plt.figure()
sns.boxenplot(data=df_no_gt.query("section == 'basal'"), y="feature_set", x="diff_release")

In [None]:
plt.figure()
sns.barplot(data=df_no_gt.query("section == 'segment'"), y="param_name", x="diff_release", hue="feature_set",
            orientation="horizontal")
plt.figure()
sns.boxenplot(data=df_no_gt.query("section == 'segment'"), y="feature_set", x="diff_release")

## Compare features

In [None]:
feature_name_array = []
feature_set_array = []
feature_score_array = []
feature_type_array = []

feature_dicts = dict(soma=features_best_soma, 
                     all=features_best_all, 
                     sections=features_best_sections,
                     single=features_best_single)

for feature_set, feats in feature_dicts.items():
    for feat_name, feat_dict in feats.items():
        feature_set_array.append(feature_set)
        feature_name_array.append(feat_name)
        if "MEA" not in feat_name:
            feature_type_array.append("intra")
        else:
            feature_type_array.append("extra")
        feature_score_array.append(feat_dict["score"])
        
df_feats = pd.DataFrame({"feature_set": feature_set_array, "feat_name": feature_name_array,
                         "feat_score": feature_score_array, "feature_type": feature_type_array})

In [None]:
# feature_set_array = []
# seed_array = []
# feature_name_array = []
# feature_value_array = []
# feature_score_array = []
# feature_type_array = []
# extra_strategy_array = []

# for i, feats in enumerate(feats_soma):
#     for feat_name, feat_dict in feats.items():
#         feature_set_array.append("soma")
#         seed_array.append(i)
#         feature_name_array.append(feat_name)
#         if "MEA" not in feat_name:
#             feature_type_array.append("soma")
#             feature_value_array.append(feat_dict["value"])
#         else:
#             feature_type_array.append("MEA")
#             feature_value_array.append(np.nan)
#         feature_score_array.append(feat_dict["score"])
#         extra_strategy_array.append("soma")
    
# for i, feats in enumerate(feats_extra):
#     for feat_name, feat_dict in feats.items():
#         feature_set_array.append("extra")
#         seed_array.append(i)
#         feature_name_array.append(feat_name)
#         if "MEA" not in feat_name:
#             feature_type_array.append("soma")
#             feature_value_array.append(feat_dict["value"])
#         else:
#             feature_type_array.append("MEA")
#             feature_value_array.append(np.nan)
#         feature_score_array.append(feat_dict["score"])
#         extra_strategy_array.append(strategy_extra[i])
    
# df_feats = pd.DataFrame({"seed": seed_array, "feature_set": feature_set_array, "feat_name": feature_name_array,
#                          "feat_value": feature_value_array, "feat_score": feature_score_array,
#                          "feat_type": feature_type_array, "extra_strategy": extra_strategy_array})

In [None]:
len(df_feats.query("feature_type == 'extra'"))

In [None]:
fig, ax = plt.subplots()

sns.boxplot(data=df_feats.query("feature_type == 'intra'"), y="feature_set", x="feat_score", ax=ax)
# g = sns.swarmplot(data=df_feats.query("feat_type == 'soma'"), x="extra_strategy", y="feat_score", ax=ax)
ax.set_ylabel("Feature scores (intracellular)", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Intracellular features", fontsize=15)

fig, ax = plt.subplots()

sns.boxplot(data=df_feats.query("feature_type == 'extra'"), 
              y="feature_set", x="feat_score", ax=ax)
# g = sns.swarmplot(data=df_feats.query("feat_type == 'MEA'"), 
#                   x="feature_set", y="feat_score", ax=ax)
ax.set_ylabel("Feature scores (extracellular)", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Extracellular features", fontsize=15)


## Compare EAP distance

In [None]:
fig, ax = plt.subplots()
sns.barplot(data=df_test, x="extra_strategy", y="eap_dist", ax=ax)
ax.set_ylabel("Cosine distance", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Extracellular difference", fontsize=15)