# 3) Analyze and evaluate optimization output

This final notebook uses the `runs.pkl` file created in notebook 2 and it analyzes:

- the distance between different feature sets in the parameter space
- the distance between different feature sets in the feature space
- the distance between different feature sets in the extracellular signals

In [None]:
import pickle
import pandas as pd
import seaborn as sns
import sys
import shutil

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

import matplotlib.pyplot as plt
from scipy.spatial import distance
import MEAutility as mu
import json
import time
import numpy as np
from pathlib import Path
from pprint import pprint

from scipy.stats import kruskal, mannwhitneyu, wilcoxon

import multimodalfitting as mf

%matplotlib notebook

In [None]:
save_fig = False
figure_folder = Path(".") / "figures_hay_ais"

if save_fig:
    figure_folder.mkdir(exist_ok=True)

In [None]:
base_dir = Path("../..")

## Load GT params and optimization output

In [None]:
# general
model_name = "hay_ais"
probe_type = "planar" # linear 

cell_models_folder = base_dir / "cell_models"
model_folder = cell_models_folder / model_name
probe_file = model_folder / "fitting" / "efeatures" / "probe_BPO.json"

In [None]:
results_date = '211124'  #. '220111' # 

In [None]:
# change this with folder containing your pkl file
result_folder = base_dir / "results" / results_date

In [None]:
cell = mf.create_ground_truth_model(model_name=model_name,
                                    release=False)
cell_release = mf.create_ground_truth_model(model_name=model_name,
                                            release=True)

probe = mf.define_electrode(probe_file=probe_file)

param_names = [param.name for param in cell.params.values() if not param.frozen]

params_release = {}
for param in cell_release.params_by_names(param_names):
    params_release[param.name] = param.value

In [None]:
protocol_for_eap = "firepattern_200"

In [None]:
pkl_file_name = "runs.pkl"

In [None]:
data = pickle.load(open(result_folder / pkl_file_name, 'rb'))
df_optimization = pd.DataFrame(data)
df_model = df_optimization.query(f"model == '{model_name}'")

In [None]:
opt_soma = df_model.query("feature_set == 'soma'")
opt_extra = df_model.query("feature_set == 'extra'")
print(f"Somatic optimizations: {len(opt_soma)}")
print(f"Extra optimizations: {len(opt_extra)}")

In [None]:
fig, ax = plt.subplots()
min_evals = 3000
color_strategy = {"all": "C1", "sections": "C2", "single": "C3"}

keep_idxs_soma = []
for idx, row in opt_soma.iterrows():
    if max(row["nevals"]) > min_evals:
        keep_idxs_soma.append(idx)
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color="C0",
                ls='-', 
                lw=0.8,
                alpha=0.75)
    else:
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color="C0",
                ls='--', 
                lw=0.5,
                alpha=0.75)
keep_idx_extra = []
for idx, row in opt_extra.iterrows():
    #print(max(row['nevals']))
    if max(row["nevals"]) > min_evals:
        keep_idx_extra.append(idx)
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=color_strategy[row["extra_strategy"]],
                ls='-', 
                lw=0.8,
                alpha=0.75, 
                label=idx)
    else:
        ax.plot(row["nevals"], 
                row["logbook"].select("min"),
                color=color_strategy[row["extra_strategy"]],
                ls='--', 
                lw=0.5,
                alpha=0.75, 
                label=idx)
ax.set_title("Min fitness")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_xlabel("Neval")
ax.set_ylabel("Min fitness")
ax.set_yscale('log')

# Load protocols and original features

In [None]:
extra_kwargs = mf.utils.get_extra_kwargs()
extra_kwargs

In [None]:
protocols_used_for_opt = ["IV_-20", "IV_-100", "IDrest_150", "IDrest_250", "IDrest_300",
                          "APWaveform_260"]

In [None]:
eva_extra = mf.create_evaluator(
    model_name=model_name,
    feature_set="extra",
    extra_strategy="all",
    protocols_with_lfp="IDrest_300",
    all_protocols=True,
    exclude_protocols=protocols_used_for_opt,
    **extra_kwargs
)

In [None]:
print(f"All test features --> num features {len(eva_extra.fitness_calculator.objectives)}")

# Compute release responses

In [None]:
responses_release = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), param_values=params_release)

In [None]:
features_release = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_release[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(responses_release)
        feat_score = obj.features[0].calculate_score(responses_release)
        features_release[obj.features[0].name]["value"] = feat_value
        features_release[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
pprint(list(responses_release.keys()))

In [None]:
fig_gt_intra = mf.plot_responses(responses_release, color="k", return_fig=True)

In [None]:
if save_fig:
    fig_gt_intra.savefig(figure_folder / "gt_intra.pdf")
    fig_gt_extra.savefig(figure_folder / "gt_extra.pdf")

## "soma" example

In [None]:
opt_results = {}

In [None]:
strategy = "soma"

In [None]:
opt_results[strategy] = {}
best_idx = np.argmin(opt_soma.best_fitness)
params_sample = opt_df.iloc[best_idx]
params_dict = {k: v for k, v in zip(param_names, params_sample.best_params)}
opt_results[strategy]["best_fitness"] = params_sample.best_fitness
opt_results[strategy]["best_params"] = params_dict

In [None]:
response_soma = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), param_values=params_sample_soma_dict)

In [None]:
opt_results[strategy]["responses"] = response_soma

In [None]:
features_best_soma = {}
for obj in eva_extra.fitness_calculator.objectives:
    features_best_soma[obj.features[0].name] = {}
    if len(obj.features) == 1:
        feat_value = obj.features[0].calculate_feature(response_soma)
        feat_score = obj.features[0].calculate_score(response_soma)
        features_best_soma[obj.features[0].name]["value"] = feat_value
        features_best_soma[obj.features[0].name]["score"] = feat_score
    else:
        print(f"More than one feature for objective: {obj.name}")

In [None]:
opt_results[strategy]["features"] = features_best_soma

In [None]:
fig_soma_intra = mf.plot_multiple_responses([responses_release, response_soma], colors=["k", "C0"], 
                                            labels=["GT", "SOMA"], return_fig=True)

## "extra" example

In [None]:
# opt_results = {}
for extra_strategy in np.unique(opt_extra.extra_strategy):
    opt_results[extra_strategy] = {}
    opt_df = opt_extra.query(f"extra_strategy == '{extra_strategy}'")
    best_idx = np.argmin(opt_df.best_fitness)
    params_sample = opt_df.iloc[best_idx]
    params_dict = {k: v for k, v in zip(param_names, params_sample.best_params)}
    opt_results[extra_strategy]["best_fitness"] = params_sample.best_fitness
    opt_results[extra_strategy]["best_params"] = params_dict

In [None]:
opt_results["soma"].keys()

In [None]:
for extra_strategy in np.unique(opt_extra.extra_strategy):
    print(f"Simulating best '{extra_strategy}'")
    responses = eva_extra.run_protocols(eva_extra.fitness_protocols.values(), 
                                        param_values=opt_results[extra_strategy]["best_params"])
    opt_results[extra_strategy]["responses"] = responses

In [None]:
for extra_strategy in np.unique(opt_extra.extra_strategy):
    responses = opt_results[extra_strategy]["responses"]
    features_best = {}
    for obj in eva_extra.fitness_calculator.objectives:
        features_best[obj.features[0].name] = {}
        if len(obj.features) == 1:
            feat_value = obj.features[0].calculate_feature(responses)
            feat_score = obj.features[0].calculate_score(responses)
            features_best[obj.features[0].name]["value"] = feat_value
            features_best[obj.features[0].name]["score"] = feat_score
        else:
            print(f"More than one feature for objective: {obj.name}")
    opt_results[extra_strategy]["features"] = features_best

In [None]:
colors_dict = {"soma": "C0",
               "all": "C1",
               "sections": "C2",
               "single": "C3"}

In [None]:
responses_list = [responses_release]
colors = ["k"]
labels = ["GT"]

for strategy, strategy_dict in opt_results.items():
    responses_list.append(strategy_dict["responses"])
    colors.append(colors_dict[strategy])    
    labels.append(strategy.upper())    

In [None]:
fig_extra_intra = mf.plot_multiple_responses(responses_list, 
                                             colors=colors, return_fig=True, 
                                             labels=labels)
# fig_extra_single_intra = mf.plot_multiple_responses([responses_release, response_single], 
#                                                      colors=["k", "C3"], return_fig=True, labels=["GT", "SINGLE"])

In [None]:
if save_fig:
    fig_extra_intra.savefig(figure_folder / "extra_intra.pdf")
    fig_extra_extra.savefig(figure_folder / "extra_extra.pdf")
    fig_extra_single_intra.savefig(figure_folder / "single_intra.pdf")
    fig_extra_single.savefig(figure_folder / "single_extra.pdf")

## Compare best-fitted models

## Compare features

In [None]:
feature_name_array = []
feature_set_array = []
feature_score_array = []
feature_type_array = []
protocol_type_array = []

for strategy, strategy_dict in opt_results.items():
    feats = strategy_dict["features"]
    for feat_name, feat_dict in feats.items():
        feature_set_array.append(strategy)
        feature_name_array.append(feat_name)
        if "MEA" not in feat_name:
            feature_type_array.append("intra")
        else:
            feature_type_array.append("extra")
        feature_score_array.append(feat_dict["score"])
        protocol_type = feat_name.split(".")[0].split("_")[0]
        protocol_type_array.append(protocol_type)
        
df_feats = pd.DataFrame({"feature_set": feature_set_array, "feat_name": feature_name_array,
                         "feat_score": feature_score_array, "protocol_type": protocol_type_array})

In [None]:
df_feats

In [None]:
fig_feat_intra, ax = plt.subplots(figsize=(10, 7))

sns.boxplot(data=df_feats, y="feature_set", x="feat_score",# hue="protocol_type", 
            ax=ax)
# g = sns.swarmplot(data=df_feats, y="feature_set", x="feat_score", ax=ax)
ax.set_ylabel("Feature scores (intracellular)", fontsize=12)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_title("Intracellular features", fontsize=15)
ax.set_xlim([0, 20])

In [None]:
for protocol_type in np.unique(df_feats.protocol_type):
    fig, ax = plt.subplots()
    sns.barplot(data=df_feats.query(f"protocol_type == '{protocol_type}'"), 
                y="feature_set", x="feat_score",# hue="protocol_type", 
                ax=ax)
    ax.set_title(protocol_type)

In [None]:
if save_fig:
    fig_feat_intra.savefig(figure_folder / "feat_intra.pdf")
    fig_feat_extra.savefig(figure_folder / "feat_extra.pdf")