# 1) Smart sampling the parameter space and feature computation

From the set of parameters estimated using multiple patch recordings (1 somatic, 2 apical dendrites), we simulate random perturbations using Latin Hypercube Sampling (LHS). 

First, we generate a large number of samples, then we select the samples which:

- are the most distant between each other
- which reproduce bAP and BAC firing
- whose feature sets are also the most distant 

The random parameters are saved in the `config/params/smart_random.csv` and will be used to assess the fitting performance of different feature sets.

From the set of all features, separate feature sets are computed for:

- multiple
- soma
- extra

feature sets. The output features are saved to the `config/features/random_random_id{}/` folder (in pkl format). 

In [None]:
%load_ext autoreload
%autoreload

from pyDOE import lhs
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import numpy as np
import os
import json
from pathlib import Path
from scipy.spatial import distance
import MEAutility as mu
import LFPy
from copy import copy
import neuroplotlib as nplt
from pprint import pprint

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

%matplotlib notebook

In [None]:
np.random.seed(2308)

In [None]:
import model
import evaluator
import plotting
import utils

In [None]:
save_params = True

In [None]:
# load release params and bounds
release_params_file = 'config/parameters_release.json'

release_params = evaluator.get_release_params()
params_bounds = evaluator.get_unfrozen_params_bounds()

In [None]:
n_params = len(release_params)
n_samples = 500  # number of random samples 
lim_dev = 0.3  # limits for each param are: [release_val-lim_dev*release_val, release_val+lim_dev*release_val] 

In [None]:
pprint(release_params)

### Latin hypercube sampling (normal 0-1)

In [None]:
random_samples = lhs(n_params, samples=n_samples)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot(random_samples[:, 5], random_samples[:, 19], random_samples[:, 3], '*')

### Compute parameters limits

In [None]:
# if True, the limits of the params config are used. Otherwise, the realeas params +- lim_dev are used
use_params_bounds = False

In [None]:
params_bounds

In [None]:
param_lims = {}

for par, val in release_params.items():
    dev = lim_dev * val
    lims = [val - dev, val + dev]
    # check values based on param configs
    if lims[0] < params_bounds[par][0]:
        lims[0] = params_bounds[par][0]
        print(f'Param {par} changed lower bound')                    
    if lims[1] > params_bounds[par][1]:
        lims[1] = params_bounds[par][1]
        print(f'Param {par} changed upper bound')
    param_lims[par] = lims

In [None]:
param_lims

### Scale parameters

In [None]:
scaled_samples = np.zeros_like(random_samples)

for i, sample in enumerate(random_samples):
    for j, (par, parlim) in enumerate(param_lims.items()):
        scaled_samples[i, j] = (parlim[1] - parlim[0]) * sample[j] + parlim[0] 

In [None]:
for sample in scaled_samples:
    for j, (par, parlim) in enumerate(param_lims.items()):
        assert parlim[0] < sample[j] < parlim[1] 

## Select *distant* parameters

First we iteratively look for parameters distan from each other in the original normalized space.

In [None]:
num_target_samples = 45 # 50
min_dist = 0.2
target_samples_idxs = []
it = 0
max_iter = 300

In [None]:
while it < max_iter:
    if np.mod(it, 100) == 0:
        print(f"Iteration {it+1}/{max_iter}")
        
    if len(target_samples_idxs) == 0:
        random_sample_id = np.random.permutation(len(random_samples))[0]
        print(f'Added {random_sample_id}')
        target_samples_idxs.append(random_sample_id)
    else:
        # compute distances
        all_samples = np.arange(len(scaled_samples))
        dists = np.zeros((len(target_samples_idxs), len(random_samples)))
        for i, target_id in enumerate(target_samples_idxs):
            target = random_samples[target_id]
            for j in all_samples:
                dists[i, j] = distance.cosine(random_samples[j], target)
        
        cum_dist = []
        possible_idxs = []
        for col in all_samples:
            if np.all(dists[:, col] > min_dist):
                possible_idxs.append(col)
                cum_dist.append(np.sum(dists[:, col]))
        
        if len(possible_idxs) > 0:
            max_id = np.argmax(cum_dist)
            random_sample_id = possible_idxs[max_id]
            print(f'Added {random_sample_id}: Targets {len(target_samples_idxs)}')
            dists = []
            for t in target_samples_idxs:
                dists.append(distance.cosine(random_samples[t], random_samples[random_sample_id]))
            target_samples_idxs.append(random_sample_id)
            
    if len(target_samples_idxs) >= num_target_samples:
        print("Found samples!")
        break
    it += 1
target_samples_idxs = np.array(target_samples_idxs)

In [None]:
len(target_samples_idxs)

In [None]:
dist_params_mat = np.zeros((len(target_samples_idxs), len(target_samples_idxs)))
for i in np.arange(len(target_samples_idxs)):
    for j in np.arange(i+1, len(target_samples_idxs)):
        p1 = random_samples[target_samples_idxs[i]]
        p2 = random_samples[target_samples_idxs[j]]
        cost = distance.cosine(p1, p2)
        dist_params_mat[i, j] =  cost
dist_params_array = dist_params_mat[dist_params_mat>0].ravel()

plt.matshow(dist_params_mat)
plt.colorbar()

print(np.min(dist_params_array))
print(np.max(dist_params_array))

In [None]:
dist_sc_params_mat = np.zeros((len(target_samples_idxs), len(target_samples_idxs)))
for i in np.arange(len(target_samples_idxs)):
    for j in np.arange(i+1, len(target_samples_idxs)):
        p1 = scaled_samples[target_samples_idxs[i]]
        p2 = scaled_samples[target_samples_idxs[j]]
        cost = distance.cosine(p1, p2)
        dist_sc_params_mat[i, j] =  cost
dist_sc_params_array = dist_sc_params_mat[dist_sc_params_mat>0].ravel()

plt.matshow(dist_sc_params_mat)
plt.colorbar()

print(np.min(dist_sc_params_array))
print(np.max(dist_sc_params_array))

## Compute full set of features

In [None]:
probe_type = 'planar' # 'linear'
electrode = model.define_probe(probe_type=probe_type)

In [None]:
ax = mu.plot_probe(electrode.probe)
#nplt.plot_neuron(morphology="morphology/cell1.asc", plane='xy', ax=ax)

In [None]:
l5pc_cell = model.create()

param_names = [param.name for param in l5pc_cell.params.values() if not param.frozen]

feature_set = 'all'

print(f'Feature set {feature_set}')
gt_responses = []

if feature_set in ["extra", "all"]:
    fitness_protocols = evaluator.define_protocols(electrode=electrode, protocols_with_lfp=["Step1"])
else:
    fitness_protocols = l5pc_evaluator.define_protocols() 

if feature_set in ["extra", "all"]:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True, electrode=electrode)
else:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True)

In [None]:
target_samples = random_samples[target_samples_idxs]
target_scaled_samples = scaled_samples[target_samples_idxs]

In [None]:
random_params = pd.DataFrame(data=target_scaled_samples, columns=param_lims.keys(), 
                             index=[f'random_{i}' for i in range(len(target_samples_idxs))])

In [None]:
random_params

In [None]:
plt.figure()
ax = sns.boxplot(data=random_params, orient='horizontal')
fig = ax.get_figure()
fig.subplots_adjust(left=0.4)

In [None]:
fitness_protocols

In [None]:
random_features = []
random_responses = []

channels='map'

for i, (index, params) in enumerate(random_params.iterrows()):
    print(f'{i+1} / {len(random_params)}, {index}')
    
    feature_folder = f'config/features/{index}'
    _, response, feature_dict = evaluator.compute_feature_values(params, l5pc_cell, fitness_protocols, sim, 
                                                                 feature_set=feature_set, probe=electrode.probe, 
                                                                 channels=channels,
                                                                 feature_folder=feature_folder,
                                                                 save_to_file=False)
    random_features.append(feature_dict)
    random_responses.append(response)

In [None]:
plotting.plot_multiple_responses(random_responses)

### Exclude params that do not express bAP firing

In [None]:
random_features_v = utils.vectorize_features(random_features)

In [None]:
complete_bap = []
bap_ap_min = 50
min_fr_step1 = 10
for i, gt in enumerate(list(random_features_v)):
    bap_feat = [k for k in gt.keys() if 'bAP' in k]
    bac_feat = [k for k in gt.keys() if 'BAC' in k]
    if len(bap_feat) == 5 and len(bac_feat) == 5:
#         ap_amplitude_from_voltagebase = gt['bAP.dend1.Spikecount']
#         fr_step_1 = gt['Step1.soma.mean_frequency']
#         if ap_amplitude_from_voltagebase >= bap_ap_min and fr_step_1 >= min_fr_step1:
        bap_spike_sount = gt['bAP.soma.Spikecount']
        if bap_spike_sount == 1:
            complete_bap.append(i)
bap_sample_idxs = np.array(complete_bap)
print(f"Samples with complete bAP response: {len(complete_bap)} / {len(random_features_v)}")

bap_features_v = np.array(random_features_v)[bap_sample_idxs]
bap_features = np.array(random_features)[bap_sample_idxs]
bap_responses = np.array(random_responses)[bap_sample_idxs]
bap_scaled_samples = target_scaled_samples[bap_sample_idxs]
bap_random_samples = target_samples[bap_sample_idxs]

In [None]:
plotting.plot_multiple_responses(bap_responses)

In [None]:
# We can select a subset of extracellular channels to compute features on (if None, all channels are used)
channels=None

In [None]:
dist_feat_mat = np.zeros((len(bap_sample_idxs), len(bap_sample_idxs)))
for i in np.arange(len(bap_sample_idxs)):
    for j in np.arange(i+1, len(bap_sample_idxs)):
        f1 = bap_features_v[i]
        f2 = bap_features_v[j]
        
        selected_keys = []
        for k in f1.keys():
            if 'MEA' not in k:
                selected_keys.append(k)
#             else:
#                 if channels is not None:
#                     if int(k[-1]) in channels:
#                         selected_keys.append(k)
#                 else:
#                     selected_keys.append(k)
                        
        f1_val, f2_val = [], []
        for k in selected_keys:
            if k in f1 and k in f2:
                if np.isfinite(f1[k]) and np.isfinite(f2[k]):
                    f1_val.append(f1[k])
                    f2_val.append(f2[k])
        cost = distance.cosine(f1_val, f2_val)
        if np.isnan(cost):
            print(f1_val, f2_val)
        dist_feat_mat[i, j] =  cost
        
dist_feat_array = dist_feat_mat[dist_feat_mat>0].ravel()

plt.matshow(dist_feat_mat)
plt.colorbar()

print(np.min(dist_feat_array))
print(np.max(dist_feat_array))

## Select samples with the most distant features

In [None]:
num_final_samples = 5

In [None]:
# make dist_matrix symmetric
dist_feat_sym = copy(dist_feat_mat)

for i in np.arange(len(bap_sample_idxs)):
    for j in np.arange(i+1, len(bap_sample_idxs)):
        dist_feat_sym[j, i] = dist_feat_sym[i, j]
plt.matshow(dist_feat_sym)

In [None]:
# select samples based on cumulative distance
cum_dist = np.sum(dist_feat_sym, 0)
max_dist_idxs = np.argsort(cum_dist)[::-1][:num_final_samples]

In [None]:
print(cum_dist[max_dist_idxs])

In [None]:
selected_sample_idxs = max_dist_idxs

In [None]:
selected_scaled_params = bap_scaled_samples[selected_sample_idxs]
selected_random_params = bap_random_samples[selected_sample_idxs]
selected_responses = bap_responses[selected_sample_idxs]
selected_features_v = bap_features_v[selected_sample_idxs]
selected_features = bap_features[selected_sample_idxs]

In [None]:
dist_sel_random_params_mat = np.zeros((len(selected_random_params), len(selected_random_params)))
dist_sel_scaled_params_mat = np.zeros((len(selected_random_params), len(selected_random_params)))
dist_sel_features_mat = np.zeros((len(selected_random_params), len(selected_random_params)))
for i in np.arange(len(selected_random_params)):
    for j in np.arange(i+1, len(selected_random_params)):
        p1 = selected_random_params[i]
        p2 = selected_random_params[j]
        dist_sel_random_params_mat[i, j] =  distance.cosine(p1, p2)
        
        p1 = selected_scaled_params[i]
        p2 = selected_scaled_params[j]
        dist_sel_scaled_params_mat[i, j] =  distance.cosine(p1, p2)
        
        f1 = selected_features_v[i]
        f2 = selected_features_v[j]
        
        selected_keys = []
        for k in f1.keys():
            if 'MEA' not in k:
                selected_keys.append(k)
#             else:
#                 if channels is not None:
#                     if int(k[-1]) in channels:
#                         selected_keys.append(k)
#                 else:
#                     selected_keys.append(k)
                        
        f1_val, f2_val = [], []
        for k in selected_keys:
            if k in f1 and k in f2:
                if np.isfinite(f1[k]) and np.isfinite(f2[k]):
                    f1_val.append(f1[k])
                    f2_val.append(f2[k])
        dist_sel_features_mat[i, j] = distance.cosine(f1_val, f2_val)
        
# dist_params_array = dist_params_mat[dist_params_mat>0].ravel()

plt.matshow(dist_sel_random_params_mat)
plt.title("Norm params")
plt.colorbar()

plt.matshow(dist_sel_scaled_params_mat)
plt.title("Scaled params")
plt.colorbar()

plt.matshow(dist_sel_features_mat)
plt.title("Features")
plt.colorbar()

# print(np.min(dist_params_array))
# print(np.max(dist_params_array))

In [None]:
plotting.plot_multiple_responses(responses_list=selected_responses)

In [None]:
fig3 = plt.figure()
ax3 = fig3.add_subplot(111)
cmap=plt.get_cmap('viridis')
for i, gt in enumerate(selected_responses):
    eap = evaluator.calculate_eap(responses=gt, protocols=fitness_protocols,
                                  protocol_name='Step1')
    eap_norm = eap / np.max(np.abs(eap), 1, keepdims=True)
    mu.plot_mea_recording(eap_norm, 
                          electrode.probe, colors=f"C{i}", ax=ax3)

In [None]:
# creata dataframe
df_release = pd.DataFrame(data=release_params, index=['release'])
df_random = pd.DataFrame(data=selected_scaled_params, columns=param_lims.keys(), 
                         index=[f'random_{i}' for i in range(len(selected_scaled_params))])

In [None]:
df_random

In [None]:
plt.figure()
ax = sns.boxplot(data=df_random, orient='horizontal')
fig = ax.get_figure()
fig.subplots_adjust(left=0.4)

## Double check that responses are correct!

In [None]:
loaded_params_file = 'config/params/smart_random.csv'
loaded_params = pd.read_csv(loaded_params_file, index_col='index')
loaded_params

In [None]:
probe_type = 'planar' # 'linear'
electrode = model.define_probe(probe_type=probe_type)

In [None]:
l5pc_cell = model.create()
channels='map'

param_names = [param.name for param in l5pc_cell.params.values() if not param.frozen]

feature_set = 'all'

print(f'Feature set {feature_set}')
gt_responses = []

if feature_set in ["extra", "all"]:
    fitness_protocols = evaluator.define_protocols(electrode=electrode, protocols_with_lfp=["Step1"])
else:
    fitness_protocols = l5pc_evaluator.define_protocols() 

if feature_set in ["extra", "all"]:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True, electrode=electrode)
else:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True)

In [None]:
loaded_responses = []

for i, (index, params) in enumerate(loaded_params.iterrows()):
    print(f'{i+1} / {len(loaded_params)}, {index}')
    
    feature_folder = f'config/features/{index}'
    _, response, feature_dict = evaluator.compute_feature_values(params, l5pc_cell, fitness_protocols, sim, 
                                                                 feature_set=feature_set, probe=electrode.probe, 
                                                                 channels=channels,
                                                                 feature_folder=feature_folder,
                                                                 save_to_file=False)
    loaded_responses.append(response)

In [None]:
plotting.plot_multiple_responses(responses_list=loaded_responses)

### Save random and releas parameters to csv

In [None]:
if save_params:
    params_folder = Path('config/params')

    if not params_folder.is_dir():
        os.makedirs(params_folder)

    release_params_file = params_folder / 'release.csv'
    random_params_file = params_folder / 'smart_random.csv'

    df_release.to_csv(release_params_file, index_label='index')
    df_random.to_csv(random_params_file, index_label='index')    

### Compute and save feature sets and responses

In [None]:
import pickle

In [None]:
def save_feature_set_from_all_feats(all_feat_dict, feature_output_folder, feature_list_file, 
                                    feature_set, probe):
    feature_output_folder = Path(feature_output_folder)
    feature_output_folder.mkdir(parents=True, exist_ok=True)
    
    feature_list = json.load(open(feature_list_file))[feature_set]
    
    feature_meanstd = {}
    for protocol_name, locations in feature_list.items():
        feature_meanstd[protocol_name] = {}
        for location, feats in locations.items():
            if location not in feature_meanstd[protocol_name]:
                feature_meanstd[protocol_name][location] = {}
            
            for feat in feats:
                feature_meanstd[protocol_name][location][feat] = all_feat_dict[protocol_name][location][feat]
                
    feature_file = feature_output_folder / f'{feature_set}.pkl'
    with feature_file.open('wb') as f:
        pickle.dump(feature_meanstd, f)
            
    # save probe
    if not (Path(feature_output_folder) / 'probe.json').is_file():
        with (Path(feature_output_folder) / 'probe.json').open('w') as f:
            json.dump(probe.info, f, indent=4)
            
    return feature_meanstd

In [None]:
feature_sets = ["multiple", "soma", "extra"]

In [None]:
features = {}

for i, all_feats in enumerate(selected_features):
    features[i] = {}
    for feat_set in feature_sets:
        featsdict = save_feature_set_from_all_feats(all_feats, f'config/features/random_{i}', 
                                                   'config/features_list.json', feat_set, electrode.probe)
        features[i][feat_set] = featsdict

In [None]:
for i, responses in enumerate(selected_responses):
    with open(f'config/features/random_{i}/responses.pkl', 'wb') as f:
        pickle.dump(responses, f)

In [None]:
features[0]['multiple']