# 1b) Smart sampling the parameter space

From the set of parameters estimated using multiple patch recordings (1 somatic, 2 apical dendrites), we simulate random perturbations using Latin Hypercube Sampling (LHS). 

First, we generate a large number of samples, then we select the samples which:

- are the most distant between each other
- which reproduce bAP firing
- whose feature sets are also the most distant 

The random parameters are saved in the `config/params/smart_random.csv` and will be used to assess the fitting performance of different feature sets.

In [None]:
%load_ext autoreload
%autoreload

from pyDOE import lhs
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import numpy as np
import os
import json
from pathlib import Path
from scipy.spatial import distance
import MEAutility as mu
import LFPy
from copy import copy

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

%matplotlib notebook

In [None]:
np.random.seed(2308)

In [None]:
import l5pc_model
import l5pc_evaluator
import l5pc_plot

In [None]:
save_params = True

In [None]:
release_params = {
    'gNaTs2_tbar_NaTs2_t.apical': 0.026145,
    'gSKv3_1bar_SKv3_1.apical': 0.004226,
    'gImbar_Im.apical': 0.000143,
    'gNaTa_tbar_NaTa_t.axonal': 3.137968,
    'gK_Tstbar_K_Tst.axonal': 0.089259,
    'gamma_CaDynamics_E2.axonal': 0.002910,
    'gNap_Et2bar_Nap_Et2.axonal': 0.006827,
    'gSK_E2bar_SK_E2.axonal': 0.007104,
    'gCa_HVAbar_Ca_HVA.axonal': 0.000990,
    'gK_Pstbar_K_Pst.axonal': 0.973538,
    'gSKv3_1bar_SKv3_1.axonal': 1.021945,
    'decay_CaDynamics_E2.axonal': 287.198731,
    'gCa_LVAstbar_Ca_LVAst.axonal': 0.008752,
    'gamma_CaDynamics_E2.somatic': 0.000609,
    'gSKv3_1bar_SKv3_1.somatic': 0.303472,
    'gSK_E2bar_SK_E2.somatic': 0.008407,
    'gCa_HVAbar_Ca_HVA.somatic': 0.000994,
    'gNaTs2_tbar_NaTs2_t.somatic': 0.983955,
    'decay_CaDynamics_E2.somatic': 210.485284,
    'gCa_LVAstbar_Ca_LVAst.somatic': 0.000333
}

# load param configs for boundary values
# param_configs = json.load(open('config/parameters.json'))

In [None]:
param_configs

In [None]:
n_params = len(release_params)
n_samples = 500  # number of random samples 
lim_dev = 0.3  # limits for each param are: [release_val-lim_dev*release_val, release_val+lim_dev*release_val] 

### Latin hypercube sampling (normal 0-1)

In [None]:
random_samples = lhs(n_params, samples=n_samples)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot(random_samples[:, 5], random_samples[:, 19], random_samples[:, 3], '*')

### Compute parameters limits

In [None]:
# if True, the limits of the params config are used. Otherwise, the realeas params +- lim_dev are used
use_params_bounds = False

In [None]:
param_lims = {}

for par, val in release_params.items():
    dev = lim_dev * val
    lims = [val - dev, val + dev]
    
    # check values based on param configs
    for param in param_configs:
        name, loc = par.split('.')
        if param['param_name'] == name and param['sectionlist'] == loc:
            if 'bounds' in param:
                if use_params_bounds:
                    lims = param['bounds']
                else:
                    if lims[0] < param['bounds'][0]:
                        lims[0] = param['bounds'][0]
                        print(f'Param {par} changed lower bound')                    
                    if lims[1] > param['bounds'][1]:
                        lims[1] = param['bounds'][1]
                        print(f'Param {par} changed upper bound')
                print(param['param_name'], lims, param['bounds'])

    param_lims[par] = lims

In [None]:
param_lims

### Scale parameters

In [None]:
scaled_samples = np.zeros_like(random_samples)

for i, sample in enumerate(random_samples):
    for j, (par, parlim) in enumerate(param_lims.items()):
        scaled_samples[i, j] = (parlim[1] - parlim[0]) * sample[j] + parlim[0] 

In [None]:
for sample in scaled_samples:
    for j, (par, parlim) in enumerate(param_lims.items()):
        assert parlim[0] < sample[j] < parlim[1] 

## Select *distant* parameters

First we iteratively look for parameters distan from each other in the original normalized space.

In [None]:
num_target_samples = 3 # 50
min_dist = 0.2
target_samples_idxs = []
it = 0
max_iter = 300

In [None]:
while it < max_iter:
    if np.mod(it, 100) == 0:
        print(f"Iteration {it+1}/{max_iter}")
        
    if len(target_samples_idxs) == 0:
        random_sample_id = np.random.permutation(len(random_samples))[0]
        print(f'Added {random_sample_id}')
        target_samples_idxs.append(random_sample_id)
    else:
        # compute distances
        all_samples = np.arange(len(scaled_samples))
        dists = np.zeros((len(target_samples_idxs), len(random_samples)))
        for i, target_id in enumerate(target_samples_idxs):
            target = random_samples[target_id]
            for j in all_samples:
                dists[i, j] = distance.cosine(random_samples[j], target)
        
        cum_dist = []
        possible_idxs = []
        for col in all_samples:
            if np.all(dists[:, col] > min_dist):
                possible_idxs.append(col)
                cum_dist.append(np.sum(dists[:, col]))
        
        if len(possible_idxs) > 0:
            max_id = np.argmax(cum_dist)
            random_sample_id = possible_idxs[max_id]
            print(f'Added {random_sample_id}: Targets {len(target_samples_idxs)}')
            dists = []
            for t in target_samples_idxs:
                dists.append(distance.cosine(random_samples[t], random_samples[random_sample_id]))
            target_samples_idxs.append(random_sample_id)
            
    if len(target_samples_idxs) >= num_target_samples:
        print("Found samples!")
        break
    it += 1
target_samples_idxs = np.array(target_samples_idxs)

In [None]:
len(target_samples_idxs)

In [None]:
dist_params_mat = np.zeros((len(target_samples_idxs), len(target_samples_idxs)))
for i in np.arange(len(target_samples_idxs)):
    for j in np.arange(i+1, len(target_samples_idxs)):
        p1 = random_samples[target_samples_idxs[i]]
        p2 = random_samples[target_samples_idxs[j]]
        cost = distance.cosine(p1, p2)
        dist_params_mat[i, j] =  cost
dist_params_array = dist_params_mat[dist_params_mat>0].ravel()

plt.matshow(dist_params_mat)
plt.colorbar()

print(np.min(dist_params_array))
print(np.max(dist_params_array))

In [None]:
dist_sc_params_mat = np.zeros((len(target_samples_idxs), len(target_samples_idxs)))
for i in np.arange(len(target_samples_idxs)):
    for j in np.arange(i+1, len(target_samples_idxs)):
        p1 = scaled_samples[target_samples_idxs[i]]
        p2 = scaled_samples[target_samples_idxs[j]]
        cost = distance.cosine(p1, p2)
        dist_sc_params_mat[i, j] =  cost
dist_sc_params_array = dist_sc_params_mat[dist_sc_params_mat>0].ravel()

plt.matshow(dist_sc_params_mat)
plt.colorbar()

print(np.min(dist_sc_params_array))
print(np.max(dist_sc_params_array))

## Compute full set of features

In [None]:
def vectorize_features(feature_list):
    feature_vectors = []
    for feature in feature_list:
        feature_vector = {}
        for prot, prot_dict in feature.items():
            for loc, loc_feat in prot_dict.items():
                for feat, feat_val in loc_feat.items():
                    feature_vector[f'{prot}.{loc}.{feat}'] = feat_val[0]
        feature_vectors.append(feature_vector)
    return feature_vectors

In [None]:
mea_positions = np.zeros((20, 3))
mea_positions[:, 2] = 20
mea_positions[:, 1] = np.linspace(-500, 1000, 20)
probe = mu.return_mea(info={'pos': list([list(p) for p in mea_positions]), 'center': False, 'plane': 'xy'})
electrode = LFPy.RecExtElectrode(probe=probe)

In [None]:
mu.plot_probe(probe)

In [None]:
channels = None

morphology = ephys.morphologies.NrnFileMorphology('morphology/C060114A7.asc', do_replace_axon=True)
param_configs = json.load(open('config/parameters.json'))
parameters = l5pc_model.define_parameters()
mechanisms = l5pc_model.define_mechanisms()

l5pc_cell = ephys.models.LFPyCellModel('l5pc', 
                                       v_init=-65., 
                                       morph=morphology, 
                                       mechs=mechanisms, 
                                       params=parameters)

param_names = [param.name for param in l5pc_cell.params.values() if not param.frozen]      
feature_set = 'all'

print(f'Feature set {feature_set}')
gt_responses = []

if feature_set in ["extra", "all"]:
    fitness_protocols = l5pc_evaluator.define_protocols(electrode) 
else:
    fitness_protocols = l5pc_evaluator.define_protocols() 

if feature_set in ["extra", "all"]:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True, electrode=electrode)
else:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True)

In [None]:
target_samples = random_samples[target_samples_idxs]
target_scaled_samples = scaled_samples[target_samples_idxs]

In [None]:
random_params = pd.DataFrame(data=target_scaled_samples, columns=param_lims.keys(), 
                             index=[f'random_{i}' for i in range(len(target_samples_idxs))])

In [None]:
random_params

In [None]:
plt.figure()
ax = sns.boxplot(data=random_params, orient='horizontal')
fig = ax.get_figure()
fig.subplots_adjust(left=0.4)

In [None]:
random_features = []
random_responses = []

for i, (index, params) in enumerate(random_params.iterrows()):
    print(f'{i+1} / {len(random_params)}, {index}')
    
    feature_folder = f'config/features/{index}'
    _, response, feature_dict = l5pc_evaluator.compute_feature_values(params, l5pc_cell, fitness_protocols, sim, 
                                                                      feature_set=feature_set, probe=probe, 
                                                                      channels=channels,
                                                                      feature_folder=feature_folder,
                                                                      save_to_file=False)
    random_features.append(feature_dict)
    random_responses.append(response)

In [None]:
loaded_features = []
loaded_responses = []

for i, (index, params) in enumerate(loaded_params.iterrows()):
    print(f'{i+1} / {len(loaded_params)}, {index}')
    display(params)
    
    feature_folder = f'config/features/{index}'
    _, response, feature_dict = l5pc_evaluator.compute_feature_values(params, l5pc_cell, fitness_protocols, sim, 
                                                                      feature_set=feature_set, probe=probe, 
                                                                      channels=channels,
                                                                      feature_folder=feature_folder,
                                                                      save_to_file=False)
    loaded_features.append(feature_dict)
    loaded_responses.append(response)

In [None]:
params

In [None]:
loaded_params.iloc[1]

In [None]:
l5pc_plot.plot_multiple_responses(loaded_responses)

In [None]:
l5pc_plot.plot_multiple_responses(random_responses)

### Exclude params that do not express bAP firing

In [None]:
random_features_v = vectorize_features(random_features)

In [None]:
complete_bap = []
for i, gt in enumerate(list(random_features_v)):
    bap_feat = [k for k in gt.keys() if 'bAP' in k]
    if len(bap_feat) == 5:
        complete_bap.append(i)
bap_sample_idxs = np.array(complete_bap)
print(f"Samples with complete bAP response: {len(complete_bap)} / {len(random_features_v)}")

bap_features_v = np.array(random_features_v)[bap_sample_idxs]
bap_responses = np.array(random_responses)[bap_sample_idxs]
bap_scaled_samples = target_scaled_samples[bap_sample_idxs]
bap_random_samples = target_samples[bap_sample_idxs]

In [None]:
# We can select a subset of extracellular channels to compute features on (if None, all channels are used)
channels=None

In [None]:
dist_feat_mat = np.zeros((len(bap_sample_idxs), len(bap_sample_idxs)))
for i in np.arange(len(bap_sample_idxs)):
    for j in np.arange(i+1, len(bap_sample_idxs)):
        f1 = bap_features_v[i]
        f2 = bap_features_v[j]
        
        selected_keys = []
        for k in f1.keys():
            if 'MEA' not in k:
                selected_keys.append(k)
            else:
                if channels is not None:
                    if int(k[-1]) in channels:
                        selected_keys.append(k)
                else:
                    selected_keys.append(k)
                        
        f1_val, f2_val = [], []
        for k in selected_keys:
            if k in f1 and k in f2:
                if np.isfinite(f1[k]) and np.isfinite(f2[k]):
                    f1_val.append(f1[k])
                    f2_val.append(f2[k])
        cost = distance.cosine(f1_val, f2_val)
        if np.isnan(cost):
            print(f1_val, f2_val)
        dist_feat_mat[i, j] =  cost
        
dist_feat_array = dist_feat_mat[dist_feat_mat>0].ravel()

plt.matshow(dist_feat_mat)
plt.colorbar()

print(np.min(dist_feat_array))
print(np.max(dist_feat_array))

## Select samples with the most distant features

In [None]:
num_final_samples = 5

In [None]:
# make dist_matrix symmetric
dist_feat_sym = copy(dist_feat_mat)

for i in np.arange(len(bap_sample_idxs)):
    for j in np.arange(i+1, len(bap_sample_idxs)):
        dist_feat_sym[j, i] = dist_feat_sym[i, j]
plt.matshow(dist_feat_sym)

In [None]:
# select samples based on cumulative distance
cum_dist = np.sum(dist_feat_sym, 0)
max_dist_idxs = np.argsort(cum_dist)[::-1][:num_final_samples]

In [None]:
print(cum_dist[max_dist_idxs])

In [None]:
selected_sample_idxs = max_dist_idxs

In [None]:
selected_scaled_params = bap_scaled_samples[selected_sample_idxs]
selected_random_params = bap_random_samples[selected_sample_idxs]
selected_responses = bap_responses[selected_sample_idxs]
selected_features_v = bap_features_v[selected_sample_idxs]

In [None]:
dist_sel_random_params_mat = np.zeros((len(selected_random_params), len(selected_random_params)))
dist_sel_scaled_params_mat = np.zeros((len(selected_random_params), len(selected_random_params)))
dist_sel_features_mat = np.zeros((len(selected_random_params), len(selected_random_params)))
for i in np.arange(len(selected_random_params)):
    for j in np.arange(i+1, len(selected_random_params)):
        p1 = selected_random_params[i]
        p2 = selected_random_params[j]
        dist_sel_random_params_mat[i, j] =  distance.cosine(p1, p2)
        
        p1 = selected_scaled_params[i]
        p2 = selected_scaled_params[j]
        dist_sel_scaled_params_mat[i, j] =  distance.cosine(p1, p2)
        
        f1 = selected_features_v[i]
        f2 = selected_features_v[j]
        
        selected_keys = []
        for k in f1.keys():
            if 'MEA' not in k:
                selected_keys.append(k)
            else:
                if channels is not None:
                    if int(k[-1]) in channels:
                        selected_keys.append(k)
                else:
                    selected_keys.append(k)
                        
        f1_val, f2_val = [], []
        for k in selected_keys:
            if k in f1 and k in f2:
                if np.isfinite(f1[k]) and np.isfinite(f2[k]):
                    f1_val.append(f1[k])
                    f2_val.append(f2[k])
        dist_sel_features_mat[i, j] = distance.cosine(f1_val, f2_val)
        
# dist_params_array = dist_params_mat[dist_params_mat>0].ravel()

plt.matshow(dist_sel_random_params_mat)
plt.title("Norm params")
plt.colorbar()

plt.matshow(dist_sel_scaled_params_mat)
plt.title("Scaled params")
plt.colorbar()

plt.matshow(dist_sel_features_mat)
plt.title("Features")
plt.colorbar()

# print(np.min(dist_params_array))
# print(np.max(dist_params_array))

In [None]:
l5pc_plot.plot_multiple_responses(responses_list=selected_responses)

In [None]:
fig3 = plt.figure()
ax3 = fig3.add_subplot(111)
cmap=plt.get_cmap('viridis')
for i, gt in enumerate(selected_responses):
    eap = l5pc_evaluator.calculate_eap(responses=gt, protocols=fitness_protocols,
                                       protocol_name='Step3')
    eap_norm = eap / np.max(np.abs(eap), 1, keepdims=True)
    mu.plot_mea_recording(eap_norm, 
                          probe, colors=f"C{i}", ax=ax3)

In [None]:
# creata dataframe
df_release = pd.DataFrame(data=release_params, index=['release'])
df_random = pd.DataFrame(data=selected_scaled_params, columns=param_lims.keys(), 
                         index=[f'random_{i}' for i in range(len(selected_scaled_params))])

In [None]:
df_random

In [None]:
plt.figure()
ax = sns.boxplot(data=df_random, orient='horizontal')
fig = ax.get_figure()
fig.subplots_adjust(left=0.4)

## Double check that responses are correct!

In [None]:
loaded_params_file = 'config/params/smart_random.csv'
loaded_params = pd.read_csv(loaded_params_file, index_col='index')
loaded_params

In [None]:
channels = None

morphology = ephys.morphologies.NrnFileMorphology('morphology/C060114A7.asc', do_replace_axon=True)
param_configs = json.load(open('config/parameters.json'))
parameters = l5pc_model.define_parameters()
mechanisms = l5pc_model.define_mechanisms()

l5pc_cell = ephys.models.LFPyCellModel('l5pc', 
                                       v_init=-65., 
                                       morph=morphology, 
                                       mechs=mechanisms, 
                                       params=parameters)

param_names = [param.name for param in l5pc_cell.params.values() if not param.frozen]      
feature_set = 'all'

print(f'Feature set {feature_set}')
gt_responses = []

if feature_set in ["extra", "all"]:
    fitness_protocols = l5pc_evaluator.define_protocols(electrode) 
else:
    fitness_protocols = l5pc_evaluator.define_protocols() 

if feature_set in ["extra", "all"]:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True, electrode=electrode)
else:
    sim = ephys.simulators.LFPySimulator(LFPyCellModel=l5pc_cell, cvode_active=True)

In [None]:
responses = []

for i, (index, params) in enumerate(random_params.iterrows()):
    print(f'{i+1} / {len(random_params)}, {index}')
    
    feature_folder = f'config/features/{index}'
    _, response, feature_dict = l5pc_evaluator.compute_feature_values(params, l5pc_cell, fitness_protocols, sim, 
                                                                      feature_set=feature_set, probe=probe, 
                                                                      channels=channels,
                                                                      feature_folder=feature_folder,
                                                                      save_to_file=False)
    responses.append(response)

In [None]:
l5pc_plot.plot_multiple_responses(responses_list=responses)

In [None]:
l5pc_cell.freeze(params)
l5pc_cell.instantiate(sim=sim)

In [None]:
for sec in l5pc_cell.LFPyCell.allseclist:
    print(sec, sec.Ra, sec.cm)

In [None]:
for p in parameters:
    print(p.name, p.value)

### Save random and releas parameters to csv

In [None]:
if save_params:
    params_folder = Path('config/params')

    if not params_folder.is_dir():
        os.makedirs(params_folder)

    release_params_file = params_folder / 'release.csv'
    random_params_file = params_folder / 'smart_random.csv'

    df_release.to_csv(release_params_file, index_label='index')
    df_random.to_csv(random_params_file, index_label='index')    