In [2]:
import numpy as np
import torch
import pandas as pd
import torch

# import saver utilities
import pickle

## Load observed data

In [3]:
# All 25 degree Celcius mouse motor cortex (M1) electrophysiological data, preprocessed
M1_25degree = pickle.load(open('pickles/M1_features.pickle', 'rb'))
ephys_features = M1_25degree['X_o'].columns
Xo = M1_25degree['X_o'].copy()

We decide to kick out observed ephys cells with low quality rna already from the start.

In [4]:
prop = pd.read_csv('../data/m1_patchseq_meta_data.csv', sep = '\t')
prop = prop.rename(columns = {'Targeted layer': 'Layer'})
prop = prop[['Cell', 'Layer', 'Cre', 'RNA type']]
prop = prop.set_index('Cell')
prop=prop.reindex(Xo.index)
no_low_qual=np.array(list(map(str,prop['RNA type'].values)))!='nan'
prop=prop.loc[no_low_qual,:]
Xo = Xo.loc[no_low_qual,:]
Xo = Xo.iloc[:,:-4]
celltypes=prop['RNA type']

In [5]:
Xo_mean=Xo.values.mean(axis=0)
Xo_std=Xo.values.std(axis=0)

In [6]:
model_param_names = np.array(['C', r'$R_{input}$', r'$\tau$', r'$g_{Nat}$', r'$g_{Na}$', r'$g_{Kd}$', r'$g_{M}$',
                         r'$g_{Kv31}$', r'$g_{L}$', r'$E_{leak}$', r'$\tau_{max}$', 'VT', 'rate_to_SS_factor'])

## Report performance for each training schedule

#### Pick your training schedule (0, 1, 2a, 2b, 2c, 2d, 2e, 3 or 4). 2d corresponds to NPE+. 0 to NPE.

In [31]:
tr_schedule='2d'

In [32]:
with open('save_model_parameters/training_schedule_{}.pickle'.format(tr_schedule), 'rb') as f:
        THETA = pickle.load(f)
highest_posterior_samples=np.concatenate(
    [THETA['highest posterior samples'][cell] if cell in THETA['highest posterior samples'] else np.ones((13,))*np.nan for cell in Xo.index]
).reshape((Xo.shape[0],len(model_param_names)))
posterior_samples_10_random=np.concatenate(
    [THETA['10 random samples'][cell].numpy() if cell in THETA['10 random samples'] else np.ones((10,13))*np.nan for cell in Xo.index]
).reshape((Xo.shape[0]*10,len(model_param_names)))

#### Let's get their summary statistics

In [24]:
from simulator import EphysModel

M1_model=EphysModel(name='M1',
                   T=25.0,
                   E_Na=69.0,
                   E_K=-98.4,
                   E_Ca=127.2,
                   start=100,
                   end=700,
                   dt=0.04,
                   label_params=model_param_names,
                   ephys_features=ephys_features[:-4],
                   n_processes=40,
                   noise_factor=10,
                   use_pathos=True,
                   chunk_size=10000,
                   save_chunks=True,
                   verbose=False)

You don't have to run the following 2 blocks again, the summary statistics of all highest posterior samples came with the Github repo.

In [25]:
M1_model.sim(torch.as_tensor(highest_posterior_samples, dtype=torch.float32))
np.savez('./save_sims/highest_posterior_samples_summ_stats_{}.npz'.format(tr_schedule),
     stats=M1_model.stats.numpy()
    )
print('Highest posterior sample simulations done.')

Highest posterior sample simulations done.


In [26]:
M1_model.sim(torch.as_tensor(posterior_samples_10_random, dtype=torch.float32))
np.savez('./save_sims/posterior_samples_10_random_summ_stats_{}.npz'.format(tr_schedule),
     stats=M1_model.stats.numpy()
    )
print('10 random posterior sample simulations done.')

10 random posterior sample simulations done.


In [33]:
highest_post_stats=np.load('./save_sims/highest_posterior_samples_summ_stats_{}.npz'.format(tr_schedule))['stats']
highest_post_stats_nans=np.isnan(highest_post_stats.mean(axis=1))

In [34]:
print('Perfomance for amortized posterior in training schedule {} regarding drawing highest posterior samples: '.format(tr_schedule))
print('---------------------------------------------------------------------------------------------------------------')

score=np.mean(
    np.sqrt(
        np.sum(
            (
                (highest_post_stats[~highest_post_stats_nans]-Xo_mean)/Xo_std-\
                (Xo[~highest_post_stats_nans]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
score_sd=np.std(
    np.sqrt(
        np.sum(
            (
                (highest_post_stats[~highest_post_stats_nans]-Xo_mean)/Xo_std-\
                (Xo[~highest_post_stats_nans]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
print(sum(highest_post_stats_nans), 'out of', Xo.shape[0],
      'failed or {:.2f} %.'.format(sum(highest_post_stats_nans)/(Xo.shape[0])*100),
      '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

Perfomance for amortized posterior in training schedule 2d regarding drawing highest posterior samples: 
---------------------------------------------------------------------------------------------------------------
24 out of 955 failed or 2.51 %. 
On simulations that were succesful we are 4.35 +/- 2.86 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.


In [None]:
best_prior_stats=np.load('./save_sims/best_1000_Euclidean_sims.npz')['stats'][::1000,:]
best_prior_stats_nans=np.isnan(best_prior_stats.sum(axis=1))
Xo_values=Xo.values
print('W.r.t. the prior: ')
print('----------------- ')
score=np.mean(
    np.sqrt(
        np.sum(
            (
                (best_prior_stats[~best_prior_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_values[~best_prior_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
score_sd=np.std(
    np.sqrt(
        np.sum(
            (
                (best_prior_stats[~best_prior_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_values[~best_prior_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
print(sum(best_prior_stats_nans), 'out of', Xo.shape[0],
      'failed or {:.2f} %.'.format(sum(best_prior_stats_nans)/(Xo.shape[0])*100),
      '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

W.r.t. the prior: 
----------------- 
0 out of 955 failed or 0.00 %. 
On simulations that were succesful we are 2.63 +/- 0.81 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.


In [35]:
post_samples_10_random_stats=np.load('./save_sims/posterior_samples_10_random_summ_stats_{}.npz'.format(tr_schedule))['stats']
post_samples_10_random_stats_nans=np.isnan(post_samples_10_random_stats.mean(axis=1))
Xo_repeated=np.repeat(Xo.values, 10, axis=0)

In [36]:
print('Perfomance for amortized posterior in training schedule {} regarding drawing 10 random posterior samples: '.format(tr_schedule))
print('-----------------------------------------------------------------------------------------------------------------')

#for i, tr_schedule in enumerate(training_schedules):
score=np.mean(
    np.sqrt(
        np.sum(
            (
                (post_samples_10_random_stats[~post_samples_10_random_stats_nans]-Xo_mean)/Xo_std-\
                (Xo_repeated[~post_samples_10_random_stats_nans]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
score_sd=np.std(
    np.sqrt(
        np.sum(
            (
                (post_samples_10_random_stats[~post_samples_10_random_stats_nans]-Xo_mean)/Xo_std-\
                (Xo_repeated[~post_samples_10_random_stats_nans]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
print(sum(post_samples_10_random_stats_nans), 'out of', Xo.shape[0]*10,
      'failed or {:.2f} %.'.format(sum(post_samples_10_random_stats_nans)/(Xo.shape[0]*10)*100),
      '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

Perfomance for amortized posterior in training schedule 2d regarding drawing 10 random posterior samples: 
-----------------------------------------------------------------------------------------------------------------
594 out of 9550 failed or 6.22 %. 
On simulations that were succesful we are 5.11 +/- 3.03 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.


In [9]:
prior_stats=np.load('./save_sims/M1_chunks/full_batch.npz')['stats']
prior_10_random_stats=prior_stats[np.random.randint(0,prior_stats.shape[0],Xo.shape[0]*10),:]
prior_10_random_stats_nans=np.isnan(prior_10_random_stats.sum(axis=1))
Xo_repeated=np.repeat(Xo.values, 10, axis=0)
Xo_mean=Xo.values.mean(axis=0)
Xo_std=Xo.values.std(axis=0)

tcmalloc: large alloc 1380007936 bytes == 0xe5ea000 @ 


In [10]:
print('W.r.t. the prior: ')
print('----------------- ')
score=np.mean(
    np.sqrt(
        np.sum(
            (
                (prior_10_random_stats[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_repeated[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
score_sd=np.std(
    np.sqrt(
        np.sum(
            (
                (prior_10_random_stats[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_repeated[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
print(sum(prior_10_random_stats_nans), 'out of', Xo.shape[0]*10,
      'failed or {:.2f} %.'.format(sum(prior_10_random_stats_nans)/(Xo.shape[0]*10)*100),
      '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

W.r.t. the prior: 
----------------- 
4994 out of 9550 failed or 52.29 %. 
On simulations that were succesful we are 11.79 +/- 3.31 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.


We can pick our favourite training schedule and generate more samples with their evaluations. We can use this to evaluate the entropy for instance in `build_figures.ipynb`.

In [11]:
fav_tr_schedule='2d'
with open('./save_posteriors/training_schedule_{}.pickle'.format(fav_tr_schedule), 'rb') as f:
    posterior = pickle.load(f)



In [12]:
index=0
feature_list=range(23)
fav_training_schedule={'samples':{}, 'evaluations':{}}
for i in range(index, Xo.shape[0]):
    xo=Xo.iloc[i,:].values
    cell_name=Xo.index[i]
    print('.', end='')

    # sampling 1000 from the posterior
    samples=posterior.sample(
        (1000,),
        x=torch.as_tensor(xo[feature_list], dtype=float),
        show_progress_bars=False
    )
    evaluations=posterior.log_prob(
        samples,
        x=torch.as_tensor(xo[feature_list], dtype=float)
    )
    fav_training_schedule['samples'].update({cell_name:samples.numpy()})
    fav_training_schedule['evaluations'].update({cell_name:evaluations.numpy()})
    
with open('save_model_parameters/favourite_training_schedule.pickle', 'wb') as f:
    pickle.dump(fav_training_schedule, f)

...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................