In [1]:
import numpy as np
import torch
import pandas as pd
import torch

# import saver utilities
import pickle

## Load observed data

In [2]:
# All 25 degree Celcius mouse motor cortex (M1) electrophysiological data, preprocessed
M1_25degree = pickle.load(open('pickles/M1_features.pickle', 'rb'))
ephys_features = M1_25degree['X_o'].columns
Xo = M1_25degree['X_o'].copy()

We decide to kick out observed ephys cells with low quality rna already from the start.

In [4]:
prop = pd.read_csv('../data/m1_patchseq_meta_data.csv', sep = '\t')
prop = prop.rename(columns = {'Targeted layer': 'Layer'})
prop = prop[['Cell', 'Layer', 'Cre', 'RNA type']]
prop = prop.set_index('Cell')
prop=prop.reindex(Xo.index)
no_low_qual=np.array(list(map(str,prop['RNA type'].values)))!='nan'
prop=prop.loc[no_low_qual,:]
Xo = Xo.loc[no_low_qual,:]
celltypes=prop['RNA type']

## Load 10 random samples and highest posterior samples for each observed cell and every amortized posterior

In [6]:
training_schedules=['0', '1', '2a', '2b', '2c', '2d', '2e', '3', '4']
model_param_names = np.array(['C', r'$R_{input}$', r'$\tau$', r'$g_{Nat}$', r'$g_{Na}$', r'$g_{Kd}$', r'$g_{M}$',
                         r'$g_{Kv31}$', r'$g_{L}$', r'$E_{leak}$', r'$\tau_{max}$', 'VT', 'rate_to_SS_factor'])

In [25]:
with open('save_model_parameters/across_training_schedules.pickle', 'rb') as f:
        THETAS = pickle.load(f)
highest_posterior_samples=np.concatenate(
    [[THETAS[tr_schedule]['highest posterior samples'][cell] for cell in Xo.index] for tr_schedule in THETAS]
)
posterior_samples_10_random=np.concatenate(
    [[THETAS[tr_schedule]['10 random samples'][cell].numpy() for cell in Xo.index] for tr_schedule in THETAS]
).reshape((Xo.shape[0]*len(training_schedules)*10,len(model_param_names)))

#### Let's get their summary statistics

In [29]:
from simulator import EphysModel

M1_model=EphysModel(name='M1',
                   T=25.0,
                   E_Na=69.0,
                   E_K=-98.4,
                   E_Ca=127.2,
                   start=100,
                   end=700,
                   dt=0.04,
                   label_params=model_param_names,
                   ephys_features=ephys_features,
                   n_processes=40,
                   noise_factor=10,
                   use_pathos=True,
                   chunk_size=10000,
                   save_chunks=True,
                   verbose=False)

In [31]:
M1_model.sim(torch.as_tensor(highest_posterior_samples, dtype=torch.float32))
np.savez('./save_sims/highest_posterior_samples_summ_stats.npz',
     stats=M1_model.stats.numpy()
    )
print('Highest posterior sample simulations done.')

Chunks:
.
Training set of parameters and summary statistics has been made:
parameters shape:  torch.Size([8595, 13])
summary statistics shape:  torch.Size([8595, 23])
Highest posterior sample simulations done.


In [32]:
M1_model.sim(torch.as_tensor(posterior_samples_10_random, dtype=torch.float32))
np.savez('./save_sims/posterior_samples_10_random_summ_stats.npz',
     stats=M1_model.stats.numpy()
    )
print('10 random posterior sample simulations done.')

Chunks:
.........
Training set of parameters and summary statistics has been made:
parameters shape:  torch.Size([85950, 13])
summary statistics shape:  torch.Size([85950, 23])
10 random posterior sample simulations done.


## Report performance of each training schedule

In [119]:
highest_post_stats=np.load('./save_sims/highest_posterior_samples_summ_stats.npz')['stats']
highest_post_stats_nans=np.isnan(highest_post_stats.mean(axis=1))
highest_post_stats_nans_reshaped=highest_post_stats_nans.reshape((len(training_schedules), Xo.shape[0]))

In [120]:
Xo_repeated=np.concatenate([Xo.values[:,:-4]]*len(training_schedules), axis=0)
Xo_mean=Xo.values[:,:-4].mean(axis=0)
Xo_std=Xo.values[:,:-4].std(axis=0)

print('Perfomance for each amortized posterior or each training schedule regarding drawing highest posterior samples: ')
print('---------------------------------------------------------------------------------------------------------------')

for i, tr_schedule in enumerate(training_schedules):
    score=np.mean(
        np.sqrt(
            np.sum(
                (
                    (highest_post_stats[Xo.shape[0]*i:Xo.shape[0]*(i+1),:][~highest_post_stats_nans[Xo.shape[0]*i:Xo.shape[0]*(i+1)],:]-Xo_mean)/Xo_std-\
                    (Xo_repeated[Xo.shape[0]*i:Xo.shape[0]*(i+1),:][~highest_post_stats_nans[Xo.shape[0]*i:Xo.shape[0]*(i+1)],:]-Xo_mean)/Xo_std
                )**2,axis=1
            )
        )
    )
    score_sd=np.std(
        np.sqrt(
            np.sum(
                (
                    (highest_post_stats[Xo.shape[0]*i:Xo.shape[0]*(i+1),:][~highest_post_stats_nans[Xo.shape[0]*i:Xo.shape[0]*(i+1)],:]-Xo_mean)/Xo_std-\
                    (Xo_repeated[Xo.shape[0]*i:Xo.shape[0]*(i+1),:][~highest_post_stats_nans[Xo.shape[0]*i:Xo.shape[0]*(i+1)],:]-Xo_mean)/Xo_std
                )**2,axis=1
            )
        )
    )
    print('\nTraining schedule {}: '.format(tr_schedule), sum(highest_post_stats_nans_reshaped[i,:]), 'out of', Xo.shape[0],
          'failed or {:.2f} %.'.format(sum(highest_post_stats_nans_reshaped[i,:])/(Xo.shape[0])*100),
          '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

Perfomance for each amortized posterior or each training schedule regarding drawing highest posterior samples: 
---------------------------------------------------------------------------------------------------------------

Training schedule 0:  142 out of 955 failed or 14.87 %. 
On simulations that were succesful we are 5.89 +/- 3.26 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.

Training schedule 1:  116 out of 955 failed or 12.15 %. 
On simulations that were succesful we are 5.41 +/- 3.15 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.

Training schedule 2a:  95 out of 955 failed or 9.95 %. 
On simulations that were succesful we are 5.30 +/- 3.15 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.

Training schedule 2b:  42 out of 955 failed or 4.40 %. 
On simulations that were succesful we are 4.77 +/- 2.88 (mean +/- s.d.) Z-scored Euclidean ephys distance far f

In [121]:
best_prior_stats=np.load('./save_sims/best_1000_Euclidean_sims.npz')['stats']

In [122]:
best_prior_stats=np.load('./save_sims/best_1000_Euclidean_sims.npz')['stats'][::1000,:]
best_prior_stats_nans=np.isnan(best_prior_stats.sum(axis=1))
Xo_values=Xo.values[:,:-4]
Xo_mean=Xo.values[:,:-4].mean(axis=0)
Xo_std=Xo.values[:,:-4].std(axis=0)
print('W.r.t. the prior: ')
print('----------------- ')
score=np.mean(
    np.sqrt(
        np.sum(
            (
                (best_prior_stats[~best_prior_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_values[~best_prior_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
score_sd=np.std(
    np.sqrt(
        np.sum(
            (
                (best_prior_stats[~best_prior_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_values[~best_prior_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
print(sum(best_prior_stats_nans), 'out of', Xo.shape[0],
      'failed or {:.2f} %.'.format(sum(best_prior_stats_nans)/(Xo.shape[0])*100),
      '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

W.r.t. the prior: 
----------------- 
0 out of 955 failed or 0.00 %. 
On simulations that were succesful we are 2.63 +/- 0.81 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.


In [123]:
post_samples_10_random_stats=np.load('./save_sims/posterior_samples_10_random_summ_stats.npz')['stats']
post_samples_10_random_stats_nans=np.isnan(post_samples_10_random_stats.mean(axis=1))
post_samples_10_random_stats_nans_reshaped=post_samples_10_random_stats_nans.reshape((len(training_schedules), Xo.shape[0]*10))

In [124]:
Xo_repeated=np.repeat(Xo.values[:,:-4], 10, axis=0)
Xo_repeated=np.concatenate([Xo_repeated]*len(training_schedules), axis=0)
Xo_mean=Xo.values[:,:-4].mean(axis=0)
Xo_std=Xo.values[:,:-4].std(axis=0)

print('Perfomance for each amortized posterior or each training schedule regarding drawing 10 random posterior samples: ')
print('-----------------------------------------------------------------------------------------------------------------')

for i, tr_schedule in enumerate(training_schedules):
    score=np.mean(
        np.sqrt(
            np.sum(
                (
                    (post_samples_10_random_stats[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1),:][~post_samples_10_random_stats_nans[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1)],:]-Xo_mean)/Xo_std-\
                    (Xo_repeated[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1),:][~post_samples_10_random_stats_nans[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1)],:]-Xo_mean)/Xo_std
                )**2,axis=1
            )
        )
    )
    score_sd=np.std(
        np.sqrt(
            np.sum(
                (
                    (post_samples_10_random_stats[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1),:][~post_samples_10_random_stats_nans[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1)],:]-Xo_mean)/Xo_std-\
                    (Xo_repeated[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1),:][~post_samples_10_random_stats_nans[Xo.shape[0]*10*i:Xo.shape[0]*10*(i+1)],:]-Xo_mean)/Xo_std
                )**2,axis=1
            )
        )
    )
    print('\nTraining schedule {}: '.format(tr_schedule), sum(post_samples_10_random_stats_nans_reshaped[i,:]), 'out of', Xo.shape[0]*10,
          'failed or {:.2f} %.'.format(sum(post_samples_10_random_stats_nans_reshaped[i,:])/(Xo.shape[0]*10)*100),
          '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

Perfomance for each amortized posterior or each training schedule regarding drawing 10 random posterior samples: 
-----------------------------------------------------------------------------------------------------------------

Training schedule 0:  1944 out of 9550 failed or 20.36 %. 
On simulations that were succesful we are 6.65 +/- 3.38 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.

Training schedule 1:  1584 out of 9550 failed or 16.59 %. 
On simulations that were succesful we are 5.63 +/- 3.49 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.

Training schedule 2a:  1450 out of 9550 failed or 15.18 %. 
On simulations that were succesful we are 5.70 +/- 3.29 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.

Training schedule 2b:  680 out of 9550 failed or 7.12 %. 
On simulations that were succesful we are 5.28 +/- 3.09 (mean +/- s.d.) Z-scored Euclidean ephys 

In [125]:
prior_stats=np.load('./save_sims/M1_chunks/full_batch.npz')['stats']
prior_10_random_stats=prior_stats[np.random.randint(0,prior_stats.shape[0],Xo.shape[0]*10),:]
prior_10_random_stats_nans=np.isnan(prior_10_random_stats.sum(axis=1))
Xo_repeated=np.repeat(Xo.values[:,:-4], 10, axis=0)
Xo_mean=Xo.values[:,:-4].mean(axis=0)
Xo_std=Xo.values[:,:-4].std(axis=0)

print('W.r.t. the prior: ')
print('----------------- ')
score=np.mean(
    np.sqrt(
        np.sum(
            (
                (prior_10_random_stats[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_repeated[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
score_sd=np.std(
    np.sqrt(
        np.sum(
            (
                (prior_10_random_stats[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std-\
                (Xo_repeated[~prior_10_random_stats_nans,:]-Xo_mean)/Xo_std
            )**2,axis=1
        )
    )
)
print(sum(prior_10_random_stats_nans), 'out of', Xo.shape[0]*10,
      'failed or {:.2f} %.'.format(sum(prior_10_random_stats_nans)/(Xo.shape[0]*10)*100),
      '\nOn simulations that were succesful we are {:.2f} +/- {:.2f} (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.'.format(score, score_sd))

W.r.t. the prior: 
----------------- 
5024 out of 9550 failed or 52.61 %. 
On simulations that were succesful we are 11.86 +/- 3.32 (mean +/- s.d.) Z-scored Euclidean ephys distance far from the experimental observation.


We can pick our favourite training schedule and generate more samples with their evaluations. We will use this when we train the next neural network predicting model parameter distributions based on gene expression levels.

In [126]:
fav_tr_schedule='2c'
with open('./save_posteriors/training_schedule_{}.pickle'.format(fav_tr_schedule), 'rb') as f:
    posterior = pickle.load(f)

In [131]:
index=0
feature_list=range(23)
fav_training_schedule={'samples':{}, 'evaluations':{}}
for i in range(index, Xo.shape[0]):
    xo=Xo.iloc[i,:].values
    cell_name=Xo.index[i]
    print('.', end='')

    # sampling 1000 from the posterior
    samples=posterior.sample(
        (1000,),
        x=torch.as_tensor(xo[feature_list], dtype=float),
        show_progress_bars=False
    )
    evaluations=posterior.log_prob(
        samples,
        x=torch.as_tensor(xo[feature_list], dtype=float)
    )
    fav_training_schedule['samples'].update({cell_name:samples.numpy()})
    fav_training_schedule['evaluations'].update({cell_name:evaluations.numpy()})
    
with open('save_model_parameters/favourite_training_schedule.pickle', 'wb') as f:
    pickle.dump(fav_training_schedule, f)

...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................