In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


After inference we use SCRaPL's generative model to sample latnet expression and accessibility space. In turn this data are used in Seurat's integration pipeline.

In [None]:
from IPython import display
import pandas as pd
import numpy as np
import scipy
import scipy.stats

from matplotlib import pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
from tensorflow import keras

from tensorflow.keras import layers
import tensorflow_probability as tfp
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

import pickle
from timeit import default_timer as timer

In [None]:
tfd = tfp.distributions
tfb = tfp.bijectors
Folder = '/content/drive/MyDrive/'

In [None]:
#Load samples for latent parameters.
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_cor_atac_tmp_1.pickle', 'rb') as handle:
    cor_nuts = pickle.load(handle)
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_m_acc_atac_tmp_1.pickle', 'rb') as handle:
    m_acc_nuts = pickle.load(handle)
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_m_exp_atac_tmp_1.pickle', 'rb') as handle:
    m_exp_nuts = pickle.load(handle)
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_s_acc_atac_tmp_1.pickle', 'rb') as handle:
    s_acc_nuts = pickle.load(handle)
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_s_exp_atac_tmp_1.pickle', 'rb') as handle:
    s_exp_nuts = pickle.load(handle)
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_inf_exp_atac_tmp_1.pickle', 'rb') as handle:
    inf_exp_nuts = pickle.load(handle)
with open(Folder+'SCRaPL/Real/Results_atac_human_col/nuts_inf_acc_atac_tmp_1.pickle', 'rb') as handle:
    inf_acc_nuts = pickle.load(handle)

In [None]:
#Load raw expression and accessibility data and normalization constants.
yy_acc_pd = pd.read_csv(Folder+'SCRaPL/Real/Data/human_acc_tmp_1.csv',',',header=[0])
yy_exp_pd = pd.read_csv(Folder+'SCRaPL/Real/Data/human_rna_tmp_1.csv',',',header=[0])

Norm_acc_pd = pd.read_csv(Folder+'SCRaPL/Real/Data/nrm_human_acc_tmp.csv',',',header=[0],index_col=[0])
Norm_exp_pd = pd.read_csv(Folder+'SCRaPL/Real/Data/nrm_human_rna_tmp.csv',',',header=[0],index_col=[0])

In [None]:
yy_acc = tf.convert_to_tensor(yy_acc_pd,dtype=tf.float32)
yy_exp = tf.convert_to_tensor(yy_exp_pd,dtype=tf.float32)
Norm_acc = tf.transpose(tf.convert_to_tensor(Norm_acc_pd,dtype=tf.float32))
Norm_exp = tf.transpose(tf.convert_to_tensor(Norm_exp_pd,dtype=tf.float32))

In [None]:
aff = tfb.Chain([tfb.Shift(-1.),tfb.Scale(scale=2.)])
aff_inv = tfb.Invert(aff)

exp = tfb.Exp()
log = tfb.Invert(exp)

tanh = tfb.Tanh()
tanh_inv = tfb.Invert(tanh)

sigm = tfb.Sigmoid()
sigm_inv = tfb.Invert(sigm)

cor_trsf = tfb.Chain([aff_inv,tanh,tfb.Scale(scale=0.5)])
cor_trsf_inv = tfb.Invert(cor_trsf)

eps=0.001
bin_bij = tfb.Chain([tfb.Shift(eps/2.0),tfb.Scale(scale=1.0-eps),tfb.NormalCDF()])

cor_bij = tfb.Chain([tanh,tfb.Scale(scale=0.5)])
std_bij = tfb.Chain([exp,tfb.Scale(scale=-1.0)])
sqr_bij = tfb.Square()

In [None]:
#SCRaPL's generative model
def SCRaPL(N_genes,N_cells,Nrm_acc,Nrm_rna):
    def prior():
        cor_lt = yield tfd.TransformedDistribution( distribution = tfd.Beta( concentration0 = 15.0*tf.ones([N_genes,1]), concentration1=15.0*tf.ones([N_genes,1])), bijector= cor_trsf_inv, name = "cor_lt" )
        m_acc_lt = yield tfd.Normal(loc=3*tf.ones([N_genes,1]),scale=tf.ones([N_genes,1]), name = "m_acc_lt")
        m_exp_lt = yield tfd.Normal(loc=4*tf.ones([N_genes,1]),scale=tf.ones([N_genes,1]), name = "m_exp_lt")
        s_acc_lt = yield tfd.TransformedDistribution( distribution = tfd.InverseGamma(concentration=2.5*tf.ones([N_genes,1]),scale=4.5*tf.ones([N_genes,1])),bijector= log, name = "s_acc_lt" )
        s_exp_lt = yield tfd.TransformedDistribution( distribution = tfd.InverseGamma(concentration=2.5*tf.ones([N_genes,1]),scale=4.5*tf.ones([N_genes,1])),bijector= log , name = "s_exp_lt")
        infl_acc_lt = yield tfd.TransformedDistribution( distribution = tfd.Beta( concentration0 =8.0*tf.ones([N_genes,1]), concentration1=2.0*tf.ones([N_genes,1])), bijector= sigm_inv, name = "infl_acc_lt" )
        infl_rna_lt = yield tfd.TransformedDistribution( distribution = tfd.Beta( concentration0 =8.0*tf.ones([N_genes,1]), concentration1=2.0*tf.ones([N_genes,1])), bijector= sigm_inv, name = "infl_rna_lt" )

        cor = cor_bij.forward(cor_lt)
        s_acc = std_bij.forward(s_acc_lt)
        s_exp = std_bij.forward(s_exp_lt)
        infl_acc = sigm.forward(infl_acc_lt)
        infl_rna = sigm.forward(infl_rna_lt)
        
        mm_acc = tf.math.multiply( m_acc_lt,tf.ones([N_genes,N_cells]))
        mm_exp = tf.math.multiply( m_exp_lt,tf.ones([N_genes,N_cells]))
        ss_acc = tf.math.multiply( s_acc   ,tf.ones([N_genes,N_cells]))
        ss_exp = tf.math.multiply( s_exp   ,tf.ones([N_genes,N_cells]))
        ccor =   tf.math.multiply( cor     ,tf.ones([N_genes,N_cells]))
        p_acc =  tf.math.multiply( infl_acc,tf.ones([N_genes,N_cells]))
        p_rna =  tf.math.multiply( infl_rna,tf.ones([N_genes,N_cells]))  

        nrm_acc = tf.math.multiply( log.forward(Nrm_acc),tf.ones([N_genes,1]))
        nrm_rna = tf.math.multiply( log.forward(Nrm_rna),tf.ones([N_genes,1]))

        x_acc = yield tfd.Normal(loc = mm_acc, scale = ss_acc,name="x_acc")
        m_cnd_exp = mm_exp+tf.math.multiply(tf.math.divide(tf.math.multiply(ss_exp,x_acc-mm_acc),ss_acc),ccor)
        s_cnd_exp = tf.math.sqrt(tf.math.multiply(1-tf.math.square(ccor),tf.math.square(ss_exp)))

        x_exp = yield tfd.Normal(loc = m_cnd_exp, scale = s_cnd_exp,name="x_exp")

        pp_acc = tf.stack([p_acc,1-p_acc],axis=-1)
        x_acc_lt = tf.stack([-20*tf.ones_like(x_acc),x_acc+nrm_acc],axis=-1)

        pp_rna = tf.stack([p_rna,1-p_rna],axis=-1)
        x_exp_lt = tf.stack([-20*tf.ones_like(x_exp),x_exp+nrm_rna],axis=-1)

        y_acc = yield tfd.MixtureSameFamily(
                                                          mixture_distribution = tfd.Categorical(probs=pp_acc),
                                                          components_distribution = tfd.Poisson(log_rate=x_acc_lt),
                                                          name="y_acc")
        y_exp = yield tfd.MixtureSameFamily(
                                                          mixture_distribution = tfd.Categorical(probs=pp_rna),
                                                          components_distribution = tfd.Poisson(log_rate=x_exp_lt),
                                                          name="y_exp")

    comp_var_coroutine = tfd.JointDistributionCoroutineAutoBatched(prior)
    return comp_var_coroutine

In [None]:
x_genes = tf.shape(yy_acc)[0]
x_cells = tf.shape(yy_acc)[1]

In [None]:
cor_mn = tf.reduce_mean(cor_nuts,axis=0)[:,tf.newaxis]
m_acc_mn = tf.reduce_mean(m_acc_nuts,axis=0)[:,tf.newaxis]
m_exp_mn = tf.reduce_mean(m_exp_nuts,axis=0)[:,tf.newaxis]
s_acc_mn = tf.reduce_mean(s_acc_nuts,axis=0)[:,tf.newaxis]
s_exp_mn = tf.reduce_mean(s_exp_nuts,axis=0)[:,tf.newaxis]
inf_acc_mn = tf.reduce_mean(inf_acc_nuts,axis=0)[:,tf.newaxis]
inf_exp_mn = tf.reduce_mean(inf_exp_nuts,axis=0)[:,tf.newaxis]

In [None]:
#As the dataset consists of 60000 peaks and 3000 cells there is not enough memory to generate latent state parameters. For that reason we break the data into 3 chunks.
prt = 3 
aaa = 20000*(prt) + 1 
aaa1 = 20000*(prt-1) 
cor_mn_1 = cor_mn[aaa1:aaa]
m_acc_mn_1 = m_acc_mn[aaa1:aaa]
m_exp_mn_1 = m_exp_mn[aaa1:aaa]
s_acc_mn_1 = s_acc_mn[aaa1:aaa]
s_exp_mn_1 = s_exp_mn[aaa1:aaa]
inf_acc_mn_1 = inf_acc_mn[aaa1:aaa]
inf_exp_mn_1 = inf_exp_mn[aaa1:aaa]

In [None]:
mdl = SCRaPL(tf.cast(aaa-aaa1,dtype=tf.int64),x_cells,Norm_acc,Norm_exp)

In [None]:
#Sample latent space parameters
_,_,_,_,_,_,_,xx_acc,xx_exp,yyy_acc,yyy_exp = mdl.sample(value=(cor_mn_1,m_acc_mn_1,m_exp_mn_1,s_acc_mn_1,s_exp_mn_1,inf_acc_mn_1,inf_exp_mn_1,None,None,None,None)) 



In [None]:
#Save latent space parameters
s1 = pd.DataFrame(tf.squeeze(tf.exp(xx_acc)).numpy(), columns= yy_acc_pd.columns, index = yy_acc_pd.index[aaa1:aaa])
s2 = pd.DataFrame(tf.squeeze(tf.exp(xx_exp)).numpy(), columns= yy_exp_pd.columns, index = yy_exp_pd.index[aaa1:aaa])

s1.to_csv(Folder+'SCRaPL/Real/Results_atac_human_col/Seurat_int_acc_tmp_1_'+str(prt)+".csv", sep =",")
s2.to_csv(Folder+'SCRaPL/Real/Results_atac_human_col/Seurat_int_exp_tmp_1_'+str(prt)+".csv", sep =",")