In [1]:
#Import packages
#---------------------------------------
import sys
import os
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib
import warnings
import random
import arviz as az
import pymc as pm
warnings.filterwarnings("ignore", category=RuntimeWarning) 

#Import your modules
#---------------------------------------
import admin_functions as adfn
import cell_decomp_func as cdfn


# Define paths
#----------------------------------------------------------------------
l_code = '/Users/dominicburrows/Dropbox/PhD/Analysis/my_scripts/GitHub/'
l_data = '/Users/dominicburrows/Dropbox/PhD/analysis/Project/'
l_fig = '/Users/dominicburrows/Dropbox/PhD/figures/'

s_code = '/cndd3/dburrows/CODE/'
s_data = '/cndd3/dburrows/DATA/'
%load_ext autoreload
sys.version

'3.11.0 | packaged by conda-forge | (main, Jan 14 2023, 12:27:40) [GCC 11.3.0]'

In [15]:
#==============================================================
def run_pyRCTD(idata, prop_vec):
#==============================================================
    mean_post = np.mean(idata.posterior['beta'][0],axis=0)
    from scipy.stats import linregress
    line_fit=linregress(np.ravel(prop_vec), np.ravel(mean_post))
    return(mean_post, line_fit.rvalue**2)

In [3]:
eps_list = np.geomspace(5,100,20).astype(int)
eps_list[0]=0
eps_list[-1]=99

In [18]:
# Run alpha noise

#Define parameters of simulated data
n_clusts = 5
n_genes = 800
n_cells = 100
rate_range = 0,40 #max and min of uniform distribution for generating rates
mode = 'epsilon'
#per = 55 #percentage of dropped genes
# e_std= 0 #spot + gene noise
# g_std = 0 #gene specific noise

for e in eps_list:
    #Simulate spot data from simulated gene expression
    spot_sim = cdfn.simulate_cell_mix(n_clusts, n_cells, n_genes).simulate_gene_exp(rate_range)
    n_spots = spot_sim.__dict__['n_spots']
    spots = spot_sim.__dict__['spots']
    ref_exp = spot_sim.__dict__['mean_exps']
    prop_vec = spot_sim.__dict__['prop_vec']
    spots = cdfn.add_noise(spots, per=None, a_std=None, g_std=None, e_std=e) #add in noise

    #Simple Linear regression
    with pm.Model(coords={"celltypes": np.arange(n_clusts),
                        "spots": np.arange(n_spots),
                        "genes": np.arange(n_genes) }) as basic_model:
        #Declare data 
        mean_exp = pm.Data('mean_exp', ref_exp, mutable=False, dims=['celltypes','genes'])
        # Priors for unknown model parameters
        beta=pm.HalfNormal("beta", sigma=1, dims=['spots','celltypes'])
        lmd= pm.Deterministic('lmd', pm.math.dot(beta, mean_exp), dims=['spots','genes'])
        #Convert from proportions to counts
        N_g = pm.Data('N_g', np.sum(spots, 1).reshape(n_spots,1), mutable=False)
        #Likelihood of observed data given Poisson rates
        y=pm.Poisson("y", mu=lmd*N_g, observed=spots)

    #Run model
    with basic_model:
        basic_data=pm.sample(random_seed=1,draws=200,chains=1, discard_tuned_samples=False)


    #Poisson noise
    with pm.Model(coords={"celltypes": np.arange(n_clusts),
                        "spots": np.arange(n_spots),
                        "genes": np.arange(n_genes),
                        "1": np.arange(1) }) as Poisson_noise_model:
        #Declare data 
        mean_exp = pm.Data('mean_exp', ref_exp, mutable=False, dims=['celltypes','genes'])
        # Priors for unknown model parameters
        beta=pm.HalfNormal("beta", sigma=1, dims=['spots','celltypes']) # celltype proportions
        eps=pm.Normal("eps", mu=0, sigma=1, dims=['spots', 'genes']) # random noise at each spot and gene

        lmd= pm.Deterministic('lmd', np.exp(eps)*pm.math.dot(beta, mean_exp), dims=['spots','genes'])
        #Convert from proportions to counts
        N_g = pm.Data('N_g', np.sum(spots, 1).reshape(n_spots,1), mutable=False)

        #Likelihood of observed data given Poisson rates
        y=pm.Poisson("y", mu=lmd*N_g, observed=spots)

    with Poisson_noise_model:
        noise_data = pm.sample(random_seed=1,draws=200,chains=1, discard_tuned_samples=False)

    basic_post, basic_r2 = run_pyRCTD(basic_data, prop_vec)
    noise_post, noise_r2 = run_pyRCTD(noise_data, prop_vec)
    if len(str(e))<2:pref = '0' + str(e)
    else:pref= str(e)
    np.save(s_data + 'spatial_transcriptomics/RCTD-test-basic-gauss-model_'+ mode + '-' + pref + '.npy', np.array([prop_vec, basic_post, basic_r2], dtype=object))
    np.save(s_data + 'spatial_transcriptomics/RCTD-test-noise-gauss-model_' + mode + '-' + pref + '.npy', np.array([prop_vec, noise_post, noise_r2], dtype=object))
    print(e)
