In [1]:
#Import packages
#---------------------------------------
import sys
import os
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib
import warnings
import random
import arviz as az
import pymc as pm
warnings.filterwarnings("ignore", category=RuntimeWarning) 

#Import your modules
#---------------------------------------

import admin_functions as adfn
import cell_decomp_func as cdfn

# Define paths
#----------------------------------------------------------------------
Fcode = '/Users/dominicburrows/Dropbox/PhD/Analysis/my_scripts/GitHub/'
Fdata = '/Users/dominicburrows/Dropbox/PhD/analysis/Project/'
Fdoc = '/Users/dominicburrows/Documents/'
F10t = '/Volumes/Dominic 10tb/'
F10t2 = '/Volumes/Dominic 10tb2/'
Ftm = '/Volumes/Dominic Time Machine/'
Ffig = '/Users/dominicburrows/Dropbox/PhD/figures/'

%load_ext autoreload
sys.version

'3.7.16 (default, Jan 17 2023, 09:28:58) \n[Clang 14.0.6 ]'

In [2]:
def run_pyRCTD(n_clusts, n_cells, n_genes, rate_range):
    #Simulate spot data from simulated gene expression
    spot_sim = cdfn.simulate_cell_mix(n_clusts, n_cells, n_genes).simulate_gene_exp(rate_range)
    n_spots = spot_sim.__dict__['n_spots']
    spots = spot_sim.__dict__['spots']
    ref_exp = spot_sim.__dict__['mean_exps']
    prop_vec = spot_sim.__dict__['prop_vec']
        
    spots +=1 #remove any zeros
    #Run without logs
    basic_model=pm.Model()
    with basic_model:
        beta=pm.HalfNormal("beta", sigma=1, shape=(n_spots, n_clusts))
        lmd=pm.math.dot(beta, ref_exp)    
        #Calculate total counts to get Poisson rates
        N = np.sum(spots, axis=1) 
        N = np.asarray([N for i in range(n_genes)]).T #Repeat total counts across columns for elementwise multiplication
        #Likelihood of observed data given Poisson rates
        y=pm.Poisson("y", mu=lmd*N, observed=spots)
    with basic_model:
        idata=pm.sample(draws=1000, chains=1)
    mean_post = np.mean(idata.posterior['beta'][0],axis=0)
    Nd = np.sum(mean_post, axis=1) 
    Nd = np.asarray([Nd for i in range(n_clusts)]).T 
    mean_post = np.divide(np.mean(idata.posterior['beta'][0],axis=0),Nd)
    from scipy.stats import linregress
    line_fit=linregress(np.ravel(prop_vec), np.ravel(mean_post))
    return(mean_post, prop_vec, line_fit.rvalue)

In [3]:
clust_range = np.arange(3, 100, 10)
genes_range = np.arange(500, 10000, 1000)
cells_range = np.arange(500, 10000, 1000)
rate_rangel = np.arange(5, 100, 10)
clust_range, genes_range, cells_range, rate_rangel

(array([ 3, 13, 23, 33, 43, 53, 63, 73, 83, 93]),
 array([ 500, 1500, 2500, 3500, 4500, 5500, 6500, 7500, 8500, 9500]),
 array([ 500, 1500, 2500, 3500, 4500, 5500, 6500, 7500, 8500, 9500]),
 array([ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95]))

In [4]:
rate_rangel

array([ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95])

In [5]:
%autoreload

In [6]:
curr_list = rate_rangel
savepref = 'rate'

for l in curr_list:
#Define parameters of simulated data
    n_clusts = 10
    n_genes = 1000
    n_cells = 500
    rate_range = 0,l #max and min of uniform distribution for generating rates

    mean_post, prop_vec, r2 = run_pyRCTD(n_clusts, n_cells, n_genes, rate_range)
    savename = 'RCTD-test_' + savepref + '-' + str(l) + '.npy'
    np.save(Fdata + 'SPATIAL-TRANSCRIPTOMICS/' + savename, [mean_post, prop_vec, r2])
    print(l)


Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 174 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 766 seconds.
  arr = np.asanyarray(arr)


5
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 186 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 808 seconds.
  arr = np.asanyarray(arr)


15
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 180 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 778 seconds.
  arr = np.asanyarray(arr)


25
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 172 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 734 seconds.
  arr = np.asanyarray(arr)


35
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 173 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 760 seconds.
  arr = np.asanyarray(arr)


45
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 176 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 695 seconds.
  arr = np.asanyarray(arr)


55
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 178 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 794 seconds.
  arr = np.asanyarray(arr)


65
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 159 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 691 seconds.
  arr = np.asanyarray(arr)


75
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 173 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 737 seconds.
  arr = np.asanyarray(arr)


85
Loaded parameters: 10 cell types , 500 cells, & 1000 genes.
Created spot mixtures from simulated data: 182 spots.


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta]


Sampling 1 chain for 1_000 tune and 1_000 draw iterations (1_000 + 1_000 draws total) took 767 seconds.


95


  arr = np.asanyarray(arr)
