In [None]:
#Import packages
#---------------------------------------
import sys
import os
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib
import warnings
import random
import arviz as az
import pymc as pm
warnings.filterwarnings("ignore", category=RuntimeWarning) 

#Import your modules
#---------------------------------------
import admin_functions as adfn
import cell_decomp_func as cdfn

# Define paths
#----------------------------------------------------------------------
l_code = '/Users/dominicburrows/Dropbox/PhD/Analysis/my_scripts/GitHub/'
l_data = '/Users/dominicburrows/Dropbox/PhD/analysis/Project/'
l_fig = '/Users/dominicburrows/Dropbox/PhD/figures/'

s_code = '/sphere/dburrows12/'
s_data = '/sphere/dburrows12/'

%load_ext autoreload
sys.version

In [None]:
# Noise model - Linear regression with noise covariates
#standard params
#n_clusts = 10
#n_genes = 600
#n_cells = 500
#rate_range = 0,20
#dropout = 85%
#E_std = 9
#a_std = 6
#g_std = 8

#Define parameters of simulated data
n_clusts = 10
n_genes = 600
n_cells = 500
rate_range = 0,20 #max and min of uniform distribution for generating rates
per = 85 #percentage of dropped genes
e_std= 9 #spot + gene noise
g_std = 7 #gene specific noise
a_std = 6 #spot specific noise

In [None]:
#Simulate spot data from simulated gene expression
spot_sim = cdfn.simulate_cell_mix(n_clusts, n_cells, n_genes).simulate_gene_exp(rate_range)
n_spots = spot_sim.__dict__['n_spots']
spots = spot_sim.__dict__['spots']
ref_exp = spot_sim.__dict__['mean_exps']
prop_vec = spot_sim.__dict__['prop_vec']

#Dropout a certain percentage of genes
for i in range(spots.shape[0]):
    rand_ind = np.random.choice(np.arange(spots.shape[1]), size = int((per/100) * spots.shape[1]), replace=False) #random index for selecting
    spots[i,rand_ind]=0

#Add random noise and make int and remove negatives
spots = spots+np.random.normal(0, e_std, (spots.shape))

#gamma - over each gene
gamma = np.random.normal(0, g_std, (spots.shape[1]))
gamma_mat = np.asarray([gamma for i in range(spots.shape[0])]) #Repeat across columns for elementwise addition
spots = spots+gamma_mat

#alpha - over each spot
alpha = np.random.normal(0, a_std, (spots.shape[0]))
alpha_mat = np.asarray([alpha for i in range(spots.shape[1])]).T #Repeat across columns for elementwise addition
spots = spots+alpha_mat

spots = spots.astype(int) 
spots[spots < 0] = 0
spots +=1 #remove any zeros


#Poisson GLM with noise
Poisson_noise_GLM=pm.Model()

with Poisson_noise_GLM:

    beta=pm.HalfNormal("beta", sigma=1, shape=(n_spots, n_clusts)) # proportions of each cell at each spot
    eps=pm.Normal("eps", mu= 0, sigma=1, shape=(spots.shape)) # random noise at each spot and gene
    gamma = pm.Normal("gamma", mu= 0, sigma=1, shape=(spots.shape[1])) # random noise at each gene
    alpha = pm.Normal("alpha", mu= 0, sigma=1, shape=(spots.shape[0])) # random noise at each spot
    
    lmd=pm.math.dot(beta, ref_exp) #each spot is a linear combination of cell proportions and their reference gene expression
    lmd = lmd + eps + np.asarray([gamma.eval() for i in range(spots.shape[0])]) + np.asarray([alpha.eval() for i in range(spots.shape[1])]).T #Add noise

    #Likelihood of observed data given Poisson rates
    y=pm.Poisson("y", mu=lmd, observed=spots)

with Poisson_noise_GLM:
    idata=pm.sample(draws=1000, chains=1)

mean_post = np.mean(idata.posterior['beta'][0],axis=0)
Nd = np.sum(mean_post, axis=1) 
Nd = np.asarray([Nd for i in range(n_clusts)]).T 
mean_post = np.divide(np.mean(idata.posterior['beta'][0],axis=0),Nd)
from scipy.stats import linregress
line_fit=linregress(np.ravel(prop_vec), np.ravel(mean_post))
r2 = line_fit.rvalue

savename = 'RCTD-test_PARSTAND_ALL.npy'
np.save(s_data + 'SPATIAL-TRANSCRIPTOMICS/' + savename, [idata, mean_post, prop_vec, r2])