# pymc3 minimal example

In [1]:
import os
import numpy as np, pandas as pd
cwd = os.path.expanduser("/home/min/GIT/cnv-gene-mapping/data/deletion_simu_30_shape0.777_scale0.843")

## import X and y

In [3]:
X = pd.read_csv(f'{cwd}/block_1815_1826/deletion.genes.block30.for_simu.sample.combined.genes.block_1815_1826.gz', compression = "gzip", sep = "\t", header = None, dtype = float)
y = np.loadtxt(f'{cwd}/deletion.genes.block30.for_simu.sample.combined.y.gz', dtype=int)

In [7]:
pi_varbvs = 0.051366009925488
mu_varbvs = 0.783230896500752
si_varbvs = 0.816999481742865

## Spike and Slab model

In [4]:
import pymc3 as pm
import theano.tensor as tt

In [5]:
prevalence = 0.05
case_prop = sum(y) / y.shape[0]
iteration = 2000
seed = 1
n_chain = 1

In [8]:
def get_model(y, X, pi0 = pi_varbvs, mu = mu_varbvs, sigma = si_varbvs):
    invlogit = lambda x: 1/(1 + tt.exp(-x))
    model = pm.Model()
    with model:
        xi = pm.Bernoulli('xi', pi0, shape = X.shape[1]) # inclusion probability for each variable
        # alpha = pm.Normal('alpha', mu = mu_intercept, sd = sigma_intercept) # Intercept
        alpha = pm.distributions.continuous.Uniform("alpha", lower = np.log(prevalence / (1-prevalence)), upper = np.log(case_prop / (1-case_prop)))
        beta = pm.Normal('beta', mu = mu, sd = sigma, shape = X.shape[1]) # Prior for the non-zero coefficients
        p = pm.math.dot(X, xi * beta) # Deterministic function to map the stochastics to the output
        y_obs = pm.Bernoulli('y_obs', invlogit(p + alpha), observed = y)  # Data likelihood
    return model

In [9]:
model = get_model(y,X)

In [10]:
model

<pymc3.model.Model at 0x7fa3d71a2950>

In [11]:
with model:
    trace1 = pm.sample(iteration, random_seed = seed, cores = 8, progressbar = True, chains = n_chain, tune = int(0.25*iteration))

Sampling chain 0, 20 divergences: 100%|##########| 2500/2500 [01:54<00:00, 21.77it/s]


In [13]:
pd.DataFrame({'inclusion_probability': np.apply_along_axis(np.mean, 0, trace1['xi']),
                        'beta': np.apply_along_axis(np.mean, 0, np.multiply(trace1["beta"], trace1["xi"])),
                        'beta_given_inclusion': np.apply_along_axis(np.sum, 0, trace1['xi'] * trace1['beta']) / np.apply_along_axis(np.sum, 0, trace1['xi'])
                        })

Unnamed: 0,inclusion_probability,beta,beta_given_inclusion
0,0.11,0.089534,0.813947
1,0.116,0.102113,0.880282
2,0.1045,0.082519,0.789652
3,0.092,0.06514,0.708042
4,0.0875,0.058763,0.671578
5,0.058,0.033424,0.576279
6,0.1865,0.353598,1.895968
7,0.86,1.79625,2.088663
8,0.021,0.00521,0.248075
9,0.028,0.012246,0.437342
