In [1]:
%load_ext rpy2.ipython

## The internal model

### $P(x_i|\theta_1, \theta_0) = (1 - \pi)\prod_{j} P(x_{ij}|\theta_{0j}) + \pi \prod_j P(x_{ij}|\theta_{1j}) $

### Or for all data values

$P(x|\theta_1, \theta_0) = \prod_{i=1}^{n} \left[ (1 - \pi)\prod_{j} P(x_{ij}|\theta_{0j}) + \pi \prod_jP(x_{ij}|\theta_{1j}) \right]$

with $j = \{LoFdn, LoFcc, Mis3dn, Mis3cc \}$

$\theta_1$: parameters of $H_1$

$\theta_0$: parameters of $H_0$


## Data

$Xdn$: de novo data; $Xcase$: case data; $Xcontrol$: control data

## To test if a gene is a causal gene for a disease, two hypothesis are compared:

$H_1: \gamma \neq 1$ and $H_0: \gamma = 1$

|$H_1$ | $H_0$|
|------|------|
|$Xdn \sim Poisson(2 \mu \gamma Nd)$  |  $Xdn \sim Poisson(2 \mu  Nd)$|
|$Xcontrol \sim Poisson(q Ncontrol)$  |$Xcontrol \sim Poisson(q Ncontrol)$|
|$Xcase \sim Poisson(q \gamma Ncase)$  |$Xcase \sim Poisson(q  Ncase)$ |




## Set conjugate priors for relative risk ($\gamma$) 

$\gamma \sim Gamma(\alpha, \beta) = Gamma(\bar{\gamma}*\beta, \beta)$

## Conditional distribution for cases when we know cases + controls

### $X_1 = Xcase$ with $P(X_1 = x_1) = \frac{e^{-\lambda_1} \lambda_1^{x_1}}{x_1!}$ and $\lambda_1 = q \gamma Ncase$

### $X_0 = Xcontrol$ with $P(X_0 = x_0) = \frac{e^{-\lambda_0} \lambda_0^{x_0}}{x_0!}$ and $\lambda_0 =  q Ncontrol$


## $X = X_1 + X_0 $ with $P(X = x) = \frac{e^{-\lambda} \lambda^{x}}{x!}$ and $\lambda =  \lambda_0 + \lambda_1$

$P(X_1 = x_1| X = x) = \frac{P(X_1 = x1; X = x)}{P(X = x)}$



In [None]:
casecontrol <- "
data {
int<lower=1> NN;
int<lower=1> K;
int<lower=1> Ncase;
int<lower=1> Ncontrol;
int<lower=1> Ntotal;
int<lower=1> Ndn;

int<lower=0> yCaseLoF[NN];
int<lower=0> yTotalLoF[NN];
int<lower=0> yLoFdn[NN];
real<lower=0> mutLoF[NN];

    int<lower=0> yCaseMis3[NN];
    int<lower=0> yTotalMis3[NN];
int<lower=0> yMis3dn[NN];
real<lower=0> mutMis3[NN];

real<lower=0.05> thetaH0;

}
parameters {
real<lower=0.001,upper=0.4> pi0;
real<lower=1> hyperGammaMeanLoFcc;
real<lower=0.5> hyperBetaLoFcc;
real<lower=0.5> gammaLoFcc;
real<lower=1> hyperGammaMeanLoFdn;
real<lower=0.5> hyperBetaLoFdn;
real<lower=0.5> gammaLoFdn;

real<lower=1> hyperGammaMeanMis3cc;
real<lower=0.5> hyperBetaMis3cc;
real<lower=0.5> gammaMis3cc;
real<lower=1> hyperGammaMeanMis3dn;
real<lower=0.5> hyperBetaMis3dn;
real<lower=0.5> gammaMis3dn;


}

model {

real ps[K];

//lof
hyperGammaMeanLoFcc ~ normal(4, 1); //normal(14, 4);
hyperBetaLoFcc ~ normal(1, 0.01);
gammaLoFcc ~ gamma(hyperGammaMeanLoFcc*hyperBetaLoFcc, hyperBetaLoFcc);
//denovo lof
hyperGammaMeanLoFdn ~ normal(14, 4);
hyperBetaLoFdn ~ normal(1, 0.01);
gammaLoFdn ~ gamma(hyperGammaMeanLoFdn*hyperBetaLoFdn, hyperBetaLoFdn);

///mis3
//hyperGammaMeanMis3cc ~ normal(10, 3);
//hyperBetaMis3cc ~ normal(1, 0.01);
//gammaMis3cc ~ gamma(hyperGammaMeanLoFcc*hyperBetaMis3cc, hyperBetaMis3cc);
//denovo mis3
hyperGammaMeanMis3dn ~ normal(4, 1); //normal(14, 4);
hyperBetaMis3dn ~ normal(1, 0.01);
gammaMis3dn ~ gamma(hyperGammaMeanMis3dn*hyperBetaMis3dn, hyperBetaMis3dn);


for (ii in 1:NN){
ps[1] <- log1m(pi0) + binomial_log(yCaseLoF[ii], yTotalLoF[ii], thetaH0) 
 + poisson_log(yLoFdn[ii], 2*Ndn*mutLoF[ii]) 
 + poisson_log(yMis3dn[ii], 2*Ndn*mutMis3[ii]);
// + binomial_log(yCaseMis3[ii], yTotalMis3[ii], thetaH0) 
ps[2] <- log(pi0) + binomial_log(yCaseLoF[ii], yTotalLoF[ii], gammaLoFcc*Ncase/(gammaLoFcc*Ncase + Ncontrol)) + poisson_log(yLoFdn[ii], 2*Ndn*mutLoF[ii]*gammaLoFdn) 
  + poisson_log(yMis3dn[ii], 2*Ndn*mutMis3[ii]*gammaMis3dn); 
//+ binomial_log(yCaseMis3[ii], yTotalMis3[ii], gammaMis3cc*Ncase/(gammaMis3cc*Ncase + Ncontrol))
increment_log_prob(log_sum_exp(ps));
}

}
"
yLoF <- counts[, 1]
y.case.lof <- counts[, 2]
y.control.lof <- counts[, 3]
yMis3 <- counts[, 4]
y.case.mis3 <- counts[, 5]
y.control.mis3 <- counts[, 6]
mutLoF <- data$mut.rate*mu.frac[1]
mutMis3 <- data$mut.rate*mu.frac[2]

mixdata3 <- list(K = 2, 
                 yCaseLoF = y.case.lof, yTotalLoF = y.case.lof + y.control.lof,
                 yLoFdn = yLoF, mutLoF = mutLoF,
                 
                 NN = length(y.case.lof),
                 Ncase = N$ca, Ncontrol = N$cn, Ntotal = N$ca + N$cn,
                 Ndn = N$dn,
                 
                 yCaseMis3 = y.case.mis3, yTotalMis3 = y.case.mis3 + y.control.mis3,
                 yMis3dn = yMis3, mutMis3 = mutMis3,
                 thetaH0 = N$ca/(N$ca + N$cn))


testFit <- stan(model_code = casecontrol, data = mixdata3, iter = 1000, chains = 1, control = list(adapt_delta = 0.9))
#,                init = list(list(hyperGammaMeanLoF = 20, hyperBetaLoF = 1)))
#,                 control = list(adapt_delta = 0.89), algorithm = "HMC")
#, pars = c('hyperbetaLoF', 'hyperalphaMeanLoF'),
#                 algorithm = "HMC"
#, control = list(adapt_delta = 0.999))#,                 init = list(hyperbetaLoF = 1, hyperalphaMeanLoF = 24))
vSeed <- 1234
fit <- stan(fit = testFit, data = mixdata3, iter = 20000, chains = 3, thin  = 10, cores = 3, 
               seed = vSeed,
            pars = c('pi0',
              'hyperGammaMeanLoFdn', 'hyperGammaMeanMis3dn',
                     'hyperBetaLoFdn', 'hyperBetaMis3dn',
                     'hyperGammaMeanLoFcc'), 
            control = list(adapt_delta = 0.95),
            init = list(list(hyperGammaMeanLoFdn = 20, hyperBetaLoFdn = 1,
                                  hyperGammaMeanLoFcc = 2, hyperBetaLoFcc = 1,
                                  hyperGammaMeanMis3dn= 4, hyperBetaMis3dn = 1),
                        list(hyperGammaMeanLoFdn = 20, hyperBetaLoFdn = 1,
                                 hyperGammaMeanLoFcc = 2, hyperBetaLoFcc = 1,
                                 hyperGammaMeanMis3dn= 4, hyperBetaMis3dn = 1),
            list(hyperGammaMeanLoFdn = 20, hyperBetaLoFdn = 1,
                 hyperGammaMeanLoFcc = 2, hyperBetaLoFcc = 1,
                 hyperGammaMeanMis3dn= 4, hyperBetaMis3dn = 1)))


In [None]:
Inference for Stan model: 28f04299199b0080c223c29a06ffb33b.
3 chains, each with iter=10000; warmup=5000; thin=10; 
post-warmup draws per chain=500, total post-warmup draws=1500.

                          mean se_mean   sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
pi0                       0.05    0.00 0.01      0.03      0.04      0.04      0.05      0.07     4 1.54
hyperGammaMeanLoFdn      17.87    1.36 2.93     11.70     15.83     18.53     20.02     22.78     5 1.19
hyperGammaMeanMis3dn      9.28    0.18 1.91      5.34      8.33      9.27      9.95     13.70   112 1.04
hyperBetaLoFdn            1.00    0.00 0.01      0.98      0.99      1.00      1.00      1.02   139 1.01
hyperBetaMis3dn           1.00    0.00 0.01      0.98      0.99      1.00      1.01      1.02    67 1.04
hyperGammaMeanLoFcc       3.95    0.67 1.32      1.71      2.80      4.16      4.79      6.18     4 1.34
lp__                 -18058.33    1.08 2.86 -18065.25 -18059.84 -18057.85 -18056.22 -18054.15     7 1.11

Samples were drawn using NUTS(diag_e) at Tue Jan 19 20:13:25 2016.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).