In [1]:
%load_ext rpy2.ipython

In [2]:
%%R
mainDir="./"
source(paste(mainDir, "/TADA/TADA.R", sep = ""))
#source(paste(mainDir, "/newLikelihood.R", sep = ""))
library("parallel")


mu.frac <- c(0.074, 0.32)
gamma.mean.dn <- c(20, 4.7)
beta.dn <- c(1,1)
gamma.mean.CC <- c(2.3, 1.00)
beta.CC <- c(4.0, 1000)
rho1 <- c(0.1, 0.5)
nu1 <- c(200, 100)
rho0 <- c(0.1, 0.5)
nu0 <- c(200, 100)
hyperpar <- as.array(rbind(gamma.mean.dn, beta.dn, gamma.mean.CC, beta.CC, rho1, nu1, rho0, nu0))
l <- 100
pi0 <- 0.94 # the fraction of non-risk genes

# ASC (Autism Sequencing Consortium) data
# The file name contains the sample size information
# The only relevant counts are dn.LoF and dn.mis3
data <- read.csv("TADA/data/ASC_2231trios_1333trans_1601cases_5397controls.csv", header=TRUE, as.is=TRUE)
ntrio <- 2231  # number of trios
ncase <- 1601  # number of cases
nctrl <- 5397  # number of controls
ntrans <- 1333 # number of subjects with transmission data
N <- list(dn=ntrio, ca=ntrans+ncase, cn=ntrans+nctrl)


In [3]:
%%R
head(data)

    Gene  mut.rate dn.LoF case.LoF ctrl.LoF trans.LoF ntrans.LoF dn.mis3
1   A1BG 2.355e-05      0        0        1         0          0       0
2   A1CF 2.174e-05      0        1        2         1          1       0
3  A2LD1 6.090e-06      0        0        0         0          0       0
4    A2M 5.640e-05      0        1        5         1          0       0
5  A2ML1 5.700e-05      0        3       16         2          6       0
6 A4GALT 2.415e-05      0        0        2         1          0       0
  case.mis3 ctrl.mis3 trans.mis3 ntrans.mis3
1         0         4          7           3
2         0         7          3           7
3         0         0          0           0
4         4        22          6           6
5         5        30          6          10
6         1         5          2           1


In [4]:
%%R
# Running TADA
counts <- as.array(cbind(data$dn.LoF, data$case.LoF+data$trans.LoF, data$ctrl.LoF+data$ntrans.LoF, data$dn.mis3, data$case.mis3+data$trans.mis3,
 data$ctrl.mis3+data$ntrans.mis3))


count.mu.Data <- cbind(counts[, 1:3], data$mut.rate*mu.frac[1])
#Count.mu.Data <- cbind(counts[, 4:6], data$mut.rate*mu.frac[2])

########Calculate data.frame and likelihood function
##countData should be a data.frame including all annotation types.
#Each type includes 3 columns: de novo, case and control
                                        #mutData: a data.frame including mutation rates of each type


In [5]:
%%R
logLikelihood <- function(countData, hyperpar, pi0 = 0.06, mutationData,
                          testType = c("dn", "CaseControl", "both"), nCore = 1, combinationType = 1){
    ##Identify how many annotation types
    if (is.numeric(mutationData))
        mutationData <- cbind(mutationData)
    kk <- dim(mutationData)[2]

        ###Choose only de novo (dn), only cc (cc), or both ("both")
        testType <- match.arg(testType)

#    message("testType: ", testType)

    dataOut <- NULL

    logLLKallType <- 0

    for (jj in 1:kk){

        ##Extract each annotation
        sIndex <- 3*(jj - 1) + 1

        countData.type <- apply(cbind(countData[, c(sIndex, sIndex + 1, sIndex + 2)], mutationData[, jj]), 1, as.list)

#        print(dim(countData.type))

        typeData <- mclapply(countData.type, function(xRow){
                                 xRow <- as.numeric(xRow)
            x <- list(dn = xRow[1],
                    ca = xRow[2], cn = xRow[3], mu = xRow[4])

 #           print(xRow)
                #####Alternative hypothesis
                x.alt.CC <- evidence.alt.CC(x = x, N = N, gamma.mean = hyperpar[3, jj],
                                            beta = hyperpar[4, jj],
                                            rho1 = hyperpar[5, jj], nu1 = hyperpar[6, jj])$total
                            x.alt.dn <- dnbinom(x = x$dn, hyperpar[1, jj]*hyperpar[2, jj],
                                   hyperpar[2, jj]/(hyperpar[2, jj] + 2*N$dn*x$mu))
                    #evidence.alt.dn(x = xRow[1], N = N$dn, mu = xRow[4],
                     #                       gamma.mean = hyperpar[1, 1], beta = hyperpar[2, 1])

#####Null hypothesis

                x.null.CC <- evidence.null.CC(x = x, N = N, rho0 = hyperpar[7, jj], nu0 = hyperpar[8, jj])$total
                x.null.dn <- dpois(x$dn, 2*N$dn*x$mu) #evidence.null.dn(x = xRow[1], N = N$dn, mu = xRow[4])


            return(c(x.alt.CC, x.alt.dn, x.null.CC, x.null.dn))

        }, mc.cores = nCore)

        #########Done

        ##############################################
                    ##########Calculate log likelihood
        typeData <- do.call(rbind, typeData)

            pH1.dn <- typeData[, 2]
            pH0.dn <- typeData[, 4]

        pH1.cc <- typeData[, 1]
        pH0.cc <- typeData[, 3]

        if (testType == "dn"){
            pH1 <- pi0*pH1.dn
            pH0 <- (1 - pi0)*pH0.dn
        }
        if (testType == "CaseControl"){
            pH1 <- pi0*pH1.cc
            pH0 <- (1 - pi0)*pH0.cc
        }

        if (testType == "both") {
            pH1 <- pi0*pH1.cc*pH1.dn
            pH0 <- (1- pi0)*pH0.cc*pH0.dn
            if (jj > 1){
                pH1 <- pi0*pH1.dn
                pH0 <- (1 - pi0)*pH0.dn
            }
        }
            pAll <- pH1 + pH0

            #sum(log(pAll))

            log.pAll <- sum(log(pAll[!is.na(pAll)]))#log(prod(pAll))


        dataOut <- cbind(dataOut, typeData)

            logLLKallType <- logLLKallType + log.pAll


    }


    return(list(dataOut = dataOut, logLik = logLLKallType))
}





In [13]:
%%R
countsAll <- cbind(counts, data$mut.rate*mu.frac[1], data$mut.rate*mu.frac[2])

tempCount <- countsAll[sample(1:dim(countsAll)[1], 2000), ]

countData <- tempCount[, 1:6]
mutationData <- tempCount[, 7:8]

In [14]:

%%R

piFunc.bothLoF.dnMis3 <- function(xIn){
  
  piX <- xIn[1]
  
  hyperparNew <- matrix(1, ncol = 2, nrow = 8)
  
  #######LoF
  ###Denovo
  hyperparNew[1, 1] <- xIn[2] ##gamma.dn.mean
  hyperparNew[2, 1] <- xIn[3] ##beta.dn
  ###Case-control
  hyperparNew[3, 1] <- xIn[4] ##gamma.cc.mean
  hyperparNew[4, 1] <- xIn[5] ##beta.cc
  hyperparNew[5, 1] <- xIn[6] ##rho1
  hyperparNew[6, 1] <- xIn[7] ##nu1
  hyperparNew[7, 1] <- xIn[8] ##rho0
  hyperparNew[8, 1] <- xIn[9] ##nu0
  
  ####Mis3
  hyperparNew[1, 2] <- xIn[10] ##gamma.dn.mean.mis3
  hyperparNew[2, 2] <- xIn[11] ##beta.dn.mis3
  
  
  log.All <- -logLikelihood(countData = countData, hyperpar = hyperparNew,testType = "both",
                            pi0 = piX, mutationData = mutationData)$logLik
  return(log.All)
}

library("optimx")
x1 <- c(0.07, 22, 0.95, 2.8, 5, 1, 10000, 0.45, 800, 4, 1.2)
piFunc.bothLoF.dnMis3(xIn = x1)



[1] 7026.05


In [15]:
%%R
library("optimx")

es.p <- optimx(par = c(0.07, 22, 0.95, 2.8, 5, 1, 10000, 0.45, 800, 4, 1.2),
               fn = piFunc.bothLoF.dnMis3,
               lower = c(0.02, 10, 0.9, 2, 2, 0.6, 9000, 0.3, 660, 3, 0.8),
               upper = c(0.09, 30, 1.1, 4, 6, 1.2, 11000, 0.6, 900, 6, 2))
es.p

           p1 p2  p3       p4 p5  p6       p7        p8  p9      p10 p11
L-BFGS-B 0.09 10 0.9 2.322967  6 1.2 9999.597 0.4585633 900 4.057077 0.8
            value fevals gevals niter convcode  kkt1  kkt2 xtimes
L-BFGS-B 6990.383    104    104    NA        0 FALSE FALSE 2566.3


In [16]:
%%R
library("optimx")

es.p1 <- optimx(par = c(0.07, 22, 0.95, 2.8, 5, 1, 10000, 0.45, 800, 4, 1.2),
               fn = piFunc.bothLoF.dnMis3,
               lower = c(0.02, 10, 0.6, 2, 2, 0.6, 5000, 0.3, 660, 3, 0.8),
               upper = c(0.09, 30, 1.5, 4, 6, 1.2, 15000, 0.9, 3000, 6, 2))
es.p1

           p1 p2  p3       p4 p5  p6       p7        p8       p9      p10 p11
L-BFGS-B 0.09 10 0.6 2.417714  6 1.2 9999.926 0.4367284 821.3144 3.471578 0.8
            value fevals gevals niter convcode  kkt1  kkt2 xtimes
L-BFGS-B 7008.855    115    115    NA        1 FALSE FALSE 2824.8


In [17]:
%%R
countsAll <- cbind(counts, data$mut.rate*mu.frac[1], data$mut.rate*mu.frac[2])

tempCount <- countsAll[sample(1:dim(countsAll)[1], 100), ]

countData <- tempCount[, 1:6]
mutationData <- tempCount[, 7:8]

In [18]:
%%R
library("optimx")

es.p2 <- optimx(par = c(0.07, 22, 0.95, 2.8, 5, 1, 10000, 0.45, 800, 4, 1.2),
               fn = piFunc.bothLoF.dnMis3,
               lower = c(0.02, 10, 0.6, 2, 2, 0.6, 5000, 0.3, 660, 3, 0.8),
               upper = c(0.09, 30, 1.5, 4, 6, 1.2, 15000, 0.9, 3000, 6, 2))
es.p2

           p1 p2  p3 p4       p5        p6       p7        p8       p9 p10 p11
L-BFGS-B 0.02 10 0.6  2 5.991746 0.7792232 9995.012 0.8453653 2662.505   3   2
            value fevals gevals niter convcode  kkt1  kkt2 xtimes
L-BFGS-B 312.7109    108    108    NA        0 FALSE FALSE 130.81
