# Install packages

In [1]:
# devtools::install_version("mixsqp", version = "0.1-97", repos = "http://cran.us.r-project.org")
# devtools::install_version("ashr", version = "2.2-32", repos = "http://cran.us.r-project.org")
# devtools::install_github("jean997/cause@v1.0.0")
# for the latest version, use devtools::install_github("jean997/cause@v1.2.0")

# Run CAUSE

In [11]:
library(readr)
library(cause)
ts1 = c("AD", "ASD", "Daytime_Sleepiness", "Height_UKB",  "Intelligence", "RA",      
        "T2D", "Alcohol", "BMI", "Depression", "IBD", "MDD", "SCZ", "Angina", 
        "CAD", "HBP", "Income", "NEB", "Smoking", "Urate", "Anorexia", 
        "CD", "Height_GIANT", "Insomnia", "Neuroticism", "SWB")

ts2 = ts1

Threshold=1e-03

start = proc.time()
for( exposure in ts1 ){
  
  for( outcome in ts2 ){
    

    
    if(exposure==outcome) next
    
    # read GWAS summary statistics
    cat(exposure,"~",outcome,"\n")
    
    X1 = suppressMessages(readr::read_delim(paste0("./GWAS_26and5_formatted/", exposure), delim="\t",
                                     escape_double = FALSE, trim_ws = TRUE, progress = F))
    
    X2 = suppressMessages(readr::read_delim(paste0("./GWAS_26and5_formatted/", outcome), delim="\t",
                                     escape_double = FALSE, trim_ws = TRUE, progress = F))
    
    X1$b = X1$Z/sqrt(X1$N)
    X2$b = X2$Z/sqrt(X2$N)
    X1$se = 1/sqrt(X1$N)
    X2$se = 1/sqrt(X2$N)
    
    X <- try(gwas_merge(X1, X2, 
                        snp_name_cols = c("SNP", "SNP"),
                        beta_hat_cols = c("b", "b"),
                        se_cols = c("se", "se"),
                        A1_cols = c("A1", "A1"),
                        A2_cols = c("A2", "A2")))
    
    if(inherits(X , 'try-error')) next
    
    d0 = X1[, c("SNP", "P")]
    colnames(d0) = c("snp", "pval.exp")
    X0 = merge(X, d0, by="snp")
    
    # clump
    clumped = MRAPSS::clump(X0,
                            IV.Threshold = 1e-03,
                            SNP_col = "snp",
                            pval_col = "pval.exp",
                            clump_kb = 1000,
                            clump_r2 = 0.1,
                            bfile = "/import/home/share/xhu/database/1KG/all_1000G_EUR_Phase3",
                            plink_bin = "/import/home/maxhu/plink/plink")
    
    varlist <- with(X, sample(snp, size=min(nrow(X), 1000000), replace=FALSE))
    params <- try(est_cause_params(X, varlist))
    if(inherits(params , 'try-error')) next
    
    # cause
    if(!is.null(clumped)){
      
      top_ldl_pruned_vars =intersect(as.character(X$snp), as.character(subset(clumped, pval.exp <= Threshold)$snp))
      
      cause_res <- try(cause(X=X, variants = top_ldl_pruned_vars , param_ests = params, force=TRUE))
      
      if(inherits( cause_res , 'try-error')) next
      
      res_elpd <- data.frame(exposure,
                             outcome,
                             Threshold,
                             length(top_ldl_pruned_vars),
                             cause_res$elpd)
      
      res.cause.est = summary(cause_res, ci_size=0.95)
      
      res = data.frame(exposure, outcome,
                       Threshold,length(top_ldl_pruned_vars),
                       matrix(c(res.cause.est$quants[[2]][,1],
                                res.cause.est$quants[[2]][,2],
                                res.cause.est$quants[[2]][,3]), nrow=1))
      
      write.table(res, file="Traits_CAUSE_est", append=T,
                  col.names = F, row.names = F,
                  quote = F)
      
      write.table(res_elpd, file="Traits_CAUSE_elpd", append=T,
                  col.names = F, row.names = F,
                  quote = F)
      
      rm(top_ldl_pruned_vars)
      rm(res)
      rm(res_elpd)
      rm(res.cause.est)
      rm(cause_res)
    }
    
  }
  
}
print(proc.time()-start)

In [12]:
cause_elpd = unique(read.table("Traits_CAUSE_elpd", header = F))
cause_est = unique(read.table("Traits_CAUSE_est", header = F))
colnames(cause_elpd) = c("exposure","outcome","Threshold","nsnp","model1","model2","delta_elpd", "se_delta_elpd", "Z")
colnames(cause_est) = c("exposure","outcome","Threshold","nsnp", "beta.hat","b_l","b_u","eta","eta_l","eta_u","q","q_l","q_u")
cause_elpd = unique(subset(cause_elpd, model1=="sharing"&model2=="causal"))
cause_elpd$pval = pnorm(cause_elpd$Z)
cause_est = unique(cause_est[, c("exposure","outcome","Threshold","nsnp", "beta.hat","b_l","b_u")])
cause_est$se = (cause_est$b_u - cause_est$b_l)/2/1.96
cause_res = unique(merge(unique(cause_elpd[, c("exposure","outcome","pval")]),
                         cause_est[, c("exposure","outcome","Threshold","nsnp", "beta.hat","se")],
                         by=c("exposure","outcome")))
cause_res$Method = "CAUSE"
write.table(cause_res, file="Traits_CAUSE.MRres", append=F, col.names = T, row.names = F, quote = F)

In [13]:
head(cause_res)

Unnamed: 0_level_0,exposure,outcome,pval,Threshold,nsnp,beta.hat,se,Method
Unnamed: 0_level_1,<fct>,<fct>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<chr>
1,AD,Alcohol,0.9999999,0.001,690,-0.001000189,0.01321479,CAUSE
2,AD,Angina,0.7051944,0.001,656,-0.013624823,0.01417754,CAUSE
3,AD,Anorexia,0.9998904,0.001,588,0.006302211,0.02600592,CAUSE
4,AD,ASD,0.999998,0.001,683,0.007747679,0.04821366,CAUSE
5,AD,BMI,0.3185072,0.001,656,-0.031817131,0.01925596,CAUSE
6,AD,CAD,0.912853,0.001,703,-0.008380268,0.01638776,CAUSE
