In [None]:
#install.packages("coloc")
library(coloc)

In [None]:
disease_list <- c("Essential hypertension", "Abdominal hernia", "Hyperlipidemia", "Osteoarthrosis", "Cardiac dysrhythmias", "Asthma", "Cataract", "Coronary atherosclerosis", "Type 2 diabetes", "Parkinson's disease", "Alzheimer's disease", "Schizophrenia")
# from https://www.ebi.ac.uk/gwas/publications/32589924
disease_files <- c("/Users/johndriscoll/Downloads/180B/essential_hypertension.tsv")
sample_sizes <- c(282871)
case_control_props <- c(0.271)

# from 1000G gene expression data
eQTL_files <- list.files(path="/Users/johndriscoll/Downloads/180B/DSC180BFinalProject/eQTL_subsets/", pattern='ENSG.*', full.names = TRUE)

result_matrix <- list()

In [None]:
sample_sizes[1]

In [None]:
disease_num <- 0
for (disease_file in disease_files) {
  
  curr_gwas <- na.omit(read.table(disease_file, sep = '\t', header = TRUE))
  curr_results <- list()

  gwas_list <- list()
  gwas_list$MAF <- curr_gwas$MAF_calculated_from_dosage_data
  gwas_list$snp <- curr_gwas$variant_id
  gwas_list$position <- curr_gwas$base_pair_location
  gwas_list$N <- sample_sizes[disease_num + 1]
  gwas_list$pvalues <- curr_gwas$p_value
  gwas_list$type <- "cc"
  gwas_list$s <- case_control_props[disease_num + 1]
  gwas_list <- gwas_list[complete.cases(gwas_list[ , (2)])]
  
  check_dataset(gwas_list)
  print("Processed disease file " + disease_file)

  num_files <- length(eQTL_files)
  counter <- 0
  for (eQTL_file in eQTL_files) {
    eQTL_data <- read.csv(eQTL_file)
    eQTL_list <- list()
    eQTL_list$beta <- eQTL_data$beta
    eQTL_list$varbeta <- eQTL_data$varbeta
    eQTL_list$snp <- eQTL_data$snp
    eQTL_list$position <- eQTL_data$pos
    eQTL_list$type <- eQTL_data[, 'type'][1]
    eQTL_list$N <- eQTL_data$N[0]
    eQTL_list$MAF <- eQTL_data$MAF
    
    check_dataset(eQTL_list)
    
    coloc_results <- coloc.abf(gwas_list, eQTL_list)
    
    curr_results <- append(curr_results, coloc_results$summary[[6]])
    
    counter <- counter + 1
    
    print("Processed eQTL_file " + eQTL_file)
    
    break
  }
  
  result_matrix[[disease_list[[disease_num + 1]]]] <- curr_results
  disease_num <- disease_num + 1
  
}

In [None]:
capture.output(result_matrix, file="hypertension_eQTL_matrix.csv")