# BMIQ for methylation beta value

In [None]:
sos run xqtl-pipeline/pipeline/BMIQ.ipynb BMIQ \
    --cwd /mnt/vast/hpc/csg/ROSMAP_methy_QTL_beta/data_preprocessing/methyl_QTL/phenotype_data \
    --phenoFile /mnt/vast/hpc/csg/ROSMAP_methy_QTL_beta/raw_data/ROSMAP_arrayMethylation_covariates.sesame.methyl.beta.sample_matched.bed.gz \
    --pheno_annotation /mnt/vast/hpc/csg/ROSMAP_methy_QTL_beta/raw_data/cpgAnno_ill450k_finalList_420132.txt \
    --mem 50G \
    --container /mnt/vast/hpc/csg/containers/rna_quantification.sif \
    -J 1 -c csg.yml -q csg2


In [None]:
[global]
# Current work directory
parameter: cwd = path('./')
# path for dir containing the PLINK genotype file
parameter: phenoFile = path
# path for the annotation of methylation file
parameter: pheno_annotation = path
# BMIQ parameter
parameter: nfit = 500
# container option to run this pipeline, HS: This docker can be downloaded from via 'docker pull gaow/xqtl'
# parameter: container = "/mnt/mfs/statgen/containers/xqtl_latest.sif" [FIXME] Add sif file
# How many regions to analyze per job
parameter: job_size = 200
# whether to perform INT to the methylation data
parameter: isINT = 1
parameter: job_size = 1
# Wall clock time expected
parameter: walltime = "5h"
# Memory expected
parameter: mem = "16G"
# Number of threads
parameter: numThreads = 8
parameter: container = ""

In [None]:
[BMIQ]
output: f'{cwd}/{phenoFile:bn}_BMIQ.bed.gz'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
R: expand = "${ }", stderr = f'{_output:nn}.stderr', stdout = f'{_output:nn}.stdout'
    library(readr)
    library(dplyr)
    library(wateRmelon)
    library(RNOmni)
  
    methy = read_delim("${phenoFile}", delim = "\t")
    methy_ann = read_delim("${pheno_annotation}",delim = "\t")
    cpg_order = match(methy$ID, methy_ann$TargetID)
    methy_ann = methy_ann[cpg_order,]
    # cpg_ord = match(methy$TargetID,methy_ann$TargetID)
    # methy_ann = methy_ann[cpg_ord,]
    # perform BMIQ
    print("start BMIQ")
    probes_type = ifelse(methy_ann$Type == "I",1,2)
    methy_norm = methy[,1:4]
    for(i in 5:ncol(methy)){
      print(paste("start sample", as.character(i)))
      file_i = paste("${_output:n}","_sample_",i,".bed", sep = "")
      if(file.exists(file_i)){
          cur_column = read_delim(file_i, delim = "\t")
      }else{
          tmp = BMIQ(methy[[i]],design.v = probes_type,nfit = ${nfit})
          cur_column = as.data.frame(tmp$nbeta)
          colnames(cur_column) = colnames(methy)[i]
          write_delim(cur_column, file = file_i)
      }
      methy_norm = cbind(methy_norm, cur_column)
      }
      rownames(methy_norm) = rownames(methy)
      colnames(methy_norm) = colnames(methy)
      write_delim(methy_norm, file = "${_output:n}", delim = "\t")

bash: expand = "${ }", stderr = f'{_output:nn}.stderr', stdout = f'{_output:nn}.stdout', container = container
    bgzip -f ${_output:n}
    tabix -p ${_output} -f
    rm ${_output:n}_sample_*
