Skip to content

Commit

Permalink
Spruced up reports, fixed minor bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
shaze committed Jun 13, 2018
1 parent b5f353b commit 0ff48a0
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 33 deletions.
55 changes: 37 additions & 18 deletions bin/plinkDraw.py
Expand Up @@ -8,6 +8,7 @@
mpl.use('Agg')
import matplotlib.pyplot as plt
import os
import re
g = glob.glob


Expand All @@ -18,7 +19,7 @@
gotcovar = sys.argv[5]
gtype = sys.argv[6]

if len(pheno)==0: pheno = "in fam"
if len(pheno)==0: pheno = "infam"

EOL = chr(10)

Expand Down Expand Up @@ -72,34 +73,38 @@ def drawQQ(pheno,result,outf):

qq_template = """
*-clearpage
*-section{PLINK Results for phenotype *-protect*-url{%(pheno)s}, test %(test)s}
The raw results can be found in the *-textbf{%(test)s} directory.
The QQ plot can be found in Figure *-ref{fig:qq}, and the Manhatten
plot in Figure *-ref{fig:man}.
The QQ plot can be found in Figure *-ref{fig:qq:%(pheno)s:%(test)s}, and the Manhatten
plot in Figure *-ref{fig:man:%(pheno)s:%(test)s}.
*-begin{figure}[ht]
"""


qq_figs = """
*-begin{figure}[hb]
*-begin{center}
*-includegraphics[width=17cm]{%(qqfile)s}
*-end{center}
*-caption{QQ plot for PLINK testing -- *-protect*-url{%(pheno)s}, test %(test)s -- *-protect*-url{%(testing)s} }
*-label{fig:qq}
*-label{fig:qq:%(pheno)s:%(test)s}
*-end{figure}
*-begin{figure}[ht]
*-begin{center}
*-includegraphics[width=17cm]{%(manfile)s}
*-end{center}
*-caption{PLINK testing: Manhatten plot for -- *-protect*-url{%(pheno)s}, test %(test)s -- *-protect*-url{%(testing)s}}
*-label{fig:man}
*-label{fig:man:%(pheno)s:%(test)s}
*-end{figure}
"""

interesting=r"""
The top ranking SNPs for %s according to %s are found in Table~\ref{tab:plink:top:%s}
*-item The top ranking SNPs for %s according to %s are found in Table~\ref{tab:plink:top:%s}
*-begin{table}
*-begin{center}
Expand All @@ -110,16 +115,29 @@ def drawQQ(pheno,result,outf):
endtab = r"""
*-end{tabular}
*-end{center}
*-caption{Top SNPs found by plink using %s for %s}
*-caption{Top SNPs found by plink using %s for %s test *-protect*-url{%s}}
*-label{tab:plink:top:%s}
*-end{table}
"""

def lambdaGC(log):
if not os.path.exists(log): return ""
res = ""
f = open(log)
for line in f:
m = re.search("Genomic inflation est. lambda(.*)", line)
if m:
res = "*-paragraph*{Genomic inflation: }: The estimate of genomic inflation ##*-lambda## "+m.group(1)+EOL+EOL
break
return res


def showInteresting(frame, p_col, test_name):
slist = interesting % (str(p_col),test_name,str(p_col)+test_name)
cpheno = pheno.replace("_","-")
slist = interesting % (str(p_col),test_name,str(p_col)+cpheno)
for row in frame.iterrows():
slist = slist+"%s & %6.4f "%(row[1]['SNP'].replace("_","-"),row[1][p_col])+r"*-*-"+chr(10)
slist = slist + endtab%(test_name,p_col,str(p_col)+test_name)
slist = slist + endtab%(test_name,p_col,pheno,str(p_col)+cpheno)
return slist

def processProbs(data,p_col,p_name):
Expand All @@ -137,7 +155,6 @@ def clean(name):
return name.replace("/","-").replace("np.","")

def showResults():
outpics = ""
# first get the result for the straight forward test
if test=="assoc":
data = clean(pheno) + ".assoc"
Expand All @@ -146,21 +163,23 @@ def showResults():
asf = pd.read_csv(data,delim_whitespace=True)
if gotcovar == "1" and test != "assoc":
asf = asf[asf['TEST']=="ADD"]
hashd = {'manfile':clean("%s-man-%s.%s"%(base,pheno,gtype)), 'testing':data,'test':test, 'pheno':pheno,\
'qqfile':clean("%s-qq-%s.%s"%(base,pheno,gtype))}
drawManhatten(pheno,asf,hashd['manfile'])
drawQQ(pheno,asf,hashd['qqfile'])
outpics = outpics + qq_template%hashd
cpheno = pheno.replace("_","-")
hashd = {'manfile':clean("%s-man-%s.%s"%(base,cpheno,gtype)), 'testing':data,'test':test, 'pheno':cpheno,
'qqfile':clean("%s-qq-%s.%s"%(base,cpheno,gtype))}
drawManhatten(pheno.replace("_","-"),asf,hashd['manfile'])
drawQQ(pheno.replace("_","-"),asf,hashd['qqfile'])
outpics = qq_template%hashd + lambdaGC(clean(pheno)+".log")+EOL+EOL+"*-begin{itemize}"+EOL
for (correct,col,label) in [(".mperm",'EMP2',"permutation testing"),(".adjusted",'BONF',"Bonferroni correction")]:
nd = data + correct
if os.path.exists(nd):
outpics = outpics+processProbs(nd,col,label)
outpics =outpics+"*-end{itemize}"+EOL+EOL+qq_figs%hashd+EOL+"*-clearpage"+EOL
return outpics

out_pics = showResults()



textf = open("%s%s%s.tex"%(label,pheno,test),"w")
textf = open("%s%s%s.tex"%(label,pheno.replace("_","-"),test),"w")
textf.write(out_pics.replace("*-",chr(92)).replace("##",chr(36)).replace("@.@",chr(10)))
textf.close()
45 changes: 42 additions & 3 deletions plink-assoc.nf
Expand Up @@ -25,7 +25,7 @@ import java.nio.file.Paths

def helps = [ 'help' : 'help' ]

allowed_params = ["input_dir","input_pat","output","output_dir","data","plink_mem_req","covariates","gemma_num_cores","gemma_mem_req","gemma","linear","logistic","chi2","fisher", "work_dir", "scripts", "max_forks", "high_ld_regions_fname", "sexinfo_available", "cut_het_high", "cut_het_low", "cut_diff_miss", "cut_maf", "cut_mind", "cut_geno", "cut_hwe", "pi_hat", "super_pi_hat", "f_lo_male", "f_hi_female", "case_control", "case_control_col", "phenotype", "pheno_col", "batch", "batch_col", "samplesize", "strandreport", "manifest", "idpat", "accessKey", "access-key", "secretKey", "secret-key", "region", "AMI", "instanceType", "instance-type", "bootStorageSize", "boot-storage-size", "maxInstances", "max-instances", "other_mem_req", "sharedStorageMount", "shared-storage-mount", "max_plink_cores", "pheno","big_time","thin"]
allowed_params = ["input_dir","input_pat","output","output_dir","data","plink_mem_req","covariates","gemma_num_cores","gemma_mem_req","gemma","linear","logistic","chi2","fisher", "work_dir", "scripts", "max_forks", "high_ld_regions_fname", "sexinfo_available", "cut_het_high", "cut_het_low", "cut_diff_miss", "cut_maf", "cut_mind", "cut_geno", "cut_hwe", "pi_hat", "super_pi_hat", "f_lo_male", "f_hi_female", "case_control", "case_control_col", "phenotype", "pheno_col", "batch", "batch_col", "samplesize", "strandreport", "manifest", "idpat", "accessKey", "access-key", "secretKey", "secret-key", "region", "AMI", "instanceType", "instance-type", "bootStorageSize", "boot-storage-size", "maxInstances", "max-instances", "other_mem_req", "sharedStorageMount", "shared-storage-mount", "max_plink_cores", "pheno","big_time","thin","adjust","mperm"]



Expand Down Expand Up @@ -110,6 +110,38 @@ if (params.help) {
System.exit(-1)
}


def fileColExists = { fname, pname, cname ->
f = new File(fname)
if (! f.exists()) {
error("\n\nThe file <${fname}> given for <${pname}> does not exist")
} else {
def line
f.withReader { line = it.readLine() }
fields = line.split()
cols = cname.split(",")
cols.each { col ->
det = col.split("/")
if (! fields.contains(det[0]))
error("\n\nThe file <${fname}> given for <$pname> does not have a column <${det}>\n")
}
}
}

fileColExists(params.data,"${params.data} - covariates", params.covariates)
fileColExists(params.data,"${params.data} - phenotypes", params.pheno)

covs = params.covariates.split(",")
params.pheno.split(",").each { p ->
if (covs.contains(p)) {
println("\n\nThe phenotype <$p> is also given as a covariate -- this seems like a very bad idea")
sleep(10)
}
}




//---- Modification of variables for pipeline -------------------------------//


Expand Down Expand Up @@ -498,7 +530,7 @@ process doReport {
file(reports) from report_ch.toList()
publishDir params.output_dir
output:
file("${out}.pdf")
file("${out}.pdf") into final_report_ch
script:
out = params.output+"-report"
these_phenos = params.pheno
Expand All @@ -510,6 +542,13 @@ process doReport {
}



final_report_ch.subscribe {
b=it.baseName; println "The output report is called ${params.output_dir}/${b}.pdf"
params.pheno.split(",").each { p ->
if (covs.contains(p)) {
println("\n\nThe phenotype <$p> is also given as a covariate -- this seems like a very bad idea")
}
}
}


45 changes: 34 additions & 11 deletions plink-qc.nf
Expand Up @@ -58,6 +58,18 @@ report = new LinkedHashMap()
repnames = ["dups","initmaf","inithwe","cleaned","misshet","mafpdf","snpmiss","indmisspdf","failedsex","misshetremf","diffmissP","diffmiss","pca","hwepdf","related","inpmd5","outmd5","batch_report","batch_aux","qc1"]




// Checks if the file exists
checker = { fn ->
if (fn.exists())
return fn;
else
error("\n\n-----------------\nFile $fn does not exist\n\n---\n")
}



repnames.each { report[it] = Channel.create() }

repmd5 = report["inpmd5"]
Expand Down Expand Up @@ -101,12 +113,21 @@ if (params.help) {
System.exit(-1)
}

def getSubChannel = { parm, parm_name ->
def getSubChannel = { parm, parm_name, col_name ->
if ((parm==0) || (parm=="0") || (parm==false) || (parm=="false")) {
filename = "emptyZ0${parm_name}.txt";
new File(filename).createNewFile()
new_ch = Channel.fromPath(filename);
} else {
if (! file(parm).exists()) {
error("\n\nThe file <$parm> given for <params.${parm_name}> does not exist")
} else {
def line
new File(parm).withReader { line = it.readLine() }
fields = line.split()
if (! fields.contains(col_name))
error("\n\nThe file <$parm> given for <params.${parm_name}> does not have a column <${col_name}>\n")
}
new_ch = Channel.fromPath(parm);
}
return new_ch;
Expand All @@ -117,15 +138,25 @@ if (params.case_control) {
Channel.fromPath(ccfile).into { cc_ch; cc2_ch }
col = params.case_control_col
diffpheno = "--pheno cc.phe --pheno-name $col"
if (! file(params.case_control).exists()) {
error("\n\nThe file <${params.case_control}> given for <params.case_control> does not exist")
} else {
def line
new File(params.case_control).withReader { line = it.readLine() }
fields = line.split()
if (! fields.contains(params.case_control_col))
error("\n\nThe file <${params.case_control}> given for <params.case_control> does not have a column <${params.case_control_col}>\n")
}

} else {
diffpheno = ""
col = ""
cc_ch = Channel.value.into("none").into { cc_ch; cc2_ch }
}


phenotype_ch = getSubChannel(params.phenotype,"pheno")
batch_ch = getSubChannel(params.batch,"batch")
phenotype_ch = getSubChannel(params.phenotype,"pheno",params.pheno_col)
batch_ch = getSubChannel(params.batch,"batch",params.batch_col)
raw_ch = Channel.create()
bim_ch = Channel.create()
inpmd5ch = Channel.create()
Expand Down Expand Up @@ -158,14 +189,6 @@ if ( nosexentries.contains(params.sexinfo_available) ) {
}


// Checks if the file exists
checker = { fn ->
if (fn.exists())
return fn;
else
error("\n\n-----------------\nFile $fn does not exist\n\n---\n")
}




Expand Down
6 changes: 5 additions & 1 deletion templates/batchReport.py
Expand Up @@ -569,15 +569,19 @@ def getPhenoAnalysis():
if "${params.batch}" not in no_response:
args.pheno_col = 'all'
pfrm = DataFrame(["1"]*len(ifrm),index=ifrm.index,columns=['all'])
got_frame = True
res_text = "Table *-ref{table:batchrep:all} on page *-pageref{table:batchrep:all} shows the error rate."
else:
pfrm = getCsvI(args.phenotype)
got_frame = True
res_text = "Table *-ref{table:batchrep:%(bname)s} on page *-pageref{table:batchrep:%(bname)s} shows the error"\
" rate as shown by %(bname)s as found in file *-url{%(fname)s}."\
%({'bname':args.pheno_col,'fname':args.phenotype})
if not got_frame:
if got_frame:
result = miss_vals(ifrm,pfrm,args.pheno_col,args.sexcheck_report)
res_text = res_text + showResult(args.pheno_col,*result)
else:
pfrm = DataFrame(["1"]*len(ifrm),index=ifrm.index,columns=['0'])
return pfrm, res_text


Expand Down

0 comments on commit 0ff48a0

Please sign in to comment.