Permalink
Browse files

update for hg38 and chr naming style #10

  • Loading branch information...
gavinha committed Aug 1, 2018
1 parent 108b0bb commit ccd357fae6036d080cbedd16d323f30f0543322a
@@ -72,8 +72,8 @@ setGenomeStyle <- function(x, genomeStyle = "NCBI", species = "Homo_sapiens"){
loadReadCountsFromWig <- function(counts, chrs = c(1:22, "X", "Y"), gc = NULL, map = NULL, centromere = NULL, flankLength = 100000, targetedSequences = NULL, genomeStyle = "NCBI", applyCorrection = TRUE, mapScoreThres = 0.9, chrNormalize = c(1:22, "X", "Y"), fracReadsInChrYForMale = 0.002, useChrY = TRUE){
require(HMMcopy)
require(GenomeInfoDb)
counts.raw <- counts
names(counts) <- setGenomeStyle(names(counts), genomeStyle)
counts.raw <- counts
counts <- keepChr(counts, chrs)
if (!is.null(gc)){
names(gc) <- setGenomeStyle(names(gc), genomeStyle)
@@ -87,10 +87,12 @@ loadReadCountsFromWig <- function(counts, chrs = c(1:22, "X", "Y"), gc = NULL, m

# remove centromeres
if (!is.null(centromere)){
centromere$Chr <- setGenomeStyle(centromere$Chr, genomeStyle)
counts <- excludeCentromere(counts, centromere, flankLength = flankLength, genomeStyle=genomeStyle)
}
# keep targeted sequences
if (!is.null(targetedSequences)){
targetedSequences[,1] <- setGenomeStyle(targetedSequences[,1], genomeStyle)
countsExons <- filterByTargetedSequences(counts, targetedSequences)
counts <- counts[countsExons$ix,]
}
@@ -171,6 +173,8 @@ getGender <- function(rawReads, normReads, gc, map, fracReadsInChrYForMale = 0.0

normalizeByPanelOrMatchedNormal <- function(tumour_copy, chrs = c(1:22, "X", "Y"),
normal_panel = NULL, normal_copy = NULL, gender = "female", normalizeMaleX = FALSE){
genomeStyle <- seqlevelsStyle(as.vector(tumour_copy$space))[1]
seqlevelsStyle(chrs) <- genomeStyle
### COMPUTE LOG RATIO FROM MATCHED NORMAL OR PANEL AND HANDLE CHRX ###
## NO PANEL
# matched normal but NO panel, then just normalize by matched normal (WES)
@@ -195,6 +199,7 @@ normalizeByPanelOrMatchedNormal <- function(tumour_copy, chrs = c(1:22, "X", "Y"
# PANEL, then normalize by panel instead of matched normal (ULP and WES)
if (!is.null(normal_panel)){
panel <- readRDS(normal_panel) ## load in IRanges object
names(panel) <- setGenomeStyle(names(panel), genomeStyle)
panel <- keepChr(panel, chr = chrs)
# intersect bins in sample and panel
hits <- findOverlaps(tumour_copy, panel, type="equal")
@@ -9,7 +9,7 @@

# ichorCNA: https://github.com/broadinstitute/ichorCNA
# HMMcopy website: http://compbio.bccrc.ca/software/hmmcopy/ and https://www.bioconductor.org/packages/release/bioc/html/HMMcopy.html
# date: March 29, 2017
# date: August 1, 2018
# description: Hidden Markov model (HMM) to analyze Ultra-low pass whole genome sequencing (ULP-WGS) data.
# This script is the main script to run the HMM.

@@ -43,6 +43,7 @@ option_list <- list(
make_option(c("--chrNormalize"), type="character", default="c(1:22)", help = "Specify chromosomes to normalize GC/mappability biases. Default: [%default]"),
make_option(c("--chrTrain"), type="character", default="c(1:22)", help = "Specify chromosomes to estimate params. Default: [%default]"),
make_option(c("--chrs"), type="character", default="c(1:22,\"X\")", help = "Specify chromosomes to analyze. Default: [%default]"),
make_option(c("--genomeStyle"), type = "character", default = "NCBI", help = "NCBI or UCSC chromosome naming convention; use UCSC if desired output is to have \"chr\" string. [Default: %default]"),
make_option(c("--normalizeMaleX"), type="logical", default=TRUE, help = "If male, then normalize chrX by median. Default: [%default]"),
make_option(c("--fracReadsInChrYForMale"), type="numeric", default=0.001, help = "Threshold for fraction of reads in chrY to assign as male. Default: [%default]"),
make_option(c("--includeHOMD"), type="logical", default=FALSE, help="If FALSE, then exclude HOMD state. Useful when using large bins (e.g. 1Mb). Default: [%default]"),
@@ -59,6 +60,7 @@ print(opt)
options(scipen=0, stringsAsFactors=F)

library(HMMcopy)
library(GenomeInfoDb)
options(stringsAsFactors=FALSE)
options(bitmapType='cairo')

@@ -96,17 +98,21 @@ plotFileType <- opt$plotFileType
plotYLim <- eval(parse(text=opt$plotYLim))
gender <- NULL
outImage <- paste0(outDir,"/", patientID,".RData")
chrs <- eval(parse(text = opt$chrs))
#chrs <- c(chrs, "Y")
chrTrain <- eval(parse(text=opt$chrTrain))
chrNormalize <- chrTrain #eval(parse(text=opt$chrNormalize))
genomeStyle <- opt$genomeStyle
chrs <- as.character(eval(parse(text = opt$chrs)))
chrTrain <- as.character(eval(parse(text=opt$chrTrain)));
chrNormalize <- as.character(eval(parse(text=opt$chrNormalize)));
seqlevelsStyle(chrs) <- genomeStyle
seqlevelsStyle(chrNormalize) <- genomeStyle
seqlevelsStyle(chrTrain) <- genomeStyle

if (!is.null(libdir)){
source(paste0(libdir,"/utils.R"))
source(paste0(libdir,"/segmentation.R"))
source(paste0(libdir,"/EM.R"))
source(paste0(libdir,"/output.R"))
source(paste0(libdir,"/plotting.R"))

if (!is.null(libdir) && libdir != "None"){
source(paste0(libdir,"/R/utils.R"))
source(paste0(libdir,"/R/segmentation.R"))
source(paste0(libdir,"/R/EM.R"))
source(paste0(libdir,"/R/output.R"))
source(paste0(libdir,"/R/plotting.R"))
} else {
library(ichorCNA)
}
@@ -134,7 +140,7 @@ if (is.null(centromere) || centromere == "None" || centromere == "NULL"){ # no c
package = "ichorCNA")
}
centromere <- read.delim(centromere,header=T,stringsAsFactors=F,sep="\t")

save.image(outImage)
## LOAD IN WIG FILES ##
numSamples <- nrow(wigFiles)
tumour_counts <- list()
@@ -166,6 +172,7 @@ for (i in 1:numSamples) {
counts <- loadReadCountsFromWig(tumour_reads, chrs = chrs, gc = gc, map = map,
centromere = centromere, flankLength = flankLength,
targetedSequences = targetedSequences,
genomeStyle = genomeStyle,
chrNormalize = chrNormalize, mapScoreThres = 0.9)
tumour_copy[[id]] <- counts$counts #as(counts$counts, "GRanges")
gender <- counts$gender
@@ -176,7 +183,7 @@ for (i in 1:numSamples) {
message("Correcting Normal")
counts <- loadReadCountsFromWig(normal_reads, chrs=chrs, gc=gc, map=map,
centromere=centromere, flankLength = flankLength, targetedSequences=targetedSequences,
chrNormalize = chrNormalize, mapScoreThres = 0.9)
genomeStyle = genomeStyle, chrNormalize = chrNormalize, mapScoreThres = 0.9)
normal_copy <- counts$counts #as(counts$counts, "GRanges")
gender.normal <- counts$gender
}else{
@@ -221,6 +228,7 @@ if (length(tumour_copy) >= 2) {
valid <- valid & tumour_copy[[i]]$valid
}
}
save.image(outImage)

### RUN HMM ###
## store the results for different normal and ploidy solutions ##
@@ -1,9 +1,9 @@
#!/bin/bash -l

#$ -q short
#$ -q broad
#$ -cwd
#$ -V
#$ -l h_vmem=12G
#$ -l h_vmem=4G,h_rt=100:00:00
#$ -pe smp 1
#$ -binding linear:1
#$ -o logs/cluster/
@@ -0,0 +1,46 @@
## read depth params ##
readCounterScript: /path/to/HMMcopy/bin/readCounter
#chrs:
# 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y
chrs: # use if want UCSC chromosome naming for hg38
chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX
binSize: 1000000 # set window size to compute coverage
ichorCNA_libdir: None # include if ichorCNA R source files changed but package has not been updated within R

## ichorCNA params ##
# included in GitHub repo
ichorCNA_rscript: ../runIchorCNA.R
# use panel matching same bin size (optional)
ichorCNA_normalPanel: ../../inst/extdata/HD_ULP_PoN_hg38_1Mb_median_normAutosome_median.rds
# must use gc wig file corresponding to same binSize (required)
ichorCNA_gcWig: ../../inst/extdata/gc_hg38_1000kb.wig
# must use map wig file corresponding to same binSize (required)
ichorCNA_mapWig: ../../inst/extdata/map_hg38_1000kb.wig
# use bed file if sample has targeted regions, eg. exome data (optional)
ichorCNA_exons: NULL
ichorCNA_centromere: ../../inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt
ichorCNA_genomeStyle: UCSC
ichorCNA_chrs: paste0('chr', c(1:22, \"X\"))
# chrs used for training ichorCNA parameters, e.g. tumor fraction.
ichorCNA_chrTrain: paste0('chr', c(1:22))
# non-tumor fraction parameter restart values; higher values should be included for cfDNA
ichorCNA_normal: c(0.5,0.6,0.7,0.8,0.9,0.95)
# ploidy parameter restart values
ichorCNA_ploidy: c(2,3)
ichorCNA_estimateNormal: TRUE
ichorCNA_estimatePloidy: TRUE
ichorCNA_estimateClonality: TRUE
# states to use for subclonal CN
ichorCNA_scStates: c(1,3)
# set maximum copy number to use
ichorCNA_maxCN: 5
# TRUE/FALSE to include homozygous deletion state
ichorCNA_includeHOMD: FALSE
# control segmentation - higher (e.g. 0.9999999) leads to higher specificity and fewer segments
# lower (e.g. 0.99) leads to higher sensitivity and more segments
ichorCNA_txnE: 0.9999
# control segmentation - higher (e.g. 10000000) leads to higher specificity and fewer segments
# lower (e.g. 100) leads to higher sensitivity and more segments
ichorCNA_txnStrength: 10000
ichorCNA_plotFileType: pdf
ichorCNA_plotYlim: c(-2,4)
@@ -51,17 +51,19 @@ rule ichorCNA:
includeHOMD=config["ichorCNA_includeHOMD"],
chrs=config["ichorCNA_chrs"],
chrTrain=config["ichorCNA_chrTrain"],
genomeStyle=config["ichorCNA_genomeStyle"],
centromere=config["ichorCNA_centromere"],
exons=config["ichorCNA_exons"],
txnE=config["ichorCNA_txnE"],
txnStrength=config["ichorCNA_txnStrength"],
fracReadsChrYMale="0.001",
plotFileType=config["ichorCNA_plotFileType"],
plotYlim=config["ichorCNA_plotYlim"]
plotYlim=config["ichorCNA_plotYlim"],
libdir=config["ichorCNA_libdir"]
resources:
mem=4
log:
"logs/ichorCNA/{tumor}.log"
shell:
"Rscript {params.rscript} --id {params.id} --WIG {input.tum} --gcWig {params.gcwig} --mapWig {params.mapwig} --normalPanel {params.normalpanel} --ploidy \"{params.ploidy}\" --normal \"{params.normal}\" --maxCN {params.maxCN} --includeHOMD {params.includeHOMD} --chrs \"{params.chrs}\" --chrTrain \"{params.chrTrain}\" --estimateNormal {params.estimateNormal} --estimatePloidy {params.estimatePloidy} --estimateScPrevalence {params.estimateClonality} --scStates \"{params.scStates}\" --centromere {params.centromere} --exons.bed {params.exons} --txnE {params.txnE} --txnStrength {params.txnStrength} --fracReadsInChrYForMale {params.fracReadsChrYMale} --plotFileType {params.plotFileType} --plotYLim \"{params.plotYlim}\" --outDir {output.outDir} > {log} 2> {log}"
"Rscript {params.rscript} --id {params.id} --libdir {params.libdir} --WIG {input.tum} --gcWig {params.gcwig} --mapWig {params.mapwig} --normalPanel {params.normalpanel} --ploidy \"{params.ploidy}\" --normal \"{params.normal}\" --maxCN {params.maxCN} --includeHOMD {params.includeHOMD} --chrs \"{params.chrs}\" --chrTrain \"{params.chrTrain}\" --genomeStyle {params.genomeStyle} --estimateNormal {params.estimateNormal} --estimatePloidy {params.estimatePloidy} --estimateScPrevalence {params.estimateClonality} --scStates \"{params.scStates}\" --centromere {params.centromere} --exons.bed {params.exons} --txnE {params.txnE} --txnStrength {params.txnStrength} --fracReadsInChrYForMale {params.fracReadsChrYMale} --plotFileType {params.plotFileType} --plotYLim \"{params.plotYlim}\" --outDir {output.outDir} > {log} 2> {log}"

0 comments on commit ccd357f

Please sign in to comment.