Permalink
Browse files

combine Long Ranger SV calls

- added into the snakemake pipeline to also extract Long Ranger SV calls
- updated bugs with combining CN/SV and classifying SV events
- fixed bug in plotting
- updated cluster resources required
  • Loading branch information...
gavinha committed Oct 17, 2018
1 parent d062b1d commit d210bd713e3daa011852c32f5417ad4d9e7f4703

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,94 @@
#' combineSVABAandTITAN.R
#' author: Gavin Ha
#' Fred Hutchinson Cancer Research Center
#' contact: <gha@fredhutch.org>
#' date: October 2, 2018
#' description: Compare tumor and normal Long Ranger SVs to identify somatic events. Combines
library(optparse)
option_list <- list(
make_option(c("--id"), type = "character", help = "Sample ID"),
make_option(c("--tenX_funcs"), type = "character", help = "Path to file containing 10X R functions to source."),
make_option(c("--tumLargeSVFile"), type="character", help = "Long Ranger large SV calls for tumor sample (large_sv_calls.bedpe)"),
make_option(c("--normLargeSVFile"), type="character", help = "Long Ranger large SV calls for normal sample (large_sv_calls.bedpe)"),
make_option(c("--tumDeletionFile"), type="character", help = "Long Ranger deletion calls for tumor sample (dels.vcf.gz)"),
make_option(c("--normDeletionFile"), type="character", help = "Long Ranger deletion calls for normal sample (dels.vcf.gz)"),
make_option(c("--genomeBuild"), type="character", default="hg19", help = "Genome build: hg19 or hg38. Default [%default]"),
make_option(c("--genomeStyle"), type = "character", default = "NCBI", help = "NCBI or UCSC chromosome naming convention; use UCSC if desired output is to have \"chr\" string. [Default: %default]"),
make_option(c("--chrs"), type = "character", default = "c(1:22, 'X')", help = "Chromosomes to analyze; string [Default: %default"),
make_option(c("--outDir"), type="character", help="Path to output directory."),
make_option(c("--outputSVFile"), type="character", help="Path to output SV file with new annotations.")
)
parseobj <- OptionParser(option_list=option_list, usage = "usage: Rscript %prog [options]")
opt <- parse_args(parseobj)
print(opt)
library(VariantAnnotation)
library(GenomicRanges)
library(data.table)
library(stringr)
args <- commandArgs(TRUE)
options(stringsAsFactors=FALSE, width=160, scipen=999)
source(opt$tenX_funcs)
id <- opt$id
tumLargeSVFile <- opt$tumLargeSVFile
normLargeSVFile <- opt$normLargeSVFile
tumDeletionFile <- opt$tumDeletionFile
normDeletionFile <- opt$normDeletionFile
outputSVFile <- opt$outputSVFile
outDir <- opt$outDir
dir.create(outDir, recursive = TRUE)
outImage <- paste0(outDir, "/", id, ".RData")
genomeBuild <- opt$genomeBuild
genomeStyle <- opt$genomeStyle
chrs <- as.character(eval(parse(text = opt$chrs)))
seqlevelsStyle(chrs) <- genomeStyle
#seqinfo <- Seqinfo(genome=genomeBuild)
buffer <- 1000
minDelLength <- 1000
minQual <- 20
save.image(outImage)
tum.sv <- getSVfromBEDPE(tumLargeSVFile, skip=1, genomeStyle = genomeStyle)
norm.sv <- getSVfromBEDPE(normLargeSVFile, skip=1, genomeStyle = genomeStyle)
del.tum <- readVcf(tumDeletionFile, genome=genomeBuild)
del.tum <- getSVfromCollapsedVCF.LR(del.tum, chrs=chrs, genomeStyle = genomeStyle)
del.norm <- readVcf(normDeletionFile, genome=genomeBuild)
del.norm <- getSVfromCollapsedVCF.LR(del.norm, chrs=chrs, genomeStyle = genomeStyle)
save.image(outImage)
tum.sv.del <- rbind(tum.sv, del.tum[FILTER=="PASS" & SPAN >= minDelLength & QUAL >= minQual], fill=TRUE)
tum.sv.del <- tum.sv.del[, .(chromosome_1, start_1, chromosome_2, start_2, mateID, FILTER,
HAP_ALLELIC_FRAC, ALLELIC_FRAC, DR, SR, PS, SPAN, orient_1, orient_2)]
norm.sv.del <- rbind(norm.sv, del.norm[FILTER=="PASS" & SPAN >= minDelLength & QUAL >= minQual], fill=TRUE)
norm.sv.del <- norm.sv.del[, .(chromosome_1, start_1, chromosome_2, start_2, mateID, FILTER,
HAP_ALLELIC_FRAC, ALLELIC_FRAC, DR, SR, PS, SPAN, orient_1, orient_2)]
overlap <- getOverlapSV(tum.sv.del, norm.sv.del, buffer.x = buffer, buffer.y = buffer)
tum.sv.del <- cbind(Sample=id, SV.id = 1:nrow(tum.sv.del), tum.sv.del)
tum.sv.del[, SOMATIC := FALSE]
tum.sv.del[!SV.id %in% overlap$overlap.ind, SOMATIC := TRUE]
## sort by chromosome_1 and start_1
tum.sv.del <- tum.sv.del[!is.na(chromosome_1) & !is.na(chromosome_2), ]
tum.sv.del <- tum.sv.del[chromosome_1 %in% chrs & chromosome_2 %in% chrs, ]
tum.sv.del <- tum.sv.del[order(chromosome_1, start_1)]
tum.sv.del[, SV.id := 1:nrow(tum.sv.del)] # reassign SV.id
## exclude events without orientation ##
tum.sv.del <- tum.sv.del[!is.na(orient_1) & !is.na(orient_2)]
write.table(tum.sv.del, file = outputSVFile, col.names=T, row.names=F, quote=F, sep="\t")
save.image(outImage)
@@ -38,7 +38,7 @@ opt <- parse_args(parseobj)
print(opt)
options(bitmapType='cairo', scipen=0)
options(stringsAsFactors=F, scipen=999, bitmapType = "cairo", width=175)
options(stringsAsFactors=F, bitmapType = "cairo", width=175)
library(data.table)
library(GenomicRanges)
@@ -75,7 +75,6 @@ plotType <- opt$plotCNAtype
plotSize <- eval(parse(text=opt$plotSize))
plotFormat <- opt$plotFormat
outDir <- opt$outDir
save.image("tmp.RData")
width <- plotSize[1] #6 8
height <- plotSize[2] #3 3.5 #4
spacing <- 3
@@ -166,8 +165,8 @@ ploidyT <- as.numeric(params[2, 2])
normCN <- 2
ploidyS <- purity * ploidyT + (1-purity) * normCN
if (yaxis == "integer"){
ulp[Chr!="X", LogRatio := log2(logRbasedCN(LogRatio, purity, ploidyT, cn=2))]
ulp[Chr=="X", LogRatio := log2(logRbasedCN(LogRatio, purity, ploidyT, cn=1))]
ulp[!grepl("X",Chr), LogRatio := log2(logRbasedCN(LogRatio, purity, ploidyT, cn=2))]
ulp[grepl("X",Chr), LogRatio := log2(logRbasedCN(LogRatio, purity, ploidyT, cn=1))]
colName <- "logR_Copy_Number"
}else{
ulp[, LogRatio := LogRatio + log2(ploidyS / 2)]
@@ -179,15 +178,18 @@ if (yaxis == "integer"){
if (plotType == "titan"){
ulp <- ulp[!is.na(Corrected_Call)]
}
ulp$Chr <- factor(ulp$Chr, levels = chrStr)
ulp <- ulp[order(Chr)]
############# load Combined SV (SVABA, GROC, LongRanger) ##############
sv <- fread(svFile)
#save.image(file=outImage)
save.image(file=outImage)
#####################################
########## PLOT CHR RESULTS #########
#####################################
for (j in 1:length(chrStr)){
message("Plotting ", chrStr[j])
###################################
### SNOWMAN + BARCODE RESCUE ######
outPlot <- paste0(outPlotDir, "/", id, "_CNA-SV-BX_",plotType,"_",chrStr[j],".",plotFormat)
@@ -264,7 +264,7 @@ logRbasedCN <- function(x, purity, ploidyT, cn = 2){
}
## modified to work for combine_TITAN_ICHOR/*titan_ichor_cn.txt
## modified to work for combine_TITAN_ICHOR/titan_ichor_cn.txt
findNearestLogR <- function(x, y, buffer = 1e6){
y1 <- 0; y2 <- 0
#y <- na.omit(y)
@@ -281,10 +281,12 @@ findNearestLogR <- function(x, y, buffer = 1e6){
#indClose <- ind[which.min(c(abs(y[ind[1], "end"]-x[["start_1"]]), abs(y[ind[2], "start"]-x[["start_1"]])))]
#y1 <- y[indClose, "copy"]
#y1 <- max(y[tail(which(bin1Ind), 1):(tail(which(bin1Ind), 1)+1), "copy"], na.rm = TRUE)
if (x[["orient_1"]] == "rev"){ # region to left of breakpoint
if (x[["orient_1"]] == "rev"){ # region to left of breakpoint
y1 <- y[max(ind - 1, 1), "LogRatio"]
#y1 <- tail(na.omit(y[max((ind-10):ind), "LogRatio"]), 1)
}else if (x[["orient_1"]] == "fwd"){ # region to right of breakpoint
y1 <- y[min(ind + 1, length(bin1Ind)), "LogRatio"]
#y1 <- head(na.omit(y[max(ind:(ind+10)), "LogRatio"]), 1)
}
}
if (sum(bin2Ind, na.rm=T) > 0){
@@ -299,6 +301,8 @@ findNearestLogR <- function(x, y, buffer = 1e6){
y2 <- y[min(ind + 1, length(bin1Ind)), "LogRatio"]
}
}
if (is.na(y1)) { y1 <- 0 }
if (is.na(y2)) { y2 <- 0 }
return(c(y1, y2))
}
@@ -215,6 +215,9 @@ removeIdenticalSV <- function(sv){
}
sortBkptPairOrder <- function(sv.unsort, chrs = c(1:22, "X", "Y")){
genomeStyle <- seqlevelsStyle(sv.unsort$chromosome_1)[1]
chrs <- as.character(chrs)
seqlevelsStyle(chrs) <- genomeStyle
## interchr
sv <- copy(sv.unsort)
sv[, SV.id := 1:nrow(sv)]
@@ -1455,7 +1458,7 @@ plotCNlogRByChr <- function(dataIn, colName = "copy", segs=NULL, chr=NULL, ploid
}
}
## modified to work for combine_TITAN_ICHOR/*titan_ichor_cn.txt
## modified to work for combine_TITAN_ICHOR/titan_ichor_cn.txt
findNearestLogR <- function(x, y, buffer = 1e6){
y1 <- 0; y2 <- 0
#y <- na.omit(y)
Oops, something went wrong.

0 comments on commit d210bd7

Please sign in to comment.