Skip to content

Commit

Permalink
Merge pull request #408 from gagneurlab/rvc_yieldSize
Browse files Browse the repository at this point in the history
Rvc yield size
  • Loading branch information
nickhsmith committed Dec 6, 2022
2 parents 7fc6c43 + 671fe4e commit 2a91f19
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions drop/modules/rvc-pipeline/countVariants/Results.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,20 @@ if (!all(is.na(res$MAX_AF))) {
filter = 'top')
}

# melt filters by GT. Exclude reference calls
res_plot <- melt(res_plot,id.vars = "FILTER",value.name = "GT")[GT != "0/0",.N,by = c("FILTER","variable","GT")]
# melt filters by GT. Exclude reference calls. Read in batches to avoid vector length errors for large datasets

# generate batch lists by config value: yieldSize
batches <- seq(0,nrow(res_plot),snakemake@config$rnaVariantCalling$yieldSize)
if(batches[length(batches)] < nrow(res_plot)) batches <- c(batches,nrow(res_plot))

# build dts by batches
out <- lapply(1:(length(batches)-1), function(i){
melt(res_plot[(1 +batches[i]):batches[i+1]], #read through batches 1
id.vars = "FILTER",value.name = "GT")[GT != "0/0",.N,by = c("FILTER","variable","GT")]
})

# combine batches and sum up the variables
res_plot <- rbindlist(out)[,.(N = sum(N)),by = c("FILTER","variable","GT")]

#' ## Table of variant calls by GT (first 1,000)
summary_dt <- dcast(res_plot, FILTER + GT ~ variable, value.var = "N")
Expand Down

0 comments on commit 2a91f19

Please sign in to comment.