Skip to content

Commit

Permalink
Merge pull request #361 from gagneurlab/simplify_qc
Browse files Browse the repository at this point in the history
Simplify qc
  • Loading branch information
nickhsmith committed Aug 25, 2022
2 parents 94dfa60 + 2d35da5 commit 1801912
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
6 changes: 5 additions & 1 deletion drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,17 @@ identityCutoff <- .85

ggplot(melt_mat, aes(value)) + geom_histogram(fill = 'cadetblue4', binwidth = 0.05, center = .025) +
theme_bw(base_size = 14) +
labs(x = 'Proportion of matching DNA-RNA variants', y = 'DNA-RNA combinations') +
scale_y_log10() + annotation_logticks(sides = "l") +
expand_limits(x=c(0,1)) +
geom_vline(xintercept=identityCutoff, linetype='dashed', color = 'firebrick')


#' ## Identify matching samples

#' Number of samples: `r nrow(qc_mat)`
#' Number of RNA samples: `r ncol(qc_mat)`
#'
#' Number of DNA samples: `r nrow(qc_mat)`
#'
#' Number of samples that match RNA and DNA: `r length(qc_mat[qc_mat > identityCutoff])`
#'
Expand Down
22 changes: 13 additions & 9 deletions drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,21 @@ mcols(gr_test)$GT <- "0/0"
rna_samples <- as.character(snakemake@params$rnaIds)
mae_res <- snakemake@input$mae_res

vcf_cols <- sa[RNA_ID %in% rna_samples, .(DNA_ID, DNA_VCF_FILE)] %>% unique %>% na.omit()
wes_samples <- vcf_cols$DNA_ID
rows_in_group <- sapply(strsplit(sa$DROP_GROUP, ',|, '), function(d) snakemake@wildcards$dataset %in% d)
vcf_cols <- sa[rows_in_group, .(DNA_ID, DNA_VCF_FILE)] %>% unique
dna_samples <- vcf_cols$DNA_ID
vcf_files <- vcf_cols$DNA_VCF_FILE

# Read all RNA genotypes into a list
rna_gt_list <- bplapply(mae_res, readRDS)

# For every DNA, find the overlapping variants with each RNA
N <- length(vcf_files)

lp <- bplapply(1:N, function(i){

# Read sample vcf file
sample <- wes_samples[i] %>% as.character()
sample <- dna_samples[i] %>% as.character()
param <- ScanVcfParam(fixed=NA, info='NT', geno='GT', samples=sample, trimEmpty=TRUE)
vcf_sample <- readVcf(vcf_files[i], param = param, row.names = FALSE)
# Get GRanges and add Genotype
Expand All @@ -70,14 +75,13 @@ lp <- bplapply(1:N, function(i){

# Find overlaps between test and sample
gr_res <- copy(gr_test)
seqlevelsStyle(gr_res) <- seqlevelsStyle(seqlevelsInUse(gr_sample)) # Make chr style the same
seqlevelsStyle(gr_res) <- seqlevelsStyle(seqlevelsInUse(gr_sample))[1]
ov <- findOverlaps(gr_res, gr_sample, type = 'equal')
mcols(gr_res)[from(ov),]$GT <- mcols(gr_sample)[to(ov),]$GT

# Find simmilarity between DNA sample and RNA sample
x <- vapply(mae_res, function(m){
gr_rna <- readRDS(m)
seqlevelsStyle(gr_rna) <- seqlevelsStyle(gr_res)
# Find similarity between DNA sample and RNA sample
x <- vapply(rna_gt_list, function(gr_rna){
seqlevelsStyle(gr_rna) <- seqlevelsStyle(gr_res)[1]
ov <- findOverlaps(gr_res, gr_rna, type = 'equal')
gt_dna <- gr_res[from(ov)]$GT
gt_rna <- gr_rna[to(ov)]$RNA_GT
Expand All @@ -88,7 +92,7 @@ lp <- bplapply(1:N, function(i){

# Create a matrix
mat <- do.call(rbind, lp)
row.names(mat) <- wes_samples
row.names(mat) <- dna_samples
colnames(mat) <- rna_samples


Expand Down

0 comments on commit 1801912

Please sign in to comment.