Skip to content

Commit

Permalink
Merge pull request #30 from vyepez88/code_cleaning
Browse files Browse the repository at this point in the history
Code cleaning
  • Loading branch information
vyepez88 committed Apr 15, 2020
2 parents 89977ea + 8aa3137 commit ad1f791
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 57 deletions.
1 change: 1 addition & 0 deletions drop/configHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def setDefaults(self, config, method=None):
setKey(config, None, "projectTitle", "DROP: Detection of RNA Outlier Pipeline")

setKey(config, None, "genomeAssembly", "hg19")
setKey(config, None, "scanBamParam", "null")

if self.method is None:
tmp_dir = submodules.getTmpDir()
Expand Down
2 changes: 1 addition & 1 deletion drop/modules/mae-pipeline
34 changes: 18 additions & 16 deletions drop/template/Scripts/AberrantExpressionAnalysis/Overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' - AE: '`sm drop.getTmpDir() + "/AE.done"`'
#' output:
#' html_document:
#' code_folding: hide
#' code_folding: show
#' code_download: TRUE
#'---

Expand All @@ -48,7 +48,7 @@ display_text <- function(caption, links) {
}

# get parameters
datasets <- snakemake@params$datasets
datasets <- sort(snakemake@params$datasets)
annotations <- snakemake@params$annotations
htmlDir <- snakemake@params$htmlDir

Expand All @@ -59,55 +59,57 @@ htmlDir <- snakemake@params$htmlDir
#'
#' **Gene annotations:** `r paste(annotations, collapse = ', ')`
#'
#' # Count Summaries
#' ## Summaries
#' ### Counts summary
#+ echo=FALSE
htmlDir <- './AberrantExpression'
count_links <- sapply(annotations, get_html_path,
datasets = datasets,
htmlDir = file.path(htmlDir, "Counting"),
fileName = paste0('Summary_', datasets, '.html'))
#'
#' `r display_text(caption = 'Gene annotation version ', count_links)`
#'
#' # OUTRIDER Results
#' ### OUTRIDER summary
#+ echo=FALSE
outrider_links <- sapply(annotations, get_html_path,
datasets = datasets,
htmlDir = file.path(htmlDir, "Outrider"),
fileName = paste0('Summary_', datasets, '.html'))
#'
#'
#' `r display_text(caption = 'Gene annotation version ', outrider_links)`
#'
#'
#' OUTRIDER dataset (ods) files
#' ## Files
#' ### OUTRIDER datasets (ods)
#'
#' `r paste('* ', snakemake@params$odsFiles, collapse = '\n')`
#'
#'
#' Results tables
#' ### Results tables
#'
#' `r paste('* ', snakemake@params$resultTables, collapse = '\n')`
#'
#'
#' # Analyze Individual Results

#' ## Analyze Individual Results
ods <- readRDS(snakemake@params$odsFiles[[1]])
res <- fread(snakemake@params$resultTables[[1]])

DT::datatable(res)
#' Display the results table of the first dataset
#+ echo=FALSE
DT::datatable(res, filter = 'top')

#' Choose a random gene and sample
#' Choose a random gene and sample to plot. Outliers are in red.
#+ echo=TRUE
gene <- res[1, geneID]
sample <- res[1, sampleID]

#' ## Volcano plot
#' ### Volcano plot
#' setting basePlot = FALSE creates an interactive plot
#' that allows finding the gene(s) of interest
OUTRIDER::plotVolcano(ods, sample, basePlot = TRUE)

#' ## Gene expression plot
#' ### Gene expression plot (normalized counts)
OUTRIDER::plotExpressionRank(ods, gene, basePlot = TRUE)

#' ## Expected vs observed counts
#' ### Expected vs observed counts
OUTRIDER::plotExpectedVsObservedCounts(ods, gene, basePlot = TRUE)

65 changes: 54 additions & 11 deletions drop/template/Scripts/AberrantSplicingAnalysis/Overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' - AS: '`sm drop.getTmpDir() + "/AS.done"`'
#' output:
#' html_document:
#' code_folding: hide
#' code_folding: show
#' code_download: TRUE
#'---

Expand All @@ -27,38 +27,81 @@ saveRDS(snakemake, file.path(snakemake@params$tmpdir,
# snakemake <- readRDS(".drop/tmp/AberrantSplicing_FRASER.snakemake")

suppressPackageStartupMessages({
library(FRASER)
library(magrittr)
library(FRASER)
library(magrittr)
})

#' FRASER objects: `r paste(snakemake@params$fds_files)`
# define functions
get_html_path <- function(datasets, htmlDir, fileName) {
file_paths <- file.path(htmlDir, fileName)
file_link <- paste0('\n* [', datasets ,'](', file_paths,
'){target="_blank"}\n', collapse = ' ')
file_link
}

display_text <- function(links) {
paste0(links, collapse = '\n')
}

# get parameters
datasets <- sort(snakemake@config$aberrantSplicing$groups)

## start html

#'
#' **Datasets:** `r paste(datasets, collapse = ', ')`
#'
#' ## Summaries
#' ### Counts summary
#+ echo=FALSE
htmlDir <- './AberrantSplicing'
count_links <- get_html_path(datasets = datasets,
htmlDir = htmlDir,
fileName = paste0(datasets, '_countSummary.html'))
#'
#' `r display_text(count_links)`
#'
#' FRASER results: `r paste(snakemake@params$result_tables)`
#' ### FRASER summary
#+ echo=FALSE
fraser_links <- get_html_path(datasets = datasets,
htmlDir = htmlDir,
fileName = paste0(datasets, '_summary.html'))
#'
#' `r display_text(fraser_links)`

#' ## Files
#' ### FRASER datasets (fds)
#' `r paste('* ', snakemake@params$fds_files, collapse = '\n')`
#'
#' ### Results tables
#' `r paste('* ', snakemake@params$result_tables, collapse = '\n')`

#'
#' ## Analyze individual results
#'

# Load the files
fds <- loadFraseRDataSet(file = snakemake@params$fds_files[[1]])
fds <- readRDS(snakemake@params$fds_files[[1]])
res <- fread(snakemake@params$result_tables[[1]])

sample <- samples(fds)[1]
#' Display the results table of the first dataset
#+ echo=FALSE
DT::datatable(res, filter = 'top')

#' Get a splice site and sample of interest
#' Get a splice site and sample of interest. Outliers are in red.
#+ echo=TRUE
sample <- res[1, sampleID]
siteIndex <- 4

#' ## Volcano plot
#' ### Volcano plot
#' setting basePlot = FALSE creates an interactive plot
#' that allows finding the junction(s) of interest
FRASER::plotVolcano(fds, sample, type = 'psi3', basePlot = TRUE)

#' ## Expression plot
#' ### Expression plot
FRASER::plotExpression(fds, type = 'psi3', site = siteIndex, basePlot = TRUE)

#' ## Expected vs observed PSI
#' ### Expected vs observed PSI
FRASER::plotExpectedVsObservedPsi(fds, type = 'psi3',
idx = siteIndex, basePlot = TRUE)

52 changes: 33 additions & 19 deletions drop/template/Scripts/MAEAnalysis/Overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#' params:
#' - tmpdir: '`sm drop.getTmpDir()`'
#' - mae_ids: '`sm parser.getMaeAll()`'
#' - count_matrices: '`sm expand(parser.getProcDataDir() +
#' "/mae/allelic_counts/{mae_id}.csv.gz",
#' mae_id=parser.getMaeAll())`'
#' - allelic_counts: '`sm expand(parser.getProcDataDir() +
#' "/mae/allelic_counts/{mae_id}.csv.gz",
#' mae_id=parser.getMaeAll())`'
#' - results_obj: '`sm expand(parser.getProcResultsDir() +
#' "/mae/samples/{mae_id}_res.Rds",
#' mae_id=parser.getMaeAll())`'
Expand All @@ -18,7 +18,6 @@
#' - html: '`sm config["htmlOutputPath"] + "/Scripts_MAE_Results_Overview.html"`'
#' - qc_matrix: '`sm expand(parser.getProcResultsDir() + "/mae/{qc_group}/" +
#' "dna_rna_qc_matrix.Rds", qc_group=config["mae"]["qcGroups"])`'
#' - qc_html: '`sm config["htmlOutputPath"] + "/Scripts_QC_Overview.html"`'
#' input:
#' - MAE: '`sm drop.getTmpDir() + "/MAE.done"`'
#' output:
Expand All @@ -32,35 +31,50 @@ saveRDS(snakemake, file.path(snakemake@params$tmpdir, "MAE_analysis.snakemake"))
# snakemake <- readRDS(".drop/tmp/MAE_analysis.snakemake")

suppressPackageStartupMessages({
library(tMAE)
library(tMAE)
})

all_files <- paste0("* ", snakemake@params$mae_ids, ": ",
snakemake@params$count_matrices, collapse = "\n")
#'
#' # All Samples

#' ## Files
#' ### Allelic counts
#' Located in `r file.path(snakemake@config$root, 'processed_data/mae/allelic_counts/')`
#'
#' `r all_files`
#' ### Results tables of each sample
#' Located in `r file.path(snakemake@config$root, 'processed_results/mae/samples/')`
#'
#' [MAE Pipeline Output](`r snakemake@params$html`)
#' ### Aggregated results tables of each group
#' `r paste('* ', snakemake@params$results_tables, collapse = '\n')`
#'
#' ### MAE Pipeline Output
#' [MAE Pipeline Output](`r "./Scripts_MAE_Datasets.html"`)
#'

#' # Analyze Individual Results
#' ## Analyze Individual Results
#'
file <- snakemake@params$results_tables[[1]]
res <- fread(file)

file_location <- strsplit(file, "/")[[1]]
res_sample <- readRDS(snakemake@params$results_obj[[1]])

plotMA4MAE(res_sample)
plotAllelicCounts(res_sample)
if(is.null(res_sample$rare)){
g1 <- plotMA4MAE(res_sample)
g2 <- plotAllelicCounts(res_sample)
} else {
g1 <- plotMA4MAE(res_sample, rare_column = 'rare')
g2 <- plotAllelicCounts(res_sample, rare_column = 'rare')
}

#' # Quality Control: VCF-BAM Matching
#'
#' DNA-RNA matrix:
#' ### MA plot: fold change vs RNA coverage
g1

#' ### Alternative vs Reference plot
g2

#' ## Quality Control: VCF-BAM Matching
#'
#' `r paste(' *', snakemake@params$qc_matrix, collapse='\n')`
#' [QC Overview](`r "./Scripts_QC_Datasets.html"`)
#'
#' [QC Overview](`r snakemake@params$qc_html`)
#' DNA-RNA matrix:
#' `r paste('* ', snakemake@params$qc_matrix, collapse='\n')`

19 changes: 12 additions & 7 deletions drop/template/Scripts/Pipeline/SampleAnnotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,22 @@ suppressPackageStartupMessages({

sa <- fread(snakemake@input$sampleAnnotation)

#' Number of rows and columns in tha sample annotation
dim(sa)
DT::datatable(sa)
#'
#' Number of rows and columns in the sample annotation: `r dim(sa)`

#'
#' ## Sample annotation
DT::datatable(sa, filter = 'top')

#' ## Quality control checks

# check for duplicated rows
if(sum(duplicated(sa)) > 0){
print("The sample annotation has the following duplicated rows. Remove them.")
sa[duplicated(sa)]
}

#' Check for NAs
#' Check for RNA_IDs without a value
if(nrow(sa[is.na(RNA_ID)]) > 0){
print("The sample annotation has some non-existent RNA_IDs. Fill them.")
sa[is.na(RNA_ID)]
Expand All @@ -58,13 +63,13 @@ if('DNA_VCF_FILE' %in% colnames(sa)){
}
}

#' Check for RNA_IDs with more than 1 RNA_BAM_FILE
#' Check for RNA_IDs with more than one RNA_BAM_FILE
if(sum(duplicated(unique(sa[,.(RNA_ID, RNA_BAM_FILE)])$RNA_ID)) > 0){
print('The following RNA_IDs and RNA_BAM_FILEs do not have a 1:1 match. Correct them.')
DT::datatable(duplicated(unique(sa[,.(RNA_ID, RNA_BAM_FILE)])$RNA_ID))
}

#' Barplot with DROP groups
#' ## Barplot with DROP groups
unique(sa[,.(RNA_ID, DROP_GROUP)])$DROP_GROUP %>% strsplit(',') %>% unlist %>%
table %>% barplot(xlab = 'DROP groups', ylab = 'Number of samples')

Expand All @@ -86,5 +91,5 @@ if(!is.null(sa$HPO_TERMS)){
na = NA, sep = "\t", row.names = F, quote = F)
}

file.create(snakemake@output$done)
file.create(snakemake@output$done) %>% invisible

4 changes: 3 additions & 1 deletion drop/template/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ rule all:
AS(config["finalFiles"]["AS"]),
MAE(config["finalFiles"]["MAE"]),
rules.Index.output,
config["htmlOutputPath"] + "/readme.html",
config["tmpdir"] + "/rulegraphs.done"
output:
touch(config["tmpdir"] + "/drop_all.done")
Expand All @@ -56,7 +57,8 @@ rule aberrantExpression:
output: touch(drop.getTmpDir() + "/AE.done")

rule aberrantSplicing:
input: AS(config["finalFiles"]["AS"])
input:
AS(config["finalFiles"]["AS"])
output: touch(drop.getTmpDir() + "/AS.done")

rule mae:
Expand Down
1 change: 1 addition & 0 deletions drop/template/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ geneAnnotation:
v29: /path/to/gencode29.gtf.gz # example

genomeAssembly: hg19 # either hg19 or hg38
scanBamParam: null # or a path to an Rds file containing a scanBamParam object

aberrantExpression:
groups:
Expand Down
21 changes: 21 additions & 0 deletions drop/template/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# DROP Analysis

Analysis of the demo dataset using the Detection of RNA Outliers Pipeline. For
speed purposes, this is only a small subset of the test dataset from GEUVADIS.

Browse either through the tabs to see the different analysis results or through
the following links to see the pipelines results.

* [**Aberrant expression pipeline**](./aberrant-expression-pipeline_index.html)

* [**Aberrant splicing pipeline**](./aberrant-splicing-pipeline_index.html)

* [**Mono-allelic expression pipeline**](./mae-pipeline_index.html)

## Resources

**Code**: available to download for free in our [github](https://www.github.com/gagneurlab/drop)

**Manuscript**: preprint in [protocol exchange](https://protocolexchange.researchsquare.com/article/pex-787/v1)

**Data**: subset of 100 samples from the GEUVADIS [dataset](https://www.ebi.ac.uk/Tools/geuvadis-das/)

0 comments on commit ad1f791

Please sign in to comment.