This repository contains the following resources:
- Simulation data for SpliceWiz
- Input
pro
andpar
input files for flux-simulator- These can be used to generate raw sequencing data for a simulation of differential alternative splicing
- SpliceWiz processBAM() output files run on alignment BAM files of the abovementioned simulation
- Input
- Mappability Exclusion resources for SpliceWiz
To use the following resources for SpliceWiz:
git clone https://github.com/alexchwong/SpliceWizResources.git
cd SpliceWizResources
NB: this git repository is approximately 640 Mb
Please refer to the installation instructions viewable at https://github.com/alexchwong/SpliceWiz
Briefly, in R:
devtools::install_github("alexchwong/SpliceWiz")
flux-simulator pro
and par
files for the simulation dataset for SpliceWiz. Also, it contains SpliceWiz processBAM output files of the simulation.
The included pro
and par
files were used by flux-simulator to simulate raw sequencing FASTA files, which were then converted to paired-end sequencing FASTQ files. The resulting FASTQ files were aligned to hg38 (Ensembl release 94) genome. Alignment BAM files were then processed by SpliceWiz's processBAM()
command to generate the output files.
Please refer to the documentation for flux-simulator for use of these files. The Ensembl GRCh38 (release 94) genome / gene annotations were used as the reference for flux-simulator.
Also, it contains the output files of SpliceWiz's processBAM() function, performed on the BAM files that were produced by alignment of flux-simulator generated sequencing data.
library(SpliceWiz)
setwd("./simulation")
# Generate reference file (hg38 v94)
# Requires Bioconductor 3.14 or higher
if(tools:::.BioC_version_associated_with_R_version() >= "3.14") {
buildRef(
reference_path = "./Reference",
fasta = "AH65745",
gtf = "AH64631",
genome_type = "hg38"
)
} else {
buildRef(
reference_path = "./Reference",
fasta = "AH65745",
gtf = "AH64631",
genome_type = "hg38",
MappabilityRef = "../Mappability/hg38.MappabilityExclusion.bed.Rds"
)
}
# Collate the SpliceWiz output files into an experiment
expr <- findSpliceWizOutput("./pb_output")
collateData(expr, "Reference", "NxtSE")
# Import experiment as NxtSE
se <- makeSE("NxtSE", realize = TRUE)
# Ensures NxtSE knows the proper path names of COV files
covfile(se) <- expr$cov_file
# Annotate the samples
colData(se)$Biology <- rep(c("A", "B"), each = 3)
# Use SpliceWiz's optimized filters
se.filtered <- se[applyFilters(se),]
# Limma-based differential analysis
require(limma)
res_limma <- ASE_limma(se.filtered, "Biology", "A", "B")
# Example heatmap
library(pheatmap)
mat <- makeMatrix(se.filtered, res_limma$EventName[1:20])
pheatmap(mat, annotation_col = as.data.frame(colData(se.filtered)))
# Example coverage plot
p <- plotCoverage(se.filtered, res_limma$EventName[1],
condition = "Biology", tracks = c("A", "B"), stack_tracks = TRUE)
as_ggplot_cov(p) # displays ggplot (static plot)
p$final_plot # displays plotly object (interactive plot)
Assuming the above code has already been run:
# Required packages that are not already SpliceWiz dependencies
install.packages(c("dplyr", "ROCit"))
source("groundTruth.R")
# Get ground truth values:
gt <- getGroundTruth("flux_files/", "Reference/")
# Get default filters
filters <- getDefaultFilters()
# baseline filters (from the SpliceWiz paper)
baseFilters <- filters[c(1)]
# optimized filters (from the SpliceWiz paper)
optFilters <- filters[c(1,3,4,5)]
# Apply optimized filters
se.opt <- se[applyFilters(se, optFilters),]
# DoubleExpSeq-based differential analysis
res <- ASE_DoubleExpSeq(se.opt, "Biology", "A", "B")
# PSI error AUC
PSIerror <- generatePSIerror(se, res, gt, colnames(se)[1:3], "Reference")
PSIerror$splice_type[PSIerror$splice_type %in% c("A3SS", "A5SS")] <- "AltSS"
PSIerror$splice_type[PSIerror$splice_type %in% c("AFE", "ALE")] <- "AltTE"
getPSIerrorAUC(PSIerror)
# Plot PSI error curve
plotPSIerror(PSIerror)
# ROC generation:
sc <- generateScoresAndClass(res, gt, "Reference")
sc$splice_type[sc$splice_type %in% c("A3SS", "A5SS")] <- "AltSS"
sc$splice_type[sc$splice_type %in% c("AFE", "ALE")] <- "AltTE"
ROCdata <- generateROCdata(sc)
getAUROC(ROCdata)
# Plot ROC curve
plotROCdata(ROCdata)
This repository also contains links to Mappability Exclusion resources for SpliceWiz. These files are intended for those who are running SpliceWiz on Bioconductor 3.13 or earlier.
library(SpliceWiz)
# Set path for SpliceWiz reference
reference_path = "./Reference"
FTP <- "ftp://ftp.ensembl.org/pub/release-94/"
buildRef(
reference_path = reference_path,
fasta = paste0(FTP, "fasta/homo_sapiens/dna/",
"Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"),
gtf = paste0(FTP, "gtf/homo_sapiens/",
"Homo_sapiens.GRCh38.94.chr.gtf.gz"),
genome_type = "hg38",
MappabilityRef = "Mappability/hg38.MappabilityExclusion.bed.Rds"
)