In [1]:
library(Rsamtools)
library(Biostrings)
library(DNAcopy)

source('../rna2cn/rCNV.R', local=TRUE)

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, as.vector, cbind, colnames,
    do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl,
    intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unlist, unsplit

Loading required package: IRanges
Loading required package: GenomeInfoDb
L

In [2]:
bams <- Sys.glob("/srv/shared/vanloo/rna2cn/SRP052901/RRBS/*/bismark/*_sorted.bam")
#indexBams(bams, MC.CORES)

In [3]:
BAMDIR <- "/srv/shared/vanloo/rna2cn/SRP052901/RRBS/SRR1777083/bismark/"
MC.CORES <- 6
window <- '1000000'
ALLCHR <- 1:22
CHRSTRING<-''

FASTA <- "/srv/shared/vanloo/rna2cn/references/Homo_sapiens.GRCh38/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa"
bed <- "/srv/shared/vanloo/rna2cn/references/Homo_sapiens.GRCh38/Homo_sapiens.GRCh38.1mb.bed"

samples <- lapply(bams, function(x) strsplit(basename(x), '_')[[1]][[1]])
normal_samples <- c('SRR1777078', 'SRR1777079')
normal_idx <- lapply(normal_samples, function(x) match(x, samples))

In [4]:
dna <- getRefGenome()
lSe <- lapply(ALLCHR,function(chr) getStartsEndsBED(bed, chr))
lGCT <- lapply(ALLCHR,function(chr) gcTrack(chr,lSe[[chr]]$starts,lSe[[chr]]$ends))

In [5]:
all_lCT <- as.list(mclapply(bams, function(bamfile) {
                                  sampleDepth <- countBam(bamfile, param=ScanBamParam(flag=scanBamFlag(isDuplicate=FALSE)))$records
                                  lapply(ALLCHR, function(chr) getCoverageTrack(bamPath=bamfile, 
                                                                                chr=chr, 
                                                                                lSe[[chr]]$starts, 
                                                                                lSe[[chr]]$ends,
                                                                                sampleDepth))
                                 },
                        mc.cores=MC.CORES))
                                  
all_lCT_smooth <- as.list(mclapply(1:length(bams),
                              function(i) smoothCoverageTrackAll(all_lCT[[i]], lSe, lGCT), 
                              mc.cores=MC.CORES))

In [6]:
normalise_CT <- function(normal_idx, all_lCT_smooth)
{
    refMean <- lapply(ALLCHR, function(chr) log10(rowMeans(10**(sapply(normal_idx, function(i) all_lCT_smooth[[i]][[chr]]$smoothed)))))

    for (i in 1:length(all_lCT_smooth)){
        for (chr in ALLCHR){
            all_lCT_smooth[[i]][[chr]]$normalised <- all_lCT_smooth[[i]][[chr]]$smoothed - refMean[[chr]]
        }
    }
    return(all_lCT_smooth)                                                                                                                                               
}
all_lCT_normalised <- normalise_CT(normal_idx, all_lCT_smooth)

In [7]:
all_track <- lapply(1:length(bams), function(i) getTrackForAll(window, bams[[i]], lSe, lGCT, lCT=NULL, lCTS=all_lCT_normalised[[i]]) )

[1] "segment Tracks"
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
[1] "segment Tracks"
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
Analyzing: Sample.1 
[1] "segment Tracks"
Analyzing: Sa

In [9]:
for(i in 1:length(bams))
{
    sample <- strsplit(basename(bams[[i]]), '_')[[1]][[2]]
    pdf(paste0(dirname(bams[[i]]),"/", sample,".",window,".gc.pdf"))
    plotGCCorrection(all_lCT[[i]], lGCT)
    dev.off()
        
    pdf(paste0(dirname(bams[[i]]),"/", sample,".",window,".pdf"))
    plotAllGenome(all_track[[i]])
    dev.off()
    
    SegsToBed(all_track[[i]], bams[[i]])
}