In [1]:
#Might need to change filepaths

rnaRDS = "../../dorcs_data_example/out.gene.bc.matrices.h5"
atacFragFile = "../../dorcs_data_example/shareseq-project.atac.GRCh38.cleaned.filtered.bedpe"
peakFile = "../../dorcs_data_example/GM_nonoverlap.bed"
cellKNNMatrix = "../../dorcs_data_example/cellKNN_GM_nonoverlap.rds"
pwmFile = "../R/cisBP_human_pfms_2021.rds" #need better name, but don't know original reasoning

genome = "hg38"
nCores = 4
savePlotsToDir = TRUE

minFeature_RNA = 200 #Seurat QC for number of min features
maxFeature_RNA = 2500 #Seurat QC for number of max features
percentMT_RNA = 5 #Seurat QC for max % of mt 
minCells_RNA = 3 #Seurat QC for min number of cells

dorcGeneCutOff = 10 #No. sig peaks needed to be called a DORC
fripCutOff = 0.3 #QC threshold for fRIP score
do = 0.05 #pval cutoff for correlation statistical test
nDORCknn = 10 #No. of dorc kNNs used to pool peaks
topNGene = 20 #Label top N genes in j-Plot

In [2]:
packages = c("dplyr","Seurat","patchwork","GenomicRanges","ggplot2","ggrepel","reshape2","ggrastr","BuenColors","ComplexHeatmap", "circlize","networkD3","GGally","network","motifmatchr","foreach","iterators","parallel","Biostrings","rtracklayer")

if(genome == "hg38"){
    BiocManager::install("BSgenome.Hsapiens.UCSC.hg38", update=T, ask=F)
    packages = c(packages, "BSgenome.Hsapiens.UCSC.hg38")
} else if(genome == "mm10"){
    BiocManager::install("BSgenome.Mmusculus.UCSC.mm10", update=T, ask=F)
    packages = c(packages, "BSgenome.Mmusculus.UCSC.mm10")
}

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) BiocManager::install(new.packages, update=T, ask=F)

suppressMessages(lapply(packages, library, character.only = TRUE))

'getOption("repos")' replaces Bioconductor standard repositories, see
'?repositories' for details

replacement repositories:
    CRAN: https://cran.r-project.org


Bioconductor version 3.14 (BiocManager 1.30.16), R 4.1.2 (2021-11-01)

“package(s) not installed when version(s) same as current; use `force = TRUE` to
  re-install: 'BSgenome.Hsapiens.UCSC.hg38'”
Old packages: 'Cairo', 'clipr', 'commonmark', 'desc', 'DT', 'evaluate',
  'fitdistrplus', 'float', 'future', 'GenomicFeatures', 'gmp', 'limma', 'mgcv',
  'misty', 'mockery', 'pracma', 'Rcpp', 'renv', 'rhdf5', 'rmarkdown',
  'survival', 'tinytex', 'uuid', 'waldo', 'xfun', 'yaml'

also installing the dependencies ‘backports’, ‘bayestestR’, ‘datawizard’, ‘insight’, ‘RUnit’, ‘broom’, ‘broom.mixed’, ‘parameters’, ‘performance’, ‘carData’, ‘kutils’, ‘broomExtra’, ‘rockchalk’, ‘r2mlm’, ‘writexl’





  There are binary versions available but the source versions are later:
       binary  source needs_compilation
Cairo  1.5-14  1.5-15              TRUE
gmp     0.6-4   0.6-5              TRUE
Rcpp  1.0.8.2 1.0.8.3              TRUE
uuid    1.0-3   1.0-4              TRUE
waldo   0.3.1   0.4.0             FALSE


The downloaded binary packages are in
	/var/folders/c3/cr8rqqls3gn0r7h2cq16rxg80000gp/T//Rtmp1ye5ht/downloaded_packages


installing the source packages ‘Cairo’, ‘gmp’, ‘Rcpp’, ‘uuid’, ‘waldo’




In [None]:
#Might need to change filepaths
#download from gh and source?
#cellkNN = readRDS(cellKNNMatrix)
source("/R/DORCS_helper_functions.R")
source("/R/FigR_functions.R")
load("/R/TSSRanges.RData")

In [None]:
#Create and preprocess RNA count matrix; using Seurat functions

#rnaCounts = Read10X_h5(rnaCountMatrix)
#rnaCounts = CreateSeuratObject(counts = rnaCounts, project = "shareseq", min.cells = minCells_RNA, min.features = minFeature_RNA)
#rnaCounts[["percent.mt"]] = PercentageFeatureSet(rnaCounts, pattern = "^MT-")
#RNAVlnPlot = VlnPlot(rnaCounts, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
#print(RNAVlnPlot)

rnaCounts = readRDS(rnaRDS)
#QC filtering and normalization

rnaCounts = subset(rnaCounts, subset = nFeature_RNA > minFeature_RNA & nFeature_RNA < maxFeature_RNA & percent.mt < percentMT_RNA)
rnaCounts = NormalizeData(rnaCounts)

In [None]:
#Create and preprocess scATACseq

myPeaks = read.table(peakFile, sep="\t", header=F)
myFrags = read.table(atacFragFile, sep="\t", header=F)u

#myFrags = fread("data/merged.fragments.100k.tsv", sep="\t", header=F)

peakRanges = makeGRangesFromDataFrame(myPeaks,seqnames.field = "V1",start.field = "V2",end.field = "V3",starts.in.df.are.0based = TRUE)
fragRanges = makeGRangesFromDataFrame(myFrags, seqnames.field = "V1", start.field = "V2", end.field = "V3", keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)

In [None]:
# Get counts from fragfile
peaksSE = getCountsFromFrags(fragRanges=fragRanges, peaks=peakRanges )
SE.filt = peaksSE[,peaksSE$FRIP > fripCutOff]

#extract RNA count matrix
rnaMat <- rnaCounts[["RNA"]]@data

#Cleaning - change barcode names
SE.filt$sample = sub(",P1\\.[0-9]+", "", SE.filt$sample)
colnames(SE.filt) = sub(",P1\\.[0-9]+", "", colnames(SE.filt))
colnames(SE.filt) = gsub(",", "\\.", colnames(SE.filt))
colnames(rnaMat) = sub("\\,P1\\.[0-9]+", "", colnames(rnaMat))
colnames(rnaMat) = gsub(",", "\\.", colnames(rnaMat))

#Get intersect of cells in RNA amd ATAC
cells = intersect(colnames(SE.filt), colnames(rnaMat))


In [None]:
set.seed(123)

#Run fast gene peak correlation
cisCor <- fastGenePeakcorr(
  SE.filt[,cells],
  rnaMat[,cells],
  genome = genome, # This will be one of "hg19","hg38" or "mm10"
  windowPadSize = 50000,
  normalizeATACmat = TRUE,
  nCores = nCores,
  p.cut = NULL
)

cisCor.filt <- cisCor %>% filter(pvalZ <= corrPVal)

In [None]:
#dorcGenes and j-Plot
dorcGenes = dorcJPlot(dorcTab = cisCor.filt,
                       cutoff = dorcGeneCutOff, # No. sig peaks needed to be called a DORC
                       labelTop = topNGene,
                       returnGeneList = TRUE, # Set this to FALSE for just the plot
                       force=2)

In [None]:
if(savePlotsToDir){
    dir.create("/plots")
    savePlots = function(filename, plotObject){
        pdf(paste0("/plots/",filename,".pdf"))
        print(plotObject)
        dev.off()
        
        png(paste0("/plots/",filename,".png"))
        print(plotObject)
        dev.off()
    }
    #savePlots("RNAViolinPlot", RNAVlnPlot)
    savePlots("JPlot", dorcJPlot(dorcTab = cisCor.filt,cutoff = dorcGeneCutOff, labelTop = topNGene,returnGeneList = FALSE,force=2))
    #savePlots("figRPlot", figRPlot)
    #savePlots("figRHeatmap", figRHeatmap)

}

#saveRDS(figR.d, file = "figR_object.rds")

files2zip <- dir('/plots', full.names = TRUE)
zip(zipfile = '/plots', files = files2zip)

BiocManager::install(c("BSgenome.Hsapiens.UCSC.hg38","GenomicDistributions"), update=T, ask=F)
myFrags = fread("data/merged.fragments.tsv", header =F, sep = "\t", showProgress=T)