# Launch docker:

docker run -it --rm -p 8889:8888 -v ~/notebooks:/home/jovyan/work jupyter/r-notebook

In [None]:
!pip install rpy2

# Simulate scRNA-seq data

In [1]:
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("splatter")
install.packages("Rtsne")

Bioconductor version 3.11 (BiocManager 1.30.10), R 4.0.0 (2020-04-24)

Installing package(s) 'splatter'

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Old packages: 'bit', 'bit64', 'data.table', 'devtools', 'dplyr', 'DT', 'fs',
  'htmltools', 'httr', 'IRkernel', 'MASS', 'parsnip', 'ps', 'Rcpp', 'remotes',
  'rmarkdown', 'RODBC', 'rvest', 'shiny', 'sys', 'tidyr', 'tinytex', 'TTR',
  'workflows', 'xfun'

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [8]:
rm(list = ls())
library(splatter)
library(rhdf5)

i <- 2 ## set random seed
simulate <- function(nGroups=3, nGenes=300, batchCells=400, dropout=0) # change dropout to simulate various dropout rates
  {
    if (nGroups > 1) method <- 'groups'
    else             method <- 'single'
    
    group.prob <- rep(1, nGroups) / nGroups
    sim <- splatSimulate(group.prob=group.prob, nGenes=nGenes, batchCells=batchCells,
                         dropout.type="experiment", method=method,
                         seed=100+i, dropout.shape=-1, dropout.mid=dropout
                         #, de.facScale=0.25
                        )
    
    counts     <- as.data.frame(t(counts(sim)))
    truecounts <- as.data.frame(t(assays(sim)$TrueCounts))
    
    dropout    <- assays(sim)$Dropout
    mode(dropout) <- 'integer'
    
    cellinfo   <- as.data.frame(colData(sim))
    geneinfo   <- as.data.frame(rowData(sim))
    
    list(sim=sim,
         counts=counts,
         cellinfo=cellinfo,
         geneinfo=geneinfo,
         truecounts=truecounts)
  }
dropout <- 0
nGroups <- 3
sim <- simulate(nGroups= nGroups, dropout = dropout)
  
simulation <- sim$sim
counts <- sim$counts
geneinfo <- sim$geneinfo
cellinfo <- sim$cellinfo
truecounts <- sim$truecounts


dropout.rate <- (sum(counts==0)-sum(truecounts==0))/sum(truecounts>0)
print("Dropout rate")
exp <- paste("dropout", dropout , sep= "_")
print(dropout.rate)
folder = "data/scRNAseq_simulated/2/"
save(counts, geneinfo, cellinfo, truecounts, file=paste(folder, "d", nGroups, exp,".RData", sep=""))

X <- t(counts) ## counts with dropout
Y <- as.integer(substring(cellinfo$Group,6))
Y <- Y-1


h5createFile(paste(folder, "d", nGroups, exp, ".h5", sep=""))
h5write(X, paste(folder, "d", nGroups, exp, ".h5", sep=""),"X")
h5write(Y, paste(folder, "d", nGroups,  exp,".h5", sep=""),"Y")
h5write(geneinfo["Gene"], paste(folder, "d", nGroups, exp, ".h5", sep=""),"geneinfo")
h5write(cellinfo["Cell"], paste(folder, "d", nGroups,  exp,".h5", sep=""),"cellinfo")

true.X <- t(truecounts) ## counts without dropout
h5createFile(paste(folder, "dt", nGroups, exp, ".h5", sep=""))
h5write(true.X, paste(folder, "dt", nGroups,  exp,".h5", sep=""),"X")
h5write(Y, paste(folder, "dt", nGroups,  exp,".h5", sep=""),"Y")

Getting parameters...

Creating simulation object...

Simulating library sizes...

Simulating gene means...

Simulating group DE...

Simulating cell means...

Simulating BCV...

Simulating counts...

Simulating dropout (if needed)...

Done!



[1] "Dropout rate"
[1] 0.06404929
