In [1]:
# if(!require(devtools)) install.packages("devtools")
# library(devtools)

# devtools::install_github("JSB-UCLA/scDesign2")

In [2]:
library(scDesign2)
# library(tidyverse)

In [3]:
RNGkind("L'Ecuyer-CMRG")
set.seed(1)

In [4]:
# load data -----------------------------------------------------------------------------
data_mat <- readRDS(system.file("extdata", "mouse_sie_10x.rds", package = "scDesign2"))

# remove spike-in -----------------------------------------------------------------------
nonspikes <- which(!grepl("ercc", rownames(data_mat), ignore.case = TRUE))
print(paste("number of spike-ins:", nrow(data_mat)-length(nonspikes)))
#> [1] "number of spike-ins: 9"
data_mat <- data_mat[nonspikes, ,drop = FALSE]

# explore basic structure of data -------------------------------------------------------
dim(data_mat)
#> [1] 15962  7216
table(colnames(data_mat))

[1] "number of spike-ins: 9"



                   Endocrine   Enterocyte.Immature.Distal 
                         310                          512 
Enterocyte.Immature.Proximal     Enterocyte.Mature.Distal 
                         297                          241 
  Enterocyte.Mature.Proximal        Enterocyte.Progenitor 
                         581                          356 
 Enterocyte.Progenitor.Early   Enterocyte.Progenitor.Late 
                         829                          404 
                      Goblet                       Paneth 
                         510                          260 
                        Stem                     TA.Early 
                        1267                          665 
                       TA.G1                        TA.G2 
                         408                          410 
                        Tuft 
                         166 

In [5]:
unique_cell_type <- names(table(colnames(data_mat)))
train_idx <- unlist(sapply(unique_cell_type, function(x){
  cell_type_idx <- which(colnames(data_mat) == x)
  n_cell_total <- length(cell_type_idx)
  sample(cell_type_idx, floor(n_cell_total/2))
}))
traincount <- data_mat[, train_idx]
testcount <- data_mat[, -train_idx]

In [6]:
remove_dropout <- function(copula_result) {
    copula_result$Stem$marginal_param1[, 1] = 0
    copula_result$Stem$marginal_param2[, 1] = 0

    copula_result$Goblet$marginal_param1[, 1] = 0
    copula_result$Goblet$marginal_param2[, 1] = 0
    
    copula_result$Tuft$marginal_param1[, 1] = 0
    copula_result$Tuft$marginal_param2[, 1] = 0

    copula_result$TA.Early$marginal_param1[, 1] = 0
    copula_result$TA.Early$marginal_param2[, 1] = 0

    copula_result$Enterocyte.Progenitor$marginal_param1[, 1] = 0
    copula_result$Enterocyte.Progenitor$marginal_param2[, 1] = 0

    copula_result$Enterocyte.Progenitor.Early$marginal_param1[, 1] = 0
    copula_result$Enterocyte.Progenitor.Early$marginal_param2[, 1] = 0

    return (copula_result)
}

In [7]:
cell_type_sel <- c("Stem", "Goblet", "Tuft", "TA.Early", "Enterocyte.Progenitor", "Enterocyte.Progenitor.Early")
n_cell_new <- ncol(testcount[, colnames(testcount) %in% cell_type_sel])
print(n_cell_new)
# set function parameter values ---------------------------------------------------------
print('get prop')
cell_type_prop <- table(colnames(testcount))[cell_type_sel]

[1] 1898
[1] "get prop"


In [8]:
# fit model and simulate data -----------------------------------------------------------
print('fit')
set.seed(1)
copula_result <- fit_model_scDesign2(
    traincount, cell_type_sel, sim_method = 'copula',
    ncores = length(cell_type_sel), marginal = 'zinb'
)

[1] "fit"


In [9]:
# remove dropout
print('remove dropout')
copula_result = remove_dropout(copula_result)

[1] "remove dropout"


In [10]:
# make simulation
print('simulation')
sim_count_copula <- simulate_count_scDesign2(
    copula_result, n_cell_new, sim_method = 'copula',
    cell_type_prop = cell_type_prop
)

[1] "simulation"


In [11]:
sim_count_copula

Stem,Stem.1,Stem.2,Stem.3,Stem.4,Stem.5,Stem.6,Stem.7,Stem.8,Stem.9,⋯,Enterocyte.Progenitor.Early,Enterocyte.Progenitor.Early.1,Enterocyte.Progenitor.Early.2,Enterocyte.Progenitor.Early.3,Enterocyte.Progenitor.Early.4,Enterocyte.Progenitor.Early.5,Enterocyte.Progenitor.Early.6,Enterocyte.Progenitor.Early.7,Enterocyte.Progenitor.Early.8,Enterocyte.Progenitor.Early.9
0,0,0,1,2,0,0,2,2,1,⋯,4,1,1,9,1,8,3,1,0,3
0,0,1,0,0,0,0,1,0,0,⋯,1,0,0,1,3,4,2,0,0,2
0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,1,0,1,1,⋯,0,0,0,0,0,0,0,0,2,0
0,0,0,0,0,1,0,0,0,0,⋯,0,0,0,0,0,0,1,0,0,0
0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,18,0,0
0,0,0,0,0,0,0,0,0,0,⋯,0,1,1,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,⋯,0,1,0,0,0,0,0,0,0,0


In [12]:
# write csv
print('write')
write.csv(
    sim_count_copula,
    "simulation.csv", 
    row.names=FALSE
)

[1] "write"
