In [1]:
library(Matrix)
library(data.table)

library(SingleCellExperiment)
library(glmGamPoi)
library(edgeR)

library(Seurat)
library(nebula)

library(fixest)

library(dplyr)
library(tidyr)

library(limma)
library(Rcpp)

data(sample_data)
df = model.matrix(~X1+X2+cc, data=sample_data$pred)
re = nebula(sample_data$count,sample_data$sid,pred=df,ncore=1)

offset.mult <- function(formula, count, df){
    sce.obj <- SingleCellExperiment::SingleCellExperiment(
        list(counts=count),
        colData=df
        )
    sce.pb <- glmGamPoi::pseudobulk(
        sce.obj,
        group_by=vars(id, tx_cell),
        n=n(),
        verbose=FALSE
        )

    fit <- glmGamPoi::glm_gp(sce.pb, design=~1+tx_cell, size_factors=FALSE, offset=log(colData(sce.pb)$n))
    test <- glmGamPoi::test_de(fit, reduced_design=~1)
    
    beta <- fit$Beta[,'tx_cell']
    pval <- test$pval
    tval <- qnorm(1-pval/2) * sign(beta)
    se <- beta/tval
    result <- cbind(beta, se, tval, pval)
    colnames(result) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
    return(result)
    }

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges

Remove  0  genes having low expression.
Analyzing  10  genes with  30  subjects and  6176  cells.


Loading required package: foreach

Loading required package: future

Loading required package: rngtools



In [2]:
selectCol <- function(mat, j.col){    
    x.col.dense <- rep(0,nrow(mat))
    p.begin <- mat@p[j.col]+1
    p.end <- mat@p[j.col+1]
    i.col <- mat@i[p.begin:p.end]+1 # i counts from 0
    x.col <- mat@x[p.begin:p.end]
    x.col.dense[i.col] <- x.col
    return(x.col.dense)
    }

selectCols <- function(mat, j.cols){    
    return(sapply(j.cols, selectCol, mat=mat))
    }

fixest.mult <- function(formula, count, df){
    df.result <- data.frame(matrix(nrow=0, ncol=4))
    colnames(df.result) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
    for (j in 1:ncol(count)){
        df$y <- count[,j]
        fit <- fixest::fepois(formula, vcov='hetero', data=df, fixef.rm='none')
        df.result[j,] <- coeftable(fit)['tx_cell',] # fixed effect o/x 에 따라 다르게 들어가야함
        } 
    return(df.result)
    }

nebula.mult <- function(formula, count, df){
    pred <- model.matrix(formula, data=df)
    sid <- df$id
    fit.nebula <- nebula::nebula(
        count,
        sid,
        pred=pred,
        cpc=0,
        mincp=0
        )
    fit.result <- fit.nebula$summary
    rownames(fit.result) <- fit.result$gene
    return(
            fit.result %>%
            mutate(
                Estimate=logFC_tx_cell,
                'Std. Error'=se_tx_cell,
                't value'=logFC_tx_cell/se_tx_cell,
                'Pr(>|t|)'=p_tx_cell
                ) %>%
            select(Estimate, 'Std. Error', 't value', 'Pr(>|t|)')
        )
    }

glmgp.mult <- function(formula, count, df){
    sce.obj <- SingleCellExperiment::SingleCellExperiment(list(counts=count), colData=df)
    sce.pb <- glmGamPoi::pseudobulk(
        sce.obj,
        group_by=vars(id, tx_cell),
        verbose=FALSE
        )

    fit <- glmGamPoi::glm_gp(sce.pb, design=~1+tx_cell)
    test <- glmGamPoi::test_de(fit, reduced_design=~1)
    
    beta <- fit$Beta[,'tx_cell']
    pval <- test$pval
    tval <- qnorm(1-pval/2) * sign(beta)
    se <- beta/tval
    result <- cbind(beta, se, tval, pval)
    colnames(result) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
    return(result)
    }

glmgp.cell.mult <- function(formula, count, df){
    sce.obj <- SingleCellExperiment::SingleCellExperiment(list(counts=count), colData=df)
    fit <- glmGamPoi::glm_gp(sce.obj, design=~1+tx_cell, on_disk=FALSE, size_factors=FALSE)
    test <- glmGamPoi::test_de(fit, reduced_design=~1)
    
    beta <- fit$Beta[,'tx_cell']
    pval <- test$pval
    tval <- qnorm(1-pval/2) * sign(beta)
    se <- beta/tval
    result <- cbind(beta, se, tval, pval)
    colnames(result) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
    return(result)
    }

edger.mult <- function(formula, count, df){
    sce.obj <- SingleCellExperiment::SingleCellExperiment(list(counts=count), colData=df)
    sce.pb <- glmGamPoi::pseudobulk(
        sce.obj,
        group_by=vars(id, tx_cell),
        verbose=FALSE
        )

    design <- model.matrix(~1+tx_cell, data=colData(sce.pb))
    edger.obj <- edgeR::DGEList(counts(sce.pb))
    edger.obj <- edgeR::estimateDisp(edger.obj, design)
    fit <- edgeR::glmQLFit(y=edger.obj, design=design)
    test <- edgeR::glmTreat(fit, coef=2)

    beta <- test$coefficients[,'tx_cell']
    pval <- test$table[,'PValue']
    tval <- qnorm(1-pval/2) * sign(beta)
    se <- beta/tval

    result <- cbind(beta, se, tval, pval)
    colnames(result) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
    
    return(result)
    }

limma.mult <- function(formula, count, df){
    sce.obj <- SingleCellExperiment::SingleCellExperiment(list(counts=count), colData=df)
    sce.pb <- glmGamPoi::pseudobulk(
        sce.obj,
        group_by=vars(id, tx_cell),
        verbose=FALSE
        )

    design <- model.matrix(~1+tx_cell, data=colData(sce.pb))
    edger.obj <- edgeR::DGEList(counts(sce.pb))
    v <- limma::voom(edger.obj, design)
    vfit <- limma::lmFit(v, design)
    efit <- limma::eBayes(vfit)
    
    beta <- efit$coefficients[,'tx_cell'] * log(2)
    pval <- efit$p.value[,'tx_cell']
    tval <- qnorm(1-pval/2) * sign(beta)
    se <- beta/tval

    result <- cbind(beta, se, tval, pval)
    colnames(result) <- c('Estimate', 'Std. Error', 't value', 'Pr(>|t|)')
    
    return(result)
    }

src <-
"
#include <Rcpp.h>

// [[Rcpp::export]]
void vec_down_sample(
    Rcpp::NumericVector data,
    const Rcpp::LogicalVector which,
    int begin,
    int end,
    double prob
    ){
    for(int i=begin; i<end; i++){
        if(which[i]){
            data[i] = R::rbinom(data[i], prob);
            }
        }
    }
"
sourceCpp(code = src)

downCells <- function(spmat, i.rows, j.cols, p){
    data <- spmat@x
    i.bool <- spmat@i %in% (i.rows-1) # spmat@i begins from 0, i.rows begins from 1
    for (j.col in j.cols){
        begin <- spmat@p[j.col]
        end <- spmat@p[j.col+1]
        vec_down_sample(data, i.bool, begin, end, p)
        }
    }    

list.func <- list(
    glmgp.mult,
    edger.mult,
    limma.mult,
    glmgp.cell.mult,
    fixest.mult,
    nebula.mult
    )

In [3]:
seurat.obj <- readRDS('datasets/reichart.seurat.rds')

In [4]:
# https://satijalab.org/seurat/articles/essential_commands.html
# prepare data
cols <- c('donor_id', 'cell_type') 
col.data <- seurat.obj[[cols]]
col.data$cell_id <- rownames(col.data)
cnt <- GetAssayData(object = seurat.obj, slot = "counts")
head(col.data)

Unnamed: 0_level_0,donor_id,cell_type,cell_id
Unnamed: 0_level_1,<fct>,<fct>,<chr>
0,DP2,mural cell,0
1,DP2,endothelial cell,1
2,DP2,mural cell,2
3,DP2,endothelial cell,3
4,DP2,fibroblast of cardiac tissue,4
5,DP2,cardiac muscle cell,5


In [5]:
# sort cell type by numbers, select top 6
ct.used <- col.data %>% 
    group_by(cell_type) %>%
    summarise(n=n()) %>%
    arrange(desc(n)) %>%
    top_n(6) %>%
    pull(cell_type)

# select donors with more than 100 cells per selected cell types
donor.used <- col.data %>%
    group_by(donor_id, cell_type, .drop=FALSE) %>%
    summarise(n=n()) %>%
    pivot_wider(names_from=cell_type, values_from=n) %>%
    select(ct.used) %>%
    filter(if_all(ct.used,~.>50)) %>% # ~>.10 is purrr style lambda function
    pull(donor_id) 
col.data %>% 
    group_by(cell_type) %>%
    summarise(n=n()) %>%
    arrange(desc(n)) %>%
    top_n(6)

[1m[22mSelecting by n
[1m[22m`summarise()` has grouped output by 'donor_id'. You can override using the `.groups` argument.
“[1m[22mUsing an external vector in selections was deprecated in tidyselect 1.1.0.
[36mℹ[39m Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(ct.used)

  # Now:
  data %>% select(all_of(ct.used))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.”
[1m[22mAdding missing grouping variables: `donor_id`
[1m[22mSelecting by n


cell_type,n
<fct>,<int>
cardiac muscle cell,311418
mural cell,170281
fibroblast of cardiac tissue,142816
endothelial cell,115548
myeloid cell,57036
native cell,52981


In [7]:
cell.used <- col.data$donor_id %in% donor.used
cnt.used <- cnt[rowMeans(cnt) > 0.01, cell.used]
col.data.used <- col.data[cell.used,]

In [8]:
author <- 'reichart'
n.sample <- 10
exp.cut <- 0.5
n.gene <- 100
n.sim <- 10
samp.prob <- 0.5
is.pb <- FALSE
t1 <- Sys.time()
for (int.ct in 1:length(ct.used)){ #
    # select celltype
    cell.type <- ct.used[[int.ct]]
    bool.ct <- col.data.used$cell_type == cell.type
    col.data.ct <- col.data.used[bool.ct,]

    # select genes with sufficient mean (nblmm doesn't work for small mean)
    cnt.ct <- cnt.used[,bool.ct]
    cnt.ct.bm <- cnt.ct[rowMeans2(cnt.ct) > 0.1,]
    cnt.ct.bm <- cnt.ct.bm[sample(1:nrow(cnt.ct.bm), n.gene),]
    
    # boolean index of cells
    bool.cell.donor <- lapply(
        unique(col.data.ct$donor_id),
        function(id.donor){
            return(col.data.ct$donor_id == id.donor)
            }
        )
    col.data.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(col.data.ct[bool.donor,])
            }
        )
    cnt.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(cnt.ct.bm[,bool.donor])
            }
        )

    list.result.null <- list()
    list.result.pow <- list()
    for (i.iter in 1:n.sim){
        
        # sample individuals
        ind.select <- sample.int(
            n=length(unique(col.data.ct$donor_id)), 
            size=n.sample,
            replace=TRUE
            )
    
        # construct col.data & cnt matrix
        col.data.select <- data.table::rbindlist(col.data.donor[ind.select], idcol="id")
        cnt.select <- do.call(cbind, cnt.donor[ind.select])
        colnames(cnt.select) <- rownames(col.data.select)
                   
        # select cells randomly
        n.per.donor <- col.data.select %>% group_by(id, donor_id) %>% summarise(n=n()) %>% pull(n)
        cell.select <- as.logical(unlist(sapply(n.per.donor, rbinom, size=1, p=samp.prob)))
        col.data.select <- col.data.select[cell.select,]
        cnt.select <- cnt.select[,cell.select]

        # assign treatment
        col.data.select$tx_cell <- rbinom(n=nrow(col.data.select), size=1, p=0.5)
        if (is.pb){ 
            # assign treatment label
            n.tx <- as.integer(n.sample/2)
            urn <- c(rep(1,n.tx), rep(0,n.sample-n.tx))
            tx.ind <- sample(x=urn, size=n.sample, replace=FALSE)
        
            # assign tx to cells
            cell.per.ind <- col.data.select %>%
                group_by(id) %>%
                summarise(n=n())
            col.data.select$tx_cell <- rep(tx.ind, times=cell.per.ind$n)
        }
    
        # for power simulation, cut expression to half (+ force int)
        idx.de <- 1:5
        idx.tx <- which(col.data.select$tx_cell == 1)
        downCells(cnt.select, idx.de, idx.tx, exp.cut)
        cnt.select.t <- t(cnt.select)
        
        # run tests
        message('start regression')
        list.formula <- list(
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula(ifelse(is.pb, 'y~tx_cell', 'y~tx_cell | id')),
            as.formula('~tx_cell')
            )
        list.data <- list(
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select.t,
            cnt.select
            )
        list.result.method <- list()
        for (int.test in 1:length(list.func)){
            list.result.method[[int.test]] <- list.func[[int.test]](
                list.formula[[int.test]],
                list.data[[int.test]],
                col.data.select
                )[,4] 
            }
        df.result <- do.call(rbind, list.result.method)
        rownames(df.result) <- c('glmGamPoi (Pb)', 'edgeR (Pb)', 'limma (Pb)', 'glmGamPoi (cell)', 'robust GLM (cell)', 'NB GLMM')
        list.result.null[[i.iter]] <- t(df.result)[6:n.gene,]
        list.result.pow[[i.iter]] <- t(df.result)[1:5,]
        }

    
    
    # results
    df.null <- do.call(rbind, list.result.null)
    df.pow <- do.call(rbind, list.result.pow)
    
    # save name
    path.tail <- paste(author,'n',n.sample,'ct',int.ct,'fc',exp.cut,'csv',sep='.')
    path.pow.head <- ifelse(is.pb, 'pow_subject/', 'pow_cell/')
    path.null.head <- ifelse(is.pb, 'null_subject/', 'null_cell/')
    
    write.csv(df.null, paste0(path.null.head, path.tail))
    write.csv(df.pow, paste0(path.pow.head, path.tail))
    
    }
t2 <- Sys.time()
t2-t1

[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  19311  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  20363  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  20168  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  15732  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  23139  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  35666  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  15126  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  16182  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  14445  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  18645  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  13063  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9072  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  14592  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  14350  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  11792  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9414  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  11596  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  11810  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  12253  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9405  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7891  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  11405  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9344  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7652  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  11960  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  10120  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9406  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9429  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9619  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9829  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6734  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6690  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7612  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  5850  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6551  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6261  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6052  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8692  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8614  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7360  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3732  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3666  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4805  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4000  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3447  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4247  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3966  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4250  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3276  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4230  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4412  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  2161  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3914  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3759  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3427  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3367  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3020  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3297  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  2221  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3963  cells.


Time difference of 45.34034 mins

In [9]:
author <- 'reichart'
n.sample <- 10
exp.cut <- 0.5
n.gene <- 100
n.sim <- 10
samp.prob <- 0.5
is.pb <- TRUE
t1 <- Sys.time()
for (int.ct in 1:length(ct.used)){ #
    # select celltype
    cell.type <- ct.used[[int.ct]]
    bool.ct <- col.data.used$cell_type == cell.type
    col.data.ct <- col.data.used[bool.ct,]

    # select genes with sufficient mean (nblmm doesn't work for small mean)
    cnt.ct <- cnt.used[,bool.ct]
    cnt.ct.bm <- cnt.ct[rowMeans2(cnt.ct) > 0.1,]
    cnt.ct.bm <- cnt.ct.bm[sample(1:nrow(cnt.ct.bm), n.gene),]
    
    # boolean index of cells
    bool.cell.donor <- lapply(
        unique(col.data.ct$donor_id),
        function(id.donor){
            return(col.data.ct$donor_id == id.donor)
            }
        )
    col.data.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(col.data.ct[bool.donor,])
            }
        )
    cnt.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(cnt.ct.bm[,bool.donor])
            }
        )

    list.result.null <- list()
    list.result.pow <- list()
    for (i.iter in 1:n.sim){
        
        # sample individuals
        ind.select <- sample.int(
            n=length(unique(col.data.ct$donor_id)), 
            size=n.sample,
            replace=TRUE
            )
    
        # construct col.data & cnt matrix
        col.data.select <- data.table::rbindlist(col.data.donor[ind.select], idcol="id")
        cnt.select <- do.call(cbind, cnt.donor[ind.select])
        colnames(cnt.select) <- rownames(col.data.select)
                   
        # select cells randomly
        n.per.donor <- col.data.select %>% group_by(id, donor_id) %>% summarise(n=n()) %>% pull(n)
        cell.select <- as.logical(unlist(sapply(n.per.donor, rbinom, size=1, p=samp.prob)))
        col.data.select <- col.data.select[cell.select,]
        cnt.select <- cnt.select[,cell.select]

        # assign treatment
        col.data.select$tx_cell <- rbinom(n=nrow(col.data.select), size=1, p=0.5)
        if (is.pb){ 
            # assign treatment label
            n.tx <- as.integer(n.sample/2)
            urn <- c(rep(1,n.tx), rep(0,n.sample-n.tx))
            tx.ind <- sample(x=urn, size=n.sample, replace=FALSE)
        
            # assign tx to cells
            cell.per.ind <- col.data.select %>%
                group_by(id) %>%
                summarise(n=n())
            col.data.select$tx_cell <- rep(tx.ind, times=cell.per.ind$n)
        }
    
        # for power simulation, cut expression to half (+ force int)
        idx.de <- 1:5
        idx.tx <- which(col.data.select$tx_cell == 1)
        downCells(cnt.select, idx.de, idx.tx, exp.cut)
        cnt.select.t <- t(cnt.select)
        
        # run tests
        message('start regression')
        list.formula <- list(
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula(ifelse(is.pb, 'y~tx_cell', 'y~tx_cell | id')),
            as.formula('~tx_cell')
            )
        list.data <- list(
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select.t,
            cnt.select
            )
        list.result.method <- list()
        for (int.test in 1:length(list.func)){
            list.result.method[[int.test]] <- list.func[[int.test]](
                list.formula[[int.test]],
                list.data[[int.test]],
                col.data.select
                )[,4] 
            }
        df.result <- do.call(rbind, list.result.method)
        rownames(df.result) <- c('glmGamPoi (Pb)', 'edgeR (Pb)', 'limma (Pb)', 'glmGamPoi (cell)', 'robust GLM (cell)', 'NB GLMM')
        list.result.null[[i.iter]] <- t(df.result)[6:n.gene,]
        list.result.pow[[i.iter]] <- t(df.result)[1:5,]
        }

    
    
    # results
    df.null <- do.call(rbind, list.result.null)
    df.pow <- do.call(rbind, list.result.pow)
    
    # save name
    path.tail <- paste(author,'n',n.sample,'ct',int.ct,'fc',exp.cut,'csv',sep='.')
    path.pow.head <- ifelse(is.pb, 'pow_subject/', 'pow_cell/')
    path.null.head <- ifelse(is.pb, 'null_subject/', 'null_cell/')
    
    write.csv(df.null, paste0(path.null.head, path.tail))
    write.csv(df.pow, paste0(path.pow.head, path.tail))
    
    }
t2 <- Sys.time()
t2-t1

[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  20404  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  21689  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  26645  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  16762  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  25129  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  20048  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  18302  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  14876  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  20491  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  16300  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  12393  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9274  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  17764  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  16394  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7876  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8340  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  10759  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9853  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  15878  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  12720  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7647  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  11578  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8971  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  10083  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  10457  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8310  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8273  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6616  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  10306  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  5992  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  8474  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7609  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6977  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6918  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7221  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  9795  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6718  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7370  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  6931  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  7708  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3839  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4741  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3835  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4141  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3050  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3483  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3411  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4010  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3559  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3923  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3718  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3027  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  4687  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3357  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3919  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3586  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  2800  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  2937  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  2658  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  10  subjects and  3419  cells.


Time difference of 40.39318 mins

In [10]:
author <- 'reichart'
n.sample <- 50
exp.cut <- 0.5
n.gene <- 100
n.sim <- 10
samp.prob <- 0.5
is.pb <- FALSE
t1 <- Sys.time()
for (int.ct in 1:length(ct.used)){ #
    # select celltype
    cell.type <- ct.used[[int.ct]]
    bool.ct <- col.data.used$cell_type == cell.type
    col.data.ct <- col.data.used[bool.ct,]

    # select genes with sufficient mean (nblmm doesn't work for small mean)
    cnt.ct <- cnt.used[,bool.ct]
    cnt.ct.bm <- cnt.ct[rowMeans2(cnt.ct) > 0.1,]
    cnt.ct.bm <- cnt.ct.bm[sample(1:nrow(cnt.ct.bm), n.gene),]
    
    # boolean index of cells
    bool.cell.donor <- lapply(
        unique(col.data.ct$donor_id),
        function(id.donor){
            return(col.data.ct$donor_id == id.donor)
            }
        )
    col.data.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(col.data.ct[bool.donor,])
            }
        )
    cnt.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(cnt.ct.bm[,bool.donor])
            }
        )

    list.result.null <- list()
    list.result.pow <- list()
    for (i.iter in 1:n.sim){
        
        # sample individuals
        ind.select <- sample.int(
            n=length(unique(col.data.ct$donor_id)), 
            size=n.sample,
            replace=TRUE
            )
    
        # construct col.data & cnt matrix
        col.data.select <- data.table::rbindlist(col.data.donor[ind.select], idcol="id")
        cnt.select <- do.call(cbind, cnt.donor[ind.select])
        colnames(cnt.select) <- rownames(col.data.select)
                   
        # select cells randomly
        n.per.donor <- col.data.select %>% group_by(id, donor_id) %>% summarise(n=n()) %>% pull(n)
        cell.select <- as.logical(unlist(sapply(n.per.donor, rbinom, size=1, p=samp.prob)))
        col.data.select <- col.data.select[cell.select,]
        cnt.select <- cnt.select[,cell.select]

        # assign treatment
        col.data.select$tx_cell <- rbinom(n=nrow(col.data.select), size=1, p=0.5)
        if (is.pb){ 
            # assign treatment label
            n.tx <- as.integer(n.sample/2)
            urn <- c(rep(1,n.tx), rep(0,n.sample-n.tx))
            tx.ind <- sample(x=urn, size=n.sample, replace=FALSE)
        
            # assign tx to cells
            cell.per.ind <- col.data.select %>%
                group_by(id) %>%
                summarise(n=n())
            col.data.select$tx_cell <- rep(tx.ind, times=cell.per.ind$n)
        }
    
        # for power simulation, cut expression to half (+ force int)
        idx.de <- 1:5
        idx.tx <- which(col.data.select$tx_cell == 1)
        downCells(cnt.select, idx.de, idx.tx, exp.cut)
        cnt.select.t <- t(cnt.select)
        
        # run tests
        message('start regression')
        list.formula <- list(
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula(ifelse(is.pb, 'y~tx_cell', 'y~tx_cell | id')),
            as.formula('~tx_cell')
            )
        list.data <- list(
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select.t,
            cnt.select
            )
        list.result.method <- list()
        for (int.test in 1:length(list.func)){
            list.result.method[[int.test]] <- list.func[[int.test]](
                list.formula[[int.test]],
                list.data[[int.test]],
                col.data.select
                )[,4] 
            }
        df.result <- do.call(rbind, list.result.method)
        rownames(df.result) <- c('glmGamPoi (Pb)', 'edgeR (Pb)', 'limma (Pb)', 'glmGamPoi (cell)', 'robust GLM (cell)', 'NB GLMM')
        list.result.null[[i.iter]] <- t(df.result)[6:n.gene,]
        list.result.pow[[i.iter]] <- t(df.result)[1:5,]
        }

    
    
    # results
    df.null <- do.call(rbind, list.result.null)
    df.pow <- do.call(rbind, list.result.pow)
    
    # save name
    path.tail <- paste(author,'n',n.sample,'ct',int.ct,'fc',exp.cut,'csv',sep='.')
    path.pow.head <- ifelse(is.pb, 'pow_subject/', 'pow_cell/')
    path.null.head <- ifelse(is.pb, 'null_subject/', 'null_cell/')
    
    write.csv(df.null, paste0(path.null.head, path.tail))
    write.csv(df.pow, paste0(path.pow.head, path.tail))
    
    }
t2 <- Sys.time()
t2-t1

[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  82807  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  118649  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  100611  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  120839  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  125208  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  100640  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  96216  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  78451  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  102791  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  97589  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  57136  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  53627  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  50330  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  50946  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  55403  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  61479  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  51873  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  47274  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  55520  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  50227  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  49497  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  42070  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  39879  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  42054  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  45513  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  51445  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  47474  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  51722  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  47083  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  45993  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  40196  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  36453  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  37084  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  38595  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  44904  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  28960  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  35126  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  40865  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  36368  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  39514  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  17330  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18119  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  15830  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  20412  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18886  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18695  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18061  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18719  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  16428  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  19555  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  16097  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  19367  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  16316  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  15703  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18866  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  18561  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  16011  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  19976  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  17010  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  17150  cells.


Time difference of 1.032396 hours

In [None]:
author <- 'reichart'
n.sample <- 50
exp.cut <- 0.5
n.gene <- 100
n.sim <- 10
samp.prob <- 0.5
is.pb <- TRUE
t1 <- Sys.time()
for (int.ct in 1:length(ct.used)){ #
    # select celltype
    cell.type <- ct.used[[int.ct]]
    bool.ct <- col.data.used$cell_type == cell.type
    col.data.ct <- col.data.used[bool.ct,]

    # select genes with sufficient mean (nblmm doesn't work for small mean)
    cnt.ct <- cnt.used[,bool.ct]
    cnt.ct.bm <- cnt.ct[rowMeans2(cnt.ct) > 0.1,]
    cnt.ct.bm <- cnt.ct.bm[sample(1:nrow(cnt.ct.bm), n.gene),]
    
    # boolean index of cells
    bool.cell.donor <- lapply(
        unique(col.data.ct$donor_id),
        function(id.donor){
            return(col.data.ct$donor_id == id.donor)
            }
        )
    col.data.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(col.data.ct[bool.donor,])
            }
        )
    cnt.donor <- lapply(
        bool.cell.donor,
        function(bool.donor){
            return(cnt.ct.bm[,bool.donor])
            }
        )

    list.result.null <- list()
    list.result.pow <- list()
    for (i.iter in 1:n.sim){
        
        # sample individuals
        ind.select <- sample.int(
            n=length(unique(col.data.ct$donor_id)), 
            size=n.sample,
            replace=TRUE
            )
    
        # construct col.data & cnt matrix
        col.data.select <- data.table::rbindlist(col.data.donor[ind.select], idcol="id")
        cnt.select <- do.call(cbind, cnt.donor[ind.select])
        colnames(cnt.select) <- rownames(col.data.select)
                   
        # select cells randomly
        n.per.donor <- col.data.select %>% group_by(id, donor_id) %>% summarise(n=n()) %>% pull(n)
        cell.select <- as.logical(unlist(sapply(n.per.donor, rbinom, size=1, p=samp.prob)))
        col.data.select <- col.data.select[cell.select,]
        cnt.select <- cnt.select[,cell.select]

        # assign treatment
        col.data.select$tx_cell <- rbinom(n=nrow(col.data.select), size=1, p=0.5)
        if (is.pb){ 
            # assign treatment label
            n.tx <- as.integer(n.sample/2)
            urn <- c(rep(1,n.tx), rep(0,n.sample-n.tx))
            tx.ind <- sample(x=urn, size=n.sample, replace=FALSE)
        
            # assign tx to cells
            cell.per.ind <- col.data.select %>%
                group_by(id) %>%
                summarise(n=n())
            col.data.select$tx_cell <- rep(tx.ind, times=cell.per.ind$n)
        }
    
        # for power simulation, cut expression to half (+ force int)
        idx.de <- 1:5
        idx.tx <- which(col.data.select$tx_cell == 1)
        downCells(cnt.select, idx.de, idx.tx, exp.cut)
        cnt.select.t <- t(cnt.select)
        
        # run tests
        message('start regression')
        list.formula <- list(
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula('~tx_cell'),
            as.formula(ifelse(is.pb, 'y~tx_cell', 'y~tx_cell | id')),
            as.formula('~tx_cell')
            )
        list.data <- list(
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select,
            cnt.select.t,
            cnt.select
            )
        list.result.method <- list()
        for (int.test in 1:length(list.func)){
            list.result.method[[int.test]] <- list.func[[int.test]](
                list.formula[[int.test]],
                list.data[[int.test]],
                col.data.select
                )[,4] 
            }
        df.result <- do.call(rbind, list.result.method)
        rownames(df.result) <- c('glmGamPoi (Pb)', 'edgeR (Pb)', 'limma (Pb)', 'glmGamPoi (cell)', 'robust GLM (cell)', 'NB GLMM')
        list.result.null[[i.iter]] <- t(df.result)[6:n.gene,]
        list.result.pow[[i.iter]] <- t(df.result)[1:5,]
        }

    
    
    # results
    df.null <- do.call(rbind, list.result.null)
    df.pow <- do.call(rbind, list.result.pow)
    
    # save name
    path.tail <- paste(author,'n',n.sample,'ct',int.ct,'fc',exp.cut,'csv',sep='.')
    path.pow.head <- ifelse(is.pb, 'pow_subject/', 'pow_cell/')
    path.null.head <- ifelse(is.pb, 'null_subject/', 'null_cell/')
    
    write.csv(df.null, paste0(path.null.head, path.tail))
    write.csv(df.pow, paste0(path.pow.head, path.tail))
    
    }
t2 <- Sys.time()
t2-t1

[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  101631  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression



Remove  0  genes having low expression.
Analyzing  100  genes with  50  subjects and  91073  cells.


[1m[22m`summarise()` has grouped output by 'id'. You can override using the `.groups` argument.
start regression

