In [1]:
source("https://bioconductor.org/biocLite.R")

Bioconductor version 3.6 (BiocInstaller 1.28.0), ?biocLite for help


In [19]:
install.packages('ROCR')

also installing the dependencies ‘gtools’, ‘gdata’, ‘caTools’, ‘KernSmooth’, ‘gplots’

Updating HTML index of packages in '.Library'
Making 'packages.html' ... done


In [3]:
library(rhdf5)
library(edgeR)
library(MAST)
library(splatter)

Loading required package: limma
Loading required package: SummarizedExperiment
Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from ‘package:limma’:

    plotMA

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, cbind, colMeans, colnames,
    colSums, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match,
    mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, 

In [4]:
input_hdf5 = '../simulated/15_groups_21.03.2018/melanomaS2_sim_de0.05_loc0.5_zheng17.h5'
h5f = H5Fopen(input_hdf5)

In [5]:
matrix = h5f$matrix
gene_attrs = h5f$gene_attrs
gene_names = h5f$gene_attrs$gene_names
gene_ids = h5f$gene_attrs$gene_ids
cell_groups = h5f$cell_attrs$cell_groups

In [6]:
h5f$cell_attrs

In [7]:
h5f$gene_attrs

In [8]:
gene_ids

NULL

In [9]:
names(h5f$gene_attrs)

In [10]:
names(h5f$cell_attrs)

### Cell groups

In [11]:
cell_groups = unique(h5f$cell_attrs$cell_groups)

In [12]:
cell_groups

In [13]:
is.vector(cell_groups)

In [14]:
vcell_groups = as.vector(cell_groups)

### opt: vapply over each cell group
vapply(vcell_groups, FUN=length, FUN.VALUE=0)

In [15]:
H5Fclose(h5f)

In [16]:
cell_group = 'Group2'

In [20]:
apply_wilcox = function (cell_group, matrix, cell_groups)
{
    
    ## Wilcox/Mann-Whitney-U Test
    # run the test (for each gene):
    pVals <- apply(
        matrix, 1, function(x) {
            # performs the test between one cell group and all the other cells
            wilcox.test(
                x[cell_groups==cell_group], 
                x[cell_groups!=cell_group]
            )$p.value
        }
    )
    return (pVals <- p.adjust(pVals, method = "fdr"))  
}

evaluate = function (pVals, cell_group, gene_attrs)
{
    # evaluation using the ground truth
    group_defac = paste('DEFac', cell_group, sep="") #DEFacGroup5
    de_genes_g5 = gene_names[gene_attrs[group_defac][[1]] != 1]
    non_de_genes_g5 = gene_names[gene_attrs[group_defac][[1]] == 1]
    
    # roc calculations
    pVals <- pVals[gene_names %in% de_genes_g5 | 
               gene_names %in% non_de_genes_g5]
    truth <- rep(1, times = length(pVals));
    truth[gene_names %in% de_genes_g5] = 0;
    pred <- ROCR::prediction(pVals, truth)
    perf <- ROCR::performance(pred, "tpr", "fpr")
    #ROCR::plot(perf)
    aucObj <- ROCR::performance(pred, "auc")
    #write the detected de genes & p-values to a csv
    #plot the roc & auc score to a csv
    return (aucObj@y.values[[1]])
}





In [21]:
wilcox_res = apply_wilcox(cell_group, matrix, cell_groups)
evaluate(wilcox_res, cell_group, gene_attrs)

In [22]:
length(wilcox_res)

## EdgeR
Binarize the groups into two: the given cell group or not

In [None]:
cgs = sort(unique(cell_groups))

In [None]:
cgs

In [None]:
deg_res = vector('list')
i = 1
for (cg in cgs):
    res = apply_edger(cg, matrix, cell_groups)
    

In [None]:

apply_edger = function (cell_group, matrix, cell_groups)
{
    binary_groups = (h5f$cell_attrs$cell_groups==cell_group)
    counts = matrix    
    dge <- DGEList(counts = counts)
    group_edgeR <- factor(binary_groups)
    design <- model.matrix(~ group_edgeR)
    dge <- estimateCommonDisp(dge, design = design, trend.method = "none")
    dge <- estimateTrendedDisp(dge, design = design, trend.method = "none")
    dge <- estimateTagwiseDisp(dge, design = design, trend.method = "none")
    fit <- glmFit(dge, design)
    res <- glmLRT(fit)
    return (res)
    #pVals <- res$table[,4]
    #names(pVals) <- rownames(res$table)

    #pVals <- p.adjust(pVals, method = "fdr")
    
    #return (pVals)
}

In [None]:
edger_res = apply_edger(cell_group, matrix, cell_groups)

In [None]:
edger_padj = p.adjust(edger_res$table$PValue, method = "fdr")

In [None]:
edger_res$table$Padj = edger_padj

In [None]:
# r remove from named list
edger_res$table$padj = NULL

In [None]:
typeof(edger_res$table)

In [None]:
edger_res$table

In [None]:
edger_pVals = apply_edger(cell_group, matrix, cell_groups)

In [None]:
evaluate(edger_pVals, cell_group, gene_attrs)

In [None]:
#EdgeR
binary_groups = (h5f$cell_attrs$cell_groups==cell_group)
counts = matrix 

In [None]:
dge <- DGEList(counts = counts)

In [None]:
group_edgeR <- factor(binary_groups)
design <- model.matrix(~ group_edgeR)
dge <- estimateCommonDisp(dge, design = design, trend.method = "none")
dge <- estimateTrendedDisp(dge, design = design, trend.method = "none")
dge <- estimateTagwiseDisp(dge, design = design, trend.method = "none")


In [None]:
fit <- glmFit(dge, design)
res <- glmLRT(fit)
pVals <- res$table[,4]
names(pVals) <- rownames(res$table)

pVals <- p.adjust(pVals, method = "fdr")

In [None]:
?DGEList

## MAST

counts = matrix
binary_groups = (h5f$cell_attrs$cell_groups==cell_group)

## Limma

In [None]:
# *** Running Limma:

Run_Limma = function(rawData, gr1, gr2){
  
    require(limma)

    filtered = apply(rawData,1, function(x) {if(all(x == 0)) return (FALSE) else return(TRUE)})
    FilteredData = rawData[filtered,]

    # Samples' conditions:
    mType <- factor (c(rep("GR1",dim(gr1)[2]) ,rep("GR2",dim(gr2)[2])))

    # Normalization factors from edgeR TMM method:
    nf <- calcNormFactors(FilteredData)
    design <- model.matrix(~mType)

    # Voom transformation for RNA-seq data:
    y <- voom (FilteredData, design, lib.size = colSums(FilteredData)*nf)

    # Linear modeling:
    fit <- lmFit(y,design)
    fit <- eBayes(fit)

    # Summary of the results:
    Limma_results <- topTable(fit,coef=2,n=nrow(fit))
    pVals = Limma_results$P.Value
    pVals <- p.adjust(pVals, method = "fdr")                            
    return (pVals)
  
}

In [None]:
binary_groups = (h5f$cell_attrs$cell_groups==cell_group)

In [None]:
gr1 = matrix[,binary_groups]

In [None]:
gr2 = matrix[,!binary_groups]

In [None]:
dim(gr2)

In [None]:
limma_res = Run_Limma(matrix, gr1, gr2)

In [None]:
evaluate(pVals = limma_res, cell_group, gene_attrs)