In [1]:
library(rhdf5)
library(edgeR)
library(MAST)
library(limma)

Loading required package: limma
Loading required package: SummarizedExperiment
Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from ‘package:limma’:

    plotMA

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, cbind, colMeans, colnames,
    colSums, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match,
    mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, 

In [2]:
input_hdf5 = '../hdf5_data/raw_melanomaS2.h5'
h5f = H5Fopen(input_hdf5)

In [3]:
matrix = h5f$matrix
gene_attrs = h5f$gene_attrs
gene_names = h5f$gene_attrs$gene_names
gene_ids = h5f$gene_attrs$gene_ids
cell_groups = h5f$cell_attrs$cell_groups

In [4]:
h5f

HDF5 FILE
        name /
    filename 

        name       otype dclass     dim
0 cell_attrs H5I_GROUP                 
1 gene_attrs H5I_GROUP                 
2 matrix     H5I_DATASET  FLOAT  x 2216

In [5]:
dim(matrix)

In [6]:
length(gene_names)

In [7]:
cell_groups = read.csv( '../analysis_output/melanomaS2/phenograph/clusters.csv', header=TRUE, stringsAsFactors=FALSE)

In [8]:
cell_barcodes = cell_groups$cell_barcode

In [9]:
cell_clusters = cell_groups$cluster

In [10]:
table(cell_clusters)

cell_clusters
  0   1   2   3   4   5   6   7 
881 295 280 232 167 166 103  92 

In [11]:
dim(matrix)

In [12]:
H5Fclose(h5f)

In [13]:
cell_types = read.csv('../intermediate_files/cluster_celltype_confusionmatrix.txt', sep='\t',header=TRUE, stringsAsFactors=TRUE)

Remove the last row

In [14]:
cell_types = cell_types[1:nrow(cell_types)-1,]

In [15]:
cell_types

cluster,B.cells,CAFs,Endothelial.cells,Macrophages,melanoma,NK.cells,T.cells,T.Cells.CD4.,T.Cells.CD8.,Tregs,uncertain,unknown,Sum,celltype,purity
0,452,4,2,4,3,1,1,1,34,0,264,112,878,B.cells,0.51
1,1,4,3,4,47,1,1,1,1,0,4,0,67,melanoma,0.7
2,0,1,0,0,272,1,0,0,0,0,0,2,276,melanoma,0.99
3,95,0,0,0,1,15,6,30,26,31,20,5,229,B.cells,0.41
4,0,134,11,0,0,0,0,0,0,0,20,1,166,CAFs,0.81
5,0,0,0,158,0,0,0,0,0,0,6,0,164,Macrophages,0.96
6,0,102,0,0,0,0,0,0,0,0,0,0,102,CAFs,1.0
7,0,0,92,0,0,0,0,0,0,0,3,0,95,Endothelial.cells,0.97


In [16]:
malignant = 'melanoma'

In [17]:
cell_types$cluster

In [18]:
all_clusters = as.numeric(as.vector(cell_types$cluster))

In [26]:
malignancy_mask = cell_types$celltype==malignant
malignant_clusters = cell_types$cluster[malignancy_mask]
non_malignant_clusters = cell_types$cluster[!malignancy_mask]

In [27]:
malignant_clusters = as.numeric(as.vector(malignant_clusters))

In [21]:
run_edgeRQLFDetRate <- function(L) {
  message("edgeRQLFDetRate")
  session_info <- sessionInfo()
  tryCatch({
    timing <- system.time({
      dge <- DGEList(L$count, group = L$condt)
      dge <- calcNormFactors(dge)
      cdr <- scale(colMeans(L$count > 0))
      design <- model.matrix(~ cdr + L$condt)
      dge <- estimateDisp(dge, design = design)
      fit <- glmQLFit(dge, design = design)
      qlf <- glmQLFTest(fit)
      tt <- topTags(qlf, n = Inf)
    })
    
    #plotBCV(dge)
    #plotQLDisp(fit)
    #hist(tt$table$PValue, 50)
    #hist(tt$table$FDR, 50)
    #limma::plotMDS(dge, col = as.numeric(as.factor(L$condt)), pch = 19)
    #plotSmear(qlf)
    
    data.frame(pval = tt$table$PValue,
                         padj = tt$table$FDR,
                         row.names = rownames(tt$table))
  }, error = function(e) {
    "edgeRQLFDetRate results could not be calculated"
    list(session_info = session_info)
  })
}

In [28]:
malignant_clusters

In [23]:
deg_res = list()
i = 0
cg = 1
#for (cg in malignant_clusters)
#{

 #   i = i + 1
#}

edgeRQLFDetRate
edgeRQLFDetRate
Timing stopped at: 0.157 0.001 0.159


ERROR: Error in as.data.frame.default(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors): cannot coerce class ""sessionInfo"" to a data.frame


In [29]:
cell_group = cg
# detecting the other malignant groups
malignant_clusters <- malignant_clusters[ malignant_clusters != cell_group ]
# remove the other malignant groups
a_malignant_and_normals <- all_clusters[ all_clusters != malignant_clusters ]
filtered_matrix = matrix[, cell_groups$cluster %in% a_malignant_and_normals]
filtered_cell_groups = cell_groups$cluster[cell_groups$cluster %in% a_malignant_and_normals]
conditions = filtered_cell_groups == cell_group

L <- list(count = filtered_matrix, condt = conditions)


In [31]:
edger_res = run_edgeRQLFDetRate(L)

edgeRQLFDetRate


In [32]:
edger_res

Unnamed: 0,pval,padj
9705,0.000000e+00,0.000000e+00
12747,0.000000e+00,0.000000e+00
18510,0.000000e+00,0.000000e+00
7086,0.000000e+00,0.000000e+00
11962,0.000000e+00,0.000000e+00
1311,0.000000e+00,0.000000e+00
19904,0.000000e+00,0.000000e+00
7812,0.000000e+00,0.000000e+00
18370,0.000000e+00,0.000000e+00
18679,0.000000e+00,0.000000e+00


In [None]:

f_name = paste('DE_malignant', i,sep='_')
write.csv(edger_res, file=f_name, row.names=FALSE, quote=FALSE)

In [None]:
deg_res

In [24]:
5

## EdgeR
Binarize the groups into two: the given cell group or not

In [None]:
groups = cell_groups$cluster

In [None]:
L <- list(count = matrix, condt = cell_groups)

In [None]:
edger_res = apply_edger_quasi(2, matrix, cell_clusters)

In [None]:
edger_res

In [None]:
lrtFiltered = edger_res

In [None]:
summary(decideTestsDGE(lrtFiltered))

In [None]:
?decideTestsDGE

In [None]:
names(edger_res)

In [None]:
temp = edger_res$unshrunk.coefficients

In [None]:
temp2 = cbind(temp, edger_res$table) 

In [None]:
edger_padj = p.adjust(edger_res$table$PValue, method = "BH")

In [None]:
edger_res$table$Padj = edger_padj

In [None]:
edger_res$table$cluster_id = 4

In [None]:
edger_res$table$gene_names = gene_names
edger_res$table$gene_ids = gene_ids


In [None]:
edger_res = cbind(temp, edger_res$table) 

In [None]:
decide_res = decideTestsDGE(lrtFiltered)

In [None]:
ss = show(decide_res)

In [None]:
data.frame()

In [None]:
decided_df = data.frame(decideTestsDGE(lrtFiltered))

In [None]:
colnames(decided_df) <- "decideTestDGE"

In [None]:
edger_res = cbind(decided_df, edger_res)

In [None]:
edger_res$

In [None]:
edger_res

In [None]:
edger_res[c(10, 9, 7, 8,5,4,3,2,1,6)]

In [None]:
edger_res$table



In [None]:
edger_pVals = apply_edger(cell_group, matrix, cell_groups)

In [None]:
edger_pVals

In [None]:
evaluate(edger_pVals, cell_group, gene_attrs)

In [None]:
#EdgeR
binary_groups = (h5f$cell_attrs$cell_groups==cell_group)
counts = matrix 

In [None]:
dge <- DGEList(counts = counts)

In [None]:
group_edgeR <- factor(binary_groups)
design <- model.matrix(~ group_edgeR)
dge <- estimateCommonDisp(dge, design = design, trend.method = "none")
dge <- estimateTrendedDisp(dge, design = design, trend.method = "none")
dge <- estimateTagwiseDisp(dge, design = design, trend.method = "none")


In [None]:
fit <- glmFit(dge, design)
res <- glmLRT(fit)
pVals <- res$table[,4]
names(pVals) <- rownames(res$table)

pVals <- p.adjust(pVals, method = "fdr")

In [None]:
?DGEList

## MAST