# Define gene signatures and visualize tree on tSNE
- define robust gene signatures defining each cluster beyond the decision tree rules
- measure signature similarity across experiments
- find the appropriate tSNE parameters
overlay tree hierarchy on tSNE annotated by (1) cluster membership (2) decision tree
genes (3) robust cluster gene signatures activation
- generating additional figures for the manuscript (heatmaps, illustrations etc)

In [2]:
suppressMessages({
    library(tidyverse)
    library(DESeq2)
})

“package ‘dplyr’ was built under R version 3.5.1”

In [54]:
n_cells = 100
n_genes = 300
test_data = matrix(round(runif(n_cells * n_genes, min=1, max=10), 0), nrow=n_cells, ncol=n_genes)

In [74]:
n_communities = 10
communities = sample(seq(n_communities), n_cells, replace=TRUE)

In [83]:
GeneSignature = function(data, label, alpha=0.05, out.path=NULL) {
    dds <- DESeqDataSetFromMatrix(countData = data, # genes x samples
                              colData = data.frame(is_community = label),
                              design= ~ is_community)
    dds <- DESeq(dds)
    resultsNames(dds)
    res <- results(dds, alpha=alpha)
    resOrdered <- res[order(res$pvalue),]
    sig.idx = res$padj < alpha
    print(paste0(sum(sig.idx), 'genes found differentially expressed')
    if(!is.null(out.path)) write.csv(res[sig.idx,], file=out.path)
}

In [87]:
base.path = '~/Desktop/Inbox/'
for (i in unique(communities)) {
    suppressMessages(GeneSignature(t(test_data), label=(communities == i) %>% as.numeric, out.path=paste0(base.path, 'community_', i, '.csv')))
}

[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
