In [1]:
# libraries
library(community)
library(ggplot2)
library(gridExtra)
library(grid)
library(ComplexHeatmap)
library(dendsort)
library(igraph)
require(circlize)
library(R.utils)
library(data.table) #to read gz file
library(Seurat)
library(nichenetr)

ComplexHeatmap version 2.10.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference

If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
  genomic data. Bioinformatics 2016.

The new InteractiveComplexHeatmap package can directly export static 
complex heatmaps into an interactive Shiny app with zero effort. Have a try!

This message can be suppressed by:
  suppressPackageStartupMessages(library(ComplexHeatmap))



Attaching package: ‘igraph’


The following objects are masked from ‘package:stats’:

    decompose, spectrum


The following object is masked from ‘package:base’:

    union


Loading required package: circlize

circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://joke

In [2]:
input_dir <- "../../../../results/data_preprocessing/Lasry/preprocessed/"
output_dir <- "../../../../results/method_comparison/compare_algorithms/community/"

In [3]:
# # load counts
print("load counts")
counts <- fread(paste0(input_dir,"counts_corr.csv.gz"), header = TRUE)
counts <- as.data.frame(counts)
rownames(counts) <- counts$gene_symbol
counts <- counts[,-1]
print(str(counts))

[1] "load counts"
'data.frame':	15770 obs. of  46702 variables:
 $ 2020.09.15.AML0024.CATCAAGGTTAGCGGA           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CATCAAGTCCGAGAAG           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CATCCACAGGGACCAT           : num  0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCAACAGAGCAAGA           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCAACAGTTCCATG           : num  0 0.69 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCAACGTAGAATAC           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCAACGTTCTCCCA           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCAACTCCGAACGC           : num  0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCAACTCTAGTCAG           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCACAAGACAGTCG           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCACACAATTGCCA           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 2020.09.15.AML0024.CCTCACACAGAACTAA           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ 

In [4]:
# load cell annotation
print("load cell annotation")
anno_cells <- read.table(paste0(input_dir,"anno_cells_corr.txt")
                         ,sep = "\t"
                         ,row.names = 1
                         ,header = TRUE
                         )
print(str(anno_cells))

[1] "load cell annotation"
'data.frame':	46702 obs. of  93 variables:
 $ sample_ID                    : chr  "AML-0024" "AML-0024" "AML-0024" "AML-0024" ...
 $ cell                         : chr  "2020-09-15-AML0024:CATCAAGGTTAGCGGA" "2020-09-15-AML0024:CATCAAGTCCGAGAAG" "2020-09-15-AML0024:CATCCACAGGGACCAT" "2020-09-15-AML0024:CCTCAACAGAGCAAGA" ...
 $ UMAP_1                       : num  -0.731 -2.2 -2.867 -1.666 -0.972 ...
 $ UMAP_2                       : num  -15.8 -16.7 -16.1 -16.1 -17.5 ...
 $ orig.ident                   : chr  "2020-09-15-AML0024" "2020-09-15-AML0024" "2020-09-15-AML0024" "2020-09-15-AML0024" ...
 $ samples                      : chr  "AML0024" "AML0024" "AML0024" "AML0024" ...
 $ Broad_cell_identity          : chr  "CD14+ monocyte" "CD14+ monocyte" "CD16+ monocyte" "CD14+ monocyte" ...
 $ Cell_type_identity           : chr  "CD14+ IFN+" "CD14+" "CD16+" "CD14+ IFN+" ...
 $ clusters_res.2               : int  7 7 7 7 80 7 7 7 7 7 ...
 $ CNV_pos                   

In [5]:
colnames(counts) <- anno_cells$cell_ID
rownames(anno_cells) <- anno_cells$cell_ID

In [6]:
seurat_obj=CreateSeuratObject(counts=counts, meta.data=anno_cells)

In [7]:
Idents(seurat_obj) <- "cell_type"

In [8]:
cell_type_list <- unique(seurat_obj@meta.data$cell_type)
sample_list <- unique(seurat_obj@meta.data$sample_ID)

In [11]:
cell_types <- unique(anno_cells[,"cell_type"])

In [16]:
df_mean_above_threshold <- matrix(,nrow = nrow(counts)
                                          ,ncol = length(cell_types)
        )
rownames(df_mean_above_threshold) <- rownames(counts)
colnames(df_mean_above_threshold) <- cell_types

In [19]:
threshold_expr=0.1

In [20]:
for(cell_type in cell_types){
                # identify cells that belong to the cell_type
                idx_cell_type <- anno_cells[,"cell_type"] == cell_type
                
                # subset counts to the cell type of interest
                counts_sub <- counts[,idx_cell_type]
                
                # identify counts that pass the threshold
                above_threshold <- counts_sub > threshold_expr
                
                # set all counts which did not pass the threshold to zero
                counts_sub_thr <- counts_sub * above_threshold
                
                # populate the df_mean_above_threshold
                ifelse(is.null(dim(counts_sub_thr))
                       ,df_mean_above_threshold[,cell_type] <- counts_sub_thr
                       ,{
                               # substitute zeros with NAs
                               counts_sub_thr[counts_sub_thr == 0] <- NA
                               df_mean_above_threshold[,cell_type] <- rowMeans(counts_sub_thr
                                                                               ,na.rm = TRUE)
                       }
                       
                )
                # subsitute NAs with zeroes
                df_mean_above_threshold[is.na(df_mean_above_threshold)] <- 0
                
        }

In [None]:
# define all cell typess in the data
        cell_types <- unique(anno_cells[,"cell_type"])
        
        # create empty dataframe nr_ligands_receptors x nr_cell_types
        # we will populate it the the values of the mean expression values
        df_mean_above_threshold <- matrix(,nrow = nrow(counts)
                                          ,ncol = length(cell_types)
        )
        rownames(df_mean_above_threshold) <- rownames(counts)
        colnames(df_mean_above_threshold) <- cell_types
        
        # populate the df_mean_above_threshold
        for(cell_type in cell_types){
                # identify cells that belong to the cell_type
                idx_cell_type <- anno_cells[,"cell_type"] == cell_type
                
                # subset counts to the cell type of interest
                counts_sub <- counts[,idx_cell_type]
                
                # identify counts that pass the threshold
                above_threshold <- counts_sub > threshold_expr
                
                # set all counts which did not pass the threshold to zero
                counts_sub_thr <- counts_sub * above_threshold
                
                # populate the df_mean_above_threshold
                ifelse(is.null(dim(counts_sub_thr))
                       ,df_mean_above_threshold[,cell_type] <- counts_sub_thr
                       ,{
                               # substitute zeros with NAs
                               counts_sub_thr[counts_sub_thr == 0] <- NA
                               df_mean_above_threshold[,cell_type] <- rowMeans(counts_sub_thr
                                                                               ,na.rm = TRUE)
                       }
                       
                )
                # subsitute NAs with zeroes
                df_mean_above_threshold[is.na(df_mean_above_threshold)] <- 0
                
        }