# Description

In [1]:
# libraries
library(Seurat)
library(tidyverse)
library(igraph)
require(circlize)
library(R.utils)
library(data.table) #to read gz file

Attaching SeuratObject

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.2      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.2.1      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.5.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: ‘igraph’


The following objects are masked from ‘package:dplyr’:

    as_data_frame, groups, union


The following objects are masked from ‘package:purrr’:

    compose, simplify


The following object is masked from ‘package:tidyr’:

    crossing


The following object is masked from ‘package:tibble’:

 

### Read in the expression data of interacting cells:
The dataset used here is publicly available single-cell data from XXX. The data was processed, and filtered by applying XXX. 

In [2]:
input_dir <- "../../../../../results/data_preprocessing/Lasry/preprocessed/"
output_dir <- "../../../../../results/method_comparison/compare_algorithms/CPDB/"
final_out <- "../../../../../results/method_comparison/compare_algorithms/CPDB/"

In [3]:
"../../../../../results/method_comparison/compare_results/CPDB/"

In [4]:
# # load counts
# print("load counts")
# counts <- read.table(gzfile(paste0(path_in,"/counts_corr.csv.gz")
#                             )
#                      ,sep = ","
#                      ,row.names = 1
#                      ,header = TRUE
#                      )
# # load counts

counts <- fread(paste0(input_dir,"counts_corr.csv.gz"), header = TRUE,check.names=FALSE)
counts <- as.data.frame(counts)
rownames(counts) <- counts$gene_symbol
counts <- counts[,-1]
# head(str(counts))
# print(str(counts))

In [5]:
# load cell annotation
print("load cell annotation")
anno_cells <- read.table(paste0(input_dir,"anno_cells_corr.txt")
                         ,sep = "\t"
                         ,row.names = 1
                         ,header = TRUE
                         ,check.names=FALSE
                         )
# print(str(anno_cells))

[1] "load cell annotation"


In [6]:
#set rownames of annotation to cell_ids
rownames(anno_cells) <- anno_cells$cell

In [7]:
#set colnames of counts to cell_ids
colnames(counts) <- rownames(anno_cells)

In [8]:
#create a Seurat object
srt=CreateSeuratObject(counts=counts, meta.data=anno_cells)

In [9]:
#peek into the number of cells for case/control
srt@meta.data$health_status %>% table()

.
    AML healthy 
  21311   25391 

In [10]:
#peek into the number of cell types
srt@meta.data$cell_type %>% table()

.
    B    DC   Ery  Gran  HSPC  Mono    NK     T 
 4765  1634  1674  2332  3169 18004  3078 12046 

In [11]:
#set the indent to cell_type
Idents(srt) <- "cell_type"

In [12]:
# initialize empty vector for storing DEGs
DEGs <- c()

# iterate over each unique cell type 
for (cell in unique(srt@meta.data$cell_type)) {
  
  # subset Seurat object to only include cells of current cell type
  seurat_obj_receiver <- subset(srt, idents = cell)
  
  # set cell identity using the "health_status" feature
  seurat_obj_receiver <- SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["health_status"]])
  
  # specify the two conditions to compare
  condition_oi <- "AML"
  condition_reference <- "healthy" 
  
  # find differentially expressed genes between the two conditions
  DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, 
                                   ident.1 = condition_oi, 
                                   ident.2 = condition_reference, 
                                   min.pct = 0.10) %>%
    # convert row names to a separate "gene" column
    rownames_to_column("gene")
  
  # add cell type information to the DEG table
  DE_table_receiver <- data.frame(cluster = cell, DE_table_receiver)
  
  # filter DEGs based on statistical significance and fold change threshold
  DE_table_receiver <- DE_table_receiver %>% 
    filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25)
  
  # print cell type and number of DEGs found
  print(cell)
  print(nrow(DE_table_receiver))
  
  # append DEGs to the vector of all DEGs
  DEGs <- rbind(DEGs, DE_table_receiver)
}


[1] "Mono"
[1] 178
[1] "Gran"
[1] 103
[1] "T"
[1] 48
[1] "NK"
[1] 164
[1] "B"
[1] 74
[1] "HSPC"
[1] 90
[1] "Ery"
[1] 645
[1] "DC"
[1] 56


In [13]:
# write.table(DEGs, file =paste0(output_dir,"samples_DEGs/DEGs.tsv"), sep = '\t', quote = F, row.names = F)

In [14]:
meta <- anno_cells["cell_type"] %>% rownames_to_column("Cell")

Below code takes an expression counts matrix (counts) and an annotation data frame (anno_cells) and writes out a separate counts file and metadata file for each sample ID in the sample_ID column of anno_cells. Each metadata file contains a single column (cell_type) and a row for each cell in the sample (required by CellPhoneDB), while the counts file contains the expression counts for each gene in each cell.

In [15]:
# create a directory "samples_DEGs" to save the subsetted counts and annotation files. 
dir.create(file.path(output_dir, "samples_DEGs"))

# loop over each unique sample ID in the "sample_ID" column of the "anno_cells" data frame
for (sample in unique(anno_cells$sample_ID)) {
  
  # filter the annotation data frame to include only cells from the current sample
  anno_filtered <- filter(anno_cells, sample_ID == sample)
  
  # subset the expression counts matrix to the current sample
  subset_counts <- counts[, rownames(anno_filtered)]
  
  # subset the annotation data frame (required by CellPhoneDB)
  subset_meta <- anno_filtered["cell_type"] %>% rownames_to_column("Cell")
    
  # subset DEGs
  subset_DEGs <- DEGs %>% filter(cluster %in% unique(subset_meta$cell_type))
  
  # write the subsetted annotation data frame to a tab-separated value (TSV) file
  write.table(subset_meta, paste0(output_dir,"samples_DEGs/", sample, "_meta.tsv"), sep = '\t', quote = F, row.names = F)
  
  # write the subsetted counts matrix to a TSV file
  write.table(subset_counts, paste0(output_dir,"samples_DEGs/", sample, "_counts.tsv"), sep = '\t', quote = F)

  write.table(subset_DEGs, paste0(output_dir,"samples_DEGs/", sample, "_DEGs.tsv"), sep = '\t', quote = F)



}


“'../../../../../results/method_comparison/compare_algorithms/CPDB//samples_DEGs' already exists”


Below is the content of shell script (`./runCPDB.sh`) that performs CellPhoneDB using DEG analysis method for each sample in the /samples_DEGs/ directory.

For each sample, the script creates a new directory `(${sample}_results)` to store the results of the CellPhoneDB analysis. The cellphonedb method degs_analysis command runs the DEG analysis method on the metadata and counts files for the current sample, using the `../DEGs.tsv` file as input for the list of differentially expressed genes. The `--database` option specifies the path to the CellPhoneDB database to use for the analysis, while the `--counts-data` option specifies the type of gene identifier used in the counts file (in this case, `hgnc_symbol`). The `--output-path` option specifies the directory where the analysis results will be saved.

`'./runCPDB.sh'`

```bash
# Set the directory path to the directory containing the DEG samples
samples_dir=../../../../../results/method_comparison/compare_algorithms/CPDB/samples_DEGs/

# Get a list of sample names
my_vars=$(ls "$samples_dir" | cut -d_ -f1 | uniq)

# Set the path to the custom database file
custom_db=../../../../../results/method_comparison/build_customDB/CPDB/custom_cellphone.db

# Loop over each sample variable name
for sample in $my_vars;
do
  # Create a subdirectory for the sample results
  mkdir ${samples_dir}${sample}_results;

  # Run CellPhoneDB's DEG analysis method on the sample using the custom database, with input files in the sample directory and output files in the sample results subdirectory
  cellphonedb method degs_analysis ${samples_dir}${sample}_meta.tsv ${samples_dir}${sample}_counts.tsv ${samples_dir}${sample}_DEGs.tsv --database $custom_db --counts-data hgnc_symbol --output-path ${samples_dir}${sample}_results/;
done;
```

In [16]:
run_CPDB <- './runCPDB.sh'

In [8]:
system(run_CPDB)

### Restructure CellPhoneDB's outputs

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# !!!!!!!!!!!!!!!!!!!we gotta explain why we are using means file not the significant ones

In [17]:
results_dir <- list.dirs(path = paste0(output_dir,"samples_DEGs/"), full.names = TRUE)

In [18]:
results_dir <- results_dir[grepl("_results", results_dir, fixed = TRUE)]

In [19]:
# Define a function called 'restructure_result' that takes one argument, 'cpdb_means'
restructure_result <- function(cpdb_means) {
  
  # Subset the columns of 'cpdb_means' that contain 'interacting_pair' or '|'
  cpdb_means <- cpdb_means[, grepl('interacting_pair|\\|', colnames(cpdb_means))]
  
  # Pivot the data to long format and split the 'interacting_pair' column into 'sending_protein' and 'receiving_protein' columns
  # Split the 'cell_types' column into 'sending_celltype' and 'receiving_celltype' columns
  # Unite the 'sending_celltype' and 'sending_protein' columns into a single column called 'sender'
  # Unite the 'receiving_celltype' and 'receiving_protein' columns into a single column called 'receiver'
  # Unite the 'sender' and 'receiver' columns into a single column called 'interacting_pairs'
  # Select the 'interacting_pairs' and 'value' columns
  conversion <- cpdb_means %>%
    pivot_longer(cols = -interacting_pair, names_to = "cell_types", values_to = "value") %>%
    separate(interacting_pair, c("sending_protein", "receiving_protein"), sep = "_") %>%
    separate(cell_types, c("sending_celltype", "receiving_celltype"), sep = "\\|") %>%
    unite(sender, c("sending_celltype", "sending_protein"), sep = ":", remove = FALSE) %>%
    unite(receiver, c("receiving_celltype", "receiving_protein"), sep = ":", remove = FALSE) %>%
    unite(interacting_pairs, c("sender", "receiver"), sep = "_", remove = FALSE) %>%
    select(interacting_pairs, value)
  
  # Return the processed data
  return(conversion)
}


In [20]:
results=list()
for (sample in results_dir){
    
    file <- paste0(sample,"/relevant_interactions.txt")
    
    sample_id <- basename(sample)
    sample_id <- strsplit(sample_id, '_')[[1]][1]
    
    
    if (file.exists(file)){
        
        cpdb_means <- read.csv(file, sep = "\t",  check.names = FALSE)
        
        
        sample_result <- restructure_result(cpdb_means)
        colnames(sample_result) <- c("interaction_ID",sample_id)
        results[[sample_id]] <- sample_result
        
    }
    
}

In [21]:
means=list()
for (sample in results_dir){
    
    file <- paste0(sample,"/means.txt")
    
    sample_id <- basename(sample)
    sample_id <- strsplit(sample_id, '_')[[1]][1]
    
    
    if (file.exists(file)){
        
        cpdb_means <- read.csv(file, sep = "\t",  check.names = FALSE)
        
        
        sample_result <- restructure_result(cpdb_means)
        colnames(sample_result) <- c("interaction_ID",sample_id)
        means[[sample_id]] <- sample_result
        
    }
    
}

In [22]:
# Define a variable called `result` that will hold the output of the Reduce function
means <- Reduce(
  
  # The `Reduce()` function takes two arguments: a function and a list.
  # In this case, the function is an anonymous function defined using the `function()` keyword.
  # This function takes two arguments `x` and `y` and performs a full join between them using the `full_join()` function from the `dplyr` package.
  # The `by = "interaction"` argument specifies that the join should be performed on the "interaction" column.
  function(x, y) full_join(x, y, by = "interaction_ID"), 
  
  # The second argument to the `Reduce()` function is a list called `results`.
  # This list contains data frames that need to be joined together.
  means
)

In [23]:
# Define a variable called `result` that will hold the output of the Reduce function
matrix_result <- Reduce(
  
  # The `Reduce()` function takes two arguments: a function and a list.
  # In this case, the function is an anonymous function defined using the `function()` keyword.
  # This function takes two arguments `x` and `y` and performs a full join between them using the `full_join()` function from the `dplyr` package.
  # The `by = "interaction"` argument specifies that the join should be performed on the "interaction" column.
  function(x, y) full_join(x, y, by = "interaction_ID"), 
  
  # The second argument to the `Reduce()` function is a list called `results`.
  # This list contains data frames that need to be joined together.
  results
)

In [24]:
matrix_result[is.na(matrix_result)] <- 0

In [25]:
head(matrix_result)

interaction_ID,AML-0024,AML-0160,AML-0693,AML-1371,AML-2123,AML-3133,AML-4340,healthy-1,healthy-2,healthy-3,healthy-4,healthy-4003,healthy-5
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
B:GNAS_B:ADRB2,0,0,0,0,0,0,0,0,0,0,0,0,0
B:GNAS_DC:ADRB2,0,0,0,0,0,0,0,0,0,0,0,0,0
B:GNAS_Ery:ADRB2,0,0,0,0,0,0,0,0,0,0,0,0,0
B:GNAS_Gran:ADRB2,0,0,0,0,0,0,0,0,0,0,0,0,0
B:GNAS_HSPC:ADRB2,0,0,0,0,0,0,0,0,0,0,0,0,0
B:GNAS_Mono:ADRB2,0,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
str(matrix_result)

tibble [18,293 × 14] (S3: tbl_df/tbl/data.frame)
 $ interaction_ID: chr [1:18293] "B:GNAS_B:ADRB2" "B:GNAS_DC:ADRB2" "B:GNAS_Ery:ADRB2" "B:GNAS_Gran:ADRB2" ...
 $ AML-0024      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-0160      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-0693      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-1371      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-2123      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-3133      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-4340      : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-1     : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-2     : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-3     : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-4     : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-4003  : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-5     : int [1:18293] 0 0 0 0 0 0 0 0 0 0 ...


In [27]:
# str(matrix_result[rowSums(matrix_result[, -1] != 0, na.rm = TRUE) > 0, ])

In [28]:
str(matrix_result %>%
  filter(rowSums(. == 1) > 0))

tibble [1,989 × 14] (S3: tbl_df/tbl/data.frame)
 $ interaction_ID: chr [1:1989] "Ery:GNAS_DC:ADRB2" "Ery:GNAS_Gran:ADRB2" "Ery:GNAS_HSPC:ADRB2" "Ery:GNAS_Mono:ADRB2" ...
 $ AML-0024      : int [1:1989] 0 0 0 0 0 1 0 0 0 0 ...
 $ AML-0160      : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-0693      : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-1371      : int [1:1989] 1 1 1 1 1 1 1 1 1 1 ...
 $ AML-2123      : int [1:1989] 0 0 0 0 1 0 0 0 0 0 ...
 $ AML-3133      : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...
 $ AML-4340      : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-1     : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-2     : int [1:1989] 0 1 0 0 0 0 0 1 0 0 ...
 $ healthy-3     : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...
 $ healthy-4     : int [1:1989] 1 0 0 0 1 0 1 0 0 0 ...
 $ healthy-4003  : int [1:1989] 0 1 0 0 0 0 0 1 0 0 ...
 $ healthy-5     : int [1:1989] 0 0 0 0 0 0 0 0 0 0 ...


In [29]:
matrix_result <- matrix_result[rowSums(matrix_result[, -1] != 0, na.rm = TRUE) > 0, ]

In [30]:
# Example list of strings
strings <- matrix_result$interaction_ID

# Initialize empty vectors for each column
sender_celltype <- c()
sender_gene <- c()
receiver_celltype <- c()
receiver_gene <- c()

# Loop through each string and split it
for (string in strings) {
  parts <- strsplit(string, "_")
  
  # Split the sender part
  sender_parts <- strsplit(parts[[1]][1], ":")
  sender_celltype <- c(sender_celltype, sender_parts[[1]][1])
  sender_gene <- c(sender_gene, sender_parts[[1]][2])
  
  # Split the receiver part
  receiver_parts <- strsplit(parts[[1]][2], ":")
  receiver_celltype <- c(receiver_celltype, receiver_parts[[1]][1])
  receiver_gene <- c(receiver_gene, receiver_parts[[1]][2])
}

# Create a dataframe with the splitted values
df <- data.frame(
  sender_celltype = sender_celltype,
  sender_gene = sender_gene,
  receiver_celltype = receiver_celltype,
  receiver_gene = receiver_gene
)



In [31]:
str(df)

'data.frame':	1989 obs. of  4 variables:
 $ sender_celltype  : chr  "Ery" "Ery" "Ery" "Ery" ...
 $ sender_gene      : chr  "GNAS" "GNAS" "GNAS" "GNAS" ...
 $ receiver_celltype: chr  "DC" "Gran" "HSPC" "Mono" ...
 $ receiver_gene    : chr  "ADRB2" "ADRB2" "ADRB2" "ADRB2" ...


In [32]:
# Create interaction annotation df with log2FC values
for (row in 1:nrow(df)){
    each_row <- df[row,]
    sender_cell <- each_row$sender_celltype
    sender_gene <- each_row$sender_gene
    receiver_cell <- each_row$receiver_celltype
    receiver_gene <- each_row$receiver_gene
    
    sender_log2FC <- subset(DEGs, cluster == sender_cell & gene == sender_gene)$avg_log2FC
    receiver_log2FC <- subset(DEGs, cluster == receiver_cell & gene == receiver_gene)$avg_log2FC

    if (length(sender_log2FC) == 0) {
        df[row,"sender_log2FC"] <- NA
    } else {
        df[row,"sender_log2FC"] <- sender_log2FC
    }
    
    if (length(receiver_log2FC) == 0) {
        df[row,"receiver_log2FC"] <- NA
    } else {
        df[row,"receiver_log2FC"] <- receiver_log2FC
    }
}


In [33]:
#see where both components (sender/receiever) have log2FC value
df[complete.cases(df$sender_log2FC, df$receiver_log2FC), ]

Unnamed: 0_level_0,sender_celltype,sender_gene,receiver_celltype,receiver_gene,sender_log2FC,receiver_log2FC
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>
78,Ery,MIF,Mono,ACKR3,-0.4807752,0.2595831
161,Ery,MIF,HSPC,CD44,-0.4807752,0.309931
164,Ery,MIF,T,CD44,-0.4807752,0.2663509
225,NK,VIM,HSPC,CD44,0.2885934,0.309931
228,NK,VIM,T,CD44,0.2885934,0.2663509
249,T,SRGN,HSPC,CD44,0.301997,0.309931
252,T,SRGN,T,CD44,0.301997,0.2663509
378,Mono,ACKR3,Mono,ADM,0.2595831,0.4564719
392,Ery,CALM1,B,SELL,-0.4362474,-0.2740404
478,Gran,ITGA4,B,CD81,0.2693447,-0.2748321


In [34]:
head(df)

Unnamed: 0_level_0,sender_celltype,sender_gene,receiver_celltype,receiver_gene,sender_log2FC,receiver_log2FC
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>
1,Ery,GNAS,DC,ADRB2,-0.2729051,
2,Ery,GNAS,Gran,ADRB2,-0.2729051,
3,Ery,GNAS,HSPC,ADRB2,-0.2729051,
4,Ery,GNAS,Mono,ADRB2,-0.2729051,
5,Ery,GNAS,NK,ADRB2,-0.2729051,
6,Ery,GNAS,T,ADRB2,-0.2729051,


In [35]:
df["interaction_ID"] <- paste0(df$sender_celltype, ":",df$sender_gene , "_", df$receiver_celltype, ":", df$receiver_gene)

In [36]:
#subset means
means <- filter(means, interaction_ID %in% df$interaction_ID)

In [37]:
cols <- c("interaction_ID", grep("healthy", names(means), value = TRUE))
control_means <- means[, cols, drop = FALSE]

In [38]:
cols <- c("interaction_ID", grep("AML", names(means), value = TRUE))
case_means <- means[, cols, drop = FALSE]

In [39]:
numeric_cols <- control_means[, !names(control_means) %in% c("interaction_ID")]
control_means$row_means <- rowMeans(numeric_cols, na.rm = TRUE)

In [40]:
numeric_cols <- case_means[, !names(case_means) %in% c("interaction_ID")]
case_means$row_means <- rowMeans(numeric_cols, na.rm = TRUE)

In [41]:
df["mean_case"]=NA
df["mean_control"]=NA

In [42]:
for (row in 1:nrow(df)){
    int_ID <- df[row,]$interaction_ID
    case_mean <- filter(case_means, interaction_ID==int_ID)$row_means
    control_mean <- filter(control_means, interaction_ID==int_ID)$row_means
    
    df[row,"mean_case"] <- case_mean
    df[row,"mean_control"] <- control_mean
}

In [43]:
df["log2"] = log2(df$mean_case/df$mean_control)

In [44]:
names(df)

In [45]:
df <- df %>% select(interaction_ID, sender_celltype, sender_gene, receiver_celltype,receiver_gene,
             mean_case,mean_control,log2,sender_log2FC,receiver_log2FC)

In [46]:
write.csv(matrix_result, paste0(final_out,"CPDB_results.csv"))

In [47]:
write.csv(df, paste0(final_out,"CPDB_anno_interaction.csv"))

In [48]:
anno <- read.csv(paste0(final_out,"CPDB_anno_interaction.csv"))

In [49]:
results <- read.csv(paste0(final_out,"CPDB_results.csv"))

In [50]:
threshold_log2FC <- 1

In [51]:
upregulated_anno <- anno[anno$log2>1,]

In [52]:
downregulated_anno <- anno[anno$log2<1,]

In [53]:
upregulated <- filter(results, interaction_ID %in% upregulated_anno$interaction_ID)

In [54]:
downregulated <- filter(results, interaction_ID %in% downregulated_anno$interaction_ID)

In [61]:
anno

X,interaction_ID,sender_celltype,sender_gene,receiver_celltype,receiver_gene,mean_case,mean_control,log2,sender_log2FC,receiver_log2FC
<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Ery:GNAS_DC:ADRB2,Ery,GNAS,DC,ADRB2,0.25820000,0.5556667,-1.10573070,-0.2729051,
2,Ery:GNAS_Gran:ADRB2,Ery,GNAS,Gran,ADRB2,0.28316667,0.5613333,-0.98720629,-0.2729051,
3,Ery:GNAS_HSPC:ADRB2,Ery,GNAS,HSPC,ADRB2,0.26400000,0.5418333,-1.03731122,-0.2729051,
4,Ery:GNAS_Mono:ADRB2,Ery,GNAS,Mono,ADRB2,0.34883333,0.5491667,-0.65470615,-0.2729051,
5,Ery:GNAS_NK:ADRB2,Ery,GNAS,NK,ADRB2,0.41320000,0.5730000,-0.47169488,-0.2729051,
6,Ery:GNAS_T:ADRB2,Ery,GNAS,T,ADRB2,0.42483333,0.5565000,-0.38948472,-0.2729051,
7,B:ARPC5_DC:ADRB2,B,ARPC5,DC,ADRB2,0.08283333,0.1653333,-0.99709427,-0.2848189,
8,B:ARPC5_Gran:ADRB2,B,ARPC5,Gran,ADRB2,0.08414286,0.1711667,-1.02448906,-0.2848189,
9,B:ARPC5_HSPC:ADRB2,B,ARPC5,HSPC,ADRB2,0.07942857,0.1516667,-0.93317408,-0.2848189,
10,B:ARPC5_Mono:ADRB2,B,ARPC5,Mono,ADRB2,0.09357143,0.1586667,-0.76185909,-0.2848189,


In [60]:
anno[anno$log2==1.5,]

X,interaction_ID,sender_celltype,sender_gene,receiver_celltype,receiver_gene,mean_case,mean_control,log2,sender_log2FC,receiver_log2FC
<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>


In [55]:
write.csv(upregulated_anno, paste0(final_out,"upregulated_anno.csv"))
write.csv(downregulated_anno, paste0(final_out,"downregulated_anno.csv"))
write.csv(upregulated, paste0(final_out,"upregulated.csv"))
write.csv(downregulated, paste0(final_out,"downregulated.csv"))

In [221]:
getwd()