# Figure 6 part 2 - Visualize the integration of INT and KO150 using GSEA
- goal: Visualize enrichment results more meaningful
- input: enrichment results between KO150 and bulk integrative temporal clusters
- output: meaningful visualizations

In [1]:
# set correct working directory -> project folder
getwd()
setwd('..')
getwd()

In [2]:
# source the figure theme, libraries and utility functions
source("./src/figure_theme.R")

In [3]:
library(tidyr)
library(circlize)
library(jsonlite)


Attaching package: ‘tidyr’


The following object is masked from ‘package:reshape2’:

    smiths


circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinformatics 2014.

This message can be suppressed by:
  suppressPackageStartupMessages(library(circlize))



Attaching package: ‘circlize’


The following object is masked from ‘package:sna’:

    degree




In [4]:
# function to replace the last two characters with "22"
replace_last_two <- function(x) {
  substr(x, 1, nchar(x) - 2) %>% paste0("22")
}

In [5]:
# function to create desired ChordDiagrams
makeChordDiagram <- function(df, col_mat, lwd_mat, border_mat, title, results_path) {
    df_mat <- as.matrix(df)

#     png(file.path(results_path, paste0(gsub(" ", "_", title),".png")), width = 4, height = 4, units='in', res = 300)
    pdf(file.path(results_path, paste0(gsub(" ", "_", title),".pdf")), width = 4, height = 4)

    chordDiagram(df_mat,
                 grid.col = c(cluster_colors_listeria, KO_col),
                 direction=1, 
                 direction.type = c("diffHeight", "arrows"), 
                 diffHeight = mm_h(3),
                 link.arr.type = "big.arrow", 
                 annotationTrack = c("grid"), # c("grid","name"),
                 preAllocateTracks = list(track.height = max(strwidth(unlist(dimnames(df_mat))))),
                 col = col_mat, # link color
                 link.lwd = lwd_mat, # link border width
                 link.border = border_mat, # link border color
                 link.target.prop = TRUE,
                 target.prop.height = mm_h(2),
                )

    # add title
    title(title, cex = 0.6)

    # add rotated names 
    circos.track(track.index = 1, panel.fun = function(x, y) {
        circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, 
            facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5), cex = 0.6)
    }, bg.border = NA) # here set bg.border to NA is important

    circos.clear()
    dev.off()
}

# configs

In [6]:
options(repr.plot.width=4, repr.plot.height=4)

In [7]:
# configs
data_path <- file.path('results','KO150','special_analysis','INT_comparison')
results_path <- file.path(data_path, 'plots')

In [8]:
# make directories if not exist
dir.create(results_path, showWarnings = FALSE, recursive = TRUE)

# load data

In [9]:
# KO colors from JSON
KO_col <- readRDS(file.path("results/figures/figure_6","KO_colors.rds"))
head(KO_col)

In [10]:
# load data
enr <- read.csv(file.path(data_path, 'GSEA_split_results.csv'))
dim(enr)
head(enr)

Unnamed: 0_level_0,X,Gene_set,Term,Overlap,P.value,Adjusted.P.value,Odds.Ratio,Genes,Odds.Ratio1
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
1,0,untreated_Spi1_up,C_albicans_cluster_1,1/77,0.2807547,0.467924542,4.5529255,PDE4B,3.0761183
2,1,untreated_Spi1_up,C_albicans_cluster_4,1/103,0.3567149,0.494133457,3.403998,RPL23,2.2961165
3,2,untreated_Spi1_up,IFN_beta_cluster_1,10/411,9.138281e-06,0.000228457,6.7188508,SLFN4;LY6A;H2-Q6;H2-T22;H2-Q4;CCL5;FBXO32;TAPBPL;XDH;HPSE,6.4573437
4,3,untreated_Spi1_up,IFN_beta_cluster_2,2/535,0.6698179,0.747354214,1.0836549,RPL21;TMEM176B,0.8738186
5,4,untreated_Spi1_up,IFN_beta_cluster_3,4/672,0.321504,0.494133457,1.5847182,LCN2;TNFRSF9;KANK3;PDE4B,1.4198585
6,5,untreated_Spi1_up,IFN_beta_cluster_4,1/541,0.9043975,0.942080755,0.6338905,GYG,0.4259088


In [11]:
# set colors for Listeria clusters
cluster_colors_listeria <- cluster_colors
names(cluster_colors_listeria) <- gsub("Listeria ", "", cluster_format2(paste0("LO28_cluster_",names(cluster_colors_listeria))))

# transform data

In [12]:
# transform data for plotting
enr_expanded <- enr %>%
  separate(Gene_set, into = c("condition", "KO", "direction"), sep = "_", extra = "merge")

dim(enr_expanded)
head(enr_expanded)

Unnamed: 0_level_0,X,condition,KO,direction,Term,Overlap,P.value,Adjusted.P.value,Odds.Ratio,Genes,Odds.Ratio1
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
1,0,untreated,Spi1,up,C_albicans_cluster_1,1/77,0.2807547,0.467924542,4.5529255,PDE4B,3.0761183
2,1,untreated,Spi1,up,C_albicans_cluster_4,1/103,0.3567149,0.494133457,3.403998,RPL23,2.2961165
3,2,untreated,Spi1,up,IFN_beta_cluster_1,10/411,9.138281e-06,0.000228457,6.7188508,SLFN4;LY6A;H2-Q6;H2-T22;H2-Q4;CCL5;FBXO32;TAPBPL;XDH;HPSE,6.4573437
4,3,untreated,Spi1,up,IFN_beta_cluster_2,2/535,0.6698179,0.747354214,1.0836549,RPL21;TMEM176B,0.8738186
5,4,untreated,Spi1,up,IFN_beta_cluster_3,4/672,0.321504,0.494133457,1.5847182,LCN2;TNFRSF9;KANK3;PDE4B,1.4198585
6,5,untreated,Spi1,up,IFN_beta_cluster_4,1/541,0.9043975,0.942080755,0.6338905,GYG,0.4259088


# Make plots

In [17]:
# loop through all conditions, KOs and clusters
# cond <- "LO28-6h" #"untreated" #"LO28-6h" #"LO28-24h"
treat <- "LO28"

for (cond in unique(enr_expanded$condition)){

    # filter for condition, Listeria clusters and stat. significance
    enr_tmp <- enr_expanded %>% 
        filter(condition == cond & grepl(treat, Term) & Adjusted.P.value<0.05)

    # dim(enr_tmp)
    # head(enr_tmp)

    # convert to adjacency matrix of odds ratios
    enr_wide <- enr_tmp %>%
      pivot_wider(
        id_cols = KO,
        names_from = Term,
        values_from = Odds.Ratio
      )%>%
      as.data.frame()

    # set first column as rownames
    rownames(enr_wide) <- enr_wide[,1]
    enr_wide <- enr_wide[,-1]

    # convert to log2(OR)
    enr_wide <- enr_wide %>%
      mutate_all(~ log2(na_if(., 0)))

    # rename columns
    colnames(enr_wide) <- gsub("Listeria ", "", cluster_format2(colnames(enr_wide)))

    # dim(enr_wide)
    # head(enr_wide)

    # make direction matrix for formatting: up=fill, down=borders
    enr_dir <- enr_tmp %>%
      pivot_wider(
        id_cols = KO,
        names_from = Term,
        values_from = direction
      )%>%
      as.data.frame()

    # set first column as rownames
    rownames(enr_dir) <- enr_dir[,1]
    enr_dir <- enr_dir[,-1]

    # rename columns
    colnames(enr_dir) <- gsub("Listeria ", "", cluster_format2(colnames(enr_dir)))

    # dim(enr_dir)
    # head(enr_dir)

    # map KO colors into a matrix
    KO_col_mat = as.matrix(enr_dir)
    for (KO in rownames(enr_dir)) {
      if (KO %in% names(KO_col)) {
        KO_col_mat[KO, ] <-  paste0(KO_col[[KO]],"FF") #"80")
      }
    }
    # KO_col_mat

    # border width=2 for up by KO i.e., down
    lwd_mat <- matrix(1, nrow = nrow(enr_dir), ncol = ncol(enr_dir)) #as.matrix(enr_dir)

    # border color for up by KO i.e., down regulation in WT
    border_mat <- KO_col_mat
    border_mat[enr_dir == "down"] <- "#FFFFFFFF" # 80 for 50% transparency

    # link color for down by KO i.e., up regulation in WT
    col_mat <- KO_col_mat
    col_mat[enr_dir == "up"] <- "#FFFFFFFF"

    ### plot ALL for condition
    makeChordDiagram(enr_wide,col_mat, lwd_mat, border_mat, title=treatment_format(cond), results_path = results_path)
    
    # put into figures folder
    if(cond=="untreated"){
        makeChordDiagram(enr_wide,col_mat, lwd_mat, border_mat, title=treatment_format(cond), results_path = file.path("results/figures/figure_6"))
    }else{
        makeChordDiagram(enr_wide,col_mat, lwd_mat, border_mat, title=treatment_format(cond), results_path = file.path("results/figures/supp_large_screen"))
    }
    
    ### loop through all KOs in condition
    for(KO_tmp in rownames(enr_wide)){
        # set tmp data
#         KO_tmp <- "Irf9"
        col_mat_tmp <- col_mat
        border_mat_tmp <- border_mat

        # set everything but KO of interest to transparency alpha=22
        for (row in rownames(col_mat)) {
          if (row != KO_tmp) {
            for (col in colnames(col_mat)) {
                col_mat_tmp[row, col] <- replace_last_two(col_mat_tmp[row, col])
                border_mat_tmp[row, col] <- replace_last_two(border_mat_tmp[row, col])
            }
          }
        }

        makeChordDiagram(enr_wide, col_mat_tmp, lwd_mat, border_mat_tmp, title=paste0(treatment_format(cond)," ",KO_tmp), results_path = results_path)   
    }
    
    # make plot for untreated SFPQ and SF3B1
    if(cond=="untreated"){
        col_mat_tmp <- col_mat
        border_mat_tmp <- border_mat

        # set everything but KOs of interest to transparency alpha=22
        for (row in rownames(col_mat)) {
          if (row != "Sfpq" & row != "Sf3b1") {
            for (col in colnames(col_mat)) {
                col_mat_tmp[row, col] <- replace_last_two(col_mat_tmp[row, col])
                border_mat_tmp[row, col] <- replace_last_two(border_mat_tmp[row, col])
            }
          }
        }

        makeChordDiagram(enr_wide, col_mat_tmp, lwd_mat, border_mat_tmp, title=paste0(treatment_format(cond)," ","SFPQ SF3B1"), results_path = file.path("results/figures/figure_6")) 
    }
    
    ### loop through all clusters in condition
    for(cluster_tmp in colnames(enr_wide)){
        # set tmp data
        col_mat_tmp <- col_mat
        border_mat_tmp <- border_mat

        # set everything but KO of interest to transparency alpha=11
        for (col in colnames(col_mat)) {
          if (col != cluster_tmp) {
            for (row in rownames(col_mat)) {
                col_mat_tmp[row, col] <- replace_last_two(col_mat_tmp[row, col])
                border_mat_tmp[row, col] <- replace_last_two(border_mat_tmp[row, col])
            }
          }
        }

        makeChordDiagram(enr_wide, col_mat_tmp, lwd_mat, border_mat_tmp, title=paste0(treatment_format(cond)," ",cluster_tmp), results_path = results_path)
        
        # put into figures folder
        if(cond=="untreated" & (cluster_tmp=="B" | cluster_tmp=="C")){
            makeChordDiagram(enr_wide, col_mat_tmp, lwd_mat, border_mat_tmp, title=paste0(treatment_format(cond)," ",cluster_tmp), results_path = file.path("results/figures/figure_6"))
        }
    }
}