# Cooccurence network using sparCC

In [1]:
setwd("/mnt/c/Users/Cedric/Desktop/git_repos/blood_microbiome")
require(tidyverse)
require(ggplot2)
require(data.table)
require(igraph)
require(Matrix)
require(SpiecEasi)
require(foreach)

Loading required package: tidyverse

“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: data.table


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last


The following object is masked from ‘p

### Load  data

In [2]:
raw_df <- fread("results/decontamination/read_matrix.raw.zeroed.csv")
species_df <- fread("results/decontamination/read_matrix_n124.global_decontaminated.zeroed.csv")

meta <- fread("data/SG10K_Health_metadata.n10714.16March2021.parsed.csv")
meta_filt <- meta %>% filter(npm_research_id %in% raw_df$npm_research_id)

cohorts <- unique(meta_filt$site_supplying_sample)
cohorts <- cohorts[cohorts != "GUSTO"]
cohorts <- cohorts[cohorts != "SSMP"]

# Remove zero rows
raw_non_zero <- rowSums(raw_df %>% select(-npm_research_id)) != 0
raw_df <- raw_df %>% filter(raw_non_zero)

decon_non_zero <- rowSums(species_df %>% select(-npm_research_id)) != 0
species_df <- species_df %>% filter(decon_non_zero)

morsels <- foreach (cohort = cohorts, .combine = "c") %do% {
    id_filt <- (meta_filt %>% filter(site_supplying_sample == cohort))$npm_research_id
    X_filt <- raw_df %>% filter(npm_research_id %in% id_filt)
    temp_list <- list(X_filt)
    names(temp_list) <- cohort
    temp_list
}

cohort_data <- c(morsels)
dim(species_df)
dim(raw_df)
length(cohorts)

In [3]:
get_graph <- function(X, corr_t) {
    set.seed(666)
    
    X_filt <- X %>%
        select(-npm_research_id)
    
    # Run SparCC
    sparcc.amgut <- sparcc(X_filt)

    ## Define arbitrary threshold for SparCC correlation matrix for the graph
    sparcc.graph <- sparcc.amgut$Cor
    sparcc.graph[abs(sparcc.graph) < corr_t] <- 0
    diag(sparcc.graph) <- 0
    sparcc.graph <- Matrix(sparcc.graph, sparse=TRUE)

    ## Create igraph object
    vertex.names <- setNames(seq(ncol(X_filt)), colnames(X_filt))
    ig.sparcc <- adj2igraph(sparcc.graph, vertex.attr = vertex.names)
    V(ig.sparcc)$name <- colnames(X_filt)

    # Remove edgeless vertices
    bad.vs <- V(ig.sparcc)[degree(ig.sparcc) == 0]
    sparcc.filt <- delete.vertices(ig.sparcc, bad.vs)

    return(sparcc.filt)
}


### Intersection of cohort-level graphs made from raw taxonomic profiles

In [4]:
for (cohort in cohorts) {       
    X_temp <- cohort_data[[cohort]]
    n_samples <- nrow(X_temp)
    n_non_zero <- sum(rowSums(X_temp %>% select(-npm_research_id)) != 0)
    print(str_glue("{cohort} has {n_non_zero} / {n_samples} samples"))
}

HELIOS has 2284 / 2284 samples
MEC has 2713 / 2713 samples
PRISM has 1248 / 1248 samples
TTSH has 653 / 653 samples
SERI has 835 / 835 samples


In [5]:
get_ix_graph <- function(corr_t) {
    raw_graphs <- list()

    for (cohort in cohorts) {
        X_temp <- cohort_data[[cohort]]
#         sample_n(30)
        raw_graphs[[cohort]] <- get_graph(X_temp, corr_t)
    }
    
    ix_graph <- intersection(raw_graphs[[1]], raw_graphs[[2]], raw_graphs[[3]],
                             raw_graphs[[4]], raw_graphs[[5]], 
                             keep.all.vertices = F)
    return(list(ix_graph, raw_graphs))
}

gx1_list <- get_ix_graph(0.05)
gx2_list <- get_ix_graph(0.1)
gx3_list <- get_ix_graph(0.2)

gx1 <- gx1_list[[1]]
gx2 <- gx2_list[[1]]
gx3 <- gx3_list[[1]]

gx3_raw_graphs <- gx3_list[[2]]

### Graphs made from decontaminated taxonomic profiles

In [6]:
g1 <- get_graph(species_df, 0.05)
g2 <- get_graph(species_df, 0.1)
g3 <- get_graph(species_df, 0.2)

### Save plots

In [83]:
plot_and_save <- function(g, save_path) {
    # Color edges based on value
    E(g)$color[E(g)$weight < 0 ] <- "steelblue4"
    E(g)$color[E(g)$weight > 0 ] <- "darkolivegreen3"
    
    set.seed(666)
    png(file = save_path,
        width = 6.67,
        height = 6.67,
        units = 'in',
        res = 300)

        plot(g,
             margin = 0,
#              layout = layout.fruchterman.reingold(g),
             layout = layout_in_circle(g, order = V(g)),
             vertex.size = 8,
             vertex.label.cex = 1,
             vertex.label.color = "black",
             vertex.frame.color = NA,
             vertex.label.dist = 1)
    
    dev.off()
}

plot_and_save(g1, "results/coocurrence/SparCC_0.05.png")
plot_and_save(g2, "results/coocurrence/SparCC_0.1.png")
plot_and_save(g3, "results/coocurrence/SparCC_0.2.png")
plot_and_save(gx1, "results/coocurrence/SparCC_intersection_0.05.2.png")
plot_and_save(gx2, "results/coocurrence/SparCC_intersection_0.1.png")
plot_and_save(gx3, "results/coocurrence/SparCC_intersection_0.2.png")

#### Plot graphs for each cohort

In [89]:
cohort_names <- names(gx3_raw_graphs)

for (i in seq(length(gx3_raw_graphs))) {
    cohort_name <- cohort_names[i]
    g_temp <- gx3_raw_graphs[[cohort_name]]
    plot_and_save(g_temp, str_glue("results/coocurrence/SparCC_{cohort_name}_0.2.png"))
}