In [1]:
suppressMessages(library(igraph))
suppressMessages(library(ggnetwork))
library(tidygraph)
library(tidyverse)
library(cowplot)
library(martini)
library(blur)

theme_set(theme_cowplot())
results <- '../results/'

gene2snp <- read_tsv(paste0(results,'preprocessing/snp2hgnc.tsv'), col_types = "cc")

method_summary <- function(graph) {
    
    tibble(`# nodes` = length(V(graph)),
           `# edges` = length(E(graph)),
           `Median betweenness` = mean(as.tibble(graph) %>% .$betweenness, na.rm = T) %>% format(digits = 2),
           `Median gene P-value` = median(as.tibble(graph) %>% .$c, na.rm = T) %>% pchisq(df = 1, lower.tail = FALSE) %>% format(digits = 2))
    
}


Attaching package: ‘tidygraph’

The following object is masked from ‘package:igraph’:

    groups

The following object is masked from ‘package:stats’:

    filter

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mdplyr  [39m 0.8.3
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mas_data_frame()[39m masks [34mtibble[39m::as_data_frame(), [34migraph[39m::as_data_frame()
[31m✖[39m [34mpurrr[39m::[32mcompose()[39m       masks [34migraph[39m::compose()
[31m✖[39m [34mtidyr[39m::[32mcrossing()[39m      masks [34migraph[39m::crossing()
[31m✖[39m [34mdplyr[39m::[32mfilter(

In [None]:
# read results
## gs
load(paste0(results,'scones/gs_network.RData'))

gs <- read_tsv(paste0(results,'scones/cones_gs.tsv'), col_types = "ciiiccdli")
gs_net <- as_tbl_graph(net) %>%
    mutate(betweenness = centrality_betweenness()) %>%
    inner_join(gs, by = c('name' = 'snp'))
rm(net)

## gm
load(paste0(results,'scones/gm_network.RData'))

gm <- read_tsv(paste0(results,'scones/cones_gm.tsv'), col_types = "ciiiccdli")
gm_net <- as_tbl_graph(net) %>%
    mutate(betweenness = centrality_betweenness()) %>%
    inner_join(gs, by = c('name' = 'snp'))
rm(net)

## gi
load(paste0(results,'scones/gi_network.RData'))

gi <- read_tsv(paste0(results,'scones/cones_gi.tsv'), col_types = "ciiiccdli")
gi_net <- as_tbl_graph(net) %>%
    mutate(betweenness = centrality_betweenness()) %>%
    inner_join(gs, by = c('name' = 'snp'))
rm(net)

# GS network

In [None]:
sum(gs$selected)

In [None]:
options(repr.plot.width=4, repr.plot.height=4)
module_size(gs)

Most of the clusters have small sizes (1,2 SNPs). However, it seems that small clusters have a similar association scores as bigger clusters, so I am inclined to think that bigger clusters are not picked despite being having lower association scores, but because they happen to connect high-score SNPs.

In [None]:
blur:::compute_mod_size(gs)

It seems that the biggest clusters come from only 3 chromosomic regions in chromosomes 3, 5 and 8.

In [None]:
options(repr.plot.width=10, repr.plot.height=4)
k <- blur:::compute_mod_size(gs) %>% filter(chr == 8) %>% .$module %>% head(n = 20)
plot_snp_module(gs, k)

In chromosome 8 we find gene CASC8, which had appreared in previous analyses.

In [None]:
options(repr.plot.width=10, repr.plot.height=4)
k <- blur:::compute_mod_size(gs) %>% filter(chr == 5) %>% .$module %>% head(n = 20)
plot_snp_module(gs, k)

In this region we find two compelling genes: MRPS30, the S30 subunit of the mitochondrial ribosome and potentially involved in energetic metabolism; and, more interestingly, RP11-53O19.1, also known as Breast Cancer-Associated Transcript 54, a ncRNA.

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
k <- blur:::compute_mod_size(gs) %>% filter(chr == 3) %>% .$module %>% head(n = 20)
plot_snp_module(gs, k)

Here we retrieve our old-friend NEK10, a protein kinase involved in cell-cycle controlwhich was already linked to BRCA susceptibility.

## Genes affected

In [None]:
table_regions(gs, gi_net)

When we don't focus on clusters, but try to see signal provided by any selected SNPs we give voice to these SNPs with an association strongly associated enough to be selected without a cluster. Like this, we recover our old friend FGFR2, a tumor suppressor gene that is amplified and overexpressed in breast cancer.

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
# FGFR2 SNPs
k <- gs %>% filter(selected & chr == 10 & pos > 123296158 & pos < 123357561) %>% .$module
plot_snp_module(gs, k)

TOX3, related with ER+ BRCA, and CASC16, really close in the genome also pop up in the analysis.

In [None]:
options(repr.plot.width=10, repr.plot.height=4)
# TOX3 SNPs
k <- gs %>% filter(selected & chr == 16 & pos > 52525285 & pos < 52639236) %>% .$module
plot_snp_module(gs, k)

In [None]:
gs_selected_net <- gs_net %>%
    filter(selected)

bind_rows(mutate(method_summary(gs_net), Method = 'GS network'),
          mutate(method_summary(gs_selected_net), Method = 'SConES GS')) %>%
    select(Method, everything())

# GM network

In [None]:
sum(gm$selected)

In [None]:
options(repr.plot.width=4, repr.plot.height=4)
module_size(gm)

In [None]:
blur:::compute_mod_size(gm)

Only 2 chromosomic regions here, in chromosomes 3 and 5, around the regions we already described.

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
k <- blur:::compute_mod_size(gm) %>% filter(chr == 3) %>% .$module %>% head(n = 20)
plot_snp_module(gm, k)

In [None]:
options(repr.plot.width=10, repr.plot.height=4)
k <- blur:::compute_mod_size(gm) %>% filter(chr == 5) %>% .$module %>% head(n = 20)
plot_snp_module(gm, k)

## Genes affected

In [None]:
table_regions(gm, gi_net)

# GI network

In [None]:
sum(gi$selected)

In [None]:
options(repr.plot.width=4, repr.plot.height=4)
module_size(gi)

In [None]:
head(blur:::compute_mod_size(gi))

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
plot_ideogram(gi)

We observe how modules span through different chromosomes.

In [None]:
options(repr.plot.width=10, repr.plot.height=10)

filter(gi_net, selected) %>%
    igraph::as.igraph() %>%
    ggnetwork %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
        geom_edges(color = "grey50") +
        geom_nodes(aes(color = gene)) +
        facet_wrap(~module) +
        theme_facet()

## Genes affected

In [None]:
table_regions(gi, net)

One interesting gene that didn't pop up in previous experiments is DIRC3, associated to renal and thyroid carcinomas; the other is CCAT2, related to prostate cancer susceptibility

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
DIRC3 <- filter(gi, selected & chr == 2 & pos > 218150948 & pos < 218606227)
plot_snp_module(DIRC3, DIRC3$module)

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
TMEM106B <- filter(gi, selected & chr == 7 & pos > 12251488 & pos < 12275818)
plot_snp_module(TMEM106B, TMEM106B$module)

In [None]:
options(repr.plot.width=10, repr.plot.height=3)
CCAT2 <- filter(gi, selected & chr == 8 & pos > 128413087 & pos < 128414371)
plot_snp_module(CCAT2, CCAT2$module)

# Consistency

How consistent are the selected SNPs in the different networks?

In [None]:
options(repr.plot.width=4, repr.plot.height=5)
consistency(GS = gs, GM = gm, GI = gi)

GI network selects twice as many SNPs as the GM and the GS networks. As we see, most of the SNPs selected in the GS are selected in GS, and most of the ones selected in GM (+ many more) are selected in GI. Interestingly, as we can see from the circos above, GI network provides a way of connecting the previously isolated chromosomic regions in 3, 5 and 8 (and some potentially interesting regions in chromosomes 10 and 16).

In [None]:
set_gs <- gs$snp[gs$selected]
set_gm <- gm$snp[gm$selected]
set_gi <- gi$snp[gi$selected]

length(intersect(set_gs, set_gm))/length(union(set_gs, set_gm))
length(intersect(set_gi, set_gs))/length(union(set_gi, set_gs))
length(intersect(set_gi, set_gm))/length(union(set_gi, set_gm))