# Partitioned Heritability

**Created**: 3 July 2022

## Environment

In [1]:
library(tidyverse)
library(ComplexHeatmap)
library(circlize)

setwd("~/eQTL_pQTL_Characterization/")

source("03_Functional_Interpretation/scripts/utils/ggplot_theme.R")

── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.8
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: grid

ComplexHeatmap version 2.6.2
Bioconductor page: http://b

## Load Data

In [2]:
var.comps <- read.csv("~/gains_team282/epigenetics/enrichment/partitioned_heritability/variance_components.csv")

In [3]:
head(var.comps)

Unnamed: 0_level_0,Eigengene,Annotation,Component,Variance,Proportion
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>
1,ME_1_1,Bulk_B-Control,Individual,5.44322e-08,0.01143616
2,ME_1_1,Bulk_B-Control,Annotation,4.120183e-16,8.656468e-11
3,ME_1_1,Bulk_B-Control,Other,1.1981e-06,0.2517198
4,ME_1_1,Bulk_B-Control,Residual,3.507126e-06,0.736844
5,ME_2_1,Bulk_B-Control,Individual,0.0,0.0
6,ME_2_1,Bulk_B-Control,Annotation,0.0,0.0


In [4]:
annot.dir = "/nfs/users/nfs_n/nm18/gains_team282/epigenetics/enrichment/heritability/"
annot.set = unique(var.comps$Annotation)
n.snps <- lapply(annot.set, function(annot) {
    c(
        read.table(paste0(annot.dir, "/", annot, "/n_annotation_snps.txt"))[1, 1],
        read.table(paste0(annot.dir, "/", annot, "/n_other_snps.txt"))[1, 1]
    )
}) %>%
    do.call(rbind, .) %>%
    as.data.frame() %>%
    dplyr::select(SNPs.Annotation = 1, SNPs.Other = 2) %>%
    dplyr::mutate(SNPs.Total = SNPs.Annotation + SNPs.Other)

rownames(n.snps) <- annot.set

In [5]:
head(n.snps)

Unnamed: 0_level_0,SNPs.Annotation,SNPs.Other,SNPs.Total
Unnamed: 0_level_1,<int>,<int>,<int>
Bulk_B-Control,59881,4097145,4157026
Bulk_B-Stimulated,74329,4082697,4157026
CD8pos_T-Control,63797,4093229,4157026
CD8pos_T-Stimulated,73627,4083399,4157026
Central_memory_CD8pos_T-Control,57804,4099222,4157026
Central_memory_CD8pos_T-Stimulated,73442,4083584,4157026


In [6]:
var.comps <- merge(var.comps, n.snps, by.x="Annotation", by.y=0)

In [7]:
lineages <- read.csv("03_Functional_Interpretation/data/Calderon_et_al_lineages.csv") %>%
    dplyr::bind_rows(c("Lineage"="MYELOID", "Cell_Type"="Neutrophils"))

In [8]:
head(lineages)

Unnamed: 0_level_0,Lineage,Cell_Type
Unnamed: 0_level_1,<chr>,<chr>
1,B,Bulk_B
2,B,Mem_B
3,B,Naive_B
4,B,Plasmablasts
5,CD8,CD8pos_T
6,CD8,Central_memory_CD8pos_T


## Heatmap of All Eigengenes

In [9]:
var.comps <- var.comps %>%
    dplyr::select(Annot=Annotation, everything(), -Proportion) %>%
    dplyr::filter(!is.na(Variance)) %>%
    tidyr::spread(Component, Variance) %>%
    dplyr::filter(Annotation > 0, Annotation + Other > 0) %>%
    dplyr::mutate(Per.SNP.Enrichment = (SNPs.Total / SNPs.Annotation) * (Annotation / (Annotation + Other))) %>%
    dplyr::mutate(Log.Enrichment = log2(Per.SNP.Enrichment))

In [10]:
head(var.comps)

Unnamed: 0_level_0,Annot,Eigengene,SNPs.Annotation,SNPs.Other,SNPs.Total,Annotation,Individual,Other,Residual,Per.SNP.Enrichment,Log.Enrichment
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Bulk_B-Control,ME_1_1,59881,4097145,4157026,4.120183e-16,5.44322e-08,1.1981e-06,3.507126e-06,2.387355e-08,-25.320012
2,Bulk_B-Control,ME_10_1,59881,4097145,4157026,9.208798e-05,0.0,2.281507e-05,2.835113e-05,55.63718,5.797977
3,Bulk_B-Control,ME_101_1,59881,4097145,4157026,7.681705e-05,0.000256517,4.501491e-13,7.48139e-05,69.42145,6.11731
4,Bulk_B-Control,ME_102_1,59881,4097145,4157026,0.0008457279,3.535808e-13,0.0001628466,6.285881e-05,58.21252,5.863258
5,Bulk_B-Control,ME_103_1,59881,4097145,4157026,1.510706e-12,1.657035e-12,5.30264e-05,9.489297e-05,1.977796e-06,-18.947675
6,Bulk_B-Control,ME_104_1,59881,4097145,4157026,0.0001657799,5.393482e-11,0.0003646383,0.0001471571,21.69738,4.439449


In [11]:
write.csv(var.comps, "~/gains_team282/epigenetics/enrichment/partitioned_heritability/per_snp_heritability.csv", quote=F, row.names=F)

In [12]:
control.prop.mtx <- var.comps %>%
    dplyr::filter(!grepl("segments", Annot)) %>%
    dplyr::filter(!grepl("cCREs", Annot)) %>%
    dplyr::filter(!grepl("Neutrophils", Annot)) %>%
    dplyr::filter(grepl("-Control", Annot)) %>%
    dplyr::mutate(Cell_Type = gsub("-.*$", "", Annot)) %>%
    dplyr::select(Eigengene, Cell_Type, Log.Enrichment) %>%
    tidyr::spread(Cell_Type, Log.Enrichment, fill=0)

rownames(control.prop.mtx) <- gsub("_", " ", gsub("_1$", "", control.prop.mtx$Eigengene))
control.prop.mtx$Eigengene <- NULL
control.prop.mtx <- as.matrix(control.prop.mtx)

In [13]:
treated.prop.mtx <- var.comps %>%
    dplyr::filter(!grepl("segments", Annot)) %>%
    dplyr::filter(!grepl("cCREs", Annot)) %>%
    dplyr::filter(!grepl("Neutrophils", Annot)) %>%
    dplyr::filter(grepl("-Stimulated", Annot)) %>%
    dplyr::mutate(Cell_Type = gsub("-.*$", "", Annot)) %>%
    dplyr::select(Eigengene, Cell_Type, Log.Enrichment) %>%
    tidyr::spread(Cell_Type, Log.Enrichment, fill=0)

rownames(treated.prop.mtx) <- gsub("_", " ", gsub("_1$", "", treated.prop.mtx$Eigengene))
treated.prop.mtx$Eigengene <- NULL
treated.prop.mtx <- as.matrix(treated.prop.mtx)

In [14]:
prop.mtx <- var.comps %>%
    dplyr::filter(grepl("Neutrophils", Annot)) %>%
    dplyr::mutate(Treatment = gsub("Neutrophils-", "", Annot)) %>%
    dplyr::select(Eigengene, Treatment, Log.Enrichment) %>%
    tidyr::spread(Treatment, Log.Enrichment, fill=0)

rownames(prop.mtx) <- gsub("_", " ", gsub("_1$", "", prop.mtx$Eigengene))
prop.mtx$Eigengene <- NULL
prop.mtx <- as.matrix(prop.mtx)

In [15]:
h <- hclust(dist(cbind(control.prop.mtx, treated.prop.mtx, prop.mtx)))

In [16]:
control.prop.mtx <- control.prop.mtx[h$order, ]
treated.prop.mtx <- treated.prop.mtx[h$order, ]
prop.mtx <- prop.mtx[h$order, ]

In [17]:
neutrophil.map <- c(
    "BGP" = "Stimulation", "Control" = "Stimulation", "FLAG" = "Stimulation", "HMGB1" = "Stimulation", "LPS" = "Stimulation", "LTA" = "Stimulation", "R848" = "Stimulation",
    "SA-1" = "S. aureus", "SA-5" = "S. aureus", "WB" = "S. aureus",
    "EC1h" = "E. coli", "EC4h" = "E. coli", "noEC1h" = "E. coli", "noEC4h" = "E. coli"
)

In [19]:
options(repr.plot.width=15, repr.plot.height=24)

min.val = min(c(min(control.prop.mtx), min(treated.prop.mtx), min(prop.mtx)))
max.val = max(c(max(control.prop.mtx), max(treated.prop.mtx), max(prop.mtx)))
col_fun = colorRamp2(c(min.val, 0, max.val), c("firebrick4", "white", "royalblue4"))

h1 = Heatmap(
    control.prop.mtx, name="Enrichment", 
    col=col_fun, show_column_dend=F, cluster_rows=F, na_col="white",
    column_split=lineages$Lineage[match(colnames(control.prop.mtx), lineages$Cell_Type)]
)

h2 = Heatmap(
    treated.prop.mtx, name="Enrichment",
    col=col_fun, show_column_dend=F, cluster_rows=F, na_col="white",
    column_split=lineages$Lineage[match(colnames(treated.prop.mtx), lineages$Cell_Type)]
)

h3 = Heatmap(
    prop.mtx, name="Enrichment", 
    col=col_fun, show_column_dend=F, cluster_rows=F, na_col="white",
    column_split=neutrophil.map[colnames(prop.mtx)]
)

pdf("03_Functional_Interpretation/results/partitioned_heritability.pdf", width=15, height=24)
h1 + h2 + h3
dev.off()

“Heatmap/annotation names are duplicated: Enrichment”
“Heatmap/annotation names are duplicated: Enrichment, Enrichment”


In [29]:
options(repr.plot.width=15, repr.plot.height=24)

min.val = min(c(min(control.prop.mtx), min(treated.prop.mtx), min(prop.mtx)))
max.val = max(c(max(control.prop.mtx), max(treated.prop.mtx), max(prop.mtx)))
col_fun = colorRamp2(c(min.val, min.val / 2, 0, max.val / 2, max.val), c("#44D62C", "#db3eb1", "#D22730", "#E0E722", "#4D4DFF"))

h1 = Heatmap(
    control.prop.mtx, name="Enrichment", 
    col=col_fun, show_column_dend=F, cluster_rows=F, na_col="white", column_title=NULL,
    column_split=lineages$Lineage[match(colnames(control.prop.mtx), lineages$Cell_Type)]
)

h2 = Heatmap(
    treated.prop.mtx, name="Enrichment",
    col=col_fun, show_column_dend=F, cluster_rows=F, na_col="white", column_title=NULL,
    column_split=lineages$Lineage[match(colnames(treated.prop.mtx), lineages$Cell_Type)]
)

h3 = Heatmap(
    prop.mtx, name="Enrichment", 
    col=col_fun, show_column_dend=F, cluster_rows=F, na_col="white", column_title=NULL,
    column_split=neutrophil.map[colnames(prop.mtx)]
)

png("~/test.png", width=15, height=24, units="in", res=600)
h1 + h2 + h3
dev.off()

“Heatmap/annotation names are duplicated: Enrichment”
“Heatmap/annotation names are duplicated: Enrichment, Enrichment”
