---
title: "00_overview_of_histone_acetylation_across_clusters Figure5A"
output=github_document:
  df_print: paged
---

In [None]:
%%R
library(tidyverse)
library(ComplexHeatmap)
library(circlize)

# prepare the data

In [None]:
%%R
Histone_genes_curated <- read.delim("./data/Histone_genes_curated_final.tsv")%>% filter(grepl('H1|H2A|H2B|H3|H4', gene_type))
acetylome_meta_data <- read.delim("./data/acetylome_meta_data.tsv")%>%filter(grepl('BRCA|LSCC|MEDUL|GBM|LUAD|UCEC|MB', cohort))
mappings_pancan <- read.delim("./data/cluster_map.tsv")
case_mapping <-mappings_pancan %>%right_join(acetylome_meta_data%>%dplyr::select(case_id,cohort,age,sex,race,smoking),by="case_id")%>%filter(!is.na(cluster_id))

head(Histone_genes_curated)
head(acetylome_meta_data)
head(case_mapping)

# acetylome data matrix

In [None]:
%%R
# acetylome
acetylome_imputed <-read.delim(file = "./data/acetylome_imputed.tsv",sep="\t")

# list histone genes
acetylation_gene <-Histone_genes_curated$geneSymbol

# acetylation matrix of histone genes H1-H4
histone.acetylome_imputed <-acetylome_imputed %>% filter(geneSymbol %in% acetylation_gene)

histone.acetylome_imputed <-Histone_genes_curated%>%dplyr::select(geneSymbol,gene_type)%>%right_join(histone.acetylome_imputed,by="geneSymbol")

head(histone.acetylome_imputed)

# Prepare annotation and data matrix for the heatmap

In [None]:
%%R
hist_site_anno<-histone.acetylome_imputed %>% dplyr::select(acetyl_sites,gene_type)

hist_map_acetyl<-case_mapping%>%left_join(t(histone.acetylome_imputed[,-c(1,2,4,5,6)]%>%column_to_rownames(var ="acetyl_sites"))%>%as.data.frame%>%rownames_to_column(var = "case_id"),by="case_id")

hist_mat_acetyl<-as.data.frame(scale(hist_map_acetyl[,-c(1:7)]))

# Fig5A

In [None]:
%%R
out_dir = './Fig5/figures/'
col_fun = colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
col_smoke = colorRamp2(c(0, 2, 4), c("white", "#999999", "#000000"))
col_cluster<-c("C1"="#686868",
               "C2"="#c6c6c6",
               "C3"="#5b3495",
               "C4"="#796eb2",
               "C5"="#9e9ac8",
               "C6"="#c6c7e1",
               "C7"="#e8e7f2",
               "C8"="#084a91",
               "C9"="#1764ab",
               "C10"="#2e7ebc",
               "C11"="#4a98c9",
               "C12"="#6aaed6",
               "C13"="#94c4df",
               "C14"="#b7d4ea",
               "C15"="#d0e1f2",
               "C16"="#e3eef9",
               "C17"="#9b3203",
               "C18"="#bd3e02",
               "C19"="#dd4d04",
               "C20"="#ef6612",
               "C21"="#f9802d",
               "C22"="#fd994d",
               "C23"="#fdb170",
               "C24"="#fdca99"
)

h = Heatmap(hist_mat_acetyl %>% t,
            col=col_fun,
            cluster_columns = FALSE,
            name='Abundance',
            top_annotation=columnAnnotation(Cluster=factor(hist_map_acetyl$cluster_id,levels = c("C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24")),
                                            Cohort=hist_map_acetyl$cohort,
                                            Sex=as.character(hist_map_acetyl$sex),
                                            Smoking_signature=hist_map_acetyl$smoking,
                                            col=list(Cluster=col_cluster,
                                                     Cohort=c("GBM"="#52b788", "MB"="#193e2e", "LSCC"="#91bdff", "LUAD"="#1a759f", "UCEC"="#5a189a", "BRCA"="#cd6090"),Cluster=col_cluster,
                                                     Sex=c("Male"="lightskyblue", "Female"="plum"),
                                                     Smoking_signature = col_smoke),
                                            annotation_legend_param = list(Cohort = list(nrow = 2, title = "Cohort", title_position = "leftcenter"),
                                                                           Cluster = list(nrow = 2, title = "Clusters", title_position = "leftcenter"),
                                                                           Sex = list(nrow = 2, title = "Sex", title_position = "leftcenter"),
                                                                           Smoking_signature= list(title = "Tobacco smoking \n mutational signature \n (log10)", title_position = "leftcenter",direction = "horizontal")
                                            )),
            right_annotation=rowAnnotation(Histone_type=hist_site_anno$gene_type,  
                                           annotation_legend_param = list(Histone_type = list(nrow = 1, title = "Histone type", title_position = "lefttop")),
                                           col=list(Histone_type=c("H1"="darkgreen", "H2A"="orangered", "H2B"="mediumvioletred", "H3"="mediumblue", "H4"="dodgerblue"))),
            row_split=hist_site_anno$gene_type, 
            heatmap_legend_param = list(direction='horizontal', title_position='leftcenter'),
            cluster_column_slices = F,
            show_row_names=T)

# plot heatmap
draw(h,annotation_legend_side='bottom', heatmap_legend_side='bottom')

#save heatmap to Fig5A.pdf
pdf(paste0(out_dir, 'Fig5A.pdf'), width=25, height=11)
draw(h,annotation_legend_side='bottom', heatmap_legend_side='bottom')
dev.off()