# Visualization of phylogenetic distribution

In [27]:
library(ape)
library(castor)
library(dplyr)
library(ggplot2)
library(ggtree)
library(ggtreeExtra)
library(reshape2)
library(ggnewscale)

## Color blind Palette
### 8 colors
cbp8 <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#000000")
### 12 colors
cbp12 <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888")

tree_path <- "../../data/ref_bac2feature/phylogeny/phylogeny.tre"
trait_path <- "../../data/intermediate_dir/trait_for_visualization.tsv"

# tree
tree <- read.tree(tree_path)

# trait data
df_trait <- read.table(trait_path, sep="\t", comment.char="", quote = "", header=T)
df_trait$species_tax_id <- as.character(df_trait$species_tax_id)
rownames(df_trait) <- df_trait$species_tax_id

p_tree <- ggtree(tree, color = "grey")

n_tax <- 5
tax_level <- "phylum"
df_tax <- df_trait[which(df_trait$species_tax_id %in% tree$tip.label), c("species_tax_id", tax_level)] %>%
  rename(label = "species_tax_id")

df_tax_top <- df_tax %>%
  group_by(.data[[tax_level]]) %>% summarize(count = dplyr::n()) %>%
  arrange(desc(count)) %>% head(n_tax) %>% select(-count) %>%
  mutate(tax_color = .data[[tax_level]])

df_tax <- df_tax %>% left_join(df_tax_top, by = tax_level)
df_tax$tax_color[is.na(df_tax$tax_color)] <- "Others"
df_tax$tax_color <- factor(df_tax$tax_color, levels = c(df_tax_top$tax_color, "Others"))

df_tax_data <- left_join(p_tree$data, df_tax, by = "label") %>% filter(isTip == TRUE)

# Visualization
options(repr.plot.width=6, repr.plot.height=9)

trait_col <- "respiration"

category <- df_trait[[trait_col]] %>% unique(rm.na = TRUE) %>% sort() %>% c("NA")
colors <- setNames(c(cbp8[1:length(category)-1], cbp12[12]), category)
titles = c("gram_stain" = "Gram stain", "sporulation" = "Sporulation", "motility" = "Motility", "range_salinity" = "Salinity range", "respiration" = "Respiration", "range_tmp" = "Temperature range", "cell_shape" = "Cell shape")

g <- p_tree +
  geom_treescale(
    x = 0,
    y= p_tree$data,
    width=1, color='grey', fontsize=5, linesize=1, offset=1
  ) +
  geom_fruit(
    data = rbind(
      df_tax_data %>% rename(ID = "label") %>% select(ID, tax_color) %>% mutate(x_pos = 1),
      df_tax_data %>% rename(ID = "label") %>% select(ID, tax_color) %>% mutate(x_pos = 2)
    ),
    geom = geom_tile,
    mapping = aes(x = x_pos, y = ID, fill = tax_color),
    pwidth = 0.05,
    offset = 0.1
  ) +
  labs(fill = "Phylum") +
  scale_fill_manual(values = cbp12, na.value = cbp12[12]) +
  new_scale_fill() +
  geom_fruit(
    data = df_trait %>% rename(ID = "species_tax_id") %>%
      mutate(tr = replace(df_trait[[trait_col]], is.na(df_trait[[trait_col]]), "NA")) %>% select(ID, tr),
    geom = geom_tile,
    mapping = aes(x = tr, y = ID, fill = tr),
    pwidth = trait_pwidth,
    offset = 0.1
  ) +
  scale_fill_manual(name = titles[[trait_col]], values = colors) +
  theme(
    legend.text = element_text(size = 15),
    legend.title = element_text(size = 15),
    legend.position = "right",
  )

save_path <- "../../results/02_data_preprocessing/figS4.pdf"
ggsave(file=save_path, dpi=300, width=6, height=9, device="pdf")

"[1m[22mComputation failed in `stat_tree_scale_line()`.
Caused by error in `FUN()`:
[33m![39m non-numeric argument to binary operator"
"[1m[22mComputation failed in `stat_tree_scale_text()`.
Caused by error in `FUN()`:
[33m![39m non-numeric argument to binary operator"
