# Convert taxa list to a pretty table

In [1]:
setwd("/mnt/c/Users/Cedric/Desktop/git_repos/blood_microbiome")
require(tidyverse)
require(data.table)
require(foreach)
require(reactable)
require(htmltools)
require(htmlwidgets)
require(IRdisplay)
require(repr)
require(webshot2)

Loading required package: tidyverse

“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: data.table


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last


The following object is masked from ‘p

### Load  data

In [2]:
patho_meta <- fread("data/kraken2_taxonomy/plusPF_20210517_species_meta.csv") %>%
    select(-n_map_taxon) %>%
    mutate(taxa = gsub("\\[|\\]", "", taxa))

prev_max_filt <- fread("results/decontamination/global_decontamination_stats_n124.csv")
path_prev_max_filt <- prev_max_filt %>%
    left_join(patho_meta) %>%
    arrange(desc(max_count))

bact <- path_prev_max_filt %>% filter(org_group == "Bacteria")
virus <- path_prev_max_filt %>% filter(org_group == "Viruses")
fungi <- path_prev_max_filt %>% filter(org_group == "Fungi")
others <- path_prev_max_filt %>% filter(org_group == "Other Eukaryotes")

# Irep and coverage breadth
cov_df <- fread("results/irep_analysis/coverage_irep_results.parsed.csv")
fwrite(path_prev_max_filt, "results/decontamination/curated_n{n_final}_global_decontamination_stats.csv")

Joining, by = "taxa"


In [3]:
bact %>% head(20)

taxa,max_count,overall_prevalence,n_samples,max_bin,org_group
<chr>,<int>,<dbl>,<int>,<chr>,<chr>
Fusobacterium nucleatum,194199,0.0011327594,10,>=500,Bacteria
Staphylococcus haemolyticus,117929,0.0105346624,93,>=500,Bacteria
Cutibacterium acnes,22596,0.0474626189,419,>=500,Bacteria
Staphylococcus cohnii,18768,0.0080425918,71,>=500,Bacteria
Neisseria subflava,15385,0.0015858632,14,>=500,Bacteria
Corynebacterium segmentosum,14476,0.0010194835,9,>=500,Bacteria
Haemophilus parainfluenzae,12183,0.0020389669,18,>=500,Bacteria
Fannyhessea vaginae,10395,0.0023787947,21,>=500,Bacteria
Staphylococcus epidermidis,9140,0.0086089715,76,>=500,Bacteria
Lactobacillus crispatus,7799,0.0106479384,94,>=500,Bacteria


In [4]:
oral <- c("Fusobacterium nucleatum", "Neisseria subflava", "Haemophilus parainfluenzae",
          "Fusobacterium pseudoperiodonticum", "Prevotella melaninogenica", "Prevotella sp. oral taxon 299")

lungs <- c("Neisseria subflava", "Neisseria mucosa", "Neisseria flavescens", "Human mastadenovirus C")

genitals <- c("Fannyhessea vaginae", "Lactobacillus crispatus", "Gardnerella vaginalis")

skin <- c("Staphylococcus haemolyticus", "Cutibacterium acnes", "Staphylococcus cohnii", 
          "Staphylococcus epidermidis", "Malassezia restricta", "Corynebacterium segmentosum",
          "Moraxella osloensis")

environment <- c("Acinetobacter baumannii", "Cupriavidus metallidurans", 
                 "Rickettsia sp. Tillamook 23", "Aspergillus oryzae")

blood <- c("Human betaherpesvirus 6A", "Human betaherpesvirus 6B", "Hepatitis B virus", 
           "Torque teno virus 6")

In [5]:
final <- bind_rows(bact %>% head(20), virus, fungi) %>%
    left_join(cov_df) %>%
    mutate(max_bPTR = ifelse(org_group == "Bacteria", max_bPTR, NA),
           emia = ifelse(taxa %in% c("Corynebacterium segmentosum", "Lactobacillus crispatus", 
                                     "Fusobacterium pseudoperiodonticum", "Rickettsia sp. Tillamook 23",
                                     "Prevotella sp. oral taxon 299",
                                     "Aspergillus oryzae"), F, T),
           origin = case_when(taxa %in% oral ~ "Mouth", 
                              taxa %in% lungs ~ "Respiratory tract",
                              taxa %in% genitals ~ "Genitals",
                              taxa %in% skin ~ "Skin",
                              taxa %in% environment ~ "Environment",
                              taxa %in% blood ~ "Blood"))

final
    
fwrite(final, str_glue("results/parsed_taxa_results.n{nrow(final)}.csv"))

Joining, by = "taxa"


taxa,max_count,overall_prevalence,n_samples,max_bin,org_group,mean_perc_covered1,mean_perc_covered5,min_perc_covered1,max_perc_covered1,max_bPTR,ORI,TER,emia,origin
<chr>,<int>,<dbl>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<chr>
Fusobacterium nucleatum,194199,0.0011327594,10,>=500,Bacteria,14.22900825,11.50248,0.3213506,65.7792926,1.675886,201937.0,1345205.0,True,Mouth
Staphylococcus haemolyticus,117929,0.0105346624,93,>=500,Bacteria,0.26897963,0.08480633,0.2356298,0.2961161,,,,True,Skin
Cutibacterium acnes,22596,0.0474626189,419,>=500,Bacteria,36.15403798,3.473737,15.33223211,77.4795425,,,,True,Skin
Staphylococcus cohnii,18768,0.0080425918,71,>=500,Bacteria,0.24924845,0.0609414,0.21837648,0.2688547,,,,True,Skin
Neisseria subflava,15385,0.0015858632,14,>=500,Bacteria,17.89799782,5.411104,1.20911307,81.7316806,1.506405,1883368.0,797590.0,True,Mouth
Corynebacterium segmentosum,14476,0.0010194835,9,>=500,Bacteria,13.99622489,1.490941,0.24934273,66.3968333,,,,False,Skin
Haemophilus parainfluenzae,12183,0.0020389669,18,>=500,Bacteria,14.73871027,0.7062885,0.65722936,60.4802455,1.17258,492915.0,1443748.0,True,Mouth
Fannyhessea vaginae,10395,0.0023787947,21,>=500,Bacteria,17.26185948,1.865483,1.22422672,75.9249475,1.876599,613931.0,1264620.0,True,Genitals
Staphylococcus epidermidis,9140,0.0086089715,76,>=500,Bacteria,15.36405809,0.2958846,2.03810903,52.9612382,1.566844,237449.0,1347375.0,True,Skin
Lactobacillus crispatus,7799,0.0106479384,94,>=500,Bacteria,13.0052727,0.389143,3.96804644,41.0523459,1.571468,328627.0,1239005.0,False,Genitals


In [6]:
# render functions
# orange_pal <- function(x) rgb(colorRamp(c("khaki4", "khaki1"))(x), maxColorValue = 255)
green_pal <- function(x) scales::colour_ramp(c("white", "darkolivegreen1"), na.color = "white")(x)
# red_green_pal <- function(x) scales::colour_ramp(c("darkolivegreen", "darkolivegreen1"), na.color = "white")(x)
    
status_badge <- function(color = "#aaa", width = "12px", height = width) {
    span(style = list(
        display = "inline-block",
        marginRight = "8px",
        width = width,
        height = height,
        backgroundColor = color,
        borderRadius = "50%"
    ))
}

In [7]:
parsed <- final %>%
    mutate(overall_prevalence = round(overall_prevalence * 100, 2),
           max_perc_covered1 = round(max_perc_covered1, 1),
           max_bPTR = round(max_bPTR, 2)) %>%
    mutate(status = case_when(is.na(max_bPTR) & org_group == "Bacteria" ~ "Unknown", 
                              max_bPTR > 1 ~ "Replicating",
                              org_group != "Bacteria" ~ "NA")) %>%
    select(-ORI, -TER, -max_bin, 
           -starts_with("mean"), -starts_with("min"), -org_group,
          -n_samples, -max_count) %>%
    select(origin, taxa, emia, overall_prevalence, max_perc_covered1, max_bPTR, status)

rtable <- parsed %>%
  reactable(compact = T,
            borderless = F,
            pagination = F,
            width = 1100,
            columns = list(taxa = colDef(name = "Species",
                                         align = "left"),
                           origin = colDef(name = "Origin",
                                           align = "center",
                                           cell = function(value, index) {
                                if (parsed$taxa[index] %in% oral) {
                                  color <- "#0099CC"
                                } else if (parsed$taxa[index] %in% lungs) {
                                  color <- "#FF99CC"
                                } else if (parsed$taxa[index] %in% genitals) {
                                  color <- "#C77CFF"
                                } else if (parsed$taxa[index] %in% skin) {
                                  color <- "wheat"
                                } else if (parsed$taxa[index] %in% environment) {
                                  color <- "lightgrey"
                                } else if (parsed$taxa[index] %in% blood) {
                                  color <- "indianred"
                                }
                           div(class = "tag", 
                                style = list(background = color), 
                                value)
                           }),
                           overall_prevalence = colDef(name = "Overall prevalence (%)",
                                                       align = "center",
                                                       style = function(value) {
                                                          normalized <- (value - min(parsed$overall_prevalence)) / (max(parsed$overall_prevalence) - min(parsed$overall_prevalence))
                                                          color <- green_pal(normalized)
                                                          list(background = color)
                                                       }),
                           max_bPTR = colDef(name = "Max. PTR",
                                             align = "center",
                                             style = function(value) {
                                                 normalised <- (value - min(parsed$max_bPTR, na.rm = T)) / 
                                                   (max(parsed$max_bPTR, na.rm = T) - min(parsed$max_bPTR, na.rm = T))
                                                 color <- green_pal(normalised)
                                                 list(background = color, borderRight = "1px solid rgba(0, 0, 0, 0.5)")
                                             }),
                           max_perc_covered1 = colDef(name = "Max. coverage (%)",
                                         align = "left",
                                         style = function(value) {
                                             normalised <- (value - min(parsed$max_perc_covered1, na.rm = T)) / 
                                               (max(parsed$max_perc_covered1, na.rm = T) - min(parsed$max_perc_covered1, na.rm = T))
                                             color <- green_pal(normalised)
                                             list(background = color)
                                         }),                           
                           status = colDef(name = "Status",
                                           align = "left",
                                           cell = function(value) {
                             color <- switch(
                               value,
                               Replicating = "hsl(120, 45%, 50%)",
                               Unknown = "hsl(3, 69%, 50%)",
                               "NA" = "hsl(0, 0%, 50%)"
                             )
                             badge <- status_badge(color = color)
                             tagList(badge, value)
                           }),
                           emia = colDef(name = "Reported in blood",
                                         align = "center",
                                         style = list(borderRight = "1px solid rgba(0, 0, 0, 0.5)"),
                                         cell = function(value) {
                                             # Render as an X mark or check mark
                                             if (value) "\u2714\ufe0f" else "\u274c"
                                         })
            )
  )

html <- "results/irep_analysis/parsed_irep_results.html"
saveWidget(rtable, html)