# Sample Summaries  

## Tagged Colony Summaries 

In [1]:
library(dplyr)
library(tidyverse)
library(ggplot2)
library(tidyr)
library(purrr)
library(stringr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mggplot2  [39m 3.5.2     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.4     [32m✔[39m [34mtidyr    [39m 1.3.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
colony <- read_csv("/Users/brookesienkiewicz/Documents/sctld/SCTLD_samples/Sample_Data/CBC_ColonyData.csv", show_col_types = FALSE)

[1m[22mNew names:
[36m•[39m `` -> `...1`


In [3]:
# change orbi rapid colony to ofav for now 
unique(colony$Species)
colony[colony$Species=='ORBI','Species']<- "OFAV"
unique(colony$Species)

In [4]:
# summary of tagged colonies 
tagged_summarytable<-colony %>%
  group_by(Transect, Species) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Species, values_from = n, values_fill = 0)

    # Add row totals
    tagged_summarytable <- tagged_summarytable %>%
      mutate(Total = rowSums(select(., -Transect)))
    
    # Add column totals
    totals_row <- tagged_summarytable %>%
      select(-Transect) %>%
      summarise(across(everything(), sum)) %>%
      mutate(Transect = "Total") %>%
      select(Transect, everything())
    
    # Combine
    tagged_summarytable <- bind_rows(tagged_summarytable, totals_row)
    tagged_summarytable

Transect,MCAV,OANN,OFAV,PAST,PSTR,SSID,DLAB,MMEA,CNAT,Total
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>
BB,6,7,6,6,6,6,0,0,0,37
CBC30N,9,2,1,5,7,8,2,2,0,36
CURLEW,5,1,4,0,5,0,2,0,0,17
HANGMAN,5,5,5,5,6,5,0,0,0,31
LAGOON,10,2,3,9,10,10,2,5,0,51
SR30N,7,2,4,6,9,11,2,6,1,48
Total,42,19,23,31,43,40,8,13,1,220


In [5]:
# summary of immune tagged colonies 
immune_summarytable<-colony %>%
  filter(`immune_y/n`=='y') %>%
  group_by(Transect, Species) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Species, values_from = n, values_fill = 0)

    # Add row totals
    immune_summarytable <- immune_summarytable %>%
      mutate(Total = rowSums(select(., -Transect)))
    
    # Add column totals
    totals_row <- immune_summarytable %>%
      select(-Transect) %>%
      summarise(across(everything(), sum)) %>%
      mutate(Transect = "Total") %>%
      select(Transect, everything())
    
    # Combine
    immune_summarytable <- bind_rows(immune_summarytable, totals_row)
    immune_summarytable

Transect,MCAV,OFAV,PAST,PSTR,SSID,Total
<chr>,<int>,<int>,<int>,<int>,<int>,<dbl>
CBC30N,1,1,3,1,1,7
CURLEW,3,1,0,2,0,6
LAGOON,3,3,3,2,2,13
SR30N,3,2,3,0,3,11
Total,10,7,9,5,6,37


### Bleached

In [6]:
# remove colonies that died before bleaching event 
unique(colony$Date_DocumentedMortality)
class(colony$Date_DocumentedMortality)

In [7]:
# viewing colonies that died in/around 9/2023
colony %>% 
    filter(Date_DocumentedMortality=='5/24/23') %>%
    select(contains("Condition"))
colony %>% 
    filter(str_detect(Date_DocumentedMortality, "^9/.*/23$")) %>%
    select(contains("Condition"))

# do i include dead colonies in 9/23? no? 

062019_Condition,052022_Condition,122022_Condition,092023_Condition,112023_Condition,122023_Condition,012024_Condition,022024_Condition,042024_Condition,062024_Condition,082024_Condition,122024_Condition,062025_Condition
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Healthy,Diseased,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,Not_visited


062019_Condition,052022_Condition,122022_Condition,092023_Condition,112023_Condition,122023_Condition,012024_Condition,022024_Condition,042024_Condition,062024_Condition,082024_Condition,122024_Condition,062025_Condition
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Healthy,Healthy,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Healthy,
Healthy,Diseased,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Dead,Not_Visited,Not_Visited,Not_visited,
Healthy,Diseased,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,
,,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,
Healthy,Healthy,Healthy,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,
,,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,Not_visited
,,Healthy,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,Not_visited
,,Healthy,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,
,,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,Not_Visited,Not_visited,
,,Diseased,Dead,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_Visited,Not_visited,Not_Visited,Not_visited,


In [8]:
clb_colony<-colony %>%
    # remove colonies that died in 2022 and before 9/23
  filter(
    !str_detect(Date_DocumentedMortality, "/22") &
    !str_detect(Date_DocumentedMortality, "9/.*/23") &
    !str_detect(Date_DocumentedMortality, "5/.*/23")
  )
     
unique(clb_colony$Date_DocumentedMortality)

In [9]:
# Add bleach status column
clb_summary <- clb_colony %>%
  mutate(
    clb_y_n = if_any(everything(), ~ str_detect(., "CLB|CLP")),
    Status = case_when(
    clb_y_n ~ "Bleached",
    is.na(clb_y_n) ~ "Not Bleached", 
    TRUE ~ "Not Bleached"
    ))
      
# Count by Transect, Species, and Status
summary_tbl <- clb_summary %>%
  group_by(Transect, Species, Status) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Species, values_from = n, values_fill = 0) %>%
  mutate(Total = rowSums(across(-c(Transect, Status))))

# Add row totals per Status
status_totals <- summary_tbl %>%
  select(-Transect) %>%
  group_by(Status) %>%
  summarise(across(everything(), sum)) %>%
  mutate(Transect = "Total") %>%
  select(Transect, everything())

# Add grand total row
grand_total <- summary_tbl %>%
  select(-c(Transect, Status)) %>%
  summarise(across(everything(), sum)) %>%
  mutate(Transect = "Total", Status = "Total") %>%
  select(Transect, Status, everything())

# Combine
clb_summarytable <- bind_rows(summary_tbl, status_totals, grand_total) %>%
  arrange(Transect, Status)

clb_summarytable

Transect,Status,MCAV,OANN,OFAV,PAST,PSTR,SSID,DLAB,CNAT,Total
<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>
BB,Bleached,4,7,6,6,4,0,0,0,27
BB,Not Bleached,2,0,0,0,2,6,0,0,10
CBC30N,Bleached,2,2,1,5,1,2,1,0,14
CBC30N,Not Bleached,0,0,0,0,0,5,1,0,6
CURLEW,Bleached,2,0,2,0,2,0,1,0,7
CURLEW,Not Bleached,1,1,2,0,0,0,1,0,5
HANGMAN,Bleached,3,4,3,1,6,1,0,0,18
HANGMAN,Not Bleached,2,1,2,4,0,4,0,0,13
LAGOON,Bleached,3,2,0,7,4,3,2,0,21
LAGOON,Not Bleached,1,0,2,0,0,5,0,0,8


In [10]:
# repeat for immune 
immune <-clb_summary %>%
  filter(`immune_y/n`=='y')
      
# Count by Transect, Species, and Status
summary_tbl <- immune %>%
  group_by(Transect, Species, Status) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Species, values_from = n, values_fill = 0) %>%
  mutate(Total = rowSums(across(-c(Transect, Status))))

# Add row totals per Status
status_totals <- summary_tbl %>%
  select(-Transect) %>%
  group_by(Status) %>%
  summarise(across(everything(), sum)) %>%
  mutate(Transect = "Total") %>%
  select(Transect, everything())

# Add grand total row
grand_total <- summary_tbl %>%
  select(-c(Transect, Status)) %>%
  summarise(across(everything(), sum)) %>%
  mutate(Transect = "Total", Status = "Total") %>%
  select(Transect, Status, everything())

# Combine
clb_immunesummary <- bind_rows(summary_tbl, status_totals, grand_total) %>%
  arrange(Transect, Status)

clb_immunesummary

Transect,Status,MCAV,OFAV,PAST,PSTR,SSID,Total
<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<dbl>
CBC30N,Bleached,1,1,3,1,1,7
CURLEW,Bleached,2,1,0,2,0,5
CURLEW,Not Bleached,1,0,0,0,0,1
LAGOON,Bleached,3,0,3,2,2,10
LAGOON,Not Bleached,0,2,0,0,0,2
SR30N,Bleached,3,1,2,0,3,9
SR30N,Not Bleached,0,1,1,0,0,2
Total,Bleached,9,3,8,5,6,31
Total,Not Bleached,1,3,1,0,0,5
Total,Total,10,6,9,5,6,36


In [11]:
# export 
write.csv(immune_summarytable, 'tables/immune_summarytable.csv', row.names=FALSE)
write.csv(tagged_summarytable, 'tables/tagged_summarytable.csv',row.names=FALSE)
write.csv(clb_summarytable, 'tables/clb_taggedsummary.csv',row.names=FALSE)
write.csv(clb_immunesummary, 'tables/clb_immunesummary.csv',row.names=FALSE)

In [12]:
## Bleaching Sample Summaries 

### SCTLD 

In [13]:
#### tagged colony summary that matches brooke's 03/2025 sequences

In [14]:
getwd()

In [15]:
sctld_samples <- read.csv("/Users/brookesienkiewicz/Documents/sctld/SCTLD_samples/Sample_Data/CBC_samples.csv")
samplelist<- read.table('seq_lists/genohublist_sctld2024.txt')
colnames(sctld_samples)

In [16]:
nrow(samplelist)
# extra 2 are negative controls 

In [17]:
#filter for seq'ed in 2024 
sctld_samples<-sctld_samples %>%
    filter(Sample_type == 'Core_EtOH') %>%
    filter(Tubelabel_species %in% samplelist$V1)
nrow(sctld_samples)

In [18]:
# make colony tag ID for sample data 
transect_id <- paste0('T',sctld_samples$TransectNum)
sctld_samples$colony_id <- paste(transect_id, sctld_samples$NewTagNum, sctld_samples$Species,
                                 sep = "_")

In [19]:
# make colony tag ID for colony data 
transect_id <- paste0('T',colony$TransectNum)
colony$colony_id <- paste(transect_id, colony$NewTagNum, colony$Species,
                                 sep = "_")

In [None]:
fruit <- c("apple", "banana", "pear", "pineapple")
str_detect(fruit, "a")

In [57]:
# match samples to colony data 
sctld_colony <- colony %>%
    filter(colony_id %in% sctld_samples$colony_id)

# add resistant, susceptible, sctld_mortality, or sctld_recovery health statuses for each colony
# specify columns to check 
cols<- c('062019_Condition','052022_Condition','122022_Condition','092023_Condition')
disease_cols<- c('052022_Condition','122022_Condition')
recovery_cols<-c('122022_Condition','092023_Condition')

sctld_colony <- sctld_colony %>%
# make smaller df
    select('Date_InitialTag','Transect','Species','colony_id',cols) %>%
    mutate(
        health_status = case_when(
            # if all cols healthy ~ resis, if diseased then dead ~ SCTLD_Mortality, if diseased then healthy ~ sctld_recovery, if just disease and/or bl ~ sus
            # include statuses in 9/2023 to incl any diseased in 5 and 122022 that died later on 
            if_any(any_of(cols), ~ str_detect(., "Dead")) ~ "SCTLD_Mortality",
            if_any(any_of(disease_cols), ~ str_detect(., "Diseased")) &  if_any(any_of(recovery_cols), ~ str_detect(., "Healthy")) ~ "SCTLD_Recovery",
            if_any(any_of(cols), ~ str_detect(., "Diseased")) ~ "SCTLD",
            if_any(any_of(cols), ~ str_detect(., "Healthy")) ~ "Resistant",
            TRUE ~ "Resistant")
        )

In [58]:
sctld_colony
unique(sctld_colony$health_status)

Date_InitialTag,Transect,Species,colony_id,062019_Condition,052022_Condition,122022_Condition,092023_Condition,health_status
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
6/21/19,CBC30N,PAST,T1_2_PAST,Healthy,Healthy,Healthy,"CLP,CLB",Resistant
10/15/19,CBC30N,PSTR,T1_4_PSTR,Healthy,Diseased,Dead,Not_Visited,SCTLD_Mortality
10/15/19,CBC30N,PSTR,T1_6_PSTR,Healthy,Healthy,Diseased,Dead,SCTLD_Mortality
6/24/19,CBC30N,MCAV,T1_7_MCAV,Healthy,Healthy,Healthy,CLP,Resistant
6/24/19,CBC30N,MCAV,T1_8_MCAV,Healthy,Diseased,Diseased,Dead,SCTLD_Mortality
10/15/19,CBC30N,PSTR,T1_12_PSTR,Healthy,Healthy,Healthy,Healthy,Resistant
6/24/19,CBC30N,PAST,T1_13_PAST,Healthy,Healthy,Healthy,Healthy,Resistant
6/24/19,CBC30N,MCAV,T1_14_MCAV,Healthy,Diseased,Dead,Not_Visited,SCTLD_Mortality
6/26/19,CBC30N,MCAV,T1_15_MCAV,Healthy,Diseased,Dead,Not_Visited,SCTLD_Mortality
6/21/19,CBC30N,PAST,T1_19_PAST,Healthy,Diseased,Healthy,CLB,SCTLD_Recovery


In [62]:
# summary of healthy, diseased, dead 
summary_sctld <- sctld_colony %>%
  group_by(Species, health_status) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Species, values_from = n, values_fill = 0)%>%
# add totals per health status 
  mutate(Total = rowSums(across(-c(health_status))))

# Add row totals per Status
status_totals <- summary_sctld %>%
  select(-health_status) %>%
  summarise(across(everything(), sum)) %>%
  mutate(health_status = "Total") %>%
  select(everything())

In [64]:
# Combine
sctld_summarytable <- bind_rows(summary_sctld, status_totals) %>%
    arrange(factor(health_status, levels = c('Resistant','SCTLD','SCTLD_Recovery','SCTLD_Mortality','Total')))
sctld_summarytable

health_status,MCAV,MMEA,OANN,OFAV,PAST,PSTR,Total
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>
Resistant,13,0,4,8,16,8,49
Susceptible,0,0,2,3,0,1,6
SCTLD_Recovery,1,0,0,0,4,0,5
SCTLD_Mortality,16,10,0,0,0,22,48
Total,30,10,6,11,20,31,108


In [25]:
# combine disease and dead by disease 
disease_totals <- summary_sctld %>%
    filter(health_status %in% c('Diseased','SCTLD_Mortality')) %>%
    select(-health_status) %>%
    summarise(across(everything(), sum)) %>%
    mutate(health_status = "Diseased") %>%
    select(everything())
# drop cols
healthy_sctld<-sctld_summarytable %>%
    filter(!health_status %in% c('Diseased','SCTLD_Mortality'))
#combine 
sctld_summarytable2 <- bind_rows(healthy_sctld,disease_totals) %>%
    arrange(factor(health_status, levels = c('Healthy','Diseased','Total')))
sctld_summarytable2

health_status,MCAV,MMEA,OANN,OFAV,PAST,PSTR,Total
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>
Healthy,16,0,5,10,20,13,64
Diseased,14,10,1,1,0,18,44
Total,30,10,6,11,20,31,108


In [26]:
#### all tagged summary 

In [27]:
# filter for colonies tagged pre 2024  
allsctld_colony<-colony %>%
    filter(!str_detect(Date_InitialTag, "/24"))

In [28]:
unique(allsctld_colony$Date_InitialTag)

In [29]:
# define health statuses
allsctld_colony <- allsctld_colony %>%
  mutate(
    health_status = case_when(
      if_any(any_of(cols), ~ str_detect(., "Dead")) ~ "SCTLD_Mortality",
      if_any(any_of(cols), ~ str_detect(., "Healthy")) ~ "Healthy",
      if_any(any_of(cols), ~ str_detect(., "Diseased")) ~ "Diseased",
      TRUE ~ "Healthy"
    )
  )

# summarize 
summary_allsctld <- allsctld_colony %>%
  group_by(Species, health_status) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Species, values_from = n, values_fill = 0)%>%
# add totals per health status 
  mutate(Total = rowSums(across(-c(health_status))))

# Add row totals per Status
allstatus_totals <- summary_allsctld %>%
  select(-health_status) %>%
  summarise(across(everything(), sum)) %>%
  mutate(health_status = "Total") %>%
  select(everything())

# Combine
allsctld_summarytable <- bind_rows(summary_allsctld, allstatus_totals) %>%
    arrange(factor(health_status, levels = c('Healthy','Diseased','SCTLD_Mortality','Total')))
allsctld_summarytable

health_status,CNAT,DLAB,MCAV,MMEA,OANN,OFAV,PAST,PSTR,SSID,Total
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>
Healthy,1,7,13,0,4,8,17,13,21,84
Diseased,0,0,3,0,2,1,0,3,0,9
SCTLD_Mortality,0,0,11,12,0,0,0,15,2,40
Total,1,7,27,12,6,9,17,31,23,133


In [30]:
# why is there 17 past here but my subset has 20 lol

In [31]:
# compare past colonies 
all_past<-allsctld_colony %>%
    filter((Species == 'PAST') & (health_status == 'Healthy'))

past_subset<-sctld_colony %>%
    filter((Species == 'PAST') & (health_status == 'Healthy'))
print(unique(all_past$colony_id))
print(length(unique(all_past$colony_id)))

 [1] "T1_2_PAST"      "T1_19_PAST"     "T2_47_PAST"     "T2_56_PAST"    
 [5] "T2_57_PAST"     "T2_63_PAST"     "T2_68_PAST"     "T2_347_PAST"   
 [9] "T3_6_PAST"      "T3_7_PAST"      "T3_8_PAST"      "T3_10_PAST"    
[13] "T3_13_PAST"     "T3_18_PAST"     "T3_24_PAST"     "T3_34_PAST"    
[17] "T3_12flag_PAST"
[1] 17


In [32]:
print(unique(past_subset$colony_id))
print(length(unique(past_subset$colony_id)))
# missing t1 13, t1 20, t1 21

 [1] "T1_2_PAST"      "T1_13_PAST"     "T1_19_PAST"     "T1_20_PAST"    
 [5] "T1_21_PAST"     "T2_47_PAST"     "T2_56_PAST"     "T2_57_PAST"    
 [9] "T2_63_PAST"     "T2_68_PAST"     "T2_347_PAST"    "T3_6_PAST"     
[13] "T3_7_PAST"      "T3_8_PAST"      "T3_10_PAST"     "T3_13_PAST"    
[17] "T3_18_PAST"     "T3_24_PAST"     "T3_34_PAST"     "T3_12flag_PAST"
[1] 20


In [33]:
allsctld_colony %>%
  filter(colony_id == 'T1_13_PAST')
# where did this get filtered out??

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


...1,Date_InitialTag,Transect,TransectNum,OldTagNum,NewTagNum,Species,Meter,Meters_90,Direction,⋯,122024_Condition,122024_Percentage,Notes_122024,062025_Condition,062025_Percentage,Notes_062025,immune_y/n,checked_colonies,colony_id,health_status
<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>


In [34]:
sctld_colony %>%
  filter(colony_id == 'T1_13_PAST')

colony_id,062019_Condition,052022_Condition,122022_Condition,Date_InitialTag,Transect,Species,health_status
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
T1_13_PAST,Healthy,Healthy,Healthy,6/24/19,CBC30N,PAST,Healthy
