In [None]:
# install.packages("tidyverse")

In [None]:
library(tidyverse)
library(readxl)

### 1. Load data

In [None]:
options(repr.plot.width=8, repr.plot.height=6)

path <- list.files("/PATH_TO_PROJECT/", pattern=".xlsx")
all <- read_excel(path, sheet=1) # load summary data

all[1:3,] #preview first few ilnes of data

### 2. Pivot data from wide format to long

In [None]:
# Excel spreadsheet data is in wide format, as requested by CMAP
# Pivot wide format data to long format for easier plotting

red <- summary %>%
  pivot_longer(
  cols = starts_with("red_"),
  names_to = "population",
  names_prefix = "red_",
  values_to = "red",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, red, replicate, flag)

orange <- summary %>%
  pivot_longer(
  cols = starts_with("orange_"),
  names_to = "population",
  names_prefix = "orange_",
  values_to = "orange",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, orange, replicate, flag)

abundance <- summary %>%
  pivot_longer(
  cols = starts_with("abundance_"),
  names_to = "population",
  names_prefix = "abundance_",
  values_to = "abundance",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, abundance, replicate, flag)

scatter <- summary %>%
  pivot_longer(
  cols = starts_with("scatter_"),
  names_to = "population",
  names_prefix = "scatter_",
  values_to = "scatter",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, scatter, replicate, flag)

diam <- summary %>%
  pivot_longer(
  cols = starts_with("cell_diameter_"),
  names_to = "population",
  names_prefix = "cell_diameter_",
  values_to = "cell_diameter",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, cell_diameter, replicate, flag)

Qc <- summary %>%
  pivot_longer(
  cols = starts_with("carbon_content_"),
  names_to = "population",
  names_prefix = "carbon_content_",
  values_to = "carbon_content",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, carbon_content, replicate, flag)

biomass <- summary %>%
  pivot_longer(
  cols = starts_with("biomass_"),
  names_to = "population",
  names_prefix = "biomass_",
  values_to = "biomass",
  values_drop_na = TRUE
  ) %>%
  select(time, file, lat, lon, depth, dilution, population, biomass, replicate, flag)

fluor <- merge(red, orange, all = TRUE) #merge red and orange fluorescence data
cell <- merge(abundance, scatter, all = TRUE) #merge abundanace and scatter
merge <- merge(fluor, cell, all = TRUE) #merge fluor data with cell data

mie <- merge(diam, Qc, all = TRUE) #merge Qc and cell size data which you get from mie theory
mie2 <- merge(mie, biomass, all = TRUE) #merge mie theory data with biomass 


all <- merge(merge, mie2, all = TRUE) #merge all long data into a single dataframe

In [None]:
all[1:3,]

### 3. Plotting
#### a. Abundance plots
##### i. Surface abundance

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & depth < 20) %>%
    dplyr::group_by(lat, population) %>%
    dplyr::summarize(sd = sd(abundance),
              avg=mean(abundance)) %>%
    ggplot(aes(lat, avg, col=population)) +
    geom_errorbar(aes(ymin=avg-sd, ymax=avg+sd), color = "black",  size = .3, width=.1) +
    geom_point(size=3) +
    facet_grid(population ~ ., scale="free_y") + 
    theme_bw() +
    ylab("Abundance (cells uL-1)")

##### ii. Abundance depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=mean(abundance)) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Abundance (cells uL-1)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

#### b. Scatter plots
##### i. Surface scatter

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & depth < 20) %>%
    dplyr::group_by(lat, population) %>%
    dplyr::summarize(sd = sd(scatter),
              avg=mean(scatter)) %>%
    ggplot(aes(lat, avg, col=population)) +
    geom_errorbar(aes(ymin=avg-sd, ymax=avg+sd), color = "black",  size = .3, width=.1) +
    geom_point(size=3) +
    facet_grid(population ~ ., scale="free_y") + 
    theme_bw() +
    ylab("Scatter (normalized to beads)")

##### ii. Scatter depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, dilution, population) %>%
    dplyr::summarize(avg=mean(scatter)) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Scatter\n(normalized to beads)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

#### c. Red fluorescence plots
##### i. Red fluorescence depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & population != "bacteria") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=log(mean(red))) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Log Red fluorescence\n(normalized to beads)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

##### ii. Red fluorescence depth profile (normalized to scatter)

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=red/scatter) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Red fluorescence\n(normalized to scatter)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

#### d. Orange fluorescence plots
##### i. Orange fluorescence depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & population != "bacteria") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=log(mean(orange))) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Log orange fluorescence\n(normalized to beads)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

##### ii. Orange fluorescence depth profile (normalized to scatter)

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=orange/scatter) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Orange fluorescence\n(normalized to scatter)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

#### e. Cell size plots
##### i. Surface cell size

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & depth < 20 & flag == 0) %>%
    dplyr::group_by(lat, population) %>%
    dplyr::summarize(avg=(mean(cell_diameter)),
              sd=sd(cell_diameter)) %>%
    ggplot(aes(lat, avg, col=population)) +
    geom_errorbar(aes(ymin=avg-sd, ymax=avg+sd), color = "black",  size = .3, width=.1) +
    geom_point(size=2) +
    facet_grid(population ~ ., scale="free_y") +
    theme_bw() +
    ylab("Cell size (um)")

##### ii. Cell size depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=mean(cell_diameter)) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Cell size (um)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

#### f. Carbon content plots
##### i. Surface carbon content

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & depth < 20 & flag == 0) %>%
    dplyr::group_by(lat, population) %>%
    dplyr::summarize(avg=(mean(carbon_content)),sd=sd(carbon_content)) %>%
    ggplot(aes(lat, avg, col=population)) +
    geom_errorbar(aes(ymin=avg-sd, ymax=avg+sd), color = "black",  size = .3, width=.1) +
    geom_point(size=3.5) +
    facet_grid(population ~ ., scale="free_y") +
    theme_bw() +
    ylab("Carbon content (microgram carbon per cell)")

##### ii. Carbon content depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=(mean(carbon_content))) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Carbon content\n (microgram carbon per cell)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")

#### g. Biomass plots
##### i. Surface biomoass

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown" & depth < 20 & flag == 0) %>%
    dplyr::group_by(lat, population) %>%
    dplyr::summarize(avg=(mean(biomass)),sd=sd(biomass)) %>%
    ggplot(aes(lat, avg, col=population)) +
    geom_errorbar(aes(ymin=avg-sd, ymax=avg+sd), color = "black",  size = .3, width=.1) +
    geom_point(size=3.5) +
    facet_grid(population ~ ., scale="free_y") +
    theme_bw() +
    ylab("Biomass (microgram carbon per liter)")

##### ii. Biomass depth profile

In [None]:
all %>%
    dplyr::filter(population != "beads" & population != "unknown") %>%
    dplyr::group_by(lat, depth, population) %>%
    dplyr::summarize(avg=(mean(biomass))) %>%
    ggplot(aes(lat, -depth)) + 
    geom_point(aes(colour=avg), size=4) + 
    viridis::scale_colour_viridis(name="Biomass\n(microgram carbon per liter)",option ="D") +
    facet_grid(population ~ .) + 
    theme_bw() +
    xlab("Latitude") + 
    ylab("Depth (m)")