In [1]:
library(tidyverse)#, warn.conflicts = FALSE)

# library to read matlab data formats into R
#library(reshape2)
#library(lubridate)

# set strings as factors to false
options(stringsAsFactors = FALSE)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.3     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



# read WIND data

In [5]:
wind_ds <- read.csv("DATA/ERA5_CombinedMonthlyData.csv")

In [7]:
names(wind_ds)

In [73]:
# limit time range to necessary_values

wind_ds[wind_ds$time_month == "05-1995",]$X

wind_ds[wind_ds$time_month == "05-1995",]$X

In [51]:
wind_ds_cleaned <- wind_ds %>%
    select(-X, -time.x, -time.y)

# read NISKIN data

In [8]:
niskin_ds <- read.csv("DATA/Niskin_mergedNutrients.csv")

In [10]:
names(niskin_ds)

In [53]:
niskin_ds_cleaned <- niskin_ds %>%
    select(-X)

# read SST/CTD data

here need to add MLD and isopycnal, reanalyzed because MLD2EuZ is only until 2013

In [98]:
ctd_ds <- read.csv("DataPipeline/CTD_combinedData.csv")

In [99]:
names(ctd_ds)

In [100]:
# add time_month column to ctd ds
ctd_ds$date = as.Date(ctd_ds$date, format="%Y-%m-%d")

ctd_ds_month <- ctd_ds %>%
  mutate(time_month = format(date, format = "%m-%Y"))

In [101]:
ctd_ds_month_cleaned <- ctd_ds_month %>%
    select(-X, -date, -Cruise_ID2)

# read Phytoplankton data

- func group counts : these are only counts, not that meaningful, should be presence absence

- species richness : more useful

### functional group counts integrated


In [17]:
# functional group counts integrated

fgcounts_phyto_ds <- read.csv("DataPipeline/IntegratedPhytoplanktonFuncGroupCounts.csv")

names(fgcounts_phyto_ds)

In [37]:
# add time_month column to ctd ds
fgcounts_phyto_ds$date = as.Date(fgcounts_phyto_ds$date, format="%Y-%m-%d")

fgcounts_phyto_ds_month <- fgcounts_phyto_ds %>%
  rename_with( ~ paste0("counts_int_", .x)) %>%
  mutate(time_month = format(counts_int_date, format = "%m-%Y"))

### species richness total


In [23]:
# species richness total

sr_phyto_ds <- read.csv("DataPipeline/IntegratedPhytoplanktonSpeciesRichnessEstimate.csv")

names(sr_phyto_ds)

In [24]:
# add time_month column to ctd ds
sr_phyto_ds$date = as.Date(sr_phyto_ds$date, format="%Y-%m-%d")

sr_phyto_ds_month <- sr_phyto_ds %>%
  mutate(time_month = format(date, format = "%m-%Y"))

### Func Group species richness

In [26]:
# species richness per func group

fgsr_phyto_ds <- read.csv("DataPipeline/IntegratedPhytoplanktonFuncGroupSpeciesRichness.csv")

names(fgsr_phyto_ds)

In [34]:
# add time_month column to ctd ds
fgsr_phyto_ds$date = as.Date(fgsr_phyto_ds$date, format="%Y-%m-%d")

#fgsr_phyto_ds_month %>% rename_with( ~ paste0("SR_", .x))

fgsr_phyto_ds_month <- fgsr_phyto_ds %>%
 rename_with( ~ paste0("SR_", .x)) %>%
  mutate(time_month = format(SR_date, format = "%m-%Y"))

In [38]:
#fgsr_phyto_ds_month

### MERGE PHYTOPLANKTON DATA 
#### clean up

In [39]:
phyto_1 <- left_join(fgsr_phyto_ds_month, sr_phyto_ds_month, by = "time_month")

In [41]:
phyto_2 <- left_join(phyto_1, fgcounts_phyto_ds_month, by="time_month")

In [43]:
names(phyto_2)

In [61]:
phyto_cleaned <- phyto_2 %>%
    select(-SR_X, -SR_date, -X, -date, -counts_int_X, -counts_int_date)

# Read Zooplankton Data

In [57]:
zoo_ds <- read.csv("DataPipeline/ZooplanktonBiomass_CleanedAggregated.csv")

names(zoo_ds)

In [59]:
# add time_month column to ctd ds
zoo_ds$date = as.Date(zoo_ds$Date, format="%Y-%m-%d")

zoo_ds_month <- zoo_ds %>%
  mutate(time_month = format(date, format = "%m-%Y"))

In [62]:
zoo_ds_month_cleaned <- zoo_ds_month %>%
    select(-X, -Date, -date)

In [65]:
#zoo_ds_month_cleaned

# NOW MERGE!

In [102]:
require(purrr)
require(dplyr)

CARIACO_dat_joined <- list(wind_ds_cleaned, 
                           niskin_ds_cleaned, 
                           ctd_ds_month_cleaned, 
                           phyto_cleaned,
                           zoo_ds_month_cleaned
                          ) %>% 
  reduce(left_join, by = c("time_month"))

In [103]:
CARIACO_dat_joined_truncated <- CARIACO_dat_joined[203:458,]

In [110]:
write.csv(CARIACO_dat_joined_truncated, "DATA/Combined_CARIACO_data.csv")