In [5]:
library(tidyverse)#, warn.conflicts = FALSE)

# library to read matlab data formats into R
#library(reshape2)
#library(lubridate)

# set strings as factors to false
options(stringsAsFactors = FALSE)

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.3     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



# read WIND data

In [6]:
wind_ds <- read.csv("DATA/ERA5_CombinedMonthlyData.csv")

In [7]:
names(wind_ds)

In [8]:
# limit time range to necessary_values

wind_ds[wind_ds$time_month == "05-1995",]$X

wind_ds[wind_ds$time_month == "05-1995",]$X

In [9]:
wind_ds_cleaned <- wind_ds %>%
    select(-X, -time.x, -time.y)

# read NISKIN data

In [10]:
niskin_ds <- read.csv("DATA/Niskin_mergedNutrients.csv")

In [11]:
names(niskin_ds)

In [12]:
niskin_ds_cleaned <- niskin_ds %>%
    select(-X)

# read SST/CTD data

here need to add MLD and isopycnal, reanalyzed because MLD2EuZ is only until 2013

In [13]:
ctd_ds <- read.csv("DataPipeline/CTD_combinedData.csv")

In [14]:
names(ctd_ds)

In [15]:
# add time_month column to ctd ds
ctd_ds$date = as.Date(ctd_ds$date, format="%Y-%m-%d")

ctd_ds_month <- ctd_ds %>%
  mutate(time_month = format(date, format = "%m-%Y"))

In [16]:
ctd_ds_month_cleaned <- ctd_ds_month %>%
    select(-X, -date, -Cruise_ID2)

# read Phytoplankton data

- func group counts : these are only counts, not that meaningful, should be presence absence

- species richness : more useful

### functional group counts integrated


In [17]:
# functional group counts integrated

fgcounts_phyto_ds <- read.csv("DataPipeline/IntegratedPhytoplanktonFuncGroupCounts.csv")

names(fgcounts_phyto_ds)

In [18]:
# add time_month column to ctd ds
fgcounts_phyto_ds$date = as.Date(fgcounts_phyto_ds$date, format="%Y-%m-%d")

fgcounts_phyto_ds_month <- fgcounts_phyto_ds %>%
  rename_with( ~ paste0("counts_int_", .x)) %>%
  mutate(time_month = format(counts_int_date, format = "%m-%Y"))

### species richness total


In [19]:
# species richness total

sr_phyto_ds <- read.csv("DataPipeline/IntegratedPhytoplanktonSpeciesRichnessEstimate.csv")

names(sr_phyto_ds)

In [20]:
# add time_month column to ctd ds
sr_phyto_ds$date = as.Date(sr_phyto_ds$date, format="%Y-%m-%d")

sr_phyto_ds_month <- sr_phyto_ds %>%
  mutate(time_month = format(date, format = "%m-%Y"))

### Func Group species richness

In [21]:
# species richness per func group

fgsr_phyto_ds <- read.csv("DataPipeline/IntegratedPhytoplanktonFuncGroupSpeciesRichness.csv")

names(fgsr_phyto_ds)

In [22]:
# add time_month column to ctd ds
fgsr_phyto_ds$date = as.Date(fgsr_phyto_ds$date, format="%Y-%m-%d")

#fgsr_phyto_ds_month %>% rename_with( ~ paste0("SR_", .x))

fgsr_phyto_ds_month <- fgsr_phyto_ds %>%
 rename_with( ~ paste0("SR_", .x)) %>%
  mutate(time_month = format(SR_date, format = "%m-%Y"))

In [23]:
#fgsr_phyto_ds_month

### MERGE PHYTOPLANKTON DATA 
#### clean up

In [24]:
phyto_1 <- left_join(fgsr_phyto_ds_month, sr_phyto_ds_month, by = "time_month")

In [25]:
phyto_2 <- left_join(phyto_1, fgcounts_phyto_ds_month, by="time_month")

In [26]:
names(phyto_2)

In [27]:
phyto_cleaned <- phyto_2 %>%
    select(-SR_X, -SR_date, -X, -date, -counts_int_X, -counts_int_date)

# Read Zooplankton Data

In [28]:
zoo_ds <- read.csv("DataPipeline/ZooplanktonBiomass_CleanedAggregated.csv")

names(zoo_ds)

In [29]:
# add time_month column to ctd ds
zoo_ds$date = as.Date(zoo_ds$Date, format="%Y-%m-%d")

zoo_ds_month <- zoo_ds %>%
  mutate(time_month = format(date, format = "%m-%Y"))

In [30]:
zoo_ds_month_cleaned <- zoo_ds_month %>%
    select(-X, -Date, -date)

In [31]:
#zoo_ds_month_cleaned

# Read Satellite DATA

In [45]:
SeaWiFS_raw <- read.csv("SatelliteData/MERGE/SeaWiFS_monthly_mean.csv")

MODIS_raw <- read.csv("SatelliteData/MERGE/MODIS_monthly_mean.csv")

SatCONCAT_raw <- read.csv("SatelliteData/MERGE/SatelliteCONCAT_monthly_mean.csv")

In [33]:
head(SeaWiFS_raw)

Unnamed: 0_level_0,date,year,month,day,lon,lat,value
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1997-10-31,1997,10,16,-64.6835,10.6875,0.231117
2,1997-11-30,1997,11,15,-64.6835,10.6875,0.6380158
3,1997-12-31,1997,12,16,-64.6835,10.6875,0.9182222
4,1998-01-31,1998,1,16,-64.6835,10.6875,1.2453725
5,1998-02-28,1998,2,14,-64.6835,10.6875,0.5960208
6,1998-03-31,1998,3,16,-64.6835,10.6875,1.9649349


In [61]:
# add time_month column to ctd ds
SeaWiFS_raw$date = as.Date(SeaWiFS_raw$date, format="%Y-%m-%d")
MODIS_raw$date = as.Date(MODIS_raw$date, format="%Y-%m-%d")
SatCONCAT_raw$date = as.Date(SatCONCAT_raw$date, format="%Y-%m-%d")

SeaWiFS_month <- SeaWiFS_raw %>%
  mutate(time_month = format(date, format = "%m-%Y"), SeaWiFS_chla=value) %>%
    select(time_month, SeaWiFS_chla)

MODIS_month <- MODIS_raw %>%
  mutate(time_month = format(date, format = "%m-%Y"), MODIS_chla=value) %>%
    select(time_month, MODIS_chla)

SatCONCAT_month <- SatCONCAT_raw %>%
  mutate(time_month = format(date, format = "%m-%Y"), Satellite_chla=value) %>%
    select(time_month, Satellite_chla)

# NOW MERGE!

In [63]:
require(purrr)
require(dplyr)

CARIACO_dat_joined <- list(wind_ds_cleaned, 
                           niskin_ds_cleaned, 
                           ctd_ds_month_cleaned, 
                           phyto_cleaned,
                           zoo_ds_month_cleaned,
                           SeaWiFS_month,
                           MODIS_month,
                           SatCONCAT_month
                          ) %>% 
  reduce(left_join, by = c("time_month"))

In [64]:
CARIACO_dat_joined_truncated <- CARIACO_dat_joined[203:500,]

In [65]:
write.csv(CARIACO_dat_joined_truncated, "DATA/Combined_CARIACO_data_v3.csv")

In [55]:
#length(CARIACO_dat_joined$time_month)