# clean up, integrate and aggregate Zooplankton data

In [1]:
library(tidyverse, warn.conflicts = FALSE)

# library to read matlab data formats into R
library(reshape2)
library(lubridate)

# set strings as factors to false
options(stringsAsFactors = FALSE)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.3     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




# read Zooplankton data (from csv)

In [2]:
zoo_ds <- read.csv("../DATA/Zooplankton_newest_full.csv")

In [5]:
names(zoo_ds)

In [6]:
str(zoo_ds)

'data.frame':	306 obs. of  61 variables:
 $ unlimited        : int  0 1 2 3 4 5 6 7 8 9 ...
 $ Cruise           : num  71 71 72 72 73 73 74 74 75 75 ...
 $ Cruise_ID        : chr  "CAR-071" "CAR-071" "CAR-072" "CAR-072" ...
 $ Day              : num  9 9 6 6 11 11 10 10 14 14 ...
 $ Month            : num  10 10 11 11 12 12 1 1 2 2 ...
 $ Year             : num  2001 2001 2001 2001 2001 ...
 $ Date             : chr  "2001-10-09" "2001-10-09" "2001-11-06" "2001-11-06" ...
 $ Latitude         : num  10.5 10.5 10.5 10.5 10.5 ...
 $ Longitude        : num  -64.7 -64.7 -64.7 -64.7 -64.7 ...
 $ Analyst          : chr  "Javier Gutierrez" "Javier Gutierrez" "Javier Gutierrez" "Javier Gutierrez" ...
 $ Mesh_Size        : num  200 500 500 200 200 500 200 500 200 500 ...
 $ TOTAL_DENSITY    : num  1112.6 212.9 13.4 282.3 572.6 ...
 $ BIOMASS          : num  26.83 15.95 2.34 7.43 5.6 ...
 $ ASH              : num  6.8428 3.3093 0.0724 0.6393 0.1423 ...
 $ COPEPODS         : num  741.76 160.22 5.6

# remove for outliers in biomass

In [52]:
zoo_ds = zoo_ds[!zoo_ds$BIOMASS >1000,]

# Now get Biomass for Total and the 2 Mesh Sizes Time series

In [53]:
test_zoo <- zoo_ds %>%
    select(BIOMASS, Mesh_Size, Date) %>%
    pivot_wider(names_from=Mesh_Size, values_from=BIOMASS) %>%
    rename("Mesh200" = "200",
          "Mesh500" = "500")

In [54]:
str(test_zoo)


tibble [152 × 3] (S3: tbl_df/tbl/data.frame)
 $ Date   : chr [1:152] "2001-10-09" "2001-11-06" "2001-12-11" "2002-01-10" ...
 $ Mesh200: num [1:152] 26.83 7.43 5.6 10.24 12.92 ...
 $ Mesh500: num [1:152] 15.947 2.337 0.714 4.93 NA ...


In [55]:
zoo_biomass_tot <- test_zoo %>%
    group_by(Date) %>%
    mutate(total_biomass = sum(Mesh200, Mesh500))

In [56]:
zoo_biomass_tot$Mesh200[1]+ zoo_biomass_tot$Mesh500[1]

In [57]:
zoo_biomass_tot$total_biomass

In [61]:
write.csv(zoo_biomass_tot, "ZooplanktonBiomass_CleanedAggregated.csv")