# Read Zooplankton Master (Nov 2017) file

this R notebook reads (and cleans up) the Zooplankton Master (Nov 2017) file that is provided on  
  
http://imars.marine.usf.edu/cariaco  

This .csv file is a collection monthly sampled data at CARIACO Ocean Time Series Station.

In [1]:
# read header
file <- "../../DataFiles_Raw/Zooplankton_Master_revised_2017Nov22.csv"
header <- read.csv(file, header = F, nrows = 1, as.is=T)

# skip empty rows & read data
data <- read.csv(file, skip = 5, header = F)

# assign header to data
colnames(data) <- header

# clean up dataframe

In [2]:
# remove empty columns
data <- data[,c(-1:-2,-63:-76)]

# remove rows full of NAs (i.e. where cruise number is NA)
data <- data[!is.na(data$CRUISE),]

# convert date/time

read Hydrography Master, and extract precise date for each cruise from there:

In [3]:
# read processed hydrography master
phm <- read.csv("../../DataFiles_Processed/Master_Hydrography_June2018_Cleaned.csv")

# trim data frame to needed columns
phmdate <- data.frame(phm$Cruise,phm$Date)
names(phmdate) <- c('CRUISE','Date')

merge two dataframes by cruise number:

In [4]:
data_m <- merge(phmdate, data, by="CRUISE")

In [5]:
# remove duplicate rows due to merging: (#perhaps there is a better fix here)
data_m <- data_m[!duplicated(data_m), ]

In [6]:
data_m$Date <- as.POSIXlt(data_m$Date, format="%Y-%m-%d")

In [7]:
# get "day of the year" as variable
data_m$yday <- data_m$Date$yday # Note: yday starts at 0 in POSIX format!

# the cleaned file is saved as a csv in the folder "DataFiles_Processed" for later use

In [8]:
# Write CSV in R
write.csv(data_m, file = "../../DataFiles_Processed/Master_Zooplankton_Nov2017_Cleaned.csv")