# Read Niskin Bottle master csv file

this R notebook reads (and cleans up) the Niskin Master file that is provided on  
  
http://imars.marine.usf.edu/cariaco  

This .csv file is a collection monthly sampled data at CARIACO Ocean Time Series Station.

In [9]:
# read header
file <- "DATA_raw/Niskin_Master_edit.csv"
header <- read.csv(file, header = F, nrows = 1, as.is=T)

# skip empty rows & read data
data <- read.csv(file, skip = 1, header = F)

# assign header to data
colnames(data) <- header

# clean up dataframe

In [10]:
# convert "-9999" values to NA
data[data == -9999] <- NA

# remove empty columns
#data <- data[,c(-1,-49:-50)]

# remove rows full of NAs (i.e. where cruise number is NA)
data <- data[!is.na(data$Cruise),]

In [12]:
data$'NA' <- NULL
data$NA.1 <- NULL
head(data)

Cruise,Leg,Day,Month,Year,Depth_target,Depth_real,O2_ml_L,O2_umol_kg,SiOH4_Cumana,⋯,C/N part.,POC,PN,DON_USF,DOP_USF,DOC,TOC,PriPro,Chl,Phaeo
1,2,8,11,1995,1,1.5,4.85,211.61,2.4,⋯,,,,,,135,,,0.09,0.09
1,2,8,11,1995,7,6.5,4.41,192.39,2.8,⋯,,,,,,88,,,0.07,0.06
1,2,8,11,1995,15,15.0,4.38,191.08,2.2,⋯,,,,,,81,,,0.1,0.08
1,2,8,11,1995,25,25.0,4.37,190.57,,⋯,,,,,,78,,,0.12,0.14
1,2,8,11,1995,35,35.0,4.27,186.13,1.9,⋯,,,,,,78,,,0.13,0.2
1,2,8,11,1995,55,54.5,3.95,172.14,1.7,⋯,,,,,,78,,,0.42,0.6


In [16]:
data$Date <- paste(data$Year,'-',data$Month,'-',data$Day,sep='')

In [18]:
head(data$Date)

# convert date/time

In [19]:
# convert date & time into machine readable format
data$DateString <- data$Date
data$Date <- as.POSIXlt(data$Date, format="%Y-%m-%d")

# get "day of the year" as variable
data$yday <- data$Date$yday

In [20]:
str(data)

'data.frame':	4395 obs. of  43 variables:
 $ Cruise           : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Leg              : int  2 2 2 2 2 2 2 2 2 2 ...
 $ Day              : int  8 8 8 8 8 8 8 8 8 8 ...
 $ Month            : int  11 11 11 11 11 11 11 11 11 11 ...
 $ Year             : int  1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 ...
 $ Depth_target     : int  1 7 15 25 35 55 75 100 150 200 ...
 $ Depth_real       : num  1.5 6.5 15 25 35 ...
 $ O2_ml_L          : num  4.85 4.41 4.38 4.37 4.27 3.95 3.87 3.63 1.81 0.45 ...
 $ O2_umol_kg       : num  212 192 191 191 186 ...
 $ SiOH4_Cumana     : num  2.4 2.8 2.2 NA 1.9 1.7 2.2 2.4 10.7 22.1 ...
 $   PO4_Cumana     : num  0 0 0 0 0.01 0.06 0.08 0.2 0.96 1.5 ...
 $ NO3_Cumana       : num  0.18 0.17 0.16 0.17 0.85 ...
 $ NH4              : num  NA NA NA NA NA NA NA NA NA NA ...
 $ NO2              : num  NA NA NA NA NA NA NA NA NA NA ...
 $ NO3NO2           : num  NA NA NA NA NA NA NA NA NA NA ...
 $ PO4              : num  NA NA NA NA NA N

# the cleaned file is saved as a csv in the folder "DATA_processed" for later use

In [21]:
# Write CSV in R
write.csv(data, file = "DATA_processed/Master_Niskin_cleaned_2.csv")