# clean up, integrate and aggregate NISKIN data

In [1]:
library(tidyverse, warn.conflicts = FALSE)

# library to read matlab data formats into R
library(reshape2)
library(lubridate)

# set strings as factors to false
options(stringsAsFactors = FALSE)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.3     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




# read NISKIN data (from csv)

In [2]:
niskin_ds <- read.csv("../../VeryNEWESTCariacoData/niskin_dtypes.csv")

In [3]:
niskin_ds$DATE <- paste(niskin_ds$Year,'-',niskin_ds$Month,'-',niskin_ds$Day, sep='')

# interpolate niskin_ds

In [4]:
# Function to read specific verfication type from dataset

prepdataframe <- function(variable='', datasource='niskin'){
    if(datasource=='niskin'){
    DF <- niskin_ds
    DF$date <- as.Date(DF$DATE, format="%Y-%m-%d")
    DF$depth <- DF$Depth_real
    #print(names(DF))
        
    VarDF <- DF %>%
      select(date, variable, depth) %>%
      gather(key='key',value = "value", -date, -depth)
    #head(VarDF)
    }
    
    return(VarDF)
}

In [5]:
# Interpolate
interpolateDF <- function(DF,func='oce-rr'){
    zz <- seq(0, 200, 1)
    
    if(func=='linear'){
        IntDF <- DF %>%
            group_by(date) %>%
            filter(sum(!is.na(value))>1) %>%
            do(data.frame(value_int = with(.,approx(depth, value, zz)), depth = zz)) 
        
        IntDF <- IntDF %>% 
              rename(
                value_int = value_int.y
                )
        IntDF$value_int.x <- NULL
        }
    
    else if(func=='oce-rr'){
        IntDF <- DF %>%
            group_by(date) %>%
            do(data.frame(value_int = with(.,oceApprox(depth, value, zz, "rr")), depth = zz)) 
        }
    
    return(IntDF)
}

In [6]:
interpolateNiskin <- function(var){
 
    niskin_dat <- prepdataframe(var)

    niskin_int <- interpolateDF(niskin_dat, 'linear')   
    
    niskin_sum <- niskin_int %>%
        group_by(date) %>%
        filter(depth<101) %>%
        filter(sum(is.na(value_int))<20) %>%
        summarize(value_sum = sum(value_int, na.rm=TRUE), 
                  var = mean(value_int, na.rm=TRUE),
                  #value_intgrtd = value_mean * 100,
                  NAs = sum(is.na(value_int)), .groups="keep")
    
    niskin_sum_monthly <- niskin_sum %>%
          mutate(time_month = format(date, format="%m-%Y"))

    
    return(niskin_sum_monthly)
}

In [7]:
niskin_numeric = c('O2_ml_L',
                'O2_umol_kg',  
                'NO3_UDO',
                'PO4_UDO',
                'SiO4_UDO',
                'NH4_USF',
                'NO2_USF',
                'NO3_NO2_USF',
                'PO4_USF',
                'SiO4_USF',
                'pH',
                'Alkalinity_mol_kg',
                'Alkalinity_umol_kg',
                'TCO2',
                'fCO2',
                'pH_corrected',
                'TCO2_corrected',
                'fCO2_corrected',
                'Salinity_bottles',
                'Salinity_CTD',
                'Temperature',
                'Sigma_t',
                'TPP',
                'PIP',
                'POC_ug_kg',
                'PON_ug_kg',
                'POC_ug_L',
                'PN_ug_L',
                'C_N_particulate',
                'DON',
                'DOP',
                'DOC',
                'TOC',
                'PrimaryProductivity',
                'Chlorophyll',
                'Phaeopigments',
                'Total_Prokaryotes',
                'Bact_Biomass_mgC_m3',
                'Bact_Biomass_uMC')

In [8]:
niskin_temp_store = list()

for (variable in niskin_numeric) {
    print(variable)
    dat <- interpolateNiskin(variable)
    niskin_temp_store[[variable]] <- data.frame(dat_var = dat$var, time_month = dat$time_month)
    names(niskin_temp_store[[variable]])[1] <- variable
    #print(head(niskin_temp_store[[variable]]))
}

[1] "O2_ml_L"


Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(variable)` instead of `variable` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m



[1] "O2_umol_kg"
[1] "NO3_UDO"
[1] "PO4_UDO"
[1] "SiO4_UDO"
[1] "NH4_USF"
[1] "NO2_USF"
[1] "NO3_NO2_USF"
[1] "PO4_USF"
[1] "SiO4_USF"
[1] "pH"
[1] "Alkalinity_mol_kg"
[1] "Alkalinity_umol_kg"
[1] "TCO2"
[1] "fCO2"
[1] "pH_corrected"
[1] "TCO2_corrected"
[1] "fCO2_corrected"
[1] "Salinity_bottles"
[1] "Salinity_CTD"
[1] "Temperature"
[1] "Sigma_t"
[1] "TPP"
[1] "PIP"
[1] "POC_ug_kg"
[1] "PON_ug_kg"
[1] "POC_ug_L"
[1] "PN_ug_L"
[1] "C_N_particulate"
[1] "DON"
[1] "DOP"
[1] "DOC"
[1] "TOC"
[1] "PrimaryProductivity"
[1] "Chlorophyll"
[1] "Phaeopigments"
[1] "Total_Prokaryotes"
[1] "Bact_Biomass_mgC_m3"
[1] "Bact_Biomass_uMC"


In [9]:
niskin_intrp <- niskin_temp_store %>% 
    reduce(left_join, by = "time_month")

# GET NISKIN WITH ACTUAL DATE

In [10]:
niskin_temp_store = list()

for (variable in niskin_numeric) {
    print(variable)
    dat <- interpolateNiskin(variable)
    niskin_temp_store[[variable]] <- data.frame(dat_var = dat$var, date = dat$date)
    names(niskin_temp_store[[variable]])[1] <- variable
    #print(head(niskin_temp_store[[variable]]))
}

[1] "O2_ml_L"
[1] "O2_umol_kg"
[1] "NO3_UDO"
[1] "PO4_UDO"
[1] "SiO4_UDO"
[1] "NH4_USF"
[1] "NO2_USF"
[1] "NO3_NO2_USF"
[1] "PO4_USF"
[1] "SiO4_USF"
[1] "pH"
[1] "Alkalinity_mol_kg"
[1] "Alkalinity_umol_kg"
[1] "TCO2"
[1] "fCO2"
[1] "pH_corrected"
[1] "TCO2_corrected"
[1] "fCO2_corrected"
[1] "Salinity_bottles"
[1] "Salinity_CTD"
[1] "Temperature"
[1] "Sigma_t"
[1] "TPP"
[1] "PIP"
[1] "POC_ug_kg"
[1] "PON_ug_kg"
[1] "POC_ug_L"
[1] "PN_ug_L"
[1] "C_N_particulate"
[1] "DON"
[1] "DOP"
[1] "DOC"
[1] "TOC"
[1] "PrimaryProductivity"
[1] "Chlorophyll"
[1] "Phaeopigments"
[1] "Total_Prokaryotes"
[1] "Bact_Biomass_mgC_m3"
[1] "Bact_Biomass_uMC"


In [11]:
niskin_intrp_wDATE <- niskin_temp_store %>% 
    reduce(left_join, by = "date")

In [12]:
niskin_intrp_wDATE

O2_ml_L,date,O2_umol_kg,NO3_UDO,PO4_UDO,SiO4_UDO,NH4_USF,NO2_USF,NO3_NO2_USF,PO4_USF,⋯,DON,DOP,DOC,TOC,PrimaryProductivity,Chlorophyll,Phaeopigments,Total_Prokaryotes,Bact_Biomass_mgC_m3,Bact_Biomass_uMC
<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
4.066699,1995-11-08,177.2776,0.9217535,0.06162603,2.127115,,,,,⋯,,,78.70598,,,0.1793046,0.2700351,,,
3.438551,1995-12-13,149.8085,3.0948845,0.21142690,3.868919,,,,,⋯,,,,,0.4826918,0.1150949,0.1636278,0.7002847,2.131909,0.1776498
3.405264,1996-01-12,148.3119,7.5116266,0.30383795,4.868135,,,,,⋯,,,81.52567,,2.0569311,0.5825762,0.3451711,3.0429052,9.928947,0.8274119
3.304298,1996-02-13,143.9005,6.8777834,0.32541078,4.052312,,,,,⋯,,,70.76477,,1.2175528,0.3497634,0.2855399,2.5915307,8.455257,0.7046181
3.412656,1996-03-13,148.5819,6.5870977,0.33615601,5.161176,,,,,⋯,,,,,1.5397245,0.6365204,0.3778061,,,
3.569975,1996-04-16,155.4296,6.7700114,0.36042125,3.744388,,,,,⋯,,,89.78809,,0.9200121,2.2452814,1.3622393,1.6907867,5.531503,0.4609772
3.011871,1996-05-09,131.1264,8.8536038,0.41044416,4.817140,,,,,⋯,,,,,5.4573471,0.8740617,0.4288778,4.2037764,13.761397,1.1467797
3.631876,1996-06-12,158.1693,7.7127451,0.28324421,3.655185,,,,,⋯,,,81.29121,,1.0564614,0.2943889,0.2438052,5.0894137,16.604926,1.3837328
3.322255,1996-07-10,144.7358,5.8328983,0.33031846,2.519367,,,,,⋯,,,69.40608,,0.8158177,0.2510180,0.3100330,,,
3.355385,1996-08-06,146.1281,5.1449137,0.25216164,3.619522,,,,,⋯,,,,,0.7889660,0.2128294,0.3644782,3.2740335,10.681972,0.8901533


In [1]:
head(niskin_intrp_wDATE)

ERROR: Error in head(niskin_intrp): Objekt 'niskin_intrp' nicht gefunden


In [13]:
niskin_intrp_wDATE$NO3_USF = niskin_intrp_wDATE$NO3_NO2_USF - niskin_intrp_wDATE$NO2_USF

niskin_merged_nuts_wDATE <- niskin_intrp_wDATE %>%
    group_by(date) %>%
    mutate(NO3_merged =  mean(c(NO3_UDO, NO3_USF), na.rm=TRUE),
          PO4_merged = mean(c(PO4_UDO, PO4_USF), na.rm=TRUE),
          SiO4_merged = mean(c(SiO4_UDO, SiO4_USF), na.rm=TRUE))

In [14]:
write.csv(niskin_merged_nuts_wDATE, "../DATA/Niskin_mergedNutrients_wDATE.csv")

# NOW:
- join nutrient data into single column for NO2, NO3, NO3_NO2, PO4, SiO4, NH4
- export to csv to merge later

In [20]:
names(niskin_intrp)

In [19]:
niskin_intrp$NO3_USF = niskin_intrp$NO3_NO2_USF - niskin_intrp$NO2_USF

In [27]:
niskin_merged_nuts <- niskin_intrp %>%
    group_by(time_month) %>%
    mutate(NO3_merged =  mean(c(NO3_UDO, NO3_USF), na.rm=TRUE),
          PO4_merged = mean(c(PO4_UDO, PO4_USF), na.rm=TRUE),
          SiO4_merged = mean(c(SiO4_UDO, SiO4_USF), na.rm=TRUE))

In [28]:
write.csv(niskin_merged_nuts, "../DATA/Niskin_mergedNutrients.csv")