In [2]:
library(tidyverse, warn.conflicts = FALSE)

# library to read matlab data formats into R
library(reshape2)
library(lubridate)
library(viridis)
library(oce)

In [4]:
HPLC <- read.csv("../../DATA/HPLC_SizePigments.csv")

In [8]:
HPLC[HPLC<0] <- NA

In [13]:
HPLC

X,Date_time_UTC,depth,Fuco,Perid,Allo,But_fuco,Hex_fuco,Zea,Tot_Chl_b,dataset
<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
0,1995-12-13 08:11:00,1,0.008,0.005,,0.002,0.008,0.004,0.018,BBRS
1,1995-12-13 08:11:00,7,0.009,0.006,0.001,0.002,0.010,0.008,0.025,BBRS
2,1995-12-13 08:11:00,15,0.008,0.010,0.001,0.003,0.011,0.008,0.032,BBRS
3,1995-12-13 08:11:00,25,0.065,0.008,0.006,0.021,0.086,0.024,0.141,BBRS
4,1995-12-13 08:11:00,35,0.034,0.009,0.002,0.010,0.042,0.012,0.107,BBRS
5,1995-12-13 08:11:00,55,0.009,,,0.003,0.011,0.005,0.053,BBRS
6,1995-12-13 08:11:00,75,0.007,0.002,,0.004,0.013,0.004,0.032,BBRS
7,1995-12-13 08:11:00,100,0.002,,,,0.003,0.001,0.005,BBRS
8,1996-01-12 08:35:00,1,0.694,0.080,0.004,0.002,0.034,0.008,0.087,BBRS
9,1996-01-12 08:35:00,7,0.599,0.164,0.005,0.016,0.032,0.008,0.084,BBRS


In [14]:
# Function to read specific verfication type from dataset

prepdataframe <- function(data, variable){
    DF <- data
    DF$date <- as.Date(DF$Date_time_UTC)
        
    VarDF <- DF %>%
      select(date, all_of(variable), depth) %>%
      gather(key='key',value = "value", -date, -depth)
    
    return(VarDF)
}

In [69]:
# Interpolate
interpolateDF <- function(DF, func){
    zz <- seq(0, 100, 1)
    
    if(func=='linear'){
        IntDF <- DF %>%
            group_by(date) %>%
            filter(sum(!is.na(value))>3) %>%
            do(data.frame(value_int = with(.,approx(depth, value, zz)), depth = zz)) 
        
        IntDF <- IntDF %>% 
              rename(
                value_int = value_int.y
                )
        IntDF$value_int.x <- NULL
        }
    
    else if(func=='oce-rr'){
        IntDF <- DF %>%
            group_by(depth, date) %>%
            summarize(value=mean(value, na.rm=TRUE)) %>%
            ungroup() %>%
            group_by(date) %>%
            do(data.frame(value_int = with(.,oceApprox(depth, value, zz, "rr")), depth = zz)) 
        }
    
    return(IntDF)
}

In [85]:
interpolateVAR <- function(data, var, depth_to=100, depth_from=0, int_func='oce-rr', noofNA=90){
 
    dat <- prepdataframe(data, var)
    #print(dat)
    int <- interpolateDF(dat, int_func)   
    #print(int)
    sum <- int %>%
        group_by(date) %>%
        filter(sum(is.na(value_int))<noofNA) %>%
        filter(depth_from<=depth & depth<=depth_to) %>%
        summarize(value_sum = sum(value_int, na.rm=TRUE), 
                  var = mean(value_int, na.rm=TRUE),
                  NAs = sum(is.na(value_int)), .groups="keep")
    
    #_sum_monthly <- niskin_sum %>%
    #      mutate(time_month = format(date, format="%m-%Y"))
    
    return(sum)
}

In [86]:
# head(interpolateVAR(HPLC, 'Fuco', 'oce-rr', noofNA=20), n=45)

In [87]:
niskin_numeric = c( 
                'Fuco',
                'Perid',
                'Allo',
                'But_fuco',
                'Hex_fuco',
                'Zea',
                'Tot_Chl_b')

In [88]:
#require(purrr)
#require(dplyr)

options(dplyr.summarise.inform=F)

m100_temp_store = list()

for (variable in niskin_numeric) {
    #print(variable)
    dat <- interpolateVAR(HPLC, variable)
    #print(tail(dat))
    m100_temp_store[[variable]] <- data.frame(dat_var = dat$var, date = dat$date)
    names(m100_temp_store[[variable]])[1] <- variable
    #print(tail(niskin_temp_store[[variable]]))
}

HPLC_100m <- m100_temp_store %>% 
    reduce(full_join, by = "date")



m55_temp_store = list()

for (variable in niskin_numeric) {
    #print(variable)
    dat <- interpolateVAR(HPLC, variable, depth_to=55, depth_from=0)
    #print(tail(dat))
    m55_temp_store[[variable]] <- data.frame(dat_var = dat$var, date = dat$date)
    names(m55_temp_store[[variable]])[1] <- variable
    #print(tail(niskin_temp_store[[variable]]))
}

HPLC_55m <- m55_temp_store %>% 
    reduce(full_join, by = "date")



m55to100_temp_store = list()

for (variable in niskin_numeric) {
    #print(variable)
    dat <- interpolateVAR(HPLC, variable, depth_to=100, depth_from=55)
    #print(tail(dat))
    m55to100_temp_store[[variable]] <- data.frame(dat_var = dat$var, date = dat$date)
    names(m55to100_temp_store[[variable]])[1] <- variable
    #print(tail(niskin_temp_store[[variable]]))
}

HPLC_55to100m <- m55to100_temp_store %>% 
    reduce(full_join, by = "date")

In [89]:
HPLC_100m

Fuco,date,Perid,Allo,But_fuco,Hex_fuco,Zea,Tot_Chl_b
<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1.546140e-02,1995-12-13,4.756296e-03,0.002245815,6.478833e-03,2.002205e-02,7.020736e-03,5.005104e-02
2.517340e-01,1996-01-12,5.656378e-02,0.004328571,8.586503e-03,1.967274e-02,5.592911e-03,5.167403e-02
2.109865e-01,1996-02-13,1.251757e-02,0.004926706,3.813813e-03,2.178840e-02,5.666307e-03,3.738141e-02
2.650423e-01,1996-03-13,4.259729e-02,0.006622384,1.579506e-03,1.740885e-02,6.882034e-03,2.385225e-02
1.244170e+00,1996-04-16,3.351050e-02,,3.133199e-02,2.197452e-01,1.688123e-02,8.421497e-02
2.484146e-01,1996-05-09,,,4.112903e-03,4.374825e-02,1.095496e-02,3.301752e-02
7.735736e-02,1996-06-12,9.210526e-03,0.006195905,7.784578e-03,3.227479e-02,1.901639e-03,1.009808e-01
2.085173e-02,1996-07-10,4.740775e-03,,9.061954e-03,3.814565e-02,1.983287e-02,1.298491e-02
2.080487e-02,1996-08-06,2.987925e-03,0.001000000,1.212971e-02,4.532916e-02,1.544237e-02,1.424976e-01
3.243001e-02,1996-09-10,4.947449e-03,0.001500000,2.359551e-02,7.010561e-02,2.185246e-02,1.313699e-02


In [81]:
tail(interpolateVAR(HPLC, 'Perid', 'oce-rr', noofNA=90), n=45)

`summarise()` has grouped output by 'depth'. You can override using the `.groups` argument.



date,value_sum,var,NAs
<date>,<dbl>,<dbl>,<int>
2012-01-10,0.3422554,0.009778725,66
2012-02-07,0.3257514,0.009307182,66
2012-03-16,1.3948743,0.055794974,76
2012-04-10,1.2024438,0.012024438,1
2012-05-15,0.4635401,0.006180535,26
2012-07-26,0.3688013,0.01053718,66
2012-08-14,0.6844302,0.009125736,26
2012-09-10,0.5950421,0.007933895,26
2012-10-10,0.4355088,0.007918342,46
2012-11-08,0.4305064,0.006239224,32
