In [1]:
rm(list = ls())
setwd(rprojroot::find_rstudio_root_file())

In [2]:
### Forecasts
# input data

run_model <- function(datetime_initialisation,forecast_mode,export,reg_method = 'lm',...) {
    source("base/Preprocess_data.R")
    source("base/Regression_model.R")
    #source("base/Knn_model.R")
    source("base/Export_data.R")
    source("base/Scores.R")
    
    #pre-processed data
    data_input = preprocess_data(
        datetime_initialisation,
        forecast_mode,...)
    
    # ensemble volume forecast
    data_fore = forecast_vol_ensemble(
      data_input = data_input,
      forecast_mode = forecast_mode,
        method = reg_method
    )

    output = export_data(
      data_input = data_input,
      data_fore = data_fore,
      export = export)
  
  return(output)
  
}

In [3]:
#all available catchments, no data 6008005, 7317005, 7355002, 8106001
catchments_attributes_filename = "data_input/attributes/attributes_49catchments_ChileCentral.csv" 
cod_cuencas = read.csv(catchments_attributes_filename)$cod_cuenca [-c(32,40,45,49)]
cod_cuencas


In [4]:
#months of initialisation
months_initialisation = seq(4,12)
months_initialisation

In [5]:
source("base/Preprocess_data.R")
a = grid_pred(c("tem","SOI", "BIENSO"),seq(1,6),"mean")
b = grid_pred(c("pr"),seq(1,6),"sum")
c = grid_pred(c("STORAGE"),seq(1,6),"last")

predictors = c(a,b,c)
predictors


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘lubridate’


The following objects are masked from ‘package:data.table’:

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union


Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Loading required package: lattice

Loading required package: latticeExtra

Loading required package: polynom

Loading required package: reshape


Attaching package: ‘reshape’


The following object is masked from ‘package:lubridate’:

    stamp


The following object is masked from ‘package:

In [6]:
#http://topepo.github.io/caret/available-models.html
regression_methods = c('lm','glm','knn','gamLoess') #'ridge','logreg'
regression_methods

In [7]:
#run sample for only one case. Testing the run_model function
model = 
run_model(     
    catchment_code = sample(cod_cuencas,1),
    datetime_initialisation = lubridate::make_date(2016,sample(months_initialisation,1),1),
    horizon = horizon_mode(window_method = "static", month_start = 9, month_end = 3),
    predictor_list = sample(predictors,1),
    remove_wys = c(1950,1963),
    water_units = waterunits(q = "m^3/s", y = "GL"),
    forecast_mode = "cv",
    data_location_paths = get_default_datasets_path(meteo = "ens30avg", hydro = "ERA5Ens_SKGE"),
    export = 'scores',
    reg_method = 'gamLoess' )
model  %>% names
model


Attaching package: ‘icesTAF’


The following object is masked from ‘package:lubridate’:

    period


Loading required package: ggplot2


Attaching package: ‘ggplot2’


The following object is masked from ‘package:latticeExtra’:

    layer


Loading required package: gam

Loading required package: splines

Loading required package: foreach

Loaded gam 1.22-1


“eval  2.3027”
“upperlimit  1.5897”
“extrapolation not allowed with blending”


rmse_det,r2_det,mae_obs,crps_ens,crpss_climatology
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
74.58892,0.06998959,53.68555,42.86223,0.2016058


In [8]:
library(foreach)
#library(doParallel)
#registerDoParallel(cores = parallel::detectCores())
library(doSNOW)
library(dplyr)
cl <- makeCluster(4)
registerDoSNOW(cl)

list_lengths <- function(...){
  list_lengths <- lapply(list(...), length)
  total_iter <- prod(unlist(list_lengths))
  return(total_iter)
}

p1 =  months_initialisation #months_initialisation
p2 =  predictors #predictors
p3 =  cod_cuencas #cod_cuencas
p4 =  regression_methods


  

Loading required package: iterators

Loading required package: snow



In [9]:
max_iter = list_lengths(p1,p2,p3,p4)
max_iter

pb <- pbarETA::txtProgressBar(max = max_iter)
opts = list(progress = function(n) setTxtProgressBar(pb, n))
          
model <-
  foreach(month_initialisation= p1,.combine = "c") %:%
  foreach(climate_predictor = p2,.combine = "c") %:%
  foreach(reg_method = p4,.combine = "c") %:%
  foreach(catchment_code=p3,.options.snow=opts) %dopar% {    
      
    run_model(     
    catchment_code = catchment_code,
    datetime_initialisation = lubridate::make_date(2022,month_initialisation,1),
    horizon = horizon_mode(window_method = "static", month_start = 9, month_end = 3),
    predictor_list = climate_predictor,
    water_units = waterunits(q = "m^3/s", y = "GL"),
    forecast_mode = "cv",
    data_location_paths = get_default_datasets_path(meteo = "ens30avg", hydro = "ERA5Ens_SKGE"),
    export = 'scores',
    reg_method = reg_method
    )
      
  }
model = purrr::transpose(model)

close(pb)
stopCluster(cl)
#stopImplicitCluster()




In [10]:
rbindlist(model$info) %>% head(2)

“Column 8 ['remove_wys'] of item 1 is length 0. This (and 48599 others like it) has been filled with NA (NULL for list columns) to make each item uniform.”


catchment_code,datetime_initialisation,predictor_list,horizon,data_location_paths,water_units,forecast_mode,remove_wys,predictor_list_corrected
<int>,<date>,<chr>,<list>,<list>,<list>,<chr>,<lgl>,<chr>
3414001,2022-04-01,tem_mean_1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,tem_mean_1months
3421001,2022-04-01,tem_mean_1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,tem_mean_1months


In [11]:
rbindlist(model$scores)  %>% head(2)

rmse_det,r2_det,mae_obs,crps_ens,crpss_climatology
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
24.66858,0.4789781,17.443319,13.702348,0.21446443
13.68732,0.251602,6.979387,6.530644,0.06429543


In [12]:
rbindlist(model$model_info)  %>% tail(2)

n_members,method,preProcess
<dbl>,<chr>,<list>
1000,gamLoess,"center, scale"
1000,gamLoess,"center, scale"


In [13]:
# stopImplicitCluster()


df <- do.call(cbind,unname(lapply(model,function(x) (rbindlist(x))))  )
df <- dplyr::rename(df,'predictor_name' = 'predictor_list_corrected')# modify names

# order columns
#df$month_initialisation <- factor(df$month_initialisation,levels = months_initialisation)
df$month_initialisation <- factor( lubridate::month(df$datetime_initialisation,label = T))
df$predictor_name <- factor(df$predictor_name)
df$catchment_code <- as.numeric(df$catchment_code)
 df %>% tail(2)
saveRDS(df,paste0("data_output/scores/RDS/model_results_singles_models_",today(),".RDS"))




“Column 8 ['remove_wys'] of item 1 is length 0. This (and 48599 others like it) has been filled with NA (NULL for list columns) to make each item uniform.”


catchment_code,datetime_initialisation,predictor_list,horizon,data_location_paths,water_units,forecast_mode,remove_wys,predictor_name,rmse_det,r2_det,mae_obs,crps_ens,crpss_climatology,n_members,method,preProcess,month_initialisation
<dbl>,<date>,<chr>,<list>,<list>,<list>,<chr>,<lgl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<list>,<ord>
7381001,2022-12-01,STORAGE_last_6months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,STORAGE_last_6months,24.92851,0.11273338,21.70706,14.27464,0.3423967,1000,gamLoess,"center, scale",Dec
8104001,2022-12-01,STORAGE_last_6months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,STORAGE_last_6months,859.66291,0.01129778,156.78291,444.50225,-1.8351447,1000,gamLoess,"center, scale",Dec
