In [1]:
rm(list = ls())
setwd(rprojroot::find_rstudio_root_file())

In [2]:
### Forecasts
# input data

run_model <- function(datetime_initialisation,forecast_mode,export,reg_method = 'lm',...) {
    source("base/Preprocess_data.R")
    source("base/Regression_model.R")
    #source("base/Knn_model.R")
    source("base/Export_data.R")
    source("base/Scores.R")
    
    #pre-processed data
    data_input = preprocess_data(
        datetime_initialisation,
        forecast_mode,...)
    
    # ensemble volume forecast
    data_fore = forecast_vol_ensemble(
      data_input = data_input,
      forecast_mode = forecast_mode,
        method = reg_method
    )

    output = export_data(
      data_input = data_input,
      data_fore = data_fore,
      export = export)
  
  return(output)
  
}

In [3]:
#all available catchments, no data 6008005, 7317005, 7355002, 8106001
catchments_attributes_filename = "data_input/attributes/attributes_49catchments_ChileCentral.csv" 
cod_cuencas = read.csv(catchments_attributes_filename)$cod_cuenca [-c(32,40,45,49)]
cod_cuencas


In [4]:
#months of initialisation
months_initialisation = seq(5,12)
months_initialisation

In [5]:
source("base/Preprocess_data.R")
a = grid_pred(c("SOI", "BIENSO"),-1,"mean")
b = grid_pred(c("pr"),-1,"sum")
c = grid_pred(c("STORAGE","SP"),1,"last")

predictors = c(a,b,c)
predictors


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘lubridate’


The following objects are masked from ‘package:data.table’:

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union


Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Loading required package: lattice

Loading required package: latticeExtra

Loading required package: polynom

Loading required package: reshape


Attaching package: ‘reshape’


The following object is masked from ‘package:lubridate’:

    stamp


The following object is masked from ‘package:

In [6]:
#http://topepo.github.io/caret/available-models.html
regression_methods = c('lm') #'ridge','logreg'
regression_methods

In [7]:
#run sample for only one case. Testing the run_model function
model = 
run_model(     
    catchment_code = sample(cod_cuencas,1),
    datetime_initialisation = lubridate::make_date(2016,sample(months_initialisation,1),1),
    horizon = horizon_mode(window_method = "static", month_start = 9, month_end = 3),
    predictor_list = sample(predictors,1),
    remove_wys = c(1950,1963),
    water_units = waterunits(q = "m^3/s", y = "GL"),
    forecast_mode = "cv",
    data_location_paths = get_default_datasets_path(meteo = "ens30avg", hydro = "ERA5Ens_SKGE"),
    export = 'scores',
    reg_method = 'glm' )
model  %>% names
model


Attaching package: ‘icesTAF’


The following object is masked from ‘package:lubridate’:

    period


Loading required package: ggplot2


Attaching package: ‘ggplot2’


The following object is masked from ‘package:latticeExtra’:

    layer




rmse_det,r2_det,mae_obs,crps_ens,crpss_climatology
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
117.0077,0.1230597,83.91234,64.08004,0.2363455


In [8]:
library(foreach)
#library(doParallel)
#registerDoParallel(cores = parallel::detectCores())
library(doSNOW)
library(dplyr)
cl <- makeCluster(4)
registerDoSNOW(cl)



p1 =  months_initialisation #months_initialisation
p2 =  predictors #predictors
p3 =  cod_cuencas #cod_cuencas
p4 =  regression_methods


  

Loading required package: iterators

Loading required package: snow



In [9]:
list_lengths <- function(...){
  list_lengths <- lapply(list(...), length)
  total_iter <- prod(unlist(list_lengths))
  return(total_iter)
}

max_iter = list_lengths(p1,p2,p3,p4)
max_iter

pb <- pbarETA::txtProgressBar(max = max_iter)
opts = list(progress = function(n) setTxtProgressBar(pb, n))
          
model <-
  foreach(month_initialisation= p1,.combine = "c") %:%
  foreach(climate_predictor = p2,.combine = "c") %:%
  foreach(reg_method = p4,.combine = "c") %:%
  foreach(catchment_code=p3,.options.snow=opts) %dopar% {    
      
    run_model(     
    catchment_code = catchment_code,
    datetime_initialisation = lubridate::make_date(2022,month_initialisation,1),
    horizon = horizon_mode(window_method = "static", month_start = 9, month_end = 3),
    predictor_list = climate_predictor,
    water_units = waterunits(q = "m^3/s", y = "GL"),
    forecast_mode = "cv",
    data_location_paths = get_default_datasets_path(meteo = "ens30avg", hydro = "ERA5Ens_SKGE"),
    export = 'scores',
    reg_method = reg_method
    )
      
  }
model = purrr::transpose(model)

close(pb)
stopCluster(cl)
#stopImplicitCluster()




In [10]:
rbindlist(model$info) %>% head(2)

“Column 8 ['remove_wys'] of item 1 is length 0. This (and 1799 others like it) has been filled with NA (NULL for list columns) to make each item uniform.”


catchment_code,datetime_initialisation,predictor_list,horizon,data_location_paths,water_units,forecast_mode,remove_wys,predictor_list_corrected
<int>,<date>,<chr>,<list>,<list>,<list>,<chr>,<lgl>,<chr>
3414001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months
3421001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months


In [11]:
rbindlist(model$scores)  %>% head(2)

rmse_det,r2_det,mae_obs,crps_ens,crpss_climatology
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
22.7869,0.05570557,17.443319,12.429719,0.28742239
13.39525,0.01733915,6.979387,6.304583,0.09668525


In [12]:
rbindlist(model$model_info)  %>% tail(2)

n_members,method,preProcess
<dbl>,<chr>,<list>
1000,lm,"center, scale"
1000,lm,"center, scale"


In [14]:
# stopImplicitCluster()

df <- do.call(cbind,unname(lapply(model,function(x) (rbindlist(x))))  )
df <- dplyr::rename(df,'predictor_name' = 'predictor_list_corrected')# modify names

# order columns
#df$month_initialisation <- factor(df$month_initialisation,levels = months_initialisation)
df$month_initialisation <- factor( lubridate::month(df$datetime_initialisation,label = T))
df$predictor_name <- factor(df$predictor_name)
df$catchment_code <- as.numeric(df$catchment_code)

saveRDS(df,paste0("data_output/scores/RDS/model_results_singles_models_",today(),".RDS"))
 df



“Column 8 ['remove_wys'] of item 1 is length 0. This (and 1799 others like it) has been filled with NA (NULL for list columns) to make each item uniform.”


catchment_code,datetime_initialisation,predictor_list,horizon,data_location_paths,water_units,forecast_mode,remove_wys,predictor_name,rmse_det,r2_det,mae_obs,crps_ens,crpss_climatology,n_members,method,preProcess,month_initialisation
<dbl>,<date>,<chr>,<list>,<list>,<list>,<chr>,<lgl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<list>,<ord>
3414001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,22.786897,0.0557055744,17.443319,12.429719,0.28742239,1000,lm,"center, scale",May
3421001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,13.395248,0.0173391485,6.979387,6.304583,0.09668525,1000,lm,"center, scale",May
3430003,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,24.986069,0.0128267385,19.328404,13.887786,0.28148302,1000,lm,"center, scale",May
3820001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,170.104270,0.0045776825,98.546479,85.314087,0.13427565,1000,lm,"center, scale",May
4302001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,6.842183,0.0184270381,4.390249,3.592372,0.18173846,1000,lm,"center, scale",May
4311001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,36.456323,0.0039410761,21.593582,19.321614,0.10521499,1000,lm,"center, scale",May
4313001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,66.457041,0.0069352557,43.858067,35.236821,0.19657151,1000,lm,"center, scale",May
4320001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,248.380180,0.0010133125,153.682344,126.620655,0.17608847,1000,lm,"center, scale",May
4501001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,64.379012,0.0001673174,41.738396,34.123806,0.18243610,1000,lm,"center, scale",May
4503001,2022-05-01,SOI_mean_-1months,"static, 9 , 3","data_input/attributes/attributes_49catchments_ChileCentral.csv , data_input/flows/flows_mm_monthly_49catchments_ChileCentral.csv , data_input/meteo_variables/meteo_monthly_catchments_ChileCentral_ens30avg_1979_present.csv , data_input/storage_variables/hydro_variables_monthly_catchments_ChileCentral_ERA5Ens_SKGE.csv, data_input/climate_index_variables/indices_mensuales_1979_present.csv","m^3/s, GL",cv,,SOI_mean_1months,94.417766,0.0012335323,51.465252,50.352011,0.02163092,1000,lm,"center, scale",May
