In [5]:
rm(list = ls())
setwd(rprojroot::find_rstudio_root_file())
source("base/Preprocess_data.R")

In [6]:
data_EDA1 = preprocess_data(
    catchment_code = "5410002",
    datetime_initialisation = '2016-5-1',
    predictor_list = c("pr_sum_-1months","tem_mean_2months")
)

data_EDA1  %>% names()

In [7]:
data_EDA2 = preprocess_data(
    catchment_code = "5410002",
    datetime_initialisation = '2016-5-1',
    predictor_list = c("pr_sum_-1months","tem_mean_2months"),
    horizon = horizon_mode(window_method = "dynamic", month_start = 9, month_end = 3),
    data_location_paths = get_default_datasets_path(meteo = "ens30avg",hydro = "ERA5Ens_SKGE"),
    water_units = waterunits(q = "m^3/s", y = "GL"),
    forecast_mode = "both",
    remove_wys <- c(1990,1950,2013)
)

data_EDA2  %>% names()

In [8]:
#PARAMETERS
dataset_meteo  <- "ens30avg"
dataset_hydro  <- "ERA5Ens_SKGE"

#set the dataset location to the default where the data is already store.
#it's possible to modify this dataset paths, following the same list structure and names.
#values of meteo, hydro and climateindex filenames can be NULL
data_location_paths <- get_default_datasets_path(
    meteo = dataset_meteo,
    hydro = dataset_hydro)
data_location_paths

In [9]:
#PARAMETERS
catchment_code <- "5410002"
remove_wys <- c(1990,1950,2013)
water_units = waterunits(q = "m^3/s", y = "GL")# GL is equivalent to one million m3

# catchment data (raw forcings, flows)
catchment_data <- read_catchment_data(
catchment_code = catchment_code,
remove_wys = remove_wys,
water_units = water_units,
data_location_paths = data_location_paths
)
#attributes of the catchment data
print(names(catchment_data))

[1] "monthly_flows"        "monthly_meteo"        "monthly_hydro"       
[4] "monthly_climateindex" "raw_data_df"          "attributes_catchment"


In [10]:
print('catchment attributes:')
catchment_data$attributes_catchment %>% tail()

[1] "catchment attributes:"


Unnamed: 0_level_0,cod_cuenca,gauge_name,gauge_lat,gauge_lon,area_km2
Unnamed: 0_level_1,<int>,<chr>,<dbl>,<dbl>,<dbl>
21,5410002,Rio Aconcagua En Chacabuquito,-32.8503,-70.5094,2113.423


In [11]:
print('monthly averaged flows:')
catchment_data$monthly_flows  %>% tail()

[1] "monthly averaged flows:"


wy_simple,wym,Q_mm,Q_converted
<int>,<dbl>,<dbl>,<dbl>
2020,10,70.89504,56.97436
2020,11,42.97035,34.53285
2020,12,28.8014,23.14607
2021,1,18.07564,14.52637
2021,2,16.04798,12.89686
2021,3,17.81437,14.31641


In [12]:
print('monthly averaged meteorology:')
catchment_data$monthly_meteo %>% tail()

[1] "monthly averaged meteorology:"


wy_simple,wym,pr,tem
<int>,<dbl>,<dbl>,<dbl>
2022,4,95.74,-1.161
2022,5,41.339,0.12
2022,6,1.09,0.557
2022,7,2.667,3.991
2022,8,1.985,8.589
2022,9,2.621,10.64


In [13]:
print('monthly averaged hydrological model variables:')
catchment_data$monthly_hydro %>% tail()

[1] "monthly averaged hydrological model variables:"


wy_simple,wym,AE,SLZ,SM,SP,SUZ,STORAGE
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2019,7,0.442,41.749,165.863,0,0,208.054
2019,8,0.544,30.032,152.008,0,0,182.584
2019,9,0.565,21.43,136.439,0,0,158.434
2019,10,0.42,19.015,124.232,0,0,143.667
2019,11,0.349,13.823,112.935,0,0,127.107
2019,12,0.242,9.828,103.779,0,0,113.849


In [14]:
print('monthly selected climate indices:')
catchment_data$monthly_climateindex %>% tail()

[1] "monthly selected climate indices:"


Unnamed: 0_level_0,wy_simple,wym,MEIv2,PDO,SOI,ONI,OLR,NINO1.2,NINO3,NINO4,NINO3.4,ESPI,AAO,BIENSO
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
506,2022,5,-1.79,-1.945,1.7,-0.91,1.2,-0.6,-0.67,-1.09,-0.98,-1.23034,0.7313,-1.52
507,2022,6,-1.78,-1.724,2.7,-1.01,1.2,-1.02,-0.96,-1.17,-1.07,-1.16183,1.4685,-1.87
508,2022,7,-1.75,-1.346,2.8,-0.99,1.2,-1.79,-1.1,-1.12,-0.98,-1.19816,0.3303,-1.78
509,2022,8,-1.53,-1.986,0.5,-0.92,1.3,-1.13,-0.94,-0.99,-0.9,-0.7904,1.7134,-0.62
510,2022,9,-1.26,-1.837,3.5,-0.82,1.7,-0.46,-0.81,-0.84,-0.85,-1.23068,1.7004,-2.01
511,2022,10,-1.12,-0.96,2.3,-0.71,1.8,-0.57,-0.55,-0.66,-0.71,-0.98883,2.3037,-1.29


In [15]:
datetime_initialisation = '2022-5-01'
date_init = initialisation_dates(datetime_initialisation)
date_init

$init_water_year_month
[1] 2

$init_water_year
[1] 2022

$init_month
[1] 5

$init_year
[1] 2022

$ymd_datetime
[1] "2022-05-01 UTC"

$datetime_initialisation
[1] "2022-5-01"


In [16]:
predictor_list <- c("pr_sum_-1months","tem_mean_5months","SOI_mean_1months")

# create the predictors variables
predictors <-
predictors_generator(
predictor_list = predictor_list,
month_initialisation_index = date_init$init_water_year_month,
catchment_data = catchment_data,
remove_wys = remove_wys
)

predictors




wy_simple,pr_sum_1months,tem_mean_5months,SOI_mean_1months
<dbl>,<dbl>,<dbl>,<dbl>
1980,167.752,8.3902,-1.0
1981,2.253,8.9372,-0.2
1982,0.225,9.387,0.1
1983,18.444,8.4892,-1.5
1984,4.312,9.5076,0.6
1985,4.425,8.6972,1.9
1986,23.608,9.4676,0.5
1987,14.411,9.6464,-2.3
1988,4.034,8.8672,0.2
1989,9.789,9.9268,2.7


In [17]:

horizon = horizon_mode(
# two types of forecast window methods:
#'dynamic': moving window if date_initialisation is after 'month_start' [month_start:month_end]
#'static': always fixed window in the horizon [month_start:month_end]
    window_method = "dynamic",
    #forecast horizon in each year. Typically sep:9 to mar:3 in Chile.
    month_start = 9,
    month_end = 3)

# set target period of the forecast
forecast_horizon <- get_forecast_horizon(date_init,horizon)
forecast_horizon
 

In [18]:
# create the target variable (y:VOLUME)
predictant <-
predictant_generator(
forecast_horizon = forecast_horizon,
catchment_data = catchment_data,
water_units = water_units
)

predictant$y

volume,wy_simple
<dbl>,<int>
468.9446,1981
1835.3082,1982
1145.2406,1983
1341.3946,1984
665.0035,1985
1298.1427,1986
1805.4835,1987
446.5912,1988
725.5958,1989
1044.8438,1991


In [19]:
# separate X (predictors),y (target-volume) and q (flow) into training sets based on wy_holdout.
train_set <- 
training_set(
predictor = predictors,
predictant = predictant,
water_year_target = date_init$init_water_year
)
train_set  %>% names()

In [20]:
train_set

Unnamed: 0_level_0,pr_sum_1months,tem_mean_5months,SOI_mean_1months
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
1981,2.253,8.9372,-0.2
1982,0.225,9.387,0.1
1983,18.444,8.4892,-1.5
1984,4.312,9.5076,0.6
1985,4.425,8.6972,1.9
1986,23.608,9.4676,0.5
1987,14.411,9.6464,-2.3
1988,4.034,8.8672,0.2
1989,9.789,9.9268,2.7
1991,53.905,9.9662,-1.0

Unnamed: 0_level_0,volume
Unnamed: 0_level_1,<dbl>
1981,468.9446
1982,1835.3082
1983,1145.2406
1984,1341.3946
1985,665.0035
1986,1298.1427
1987,1805.4835
1988,446.5912
1989,725.5958
1991,1044.8438

Unnamed: 0_level_0,sep,oct,nov,dic,ene,feb,mar
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1981,12.636667,20.119355,37.16667,32.93871,34.06774,25.98929,16.2871
1982,44.91,50.854839,88.78333,180.24815,167.22581,112.8,55.63226
1983,20.163333,52.364516,94.19667,116.43871,75.09032,51.46786,26.53226
1984,24.906667,66.677419,90.74333,118.37742,100.91935,63.21071,45.83226
1985,14.516667,20.974194,58.08333,57.43548,45.10968,34.58571,23.26774
1986,25.63,41.958065,78.32,148.3129,100.6129,61.475,37.66452
1987,35.553333,55.051613,146.78333,176.06452,149.87097,81.26786,43.24516
1988,10.509333,18.393548,32.63667,31.71935,31.08387,29.20357,17.40968
1989,23.03,38.5,77.97,58.26129,36.33226,25.82143,16.74839
1991,33.91,38.248387,77.61333,85.11935,80.21935,50.73929,32.75806


In [21]:
# corrected predictor list names (period may change if -1 is set)
predictor_list_corrected <- 
             predictors %>%
             select(- "wy_simple") %>%
             colnames
predictor_list_corrected

In [22]:
# testing set 
test_set  <- 
  testing_set(
    predictors = predictors,
    predictant = predictant,
    water_year_target = date_init$init_water_year)
test_set

Unnamed: 0_level_0,pr_sum_1months,tem_mean_5months,SOI_mean_1months
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
2022,20.373,9.99,2.8


Unnamed: 0_level_0,pr_sum_1months,tem_mean_5months,SOI_mean_1months
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
2022,20.373,9.99,2.8
