# Write ERA5 demand predictions

In [1]:
import xarray as xr
import pandas as pd
import numpy as np

from sklearn.ensemble import ExtraTreesRegressor

In [2]:
%cd /g/data/w42/dr6273/work/demand_model/

import functions as fn

/g/data/w42/dr6273/work/demand_model


In [3]:
%load_ext autoreload
%autoreload 2

### Set global variables

In [4]:
RESULTS_PATH = "/g/data/w42/dr6273/work/projects/Aus_energy/model_results/"

In [5]:
MARKET = "NEM" # "NEM" or "EU"

In [6]:
REMOVE_WEEKEND = True

In [7]:
REMOVE_XMAS = True

In [8]:
REMOVE_MONTH = 0 # integer: [1, 12]

In [9]:
MASK_NAME = "pop_dens_mask"

In [10]:
TIME_COLUMNS = []

In [11]:
FIRST_TRAIN_YEAR = 2010
LAST_TRAIN_YEAR = 2019

In [12]:
FIRST_TEST_YEAR = 2020
LAST_TEST_YEAR = 2020

In [13]:
N_FEATURES = "parsimonious"

In [14]:
DETREND = False

In [15]:
regions = ["NEM", "QLD", "NSW", "VIC", "SA", "TAS"]

### Load features and hyperparameters

In [16]:
features = fn.read_results(
    "feature_selection", MARKET, regions, MASK_NAME,
    FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, FIRST_TEST_YEAR, LAST_TEST_YEAR,
    REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES, RESULTS_PATH
)

In [17]:
hyps = fn.read_results(
    "hyperparameters", MARKET, regions, MASK_NAME,
    FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, FIRST_TEST_YEAR, LAST_TEST_YEAR,
    REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES, RESULTS_PATH
)

### Fit model

In [18]:
dem_da = xr.open_dataset("/g/data/w42/dr6273/work/projects/Aus_energy/data/energy_demand/daily_demand_2010-2020_stl.nc")["demand_stl"]
dem_da = fn.remove_time(dem_da, REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH)

In [19]:
da_list = []
for r in regions:
     da_list.append(dem_da.sel(region=r).expand_dims({"region": [r]}))
demand = xr.concat(da_list, "region")

In [25]:
# Prepare predictors
files = fn.get_predictor_files(MARKET, MASK_NAME, detrended=DETREND)
pred_ds = xr.open_mfdataset(files, combine="nested", compat="override")

In [26]:
files

['/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/mtpr_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/10w_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/msdwswrf_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/rh_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/cdd_24_rollmean3_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/2t_rollmean4_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/2tmin_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/2tmax_era5_daily_1959-2022_NEM_pop_dens_mask.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/hdd_18_ro

In [28]:
t4 = xr.open_mfdataset('/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/2t_rollmean4_era5_daily_1959-2022_NEM_pop_dens_mask.nc')

In [29]:
t4

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [22]:
# Prepare dataframe for machine learning
region_dfs = {}
for r in regions:
    df = fn.to_dataframe(dem_da, pred_ds, r)
    selected_preds = fn.parse_features(fn.sel_model(features[r])["feature_names"])[:]
    print(selected_preds[::-1])
    df = df[["demand"] + selected_preds[::-1]]
    region_dfs[r] = df

['t2m', 't2m3', 'cdd', 'hdd', 'cdd4', 'q', 'hdd4', 't2max', 't2min', 't2m4', 'msdwswrf', 'w10', 'mtpr']


KeyError: "['t2m3', 'cdd4', 'hdd4', 't2max', 't2min', 't2m4'] not in index"

In [24]:
pred_ds

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 10 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 10 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.07 MiB 1.07 MiB Shape (23376, 6) (23376, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  23376,

Unnamed: 0,Array,Chunk
Bytes,1.07 MiB,1.07 MiB
Shape,"(23376, 6)","(23376, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [36]:
models = {}

for r in regions:
    test_len = dem_da.sel(time=slice(str(FIRST_TEST_YEAR), str(LAST_TEST_YEAR))).time.values.shape[0]
    
    train_X, test_X, train_y, test_y = fn.split(
        fn.sel_train_test(region_dfs[r], FIRST_TRAIN_YEAR, LAST_TEST_YEAR),
        "demand",
        test_size=test_len,
        random_state=0,
        shuffle=False
    )
    
    # Finalise model
    rf = ExtraTreesRegressor(
        n_estimators=int(hyps[r].loc["n_estimators"].values),
        min_samples_leaf=int(hyps[r].loc["min_samples_leaf"].values),
        max_depth=int(hyps[r].loc["max_depth"].values),
        max_leaf_nodes=int(hyps[r].loc["max_leaf_nodes"].values),
        random_state=0,
    )
    
    models[r], _ = fn.predict_forest(train_y, train_X, train_X, rf)

### Predict and write

In [61]:
# Prepare dataframe for machine learning
predictions = {}

for r in regions:
    pred_arr = pred_ds.sel(region=r).to_array("variable")
    df = pd.DataFrame(
        pred_arr.transpose(),
        columns=pred_arr["variable"],
        index=pred_arr["time"]
    )
    selected_preds = fn.parse_features(fn.sel_model(features[r])["feature_names"])[:]
    print(selected_preds[::-1])
    df = df[selected_preds[::-1]]
    
    df = df.dropna()
    
    model = models[r]
    df["prediction"] = model.predict(np.array(df))
    predictions[r] = df
    
    filename = fn.get_filename(
        "extrapolated", MARKET, r, MASK_NAME,
        FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, "1959", "2022",
        REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES
    )
    df.to_csv(
        RESULTS_PATH + "/extrapolated/random_forest/" + filename + ".csv",
    )

['t2m', 't2m3', 'cdd', 'hdd', 'cdd4', 'q', 'hdd4', 't2max', 't2min', 't2m4', 'msdwswrf', 'w10', 'mtpr']
['t2m', 't2m3', 'q', 'hdd4', 't2max', 'cdd3', 'rh', 'msdwswrf']
['t2m', 't2m3', 'q', 't2max', 't2m4', 'msdwswrf', 'w10']
['t2m3', 'cdd', 'hdd', 't2max', 't2m4', 'msdwswrf', 'w10']
['t2m3', 'cdd', 'q', 't2max', 't2m4', 'msdwswrf', 'w10']
['t2m', 't2m3', 'rh', 'msdwswrf', 'w10']
