# Write ERA5 demand predictions

In [2]:
import xarray as xr
import pandas as pd
import numpy as np

from sklearn.ensemble import ExtraTreesRegressor

In [3]:
%cd /g/data/w42/dr6273/work/demand_model/

import functions as fn

/g/data/w42/dr6273/work/demand_model


In [4]:
%load_ext autoreload
%autoreload 2

### Set global variables

In [5]:
RESULTS_PATH = "/g/data/w42/dr6273/work/projects/Aus_energy/model_results/"

In [6]:
MARKET = "NEM" # "NEM" or "EU"

In [7]:
REMOVE_WEEKEND = True

In [8]:
REMOVE_XMAS = True

In [9]:
REMOVE_MONTH = 0 # integer: [1, 12]

In [10]:
MASK_NAME = "pop_dens_mask"

In [11]:
TIME_COLUMNS = []

In [12]:
FIRST_TRAIN_YEAR = 2010
LAST_TRAIN_YEAR = 2016

In [13]:
FIRST_TEST_YEAR = 2017
LAST_TEST_YEAR = 2019

In [14]:
N_FEATURES = "parsimonious"

In [15]:
DETREND = True

In [16]:
regions = ["NEM", "QLD", "NSW", "VIC", "SA", "TAS"]

### Load features and hyperparameters

In [17]:
features = fn.read_results(
    "feature_selection", MARKET, regions, MASK_NAME,
    FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, FIRST_TEST_YEAR, LAST_TEST_YEAR,
    REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES, RESULTS_PATH
)

In [18]:
hyps = fn.read_results(
    "hyperparameters", MARKET, regions, MASK_NAME,
    FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, FIRST_TEST_YEAR, LAST_TEST_YEAR,
    REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES, RESULTS_PATH
)

### Fit model

In [19]:
dem_da = xr.open_dataset("/g/data/w42/dr6273/work/projects/Aus_energy/data/energy_demand/daily_demand_2010-2020_stl.nc")["demand_stl"]

In [20]:
# Prepare predictors
_files = fn.get_predictor_files(MARKET, MASK_NAME, detrended=DETREND)

In [21]:
files = []
for f in _files:
    if "1940-2023" in f:
        files.append(f)

In [22]:
files

['/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/hdd_18_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/10w_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/cdd_24_rollmean4_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/2tmax_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/cdd_24_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/2tmin_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/cdd_24_rollmean3_era5_daily_1940-2023_NEM_pop_dens_mask_detrended.nc',
 '/g/data/w42/dr6273/work/projects/Aus_energy/demand_predictors/q_era5_daily_1940-2023_NEM_pop_dens_mask_detre

In [23]:
pred_ds = xr.open_mfdataset(files, combine="nested", compat="override")

In [24]:
pred_ds

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (30681, 6) (30681, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  30681,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (30681, 6) (30681, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  30681,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 10 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (30681, 6) (30681, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  30681,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 10 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (30681, 6) (30681, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  30681,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (30681, 6) (30681, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  30681,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 10 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (6, 30681) (6, 30681) Dask graph 1 chunks in 10 graph layers Data type float64 numpy.ndarray",30681  6,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(6, 30681)","(6, 30681)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.40 MiB 1.40 MiB Shape (30681, 6) (30681, 6) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",6  30681,

Unnamed: 0,Array,Chunk
Bytes,1.40 MiB,1.40 MiB
Shape,"(30681, 6)","(30681, 6)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [25]:
# Prepare dataframe for machine learning
region_dfs = {}
for r in regions:
    cal = fn.get_calendar(MARKET, r)
    demand = fn.remove_time(dem_da, True, True, 0, cal)
    pred = fn.remove_time(pred_ds, True, True, 0, cal)
    df = fn.to_dataframe(demand, pred, r)
    
    selected_preds = fn.parse_features(fn.sel_model(features[r])["feature_names"])[:]
    print(selected_preds[::-1])
    df = df[["demand"] + selected_preds[::-1]]
    region_dfs[r] = df

['t2max', 'msdwswrf', 't2m', 'w10', 'cdd', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 'cdd', 't2m4']
['t2max', 'msdwswrf', 't2m', 'mtpr', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 't2m4', 't2m3']
['t2m', 'w10', 'rh', 't2m3']


In [26]:
models = {}

for r in regions:
    cal = fn.get_calendar(MARKET, r)
    dem = fn.remove_time(dem_da, True, True, 0, cal)
    test_len = dem.sel(time=slice(str(FIRST_TEST_YEAR), str(LAST_TEST_YEAR))).time.values.shape[0]
    
    train_X, test_X, train_y, test_y = fn.split(
        fn.sel_train_test(region_dfs[r], FIRST_TRAIN_YEAR, LAST_TEST_YEAR),
        "demand",
        test_size=test_len,
        random_state=0,
        shuffle=False
    )
    
    # Finalise model
    rf = ExtraTreesRegressor(
        n_estimators=int(hyps[r].loc["n_estimators"].values),
        min_samples_leaf=int(hyps[r].loc["min_samples_leaf"].values),
        max_depth=int(hyps[r].loc["max_depth"].values),
        max_leaf_nodes=int(hyps[r].loc["max_leaf_nodes"].values),
        random_state=0,
    )
    
    models[r], _ = fn.predict_forest(train_y, train_X, train_X, rf)

### Predict and write

In [27]:
# Prepare dataframe for machine learning
predictions = {}

for r in regions:
    pred_arr = pred_ds.sel(region=r).to_array("variable")
    df = pd.DataFrame(
        pred_arr.transpose(),
        columns=pred_arr["variable"],
        index=pred_arr["time"]
    )
    selected_preds = fn.parse_features(fn.sel_model(features[r])["feature_names"])[:]
    print(selected_preds[::-1])
    df = df[selected_preds[::-1]]
    
    df = df.dropna()
    
    model = models[r]
    df["prediction"] = model.predict(np.array(df))
    predictions[r] = df
    
    filename = fn.get_filename(
        "extrapolated", MARKET, r, MASK_NAME,
        FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, "1940", "2023",
        REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES
    )
    if DETREND:
        filename = filename + "_detrended"
    df.to_csv(
        RESULTS_PATH + "/extrapolated/random_forest/" + filename + ".csv",
    )

['t2max', 'msdwswrf', 't2m', 'w10', 'cdd', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 'cdd', 't2m4']
['t2max', 'msdwswrf', 't2m', 'mtpr', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 't2m4', 't2m3']
['t2m', 'w10', 'rh', 't2m3']
