# Write ERA5 demand predictions

In [1]:
import xarray as xr
import pandas as pd
import numpy as np

from sklearn.ensemble import ExtraTreesRegressor

In [2]:
%cd /g/data/w42/dr6273/work/demand_model/

import functions as fn

/g/data/w42/dr6273/work/demand_model


In [3]:
%load_ext autoreload
%autoreload 2

### Set global variables

In [4]:
RESULTS_PATH = "/g/data/w42/dr6273/work/projects/Aus_energy/model_results/"

In [5]:
MARKET = "NEM" # "NEM" or "EU"

In [6]:
REMOVE_WEEKEND = True

In [7]:
REMOVE_XMAS = True

In [8]:
REMOVE_MONTH = 0 # integer: [1, 12]

In [9]:
MASK_NAME = "pop_dens_mask"

In [10]:
TIME_COLUMNS = []

In [11]:
FIRST_TRAIN_YEAR = 2010
LAST_TRAIN_YEAR = 2016

In [12]:
FIRST_TEST_YEAR = 2017
LAST_TEST_YEAR = 2019

In [13]:
N_FEATURES = "parsimonious"

In [14]:
DETREND = True

In [15]:
regions = ["NEM", "QLD", "NSW", "VIC", "SA", "TAS"]

### Load features and hyperparameters

In [16]:
features = fn.read_results(
    "feature_selection", MARKET, regions, MASK_NAME,
    FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, FIRST_TEST_YEAR, LAST_TEST_YEAR,
    REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES, RESULTS_PATH
)

In [17]:
hyps = fn.read_results(
    "hyperparameters", MARKET, regions, MASK_NAME,
    FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, FIRST_TEST_YEAR, LAST_TEST_YEAR,
    REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES, RESULTS_PATH
)

### Fit model

In [18]:
dem_da = xr.open_dataset("/g/data/w42/dr6273/work/projects/Aus_energy/data/energy_demand/daily_demand_2010-2020_stl.nc")["demand_stl"]

In [19]:
# Prepare predictors
files = fn.get_predictor_files(MARKET, MASK_NAME, detrended=DETREND)
pred_ds = xr.open_mfdataset(files, combine="nested", compat="override")

In [20]:
# Prepare dataframe for machine learning
region_dfs = {}
for r in regions:
    cal = fn.get_calendar(MARKET, r)
    demand = fn.remove_time(dem_da, True, True, 0, cal)
    pred = fn.remove_time(pred_ds, True, True, 0, cal)
    df = fn.to_dataframe(demand, pred, r)
    
    selected_preds = fn.parse_features(fn.sel_model(features[r])["feature_names"])[:]
    print(selected_preds[::-1])
    df = df[["demand"] + selected_preds[::-1]]
    region_dfs[r] = df

['t2max', 'msdwswrf', 't2m', 'w10', 'cdd', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 'cdd', 't2m4']
['t2max', 'msdwswrf', 't2m', 'mtpr', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 't2m4', 't2m3']
['t2m', 'w10', 'rh', 't2m3']


In [23]:
region_dfs["QLD"].to_csv(
        "/g/data/w42/dr6273/work/projects/Aus_energy/for_SH/demand_QLD_2010-19.csv",
    )

In [25]:
v = region_dfs["QLD"].columns[1:]

In [34]:
pred_ds[v].sel(region="QLD")["w10"].values

array([1.7287239 , 2.74495982, 1.70599203, ..., 4.00592282, 3.59753111,
       3.29838911])

In [37]:
qld_pred = pred_ds[v].sel(region="QLD").to_dataframe().drop("region", axis=1)
qld_pred

Unnamed: 0_level_0,t2max,msdwswrf,t2m,q,w10,cdd,t2m4
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1959-01-01,,292.675873,298.218779,14.978152,1.728724,0.735046,
1959-01-02,301.840051,212.884482,296.801772,15.334353,2.744960,0.423551,
1959-01-03,298.421748,181.831231,295.600398,14.884802,1.705992,0.393636,
1959-01-04,302.375259,303.834739,296.689620,14.142507,0.616126,0.615129,296.827806
1959-01-05,303.886801,316.353570,298.724972,14.662342,1.621658,1.297326,296.954352
...,...,...,...,...,...,...,...
2022-12-27,298.324602,262.989052,295.416537,12.215562,4.001738,0.183803,295.876541
2022-12-28,298.237416,273.667103,295.445376,12.259319,2.882962,0.245857,295.805191
2022-12-29,299.607831,213.382467,296.790060,13.750696,4.005923,0.395849,295.922065
2022-12-30,298.230718,167.622519,296.221529,14.319748,3.597531,0.492517,295.968212


In [38]:
qld_pred.to_csv(
        "/g/data/w42/dr6273/work/projects/Aus_energy/for_SH/predictors_QLD_1959-2022.csv",
    )

In [21]:
models = {}

for r in regions:
    cal = fn.get_calendar(MARKET, r)
    dem = fn.remove_time(dem_da, True, True, 0, cal)
    test_len = dem.sel(time=slice(str(FIRST_TEST_YEAR), str(LAST_TEST_YEAR))).time.values.shape[0]
    
    train_X, test_X, train_y, test_y = fn.split(
        fn.sel_train_test(region_dfs[r], FIRST_TRAIN_YEAR, LAST_TEST_YEAR),
        "demand",
        test_size=test_len,
        random_state=0,
        shuffle=False
    )
    
    # Finalise model
    rf = ExtraTreesRegressor(
        n_estimators=int(hyps[r].loc["n_estimators"].values),
        min_samples_leaf=int(hyps[r].loc["min_samples_leaf"].values),
        max_depth=int(hyps[r].loc["max_depth"].values),
        max_leaf_nodes=int(hyps[r].loc["max_leaf_nodes"].values),
        random_state=0,
    )
    
    models[r], _ = fn.predict_forest(train_y, train_X, train_X, rf)

### Predict and write

In [22]:
# Prepare dataframe for machine learning
predictions = {}

for r in regions:
    pred_arr = pred_ds.sel(region=r).to_array("variable")
    df = pd.DataFrame(
        pred_arr.transpose(),
        columns=pred_arr["variable"],
        index=pred_arr["time"]
    )
    selected_preds = fn.parse_features(fn.sel_model(features[r])["feature_names"])[:]
    print(selected_preds[::-1])
    df = df[selected_preds[::-1]]
    
    df = df.dropna()
    
    model = models[r]
    df["prediction"] = model.predict(np.array(df))
    predictions[r] = df
    
    filename = fn.get_filename(
        "extrapolated", MARKET, r, MASK_NAME,
        FIRST_TRAIN_YEAR, LAST_TRAIN_YEAR, "1959", "2022",
        REMOVE_WEEKEND, REMOVE_XMAS, REMOVE_MONTH, N_FEATURES
    )
    if DETREND:
        filename = filename + "_detrended"
    df.to_csv(
        RESULTS_PATH + "/extrapolated/random_forest/" + filename + ".csv",
    )

['t2max', 'msdwswrf', 't2m', 'w10', 'cdd', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 'cdd', 't2m4']
['t2max', 'msdwswrf', 't2m', 'mtpr', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'w10', 't2m4', 't2m3']
['t2max', 'msdwswrf', 't2m', 'q', 'w10', 't2m4', 't2m3']
['t2m', 'w10', 'rh', 't2m3']
