In [1]:
from datetime import datetime, timedelta
from pathlib import Path
import multiprocessing as mp

# data wrangling libraries
import numpy as np
import pandas as pd

# NEM data libraries
# NEMOSIS for actual demand data
# NEMSEER for forecast demand data
import nemosis
from nemseer import compile_data, download_raw_data, generate_runtimes

# interactive plotting
import plotly.express as px
import plotly.io as pio

# progress bar for error computation
from tqdm.notebook import tqdm



In [2]:
# Analysis start and end times
analysis_start = "2021/01/01 00:30:00"
analysis_end = "2022/01/01 00:00:00"
times = pd.date_range(analysis_start, analysis_end, freq="30T")

In [3]:
# create nemosis cache path and compile cache
nemosis_cache = Path("nemosis_cache/")
if not nemosis_cache.exists():
    nemosis_cache.mkdir()
    
nemosis.cache_compiler(
    analysis_start, analysis_end, "TRADINGPRICE", nemosis_cache, fformat="parquet")

Caching data for table TRADINGPRICE
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.
Cache for TRADINGPRICE in date range already compiled in nemosis_cache.


In [4]:
# download raw data from NEMWEB
download_raw_data(
    "PREDISPATCH",
    "PRICE",
    "nemseer_cache/",
    forecasted_start=analysis_start,
    forecasted_end=analysis_end,)

INFO: Query raw data already downloaded to nemseer_cache


In [5]:
# Calculate predispatch price forecast error for single time period 
def calculate_pd_price_forecast_error(forecasted_time: str) -> pd.DataFrame:
    """
    Calculates P5MIN demand forecast error (Actual - Forecast) for all forecasts
    that are run for a given forecasted_time.

    Args:
        forecasted_time: Datetime string in the form YYYY/mm/dd HH:MM:SS
    Returns:
        pandas DataFrame with forecast error in `TOTALDEMAND` columns, the ahead time
        of the forecast run in `ahead_time`, and the forecasted time in
        `forecasted_time`.
    """
    # necessary for datetime indexing with pandas and xarray
    time = str(forecasted_time).replace("-", "/")
    
    # get forecast data for forecasted_time
    run_start, run_end = generate_runtimes(time, time, "PREDISPATCH")
    nemseer_data = compile_data(
        run_start,
        run_end,
        time,
        time,
        "PREDISPATCH",
        "PRICE",
        "nemseer_cache/",
        data_format="xr",
    )
    price_forecasts = nemseer_data["PRICE"]
    price_forecasts= price_forecasts.where(price_forecasts.INTERVENTION == 0) 
    
    # get actual demand data for forecasted_time
    # nemosis start time must precede end of interval of interest by 5 minutes
    nemosis_start = (
        datetime.strptime(time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=30)
    ).strftime("%Y/%m/%d %H:%M:%S")
    # compile data using nemosis, using cached parquet and filtering out interventions
    # select appropriate region
    nemosis_data = nemosis.dynamic_data_compiler(
        nemosis_start,
        time,
        "TRADINGPRICE",
        nemosis_cache,
        filter_cols=["REGIONID"],
        filter_values=(["SA1"],),
        fformat="parquet",
    )
    
    # select relevant price index from:
    # RRP, RAISE6SECRRP, RAISE60SECRRP, RAISE5MINRRP , RAISEREGRRP
    #      LOWER6SECRRP, LOWER60SECRRP, LOWER5MINRRP , LOWERREGRRP 
    actual_price = nemosis_data.groupby("SETTLEMENTDATE")["RRP"].sum()[time]
    
    # sum forecast price for relevant region: QLD1 SA1 NSW1 VIC1 TAS1
    price_forecasts=price_forecasts.sel(REGIONID="QLD1")
    query_forecasts = price_forecasts.sel(forecasted_time=time)["RRP"]
    
    # calculate error and return as a pandas DataFrame
    error = (actual_price - query_forecasts).to_dataframe()
    # calculate number of minutes ahead
    error["ahead_time"] = error["forecasted_time"] - error.index
    error = error.set_index("forecasted_time")
    return error

In [6]:
times = pd.date_range(analysis_start, analysis_end, freq="30T")

In [7]:
result = map(calculate_pd_price_forecast_error, times)

In [8]:
results = list(result)
#quiet

INFO: Query raw data already downloaded to nemseer_cache
INFO: Converting PRICE data to xarray.
Compiling data for table TRADINGPRICE.
Returning TRADINGPRICE.


TypeError: datetime64 type does not support add operations