In [None]:
# Ensure datetime index exists
df_filtered["DATE"] = df_filtered.index.date
df_filtered["MONTH"] = df_filtered.index.month
df_filtered["DAYOFWEEK"] = df_filtered.index.dayofweek
df_filtered["WEEKOFYEAR"] = df_filtered.index.isocalendar().week.astype(int)
df_filtered["HOUR"] = df_filtered.index.hour


# Create lag features
df_filtered["LAG_24H"] = df["TOTALDEMAND"].shift(24)
df_filtered["LAG_48H"] = df["TOTALDEMAND"].shift(48)

df_filtered = df_filtered.dropna(subset=["LAG_24H", "LAG_48H"])


# Weekend flag
df_filtered["IS_WEEKEND"] = df_filtered["DAYOFWEEK"].isin([5, 6]).astype(bool)

# Season column
df_filtered["SEASON"] = df_filtered.apply(
    lambda row: (
        "Summer" if row["MONTH"] in [11, 12, 1, 2, 3] else (
            "Winter" if row["MONTH"] in [6, 7, 8] else (
                "Spring" if row["MONTH"] in [9, 10] else "Autumn"
            )
        )
    ),
    axis=1
)

# Public Holidays using `holidays` package
nsw_holidays = holidays.Australia(prov="NSW", years=range(2010, 2021))
holiday_dates = [date for date in nsw_holidays.keys()]

# New Year extension (Dec 31 – Jan 2 each year)
new_year_ranges = [(f"{year-1}-12-31", f"{year}-01-02") for year in range(2010, 2021)]
new_year_dates = []
for start, end in new_year_ranges:
    new_year_dates.extend(pd.date_range(start=start, end=end))

# Public holiday flag
df_filtered["IS_PUBLIC_HOLIDAY"] = (
    df_filtered["DATE"].isin(holiday_dates) | df_filtered["DATE"].isin(new_year_dates)
).astype(bool)

# Temperature-based features
df_filtered["TEMP_SQUARED"] = df_filtered["TEMPERATURE"] ** 2

# Step 1: Create min-max based bins (e.g., 20 equal-width bins)
temp_min = df_filtered["TEMPERATURE"].min()
temp_max = df_filtered["TEMPERATURE"].max()

bins = np.linspace(temp_min, temp_max, num=21)  # 20 bins
df_filtered["TEMP_BIN"] = pd.cut(df_filtered["TEMPERATURE"], bins=bins)

seasonal_temp_response = (
    df_filtered.groupby(["SEASON", "TEMP_BIN"])["TOTALDEMAND"]
    .median()
    .reset_index()
    .rename(columns={"TOTALDEMAND": "SEASONAL_AVG_DEMAND"})
)

df_filtered = df_filtered.merge(seasonal_temp_response, on=["SEASON", "TEMP_BIN"], how="left").set_index(df_filtered.index)

df_filtered = pd.get_dummies(df_filtered, columns=["SEASON"], drop_first=True)

In [None]:
df_filtered.drop(columns=["DATE", "TEMP_BIN", "FORECASTING"], inplace=True, errors='ignore')

df_filtered.info()

