In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics as m
from sklearn.tree import DecisionTreeRegressor

In [2]:
def make_lags(df, cols, lags):
    return pd.concat([df[col].shift(lag).rename(f"{col}_L{lag}") for col in cols for lag in lags], axis=1)

In [3]:
df = pd.read_csv("../../../data/cleaned/wind_hourly_cleaned.csv")
# df = df.sort_values('date').reset_index(drop=True)
df['wind_speed'] = df['wind_speed'].astype(float)

In [4]:
LAGS = [1,2,3,23,24,25,48]
df = df.join(make_lags(df[['wind_speed']], ['wind_speed'], LAGS)).dropna()

In [5]:
df.head()

Unnamed: 0,datetime,wind_speed,wind_speed_L1,wind_speed_L2,wind_speed_L3,wind_speed_L23,wind_speed_L24,wind_speed_L25,wind_speed_L48
48,2024-01-03 00:00:00,0.1,0.8,1.0,0.216667,0.933333,1.05,0.716667,0.1
49,2024-01-03 01:00:00,0.616667,0.1,0.8,1.0,0.7,0.933333,1.05,0.283333
50,2024-01-03 02:00:00,0.5,0.616667,0.1,0.8,1.45,0.7,0.933333,0.2
51,2024-01-03 03:00:00,0.033333,0.5,0.616667,0.1,1.75,1.45,0.7,0.5
52,2024-01-03 04:00:00,0.116667,0.033333,0.5,0.616667,0.083333,1.75,1.45,0.966667


In [6]:
train_end = len(df) - 720
test_df   = df.iloc[train_end:].copy()


In [8]:
preds = []
for t in range(train_end, len(df)):
    X = df.iloc[t-168:t][[c for c in df.columns if '_L' in c]].values
    y = df.iloc[t-168:t]['wind_speed'].values
    model = DecisionTreeRegressor(max_depth=8, min_samples_leaf=10, random_state=42)
    model.fit(X, y)
    x_next = df.iloc[t][[c for c in df.columns if '_L' in c]].values.reshape(1,-1)
    preds.append(model.predict(x_next)[0])

test_df['pred_dt'] = preds
mae  = m.mean_absolute_error(test_df['wind_speed'], test_df['pred_dt'])
rmse = m.mean_squared_error(test_df['wind_speed'], test_df['pred_dt'])
mape = np.mean(np.abs((test_df['wind_speed'] - test_df['pred_dt'])/test_df['wind_speed']))*100

In [9]:
# --- REPORT PRINTING ---
print("\n=======================================================")
print("  DECISION TREE FORECAST PERFORMANCE REPORT")
print("=======================================================")
print(f"Total Test Observations: {len(test_df):,}")
print("-" * 55)
print("Metric                         Value")
print("-" * 55)
print(f"Mean Absolute Error (MAE):     {mae:20.4f}")
print(f"Root Mean Squared Error (RMSE):{rmse:20.4f}")
print(f"Mean Abs. Percentage Error (MAPE): {mape:18.2f} %")
print("=======================================================")


  DECISION TREE FORECAST PERFORMANCE REPORT
Total Test Observations: 720
-------------------------------------------------------
Metric                         Value
-------------------------------------------------------
Mean Absolute Error (MAE):                   0.8246
Root Mean Squared Error (RMSE):              1.3596
Mean Abs. Percentage Error (MAPE):                inf %
