In [24]:
import sys
import os
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly_express as px
from datetime import datetime, timedelta
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit


module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [5]:
from src.paths import TRANSFORMED_DATA_DIR, MODELS_DIR
from src.feature import train_test_kfolds


In [44]:
class BaseLineModelAverage25Hour():

    """
    Prediction = average electricity output observed in the last 24hour, 25hour and 26 hour
    """
    def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
        pass
    
    def predict(self, X_test: pd.DataFrame) -> pd.Series:
        """"""
        return 0.33*( X_test[f'CF_previous_24_hours'] \
                     + X_test[f'CF_previous_25_hours'] \
                     + X_test[f'CF_previous_26_hours'] \
        )

class BaseLineModelPrevious24Hour():

    """
    Prediction = average electricity output observed in the last 24hour, 25hour and 26 hour
    """
    def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
        pass
    
    def predict(self, X_test: pd.DataFrame) -> pd.Series:
        """"""
        return X_test[f'CF_previous_24_hours']*0.85
        

In [45]:
# Read the transformed data
wind_farms_data_features_target = pd.read_parquet(f'{TRANSFORMED_DATA_DIR}/wind_farm_all_features.parquet')

In [46]:
time_split = TimeSeriesSplit(gap=24, test_size=93, n_splits=10, max_train_size=720)
target_features_required = ['CF'] + ['CF_previous_24_hours'] + ['CF_previous_25_hours'] + ['CF_previous_26_hours']                                                  
train_test_folds= train_test_kfolds(wind_farms_data_features_target, time_split=time_split, features_target=target_features_required)

In [47]:

result = {}

fig = px.line(
    wind_farms_data_features_target,
    x=wind_farms_data_features_target.index,
    y="CF",
    markers=True,
    hover_data=["CF"]
)
for key, value in train_test_folds.items():

    X_train, y_train, X_test, y_test = train_test_folds[key]
    model = BaseLineModelAverage25Hour()
    model.fit(X_train,y_train)
    predictions = model.predict(X_test)
    result[key]= (mean_absolute_error(y_test, predictions), mean_absolute_percentage_error(y_test, predictions))

    fig.add_trace(go.Scatter(x=y_test.index, y=predictions, mode='lines', name=str(key), line=dict(color='red')))

print(f"Mean absolute percentage error:{np.mean([v[1] for v in result.values()])}")

print(f"Mean absolute error:{np.mean([v[0] for v in result.values()])})")
fig.show()

Mean absolute percentage error:2.809699775968968
Mean absolute error:0.15656032430767483)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [48]:

result = {}

fig = px.line(
    wind_farms_data_features_target,
    x=wind_farms_data_features_target.index,
    y="CF",
    markers=True,
    hover_data=["CF"]
)
for key, value in train_test_folds.items():

    X_train, y_train, X_test, y_test = train_test_folds[key]
    model = BaseLineModelPrevious24Hour()
    model.fit(X_train,y_train)
    predictions = model.predict(X_test)
    result[key]= (mean_absolute_error(y_test, predictions), mean_absolute_percentage_error(y_test, predictions))

    fig.add_trace(go.Scatter(x=y_test.index, y=predictions, mode='lines', name=str(key), line=dict(color='red')))

print(f"Mean absolute percentage error:{np.mean([v[1] for v in result.values()])}")

print(f"Mean absolute error:{np.mean([v[0] for v in result.values()])})")

fig.update_layout(title="Predicted vs Actuals")
# fig.add_annotation(dict(text="Max training date"))

fig.update_layout(annotations=[
    dict(
        x=1.05,  # x and y coordinates for positioning, adjust as needed
        y=1.02,
        xref='paper',
        yref='paper',
        text='Max Training date',  # Legend title text
        showarrow=False,
        align='right'
    )
])
fig.show()

Mean absolute percentage error:2.395995418546324
Mean absolute error:0.14372151259217242)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

