# Package Management

In [None]:
import pandas as pd
import numpy as np
from datetime import timedelta
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tqdm.auto import tqdm
from typing import Tuple

# Price-forecasting Engine

In [None]:
import pandas as pd
from datetime import timedelta
from xgboost import XGBRegressor
from typing import Tuple
from tqdm.auto import tqdm


class EnergyPriceForecaster:
    """
    A class used to forecast electricity prices for the day-ahead market.

    Attributes
    ----------
    data : pd.DataFrame
        The input dataset containing electricity prices.

    Methods
    -------
    load_data(file_path: str) -> pd.DataFrame
        Load the input dataset from the given file path.
    process_data(data: pd.DataFrame) -> pd.DataFrame
        Pre-processes the input dataset.
    prepare_features(data: pd.DataFrame) -> pd.DataFrame
        Pre-processes the input dataset and creates additional features.
    train_and_predict(train_end: pd.Timestamp, forecast_start: pd.Timestamp, forecast_end: pd.Timestamp) -> Tuple[pd.Series, np.ndarray]
        Trains the XGBoost model and predicts the energy prices.
    forecast(start_date: pd.Timestamp, end_date: pd.Timestamp) -> pd.DataFrame
        Forecasts the energy prices for a given range of dates.
    """

    def __init__(self, file_path: str):
        self.data = self.load_data(file_path)
        self.data = self.process_data(self.data)

    @staticmethod
    def load_data(file_path: str) -> pd.DataFrame:
        """
        Load the input dataset from the given file path.

        Parameters
        ----------
        file_path : str
            The file path of the input data.

        Returns
        -------
        pd.DataFrame
            The input dataset.
        """
        data = pd.read_excel(file_path, parse_dates=['timestamp'])
        return data

    @staticmethod
    def process_data(data: pd.DataFrame) -> pd.DataFrame:
        """
        Pre-processes the input dataset.

        Parameters
        ----------
        data : pd.DataFrame
            The input dataset containing energy prices.

        Returns
        -------
        pd.DataFrame
            The preprocessed input data.
        """
        data = (
            data[data['auction'] == 'DAM']
            .set_index('timestamp')
            .rename(columns={"price_eur": "price"})
            .loc[:, ['price']]
            .resample('H').ffill()
        )
        return data

    @staticmethod
    def prepare_features(data: pd.DataFrame) -> pd.DataFrame:
        """
        Pre-processes the input dataset and creates additional features.

        Parameters
        ----------
        data : pd.DataFrame
            The input dataset containing energy prices.

        Returns
        -------
        pd.DataFrame
            The input dataset with additional features.
        """
        data = data.copy()
        data['hour'] = data.index.hour
        data['day_of_week'] = data.index.dayofweek
        data['day_of_month'] = data.index.day
        data['month'] = data.index.month

        # Create rolling window averages
        data['3_day


In [None]:
class EnergyPriceForecaster:
    """
    A class used to forecast electricity prices for the day-ahead market.

    Attributes
    ----------
    data : pd.DataFrame
        The input dataset containing electricity prices.

    Methods
    -------
    load_data(file_path)
        Load the input dataset from the given file path.
    process_data(data)
        Pre-processes the input dataset.
    pivot_data(data)
        Pivot the auction and price_eur columns and join them back to the original DataFrame.
    prepare_features(data)
        Pre-processes the input dataset and creates additional features.
    train_and_predict(train_end, forecast_start, forecast_end)
        Trains the XGBoost model and predicts the energy prices.
    forecast(start_date, end_date)
        Forecasts the energy prices for a given range of dates.
    """

    def __init__(self, file_path):
        self.data = self.load_data(file_path)
        self.data = self.process_data(self.data)

    def load_data(self, file_path):
        """
        Load the input dataset from the given file path.

        Parameters
        ----------
        file_path : str
            The file path of the input data.

        Returns
        -------
        pd.DataFrame
            The input dataset.
        """
        data = pd.read_excel(file_path, parse_dates=['timestamp'])
        return data

    def process_data(self, data):
        """
        Pre-processes the input dataset.

        Parameters
        ----------
        data : pd.DataFrame
            The input dataset containing energy prices.

        Returns
        -------
        pd.DataFrame
            The preprocessed input data.
        """
        data = (
            data[data['auction'] == 'DAM']
            .set_index('timestamp')
            .rename(columns={"price_eur": "price"})
            .loc[:, ['price']]
            .resample('H').ffill()
        )
        return data

    def pivot_data(self, data):
        """
        Pivot the auction and price_eur columns and join them back to the original DataFrame.

        Parameters
        ----------
        data : pd.DataFrame
            The input dataset containing energy prices.

        Returns
        -------
        pd.DataFrame
            The pivoted input data.
        """
        pivoted_data = data.pivot(index='timestamp', columns='auction', values='price_eur')
        data = data.join(pivoted_data, on='timestamp').drop(columns=['auction', 'price_eur'])
        return data

    # ... Rest of the class remains the same ...


# Initialize the forecaster
file_path = '/content/Auctions (DAM-IDA3).xlsx'
forecaster = EnergyPriceForecaster(file_path)

# Perform the forecast
start_date = pd.Timestamp('2019-01-01')
end_date = pd.Timestamp('2021-12-31')
results_df = forecaster.forecast(start_date, end_date)

print(results_df)


In [None]:


data = pd.read_excel('/content/Auctions (DAM-IDA3).xlsx')

data['timestamp'] = pd.to_datetime(data['timestamp'])

# Pivot the auction and price_eur columns and join them back to the original DataFrame
pivoted_data = data.pivot(index='timestamp', columns='auction', values='price_eur')
data = data.join(pivoted_data, on='timestamp').drop(columns=['auction', 'price_eur'])

# Check the data
data.info()

FileNotFoundError: ignored

In [None]:
# Load and prepare data (example)



data = pd.read_excel('/content/Auctions (DAM-IDA3).xlsx', parse_dates=['timestamp'])

# Filter the auction column to only include the DAM auction
data = data[data['auction'] == 'DAM']

# Set the datetime column as the index
data = data.set_index('timestamp')

# Rename the price_eur column to 'price'
data = data.rename(columns={"price_eur": "price"})

# Only keep the price column
data = data[['price']]

# Resample the data to an hourly frequency and forward fill missing values
data = data.resample('H').ffill()

# Check the data
data.info()

In [None]:



class EnergyPriceForecaster:
    def __init__(self, data):
        self.data = data

    def prepare_features(self, data):
        data = data.copy()
        data['hour'] = data.index.hour
        data['day_of_week'] = data.index.dayofweek
        data['day_of_month'] = data.index.day
        data['month'] = data.index.month

        # Create rolling window averages
        data['3_day_avg'] = data['price'].rolling(window=3*24).mean()
        data['7_day_avg'] = data['price'].rolling(window=7*24).mean()
        data['14_day_avg'] = data['price'].rolling(window=14*24).mean()
        data['21_day_avg'] = data['price'].rolling(window=21*24).mean()
        data['30_day_avg'] = data['price'].rolling(window=30*24).mean()
        data['60_day_avg'] = data['price'].rolling(window=60*24).mean()
        data['90_day_avg'] = data['price'].rolling(window=90*24).mean()



        # Drop rows with NaN values created by the rolling window averages
        data = data.dropna()

        return data

    def train_and_predict(self, train_end, forecast_start, forecast_end):
        prepared_data = self.prepare_features(self.data)
        train_data = prepared_data[:train_end]
        test_data = prepared_data[forecast_start:forecast_end]

        X_train = train_data.drop(columns='price')
        y_train = train_data['price']
        X_test = test_data.drop(columns='price')
        y_test = test_data['price']

        model = XGBRegressor(n_estimators=1000, learning_rate=0.1)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        return y_test, y_pred

    def forecast(self, start_date, end_date):
        date_range = pd.date_range(start=start_date, end=end_date, freq='D')

        results_df = pd.DataFrame()

        for date in tqdm(date_range, desc="Forecasting"):
            train_end = date - timedelta(days=1)
            forecast_start = date
            forecast_end = date + timedelta(days=1)

            y_test, y_pred = self.train_and_predict(train_end, forecast_start, forecast_end)

            temp_df = pd.DataFrame({'timestamp': y_test.index,
                                    'actual_price': y_test.values,
                                    'forecast_price': y_pred})
            results_df = pd.concat([results_df, temp_df], ignore_index=True)

        return results_df


# Initialize the forecaster
forecaster = EnergyPriceForecaster(data)

# Perform the forecast
start_date = pd.Timestamp('2019-01-01')
end_date = pd.Timestamp('2021-12-31')
results_df = forecaster.forecast(start_date, end_date)

print(results_df)



Forecasting:   0%|          | 0/1096 [00:00<?, ?it/s]

  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepared_data[:train_end]
  test_data = prepared_data[forecast_start:forecast_end]
  train_data = prepa

                      timestamp  actual_price  forecast_price
0     2019-01-01 00:00:00+00:00         70.28       57.093227
1     2019-01-01 01:00:00+00:00         65.18       55.203873
2     2019-01-01 02:00:00+00:00         60.91       51.189728
3     2019-01-01 03:00:00+00:00         65.16       51.895824
4     2019-01-01 04:00:00+00:00         65.16       48.271999
...                         ...           ...             ...
27395 2021-12-31 20:00:00+00:00        151.15      177.121399
27396 2021-12-31 21:00:00+00:00        140.58      153.833069
27397 2021-12-31 22:00:00+00:00         79.97      164.477814
27398 2021-12-31 23:00:00+00:00          0.27      141.865265
27399 2022-01-01 00:00:00+00:00         -0.01       99.224335

[27400 rows x 3 columns]


In [None]:
import matplotlib.pyplot as plt

# Read the DataFrame
results_df

# Plot the actual and forecast prices
plt.figure(figsize=(12, 6))
plt.plot(results_df['timestamp'], results_df['actual_price'], label='Actual Price')
plt.plot(results_df['timestamp'], results_df['forecast_price'], label='Forecast Price')

# Add labels and title
plt.xlabel('Timestamp')
plt.ylabel('Price')
plt.title('Actual Price vs Forecast Price')

# Display the legend
plt.legend()

# Show the plot
plt.show()


NameError: ignored