<a href="https://colab.research.google.com/github/andresfelipecs/suena_application_test/blob/master/algotrading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing libraries

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.subplots as sp
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

import time
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, r2_score

import tensorflow as tf
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Input, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, Sequential
from itertools import product
import warnings
from scipy.stats.distributions import norm
import xgboost as xgb
from hyperopt import fmin, tpe, hp
from functools import partial
from typing import List, Tuple, Optional

!pip install ipywidgets
import ipywidgets as widgets
from IPython.display import display



# Loading data

In [None]:
# Loading the data into a Pandas DataFrame
df_auction = pd.read_csv("/content/sample_data/auction_data.csv", sep=';', header=0)

# Setting the first column as an index 
df_auction['Date (WET)'] = pd.to_datetime(df_auction['Date (WET)'], format='[%d/%m/%Y %H:%M]')
# df_auction['date'] = df_auction['Date (WET)']
df_auction.set_index('Date (WET)', inplace=True)


# Droping the first row
units_auction = df_auction.iloc[0]
print(units_auction)

df_auction = df_auction.drop(df_auction.index[0])


# Showing the data 
print(df_auction)
# print(f'INDEX:  {df_auction.index} \n')
# print(f'TYPE:  {df_auction.dtypes} \n')
# print(f'SHAPE:  {df_auction.shape} \n')
print(f"COLUMNS: {df_auction.columns} \n")

# print(f'ROW {df_auction.loc["2021-01-01 00:00:00" ]}')


# Converting all the elements in the df_auction dataframe to numerical values, so that the data can be used for numerical computations and visualizations.
df_auction = df_auction.apply(pd.to_numeric, errors='coerce')
# The errors='coerce' argument is used to handle any elements that can't be converted to numerical values. 
# When errors='coerce', any non-numerical values will be replaced with NaN (Not a Number) values.




In [None]:
fig = make_subplots(
    rows=3, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "table"}],
           [{"type": "scatter"}],
           [{"type": "scatter"}]]
)

def create_table(header_values, cell_values, index):
    return go.Table(
        header=dict(
            values=header_values,
            fill=dict(color='#3e3c4a'),  
            font=dict(size=15, color='#ffc512'),  
            align="left"
        ),
        cells=dict(
            values= [index] + cell_values,
            fill=dict(color=['#ffffff', '#ffffff']),  
            align="left",
            font=dict(size=12, color='#3e3c4a') 
        )
    )

fig.add_trace(
    go.Scatter(
        x=df_auction.index,
        y=df_auction["price_first_auction"],
        mode="lines",
        name="Price first auction",
        line=dict(color='#3e3c4a')
    ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(
        x=df_auction.index,
        y=df_auction["price_second_auction"],
        mode="lines",
        name="Price second auction",
        line=dict(color='#ffc512')
    ),
    row=3, col=1
)


fig.add_trace(
    create_table(
        ["Datetime", "price first auction", "Price second auction", "Traded volume first_auction",
                    "Traded volume second auction", "Price forecast first auction"],
        [df_auction[k].tolist() for k in df_auction.columns[:6]],
        df_auction.index
    ),
    row=1, col=1
)

fig.update_layout(
    height=900,  
    showlegend=True,
    title=dict(
        text="df_auction",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.show()

In [None]:
df_forecast_inputs = pd.read_csv(
            "/content/sample_data/forecast_inputs.csv"
        , sep=';', header=0)

# Setting the first column as an index 
df_forecast_inputs['Date (WET)'] = pd.to_datetime(df_forecast_inputs['Date (WET)'], format='[%d/%m/%Y %H:%M]')
df_forecast_inputs.set_index('Date (WET)', inplace=True)

# Dropping the first row
units_forecast_inputs = df_forecast_inputs.iloc[0]
print(units_forecast_inputs)

df_forecast_inputs = df_forecast_inputs.drop(df_forecast_inputs.index[0])

df_forecast_inputs = df_forecast_inputs.apply(pd.to_numeric, errors='coerce')


print(df_forecast_inputs)




In [None]:
fig = make_subplots(
    rows=3, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "table"}],
           [{"type": "table"}],
           [{"type": "table"}]]
)

def create_table(header_values, cell_values, index):
    return go.Table(
        header=dict(
            values=header_values,
            fill=dict(color='#3e3c4a'),  
            font=dict(size=15, color='#ffc512'),  
            align="left"
        ),
        cells=dict(
            values= [index] + cell_values,
            fill=dict(color=['#ffffff', '#ffffff']),  
            align="left",
            font=dict(size=12, color='#3e3c4a') 
        )
    )

fig.add_trace(
    create_table(
        ["Datetime", "Demand plus system margin (MW)", "Demand (MW)", "Within day availability (MW)",
         "Margin (MW)", "within_day_margin (MW)", "Long term wind (GBP/MWh)"],
        [df_forecast_inputs[k].tolist() for k in df_forecast_inputs.columns[:6]],
        df_forecast_inputs.index
    ),
    row=1, col=1
)

fig.add_trace(
    create_table(
        ["Datetime", "Long term solar (MW)", "Long term wind over demand (%)", "long term wind over margin (%)",
         "long term solar over demand (%)", "long term solar over margin (%)", "margin over demand (%)"],
        [df_forecast_inputs[k].tolist() for k in df_forecast_inputs.columns[6:12]],
        df_forecast_inputs.index
    ),
    row=2, col=1
)

fig.add_trace(
    create_table(
        ["Datetime", "Snsp forecast (%)", "Stack price (GBP/MWh)", "Within day potential stack price (GBP/MWh)",
         "previous day ahead price (GBP/MWh)", "previous continuous half hour vwap (GBP/MWh)", "inertia forecast (GVA.s)"],
        [df_forecast_inputs[k].tolist() for k in df_forecast_inputs.columns[12:]],
        df_forecast_inputs.index
    ),
    row=3, col=1
)

fig.update_layout(
    height=900,  
    showlegend=False,
    title=dict(
        text="df_forecast_inputs",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.show()


In [None]:
df_system_prices = pd.read_csv(
            "/content/sample_data/system_prices.csv"
        , sep=';', header=0)

# Setting the first column as an index 
df_system_prices['Date (WET)'] = pd.to_datetime(df_system_prices['Date (WET)'], format='[%d/%m/%Y %H:%M]')
df_system_prices.set_index('Date (WET)', inplace=True)

# Dropping the first row
units_system_prices = df_system_prices.iloc[0]
print(units_system_prices)

df_system_prices = df_system_prices.drop(df_system_prices.index[0])

df_system_prices = df_system_prices.apply(pd.to_numeric, errors='coerce')


print(df_system_prices)



In [None]:
fig = make_subplots(
    rows=4, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "table"}],
           [{"type": "scatter"}],
           [{"type": "scatter"}],
           [{"type": "scatter"}]
           ]
)

def create_table(header_values, cell_values, index):
    return go.Table(
        header=dict(
            values=header_values,
            fill=dict(color='#3e3c4a'),  
            font=dict(size=15, color='#ffc512'),  
            align="left"
        ),
        cells=dict(
            values= [index] + cell_values,
            fill=dict(color=['#ffffff', '#ffffff']),  
            align="left",
            font=dict(size=12, color='#3e3c4a') 
        )
    )

fig.add_trace(
    go.Scatter(
        x=df_system_prices.index,
        y=df_system_prices["forecast_system_price_high"],
        mode="lines",
        name="Forecast system price high",
        line=dict(color='#e28278')
    ),
    row=4, col=1
)

fig.add_trace(
    go.Scatter(
        x=df_system_prices.index,
        y=df_system_prices["forecast_system_price_low"],
        mode="lines",
        name="Forecast system price low",
        line=dict(color='#ffc512')
    ),
    row=3, col=1
)

fig.add_trace(
    go.Scatter(
        x=df_system_prices.index,
        y=df_system_prices["system_price"],
        mode="lines",
        name="System price ",
        line=dict(color='#3e3c4a')
    ),
    row=2, col=1
)



fig.add_trace(
    create_table(
        ["Datetime", "Forecast system price low GBP/MWh", "Forecast system price high GBP/MWh", "System price GBP/MWh"],
        [df_system_prices[k].tolist() for k in df_system_prices.columns[:]],
        df_system_prices.index
    ),
    row=1, col=1
)


fig.update_layout(
    height=900,  
    showlegend=True,
    title=dict(
        text="df_forecast_inputs",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.show()

# Cleaning and Splitting the data into training and testing sets

In [None]:

# Filling NaN with mode 
df_auction.fillna(df_auction.mode().iloc[0], inplace=True)
df_forecast_inputs.fillna(df_forecast_inputs.mode().iloc[0], inplace=True)
df_system_prices.fillna(df_system_prices.mode().iloc[0], inplace=True)

train_auction = df_auction[df_auction.index < '2022-03-01']
train_auction = train_auction.apply(pd.to_numeric, errors='coerce')

test_auction = df_auction[df_auction.index >= '2022-03-01']
test_auction = test_auction.apply(pd.to_numeric, errors='coerce')

train_forecast_inputs = df_forecast_inputs[df_forecast_inputs.index < '2022-03-01']
train_forecast_inputs = train_forecast_inputs.apply(pd.to_numeric, errors='coerce')

test_forecast_inputs = df_forecast_inputs[df_forecast_inputs.index >= '2022-03-01']
test_forecast_inputs = test_forecast_inputs.apply(pd.to_numeric, errors='coerce')

train_system_prices = df_system_prices[df_system_prices.index < '2022-03-01']
train_system_prices = train_system_prices.apply(pd.to_numeric, errors='coerce')

test_system_prices = df_system_prices[df_system_prices.index >= '2022-03-01']
test_system_prices = test_system_prices.apply(pd.to_numeric, errors='coerce')

# Using scaler to normalize ALL the data 
scaler = StandardScaler()

all_data = pd.concat([df_auction, df_forecast_inputs, df_system_prices], axis=1)
all_data = all_data.apply(pd.to_numeric, errors='coerce')
regression_all_data = all_data
all_data = pd.DataFrame(scaler.fit_transform(all_data), columns=all_data.columns, index=all_data.index)

normalized_train_data = all_data[all_data.index < '2022-03-01']
normalized_test_data = all_data[all_data.index >= '2022-03-01']

normalized_train_auction = normalized_train_data[df_auction.columns]
normalized_test_auction = normalized_test_data[df_auction.columns]

normalized_train_forecast_inputs = normalized_train_data[df_forecast_inputs.columns]
normalized_test_forecast_inputs = normalized_test_data[df_forecast_inputs.columns]

normalized_train_system_prices = normalized_train_data[df_system_prices.columns]
normalized_test_system_prices = normalized_test_data[df_system_prices.columns]

print(f"normalized_train_auction {normalized_train_auction}")

# Making sure there are not NaN or inf values 
print(f"train auction null data {train_auction.isnull().sum().sum()}")
print(f"train auction inf data {np.isinf(train_auction).sum().sum()}")

print(f"train forecast input null data {train_forecast_inputs.isnull().sum().sum()}")
print(f"tran forecast input inf data {np.isinf(train_forecast_inputs).sum().sum()}")


print(f"train system price null data {train_system_prices.isnull().sum().sum()}")
print(f"traion system price inf data {np.isinf(train_system_prices).sum().sum()}")

print(train_auction.columns)
print(train_forecast_inputs.columns)
print(train_system_prices.columns)




# Regression analysis using scikit-learn

In [None]:
print(regression_all_data.columns)

df = regression_all_data

# Preparing the independent (X) and dependent (y) variables
X = df.drop(['price_first_auction', 'price_second_auction'], axis=1)
y = df['price_second_auction']  # Change to 'price_first_auction' if needed

# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the linear regression model
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

# Making predictions on the test set
y_pred = regression_model.predict(X_test)

# Performance of the model using metric R-squared (R²)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Check the coefficients and intercept of the model
print("Coefficients:", regression_model.coef_)
print("Intercept:", regression_model.intercept_)


### Linear Regression Results

We performed a linear regression analysis to predict the `price_second_auction` using the given independent variables. Below are the key findings from our analysis:

**Model Performance**

* **R-squared (R²):** 0.7395460047130158

R-squared: The R-squared value is 0.7395460047130158, which indicates that around 73.95% of the variation in the price_second_auction can be explained by the independent variables in the model. This is a relatively good R-squared value, meaning that the model is able to explain a significant portion of the variation in the dependent variable.








**Model Coefficients**

The coefficients of our linear regression model provide insights into the relationships between the independent variables and the dependent variable (`price_second_auction`). The table below summarizes the coefficients:

In [None]:
# Create a DataFrame with the coefficients
coefficients_df = pd.DataFrame({
    'Independent Variable': [
        'traded_volume_first_auction',
        'traded_volume_second_auction',
        'price_forecast_first_auction',
        'demand_plus_system_margin',
        'demand',
        'within_day_availability',
        'margin',
        'within_day_margin',
        'long_term_wind',
        'long_term_solar',
        'long_term_wind_over_demand',
        'long_term_wind_over_margin',
        'long_term_solar_over_demand',
        'long_term_solar_over_margin',
        'margin_over_demand',
        'snsp_forecast',
        'stack_price',
        'within_day_potential_stack_price',
        'previous_day_ahead_price',
        'previous_continuous_half_hour_vwap',
        'inertia_forecast',
        'forecast_system_price_low',
        'forecast_system_price_high',
        'system_price'
    ],
    'Coefficient': regression_model.coef_
})

# Display the coefficients DataFrame
pd.set_option('display.float_format', '{:.4e}'.format)  # Displayed coefficients in scientific notation
display(coefficients_df)


The coefficients of the independent variables show the relationship between each independent variable and the dependent variable (price_second_auction). Positive coefficients indicate a positive relationship (i.e., when the independent variable increases, the dependent variable also increases), while negative coefficients indicate a negative relationship (i.e., when the independent variable increases, the dependent variable decreases). Larger absolute values of the coefficients imply a stronger relationship.

**Model Intercept**

Intercept: The intercept value is 17.34216750441618, which represents the expected value of the price_second_auction when all independent variables are zero. Note that the intercept may not have a meaningful interpretation in this case, as it's unlikely that all independent variables would be zero simultaneously.

---

In conclusion, our linear regression model provides a reasonable understanding of the relationships between the independent variables and the `price_second_auction`. However, it's crucial to validate the assumptions of linear regression, perform additional diagnostics, and compare the model's performance to other models or a baseline model before drawing final conclusions.


# Checking the shapes of the resulting training and testing sets





In [None]:

print("train_auction shape: ", train_auction.shape)
print("test_auction shape: ", test_auction.shape)

print("train_forecast_inputs shape: ", train_forecast_inputs.shape)
print("test_forecast_inputs shape: ", test_forecast_inputs.shape)

print("train_system_prices shape: ", train_system_prices.shape)
print("test_system_prices shape: ", test_system_prices.shape)

# Tensor flow & keras: predictions with Kfold validations (1D Convolutional Neural Network (CNN))

In [None]:

# Splitting the input data into features and target
train_features = normalized_train_auction.drop('price_second_auction', axis=1)
train_target = normalized_train_auction['price_second_auction']
# Concatenating the other datasets with the training features
train_features = np.concatenate((train_features, normalized_train_forecast_inputs, normalized_train_system_prices), axis=1)

# Defining the number of folds for the K-fold cross-validation
n_folds = 5

# Initializing a KFold object with the number of folds
kfold = KFold(n_splits=n_folds, shuffle=True)

# Initializing arrays to store the results of the cross-validation
cv_scores = []
cv_histories = []

for train_idx, val_idx in kfold.split(train_features):
    # Splitting the data into training and validation sets for each fold
    x_train, x_val = train_features[train_idx], train_features[val_idx]
    y_train, y_val = train_target[train_idx], train_target[val_idx]
    
    # Building the neural network model using TensorFlow
    model = tf.keras.models.Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(x_train.shape[1], 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))

    # Compiling the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mean_absolute_error'])

    # Defining callbacks to monitor the training process
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10),
        ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.0001)
    ]

    # Reshaping the data to include a channel dimension
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
    x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], 1)

    # Training the model on the training data for each fold
    history = model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_val, y_val), callbacks=callbacks)

    # Evaluating the model on the validation data for each fold
    score = model.evaluate(x_val, y_val, verbose=0)

    # Appending the results of the fold to the arrays
    cv_scores.append(score)
    cv_histories.append(history)

# Calculating the average performance of the model across all folds
mean_score = np.mean(cv_scores, axis=0)

# Printing the average performance of the model
print("Average performance on validation data:")
print("Loss: ", mean_score[0])
print("Mean Absolute Error: ", mean_score[1])


# Using the trained model to make predictions on the train data
validated_train_forecast = model.predict(train_features)

print(validated_train_forecast)
print(validated_train_forecast.shape)

# Using the trained model to make predictions on the test data
test_features = normalized_test_auction.drop('price_second_auction', axis=1)

# Concatenating the other datasets with the test features
test_features = np.concatenate((test_features, normalized_test_forecast_inputs, normalized_test_system_prices), axis=1)

validated_test_forecast = model.predict(test_features)


# Summary: 
# The model being used is a 1D Convolutional Neural Network (CNN) built using TensorFlow. 
# It has multiple layers, including two 1D convolutional layers, two max pooling layers, a flatten layer, two dense layers, and a dropout layer. 
# It's trained using the Adam optimizer with a learning rate of 0.001 and the mean squared error (MSE) loss function. 
# It's also monitored using several callbacks, including EarlyStopping, ModelCheckpoint, and ReduceLROnPlateau. 
# Performming K-fold cross-validation, where the data is split into training and validation sets for each fold, and the model is trained and evaluated on each fold. 
# Finally, the trained model is used to make predictions on the train and test data.

# Decided to change the model to CNN, one most commonly used for time series forecasting to validate and compare results with the previous model.
# It is also recommended to do K-fold cross-validation in order to have a better performace estimation, reduce overfitting and helps us identifying 
# the model stability. If the performance scores low variability across different folds, it suggests that the model may perform well on unseen data.

# This model approach, as the previous one, is using the train_features wihtout the second_price_auction to find the train_forecast
#  and the test_features wihtout the second_price_auction to find the test_forecast


## Performance scores

In [None]:
# Calculating the mean and standard deviation of the target variable
target_mean = np.mean(normalized_train_auction['price_second_auction'])
target_std = np.std(normalized_train_auction['price_second_auction'])

print("Target Mean: ", target_mean)
print("Target Standard Deviation: ", target_std)

print("Average performance on validation data:")
print("Loss: ", mean_score[0])
print("Mean Absolute Error: ", mean_score[1])

# Based on the performance metrics, the model is performing relatively well. 
# The average loss value of 0.064 and mean absolute error value of 0.107 
# It indicates that the model is making predictions that are relatively close to the true values, 
# with an average absolute difference of approximately 0.11 between the predicted and true values.


## Results plot: Plotting validated normalized values 

In [None]:
# print(validated_train_forecast)
# print(validated_test_forecast)

# Creating a dataframe for the train forecast
validated_train_forecast_df = pd.DataFrame(validated_train_forecast, index=normalized_train_auction.index, columns=['price_second_auction_forecast'])

# Creating a dataframe for the test forecast
validated_test_forecast_df = pd.DataFrame(validated_test_forecast, index=normalized_test_auction.index, columns=['price_second_auction_forecast'])


fig = make_subplots()

fig.add_trace(go.Scatter(x=normalized_train_auction.index, y=normalized_train_auction['price_second_auction'], name='Train Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=validated_train_forecast_df.index, y=validated_train_forecast_df['price_second_auction_forecast'], name='Train Price Second Auction Forecast', line=dict(color='#7ba1bc', width=0.8)))

fig.add_trace(go.Scatter(x=normalized_test_auction.index, y=normalized_test_auction['price_second_auction'], name='Test Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=validated_test_forecast_df.index, y=validated_test_forecast_df['price_second_auction_forecast'], name='Test Price Second Auction Forecast', line=dict(color='#fdb462', width=0.8)))

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Normalized Train and Test Forecasts Second Auction",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.show()



## Plotting train **prices** and **volume** 

In [None]:


fig = make_subplots(
            rows=2,
            cols=2,
            shared_xaxes=True,
            vertical_spacing=0.08,
            subplot_titles=("1st Action Prices", "2nd Action Prices", "1st Action Volume", "2nd Action Volume"),
            row_width=[0.2, 0.7],
        )

fig.add_trace(
            go.Scatter(x=normalized_train_auction.index, y=normalized_train_auction["price_first_auction"], mode="lines", line_shape="spline", name="Train Price First Auction", line=dict(color='#3e3c4a')),
            row=1,
            col=1,
        )
fig.add_trace(
            go.Scatter(x=normalized_train_auction.index, y=normalized_train_auction["price_second_auction"], mode="lines", line_shape="spline", name="Train Price Second Auction", line=dict(color='#3e3c4a')),
            row=1,
            col=2,
        )

fig.add_trace(
            go.Bar(
                x=normalized_train_auction.index,
                y=normalized_train_auction["traded_volume_first_auction"],
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=1,
        )

fig.add_trace(
            go.Bar(
                x=normalized_train_auction.index,
                y=normalized_train_auction["traded_volume_second_auction"],
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=2,
        )

fig.add_trace(
            go.Scatter(
                x=normalized_train_auction.index,
                y=normalized_train_auction["price_forecast_first_auction"],
                mode="lines",
                name="Price train forecast first auction",
                line=dict(color="#7ba1bc", width=0.5),
            ),
            row=1,
            col=1,
        )

fig.add_trace(
            go.Scatter(
                x=validated_train_forecast_df.index,
                y=validated_train_forecast_df['price_second_auction_forecast'],
                mode="lines",
                name="Price train forecast second auction",
                line=dict(color="#fdb462", width=0.5),
            ),
            row=1,
            col=2,
        )

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Normalized Training Actions Prices and Volumes",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)
            

fig.update_yaxes(title='Price', row=1)
fig.update_xaxes(title='Date', row=2)

fig.show()

## Plotting normalized test **prices** and **volume** 

In [None]:

fig = make_subplots(
            rows=2,
            cols=2,
            shared_xaxes=True,
            vertical_spacing=0.08,
            subplot_titles=("1st Action Prices", "2nd Action Prices", "1st Action Volume", "2nd Action Volume"),
            row_width=[0.2, 0.7],
        )

fig.add_trace(
            go.Scatter(x=normalized_test_auction.index, y=normalized_test_auction["price_first_auction"], mode="lines", line_shape="spline",name="Test Price First Auction ", line=dict(color='#3e3c4a')),
            row=1,
            col=1,
        )
fig.add_trace(
            go.Scatter(x=normalized_test_auction.index, y=normalized_test_auction["price_second_auction"], mode="lines", line_shape="spline",name="Test Price Second Auction", line=dict(color='#3e3c4a')),
            row=1,
            col=2,
        )

fig.add_trace(
            go.Bar(
                x=normalized_test_auction.index,
                y=normalized_test_auction["traded_volume_first_auction"],
                
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=1,
        )

fig.add_trace(
            go.Bar(
                x=normalized_test_auction.index,
                y=normalized_test_auction["traded_volume_second_auction"],
                
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=2,
        )

fig.add_trace(
            go.Scatter(
                x=normalized_test_auction.index,
                y=normalized_test_auction["price_forecast_first_auction"],
                mode="lines",
                name="Price train forecast first auction",
                line=dict(color="#7ba1bc", width=0.5),
            ),
            row=1,
            col=1,
        )

fig.add_trace(
            go.Scatter(
                x=validated_test_forecast_df.index,
                y=validated_test_forecast_df['price_second_auction_forecast'],
                mode="lines",
                name="Price train forecast second auction",
                line=dict(color="#fdb462", width=0.5),
            ),
            row=1,
            col=2,
        )
fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Normalized Test Actions Prices and Volumes",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.update_yaxes(title='Price', row=1)
fig.update_xaxes(title='Date', row=2)

fig.show()

##  Denormalized Plots: (Results) (train **prices** and **volume**) (test **prices** and **volume**)

In [None]:

# Getting the mean and standard deviation used during normalization
mean = scaler.mean_[0]
std = scaler.scale_[0]

# Denormalizing the train forecast
validated_train_forecast_df['price_second_auction_forecast'] = validated_train_forecast_df['price_second_auction_forecast'] * std + mean
# Denormalizing the test forecast
validated_test_forecast_df['price_second_auction_forecast'] = validated_test_forecast_df['price_second_auction_forecast'] * std + mean

print(f"train forecast df : {validated_train_forecast_df['price_second_auction_forecast']}")
print(f"test forecast df {validated_test_forecast_df['price_second_auction_forecast']}")





In [None]:
# print(train_auction)
# print(test_auction)

fig = make_subplots()

fig.add_trace(go.Scatter(x=train_auction.index, y=train_auction['price_second_auction'], name='Train Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=validated_train_forecast_df.index, y=validated_train_forecast_df['price_second_auction_forecast'], name='Train Price Second Auction Forecast', line=dict(color='#7ba1bc', width=0.8)))

fig.add_trace(go.Scatter(x=test_auction.index, y=test_auction['price_second_auction'], name='Test Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=validated_test_forecast_df.index, y=validated_test_forecast_df['price_second_auction_forecast'], name='Test Price Second Auction Forecast', line=dict(color='#fdb462', width=0.8)))


fig.update_xaxes(title='Date')
fig.update_yaxes(title='Price')

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Denormalized Train and Test Forecasts Second Auction ",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)
fig.show()




In [None]:

fig = make_subplots(
            rows=2,
            cols=2,
            shared_xaxes=True,
            vertical_spacing=0.08,
            subplot_titles=("1st Action Prices", "2nd Action Prices", "1st Action Volume", "2nd Action Volume"),
            row_width=[0.2, 0.7],
        )

fig.add_trace(
            go.Scatter(x=train_auction.index, y=train_auction["price_first_auction"], mode="lines", line_shape="spline", name="Train Price First Auction", line=dict(color='#3e3c4a')),
            row=1,
            col=1,
        )
fig.add_trace(
            go.Scatter(x=train_auction.index, y=train_auction["price_second_auction"], mode="lines", line_shape="spline", name="Train Price Second Auction", line=dict(color='#3e3c4a')),
            row=1,
            col=2,
        )

fig.add_trace(
            go.Bar(
                x=train_auction.index,
                y=train_auction["traded_volume_first_auction"],
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=1,
        )

fig.add_trace(
            go.Bar(
                x=train_auction.index,
                y=train_auction["traded_volume_second_auction"],
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=2,
        )

fig.add_trace(
            go.Scatter(
                x=train_auction.index,
                y=train_auction["price_forecast_first_auction"],
                mode="lines",
                name="Price train forecast first auction",
                line=dict(color="#7ba1bc", width=0.5),
            ),
            row=1,
            col=1,
        )

fig.add_trace(
            go.Scatter(
                x=validated_train_forecast_df.index,
                y=validated_train_forecast_df['price_second_auction_forecast'],
                mode="lines",
                name="Price train forecast second auction",
                line=dict(color="#fdb462", width=0.5),
            ),
            row=1,
            col=2,
        )

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Denormalized Training Actions Prices and Volumes",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)
            
fig.update_yaxes(title='Price', row=1)
fig.update_xaxes(title='Date', row=2)

fig.show()

In [None]:

fig = make_subplots(
            rows=2,
            cols=2,
            shared_xaxes=True,
            vertical_spacing=0.08,
            subplot_titles=("1st Action Prices", "2nd Action Prices", "1st Action Volume", "2nd Action Volume"),
            row_width=[0.2, 0.7],
        )

fig.add_trace(
            go.Scatter(x=test_auction.index, y=test_auction["price_first_auction"], mode="lines", line_shape="spline",name="Test Price First Auction ", line=dict(color='#3e3c4a')),
            row=1,
            col=1,
        )
fig.add_trace(
            go.Scatter(x=test_auction.index, y=test_auction["price_second_auction"], mode="lines", line_shape="spline",name="Test Price Second Auction", line=dict(color='#3e3c4a')),
            row=1,
            col=2,
        )

fig.add_trace(
            go.Bar(
                x=test_auction.index,
                y=test_auction["traded_volume_first_auction"],
                
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=1,
        )

fig.add_trace(
            go.Bar(
                x=test_auction.index,
                y=test_auction["traded_volume_second_auction"],
                
                showlegend=False,
                marker=dict(
                    opacity=1,
                    line=dict(width=2, color="#fff3cd"),
                ),
            ),
            row=2,
            col=2,
        )

fig.add_trace(
            go.Scatter(
                x=test_auction.index,
                y=test_auction["price_forecast_first_auction"],
                mode="lines",
                name="Price train forecast first auction",
                line=dict(color="#7ba1bc", width=0.5),
            ),
            row=1,
            col=1,
        )

fig.add_trace(
            go.Scatter(
                x=validated_test_forecast_df.index,
                y=validated_test_forecast_df['price_second_auction_forecast'],
                mode="lines",
                name="Price train forecast second auction",
                line=dict(color="#fdb462", width=0.5),
            ),
            row=1,
            col=2,
        )

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Denormalized Test Actions Prices and Volumes",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.update_yaxes(title='Price', row=1)
fig.update_xaxes(title='Date', row=2)

fig.show()

In [None]:
start_date = pd.Timestamp('2022-02-28 23:00:00')
end_date = pd.Timestamp('2022-09-12 23:00:00')

# Number of hours between start_date and end_date
n_hours = (end_date - start_date).total_seconds() / 3600

print("Number of hours between start and end dates:", n_hours)



# **UNSEEN DATA SHORT TERM PREDICTIONS** 

The optimal time frame for precise forecasting is contingent upon the specific field and the quality of the data at hand. Within the realm of electricity price forecasting, short-term predictions (e.g., spanning a few hours to several days) tend to exhibit greater accuracy compared to their long-term counterparts (e.g., those extending over weeks or months). This is primarily due to the reduced uncertainty associated with shorter time frames. As evidenced by our attempt to forecast using previously unseen data, long-term projections (spanning 10 months) failed to align with the test data. In contrast, our initial models exhibited greater accuracy as they incorporated additional variables within the same time index, enabling the model to learn from contemporaneous data and generate more accurate predictions for the target variable.

Given the nature of the data, it appears prudent to concentrate on short-term forecasting, potentially within a 24-48 hour window. This time frame would enable the capture of daily trends and variations while maintaining a satisfactory level of precision. It is crucial to bear in mind that forecast accuracy typically declines as the prediction horizon expands. As such, it is essential to assess the performance of our forecasting model and recognize its limitations over extended periods.

Moving forward, our subsequent models will prioritize generating accurate short-term forecasts using previously unseen data, as would be the case in real-world trading scenarios. Subsequently, we will compare the predictive capabilities of various models to determine the most suitable approach for incorporating this data into our double auction strategies.

In the next models we are going to assume that the end of our train data is the present and the first two days second auction price of the test data are our target dependent variable.

# XGBoost model: Predictions with Kfold validations 

In [None]:


def extract_features_from_past_data(normalized_train_auction, normalized_train_forecast_inputs, normalized_train_system_prices):
    train_features = pd.concat([normalized_train_auction,
                                  normalized_train_forecast_inputs,
                                  normalized_train_system_prices], axis=1)
    return train_features

train_features = extract_features_from_past_data(normalized_train_auction, normalized_train_forecast_inputs, normalized_train_system_prices)

# print(train_features)

# Preparing the train data and target variable
train_data = train_features['price_second_auction']

# Number of hours we want to predict into the future
n_hours_for_prediction = 2 * 24

train_features = train_features[:-n_hours_for_prediction]


test_features = train_features[-n_hours_for_prediction:]

train_target = train_data[:-n_hours_for_prediction]
test_target = train_data[-n_hours_for_prediction:]

# Setting up the K-Fold cross-validation
kf = KFold(n_splits=5, random_state=42, shuffle=True)

# Defining the XGBoost model
xgbr = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000, learning_rate=0.05, max_depth=3, random_state=42)

def calculate_kpis(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    return mse, rmse, mae, r2

mse_scores = []
rmse_scores = []
mae_scores = []
r2_scores = []


# Performing cross-validation
for train_index, test_index in kf.split(train_features):
    X_train, X_test = train_features.iloc[train_index], train_features.iloc[test_index]
    y_train, y_test = train_target.iloc[train_index], train_target.iloc[test_index]
    
    # Training the model
    xgbr.fit(X_train, y_train)
    
    # Making predictions on the test set
    y_pred = xgbr.predict(X_test)
    
    # Calculating the evaluation metrics
    mse, rmse, mae, r2 = calculate_kpis(y_test, y_pred)
    
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    mae_scores.append(mae)
    r2_scores.append(r2)

# Calculating the mean and standard deviation of the evaluation metrics from cross-validation
mean_mse = np.mean(mse_scores)
std_mse = np.std(mse_scores)

mean_rmse = np.mean(rmse_scores)
std_rmse = np.std(rmse_scores)

mean_mae = np.mean(mae_scores)
std_mae = np.std(mae_scores)

mean_r2 = np.mean(r2_scores)
std_r2 = np.std(r2_scores)

print("Mean MSE from Cross-Validation:", mean_mse)
print("Standard Deviation of MSE from Cross-Validation:", std_mse)

print("Mean RMSE from Cross-Validation:", mean_rmse)
print("Standard Deviation of RMSE from Cross-Validation:", std_rmse)

print("Mean MAE from Cross-Validation:", mean_mae)
print("Standard Deviation of MAE from Cross-Validation:", std_mae)

print("Mean R2 from Cross-Validation:", mean_r2)
print("Standard Deviation of R2 from Cross-Validation:", std_r2)

# Trainning the model on the entire dataset
xgbr.fit(train_features, train_target)

# Predicting the next 2 * 24 hours
prediction = xgbr.predict(test_features)

predict_period_dates = pd.date_range('2022-03-01', periods=n_hours_for_prediction, freq="H").tolist()

# Creating a DataFrame with the forecast dates and predictions
future_forecast_df = pd.DataFrame({'DateTime': predict_period_dates, 'Prediction': prediction})


**Cross-validation results:**

Mean MSE: 0.00247
This value is low, indicating a low error rate on average for the training data. This is a positive result, showing that the model is fitting well on the training data.

Standard Deviation of MSE: 0.00363
This value is relatively small, suggesting that the model has consistent performance across different folds of the training data. This is a positive result, indicating stable performance.

Mean RMSE: 0.0363
This value is low, suggesting a low average error rate in terms of the square root of the mean squared error. This is a positive result, indicating good model performance.

Standard Deviation of RMSE: 0.0339
This value is small, showing that the model performs consistently across different folds of the training data. This is a positive result.

Mean MAE: 0.00438
This value is low, indicating a low average absolute error. This is a positive result, as it shows the model has good predictive performance.

Standard Deviation of MAE: 0.000838
This value is very small, indicating consistent performance in terms of absolute errors across different folds of the training data. This is a positive result.

Mean R2: 0.9969
This value is high, suggesting that the model explains 99.69% of the variance in the target variable. This is a positive result, showing that the model has strong explanatory power.

Standard Deviation of R2: 0.00454
This value is low, indicating that the model performs consistently in terms of explaining the variance in the target variable across different folds of the training data. This is a positive result.


In [None]:
print(normalized_test_auction.columns)

# Filtering the dataframe to get the first 48 hours
start_time = normalized_test_auction.index.min()
end_time = start_time + pd.Timedelta(hours=48)
filtered_df = normalized_test_auction[(normalized_test_auction.index >= start_time) & (normalized_test_auction.index < end_time)]

# Getting the 'price_second_auction' column for the first 48 hours
first_48_hours_price_second_auction = filtered_df['price_second_auction']
# Evaluating the test set predictions
test_mse, test_rmse, test_mae, test_r2 = calculate_kpis(first_48_hours_price_second_auction, prediction)

print("Test set MSE:", test_mse)
print("Test set RMSE:", test_rmse)
print("Test set MAE:", test_mae)
print("Test set R2:", test_r2)

# print(future_forecast_df)

**Prediction Results according to KPIs:**

Test set MSE: 0.0848582473431355
The Mean Squared Error (MSE) for the test set is 0.0849, which is a measure of the average squared differences between the predicted and actual values. A lower MSE value indicates better model performance. It is still relatively high, suggesting that there is room for further improvement in the model's predictive accuracy.

Test set RMSE: 0.29130438950200443
The Root Mean Squared Error (RMSE) for the test set is 0.2913. RMSE is the square root of MSE and is also a measure of the average difference between predicted and actual values. A lower RMSE value indicates better model performance. There might still be potential for further optimization.

Test set MAE: 0.2332893824903918
The Mean Absolute Error (MAE) for the test set is 0.2333. This metric represents the average absolute difference between the predicted and actual values, without considering the direction of the error. A lower MAE value indicates better model performance. There might still be room for further improvement.

Test set R2: 0.2461117571929009
The R2 score for the test set is 0.2461, which represents the proportion of the variance in the dependent variable that is predictable from the independent variables. An R2 score of 1 indicates a perfect fit, while an R2 score of 0 indicates that the model does not explain any of the variance in the target variable. Still relatively low, suggesting that the model's predictive performance could be further improved.

In [None]:
fig = go.Figure()

# Adding a trace for the predictions
fig.add_trace(go.Scatter(x=future_forecast_df['DateTime'], y=future_forecast_df['Prediction'], mode='lines', name='Predictions', line=dict(color="#ffc512", width=4),))

fig.update_layout(xaxis_title='Date',
                  yaxis_title='Price')

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Future Price Predictions",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.show()

## Results plot: Normalized trin and comparison between forecast df made by XGBoost model with unseen data and the given test data 

In [None]:
# Getting the mean and standard deviation used during normalization
mean = scaler.mean_[0]
std = scaler.scale_[0]

# Normalizing the train forecast
validated_train_forecast_df['price_second_auction_forecast'] = (validated_train_forecast_df['price_second_auction_forecast'] - mean) / std

# Normalizing the test forecast
validated_test_forecast_df['price_second_auction_forecast'] = (validated_test_forecast_df['price_second_auction_forecast'] - mean) / std


In [None]:
print(future_forecast_df.index)

# Selecting data within the desired time range
train_start_date = '2022-02-26'
train_end_date = normalized_train_auction.index.max()
train_mask = (normalized_train_auction.index >= train_start_date) & (normalized_train_auction.index <= train_end_date)
train_df = normalized_train_auction.loc[train_mask]

validated_start_date = '2022-02-26'
validated_end_date = validated_train_forecast_df.index.max()
validated_mask = (validated_train_forecast_df.index >= validated_start_date) & (validated_train_forecast_df.index <= validated_end_date)
validated_df = validated_train_forecast_df.loc[validated_mask]

test_start_date = '2022-03-01'
test_end_date = '2022-03-04'
test_mask = (normalized_test_auction.index >= test_start_date) & (normalized_test_auction.index <= test_end_date)
test_df = normalized_test_auction.loc[test_mask]

forecast_mask = (future_forecast_df['DateTime'] >= test_start_date) & (future_forecast_df['DateTime'] <= test_end_date)
future_forecast_df = future_forecast_df.loc[forecast_mask]

# Creating the subplots and add the traces
fig = make_subplots()

fig.add_trace(go.Scatter(x=train_df.index, y=train_df['price_second_auction'], name='Train Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=validated_df.index, y=validated_df['price_second_auction_forecast'], name='Train Price Second Auction Forecast', line=dict(color='#e28278', width=3)))

fig.add_trace(go.Scatter(x=test_df.index, y=test_df['price_second_auction'], name='Test Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=future_forecast_df['DateTime'], y=future_forecast_df['Prediction'], name='Test Price Second Auction Forecast', line=dict(color='#ffc512', width=3)))

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Normalized Train and Test Forecasts Second Auction",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)


fig.update_xaxes(title='Date')
fig.update_yaxes(title='Price')

fig.show()


# SARIMAX
Regrettably, the current approach demands substantial system RAM and considerable processing time, without yielding improved predictions compared to our previous model. However, utilizing a GPU with increased memory capacity, potentially through cloud-based services, could streamline this process and facilitate the development of progressively superior models for our forecasting endeavors.

In [None]:


def extract_features_from_past_data(normalized_train_auction, normalized_train_forecast_inputs, normalized_train_system_prices):
    train_features = pd.concat([normalized_train_auction,
                                  normalized_train_forecast_inputs,
                                  normalized_train_system_prices], axis=1)
    return train_features

train_features = extract_features_from_past_data(normalized_train_auction, normalized_train_forecast_inputs, normalized_train_system_prices)
train_data = train_features['price_second_auction']

warnings.filterwarnings("ignore")

space = {
    'p': hp.choice('p', range(0, 2)),
    'd': hp.choice('d', range(0, 2)),
    'q': hp.choice('q', range(0, 2)),
    'P': hp.choice('P', range(0, 2)),
    'D': hp.choice('D', range(0, 2)),
    'Q': hp.choice('Q', range(0, 2)),
}

def objective(params, train_data_np):
    model = SARIMAX(train_data_np, order=(params['p'], params['d'], params['q']), seasonal_order=(params['P'], params['D'], params['Q'], 24), enforce_stationarity=False, enforce_invertibility=False)
    try:
        results = model.fit()
        return results.aic
    except:
        return float('inf')

train_data_np = train_data.to_numpy()

best = fmin(partial(objective, train_data_np=train_data_np), space, algo=tpe.suggest, max_evals=50)

best_pdq = (best['p'], best['d'], best['q'])
best_seasonal_pdq = (best['P'], best['D'], best['Q'], 24)

print(f"Best SARIMA model parameters: {best_pdq}x{best_seasonal_pdq}")

sarima_model = SARIMAX(train_data, order=best_pdq, seasonal_order=best_seasonal_pdq, enforce_stationarity=False, enforce_invertibility=False)
sarima_results = sarima_model.fit()

n_hours_for_prediction = 2 * 24
prediction = sarima_results.get_forecast(steps=n_hours_for_prediction).predicted_mean

predict_period_dates = pd.date_range(train_features.index[-1] + pd.Timedelta(hours=1), periods=n_hours_for_prediction, freq="H").tolist()

future_forecast_df2 = pd.DataFrame({'DateTime': predict_period_dates, 'Prediction': prediction})



In [None]:
fig = go.Figure()

# Adding a trace for the predictions
fig.add_trace(go.Scatter(x=future_forecast_df2['DateTime'], y=future_forecast_df2['Prediction'], mode='lines', name='Predictions', line=dict(color="#ffc512", width=4),))

fig.update_layout(xaxis_title='Date',
                  yaxis_title='Price')

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Future Price Predictions",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.show()

## Result Sarimax plot: normalized train data and comparison between test data and the Sarimax forecast df made with unseen data

In [None]:
# print(future_forecast_df2.index)

# Selecting data within the desired time range
train_start_date = '2022-02-26'
train_end_date = normalized_train_auction.index.max()
train_mask = (normalized_train_auction.index >= train_start_date) & (normalized_train_auction.index <= train_end_date)
train_df = normalized_train_auction.loc[train_mask]

validated_start_date = '2022-02-26'
validated_end_date = validated_train_forecast_df.index.max()
validated_mask = (validated_train_forecast_df.index >= validated_start_date) & (validated_train_forecast_df.index <= validated_end_date)
validated_df = validated_train_forecast_df.loc[validated_mask]

test_start_date = '2022-03-01'
test_end_date = '2022-03-04'
test_mask = (normalized_test_auction.index >= test_start_date) & (normalized_test_auction.index <= test_end_date)
test_df = normalized_test_auction.loc[test_mask]

forecast_mask = (future_forecast_df2['DateTime'] >= test_start_date) & (future_forecast_df2['DateTime'] <= test_end_date)
future_forecast_df2 = future_forecast_df2.loc[forecast_mask]

# Creating the subplots and add the traces
fig = make_subplots()

fig.add_trace(go.Scatter(x=train_df.index, y=train_df['price_second_auction'], name='Train Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=validated_df.index, y=validated_df['price_second_auction_forecast'], name='Train Price Second Auction Forecast', line=dict(color='#e28278', width=3)))

fig.add_trace(go.Scatter(x=test_df.index, y=test_df['price_second_auction'], name='Test Price Second Auction', line=dict(color='#3e3c4a')))
fig.add_trace(go.Scatter(x=future_forecast_df2['DateTime'], y=future_forecast_df2['Prediction'], name='Test Price Second Auction Forecast', line=dict(color='#ffc512', width=3)))

fig.update_layout(
    plot_bgcolor='#ffffff'
    )

fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Normalized Train and Test Forecasts Second Auction",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)


fig.update_xaxes(title='Date')
fig.update_yaxes(title='Price')

fig.show()


# **TRADING ALGORITHM**

Upon evaluating the outcomes of various models, the XGBoost and SARIMAX models emerge as the top performers in terms of runtime, RAM usage, computational efficiency, and accuracy. Among these two contenders, the XGBoost model demonstrates superior precision, and as such, we will utilize its predictive values to inform our strategic approach.

We assume that the "future data" represents the test data prices for each auction, with the initial price obtained from the forecast of the first auction already provided. For the second price, we will employ the future_forecast_df dataframe, derived from the XGBoost model, under the pretense that the test data was unseen, thereby allowing for an accuracy comparison. This information will be utilized to determine the appropriate bids and volumes, which will then be integrated into our trading algorithm.

In [None]:
# Importing libraries
!pip install pulp
import random
import pandas as pd
from typing import List, Tuple
import pulp

## Organizing input data

In [None]:
# First we need to denormalize the XGBoost model results.

# Getting the mean and standard deviation used during normalization
mean = scaler.mean_[0]
std = scaler.scale_[0]

# date = test_auction.loc["2022-03-02 23:00:00", "price_forecast_first_auction"]
# print(f"test auction {date}")

# Denormalizing the future forecast
future_forecast_df['Prediction'] = future_forecast_df['Prediction'] * std + mean
print(f"{future_forecast_df}")

future_forecast_df['DateTime'] = pd.to_datetime(future_forecast_df['DateTime'], format='[%d/%m/%Y %H:%M]')
future_forecast_df.set_index('DateTime', inplace=True)

test_auction = test_auction[test_auction.index <= '2022-03-02 23:00:00']
print(test_auction["price_forecast_first_auction"])

# date = test_auction.loc["2022-03-02 23:00:00", "price_forecast_first_auction"]
# print(f"test auction 2022-03-02 23:00:00 {date}")

test_forecast_inputs = test_forecast_inputs[test_forecast_inputs.index <= '2022-03-02 23:00:00']
print(f"test_forecast_inputs {test_forecast_inputs.columns}")

test_system_prices = test_system_prices[test_system_prices.index <= '2022-03-02 23:00:00']
print(f"test_system_prices {test_system_prices.columns}")

test_auction = test_auction[test_auction.index <= '2022-03-02 23:00:00']
print(f"test_auction {test_auction.columns}")

predicted_prices = pd.concat([test_auction["price_forecast_first_auction"], future_forecast_df['Prediction']], axis=1)

print(predicted_prices)

In [None]:
print(f" test_forecast_inputs['within_day_availability'] {test_forecast_inputs['within_day_availability']}")

source : https://www.ofgem.gov.uk/energy-data-and-research/data-portal/wholesale-market-indicators

Since our objective is to maximize profit, our bidding strategy should focus on finding the optimal bid and volume that results in the highest profit.

In [None]:
# Loading the data into a Pandas DataFrame
df_electricity_bid_offer = pd.read_csv("/content/sample_data/electricity-bid-offer-sp (1).csv", sep=',', header=None)

# Setting column names
df_electricity_bid_offer.columns = ['Date (WET)', 'Day-ahead', 'Front Month', 'Front Quarter', 'Front Season', 'Second Season', 'Third Season', 'Fourth Season']

df_electricity_bid_offer = df_electricity_bid_offer.drop(df_electricity_bid_offer.index[0])

# Setting the first column as an index
df_electricity_bid_offer['Date (WET)'] = pd.to_datetime(df_electricity_bid_offer['Date (WET)'], format='%m-%d-%Y')
df_electricity_bid_offer.set_index('Date (WET)', inplace=True)

print(df_electricity_bid_offer)
print(df_electricity_bid_offer.columns)


In [None]:
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "table"}],
           [{"type": "table"}]]
)

def create_table(header_values, cell_values, index):
    return go.Table(
        header=dict(
            values=header_values,
            fill=dict(color='#3e3c4a'),  
            font=dict(size=15, color='#ffc512'),  
            align="left"
        ),
        cells=dict(
            values= [index] + cell_values,
            fill=dict(color=['#ffffff', '#ffffff']),  
            align="left",
            font=dict(size=12, color='#3e3c4a') 
        )
    )


fig.add_trace(
    create_table(
        ["Datetime", "Day-ahead", "Front Quarter", "Front Month", "Front Season"],
        [df_electricity_bid_offer[k].tolist() for k in df_electricity_bid_offer.columns[:4]],
        df_electricity_bid_offer.index
    ),
    row=1, col=1
)

fig.add_trace(
    create_table(
        ["Datetime", "Second Season", "Third Season", "Fourth Season"],
        [df_electricity_bid_offer[k].tolist() for k in df_electricity_bid_offer.columns[4:]],
        df_electricity_bid_offer.index
    ),
    row=2, col=1
)
                  
fig.update_layout(
    height=600,  
    showlegend=False,
    title=dict(
        text="df_electricity_bid_offer spreads by contract type (GB)",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.show()

In [None]:
# Loading the data into a Pandas DataFrame
df_whole_electricity = pd.read_csv("/content/sample_data/wholesale-electricity-ge.csv", sep=',', header=None)

# Setting column names
df_whole_electricity.columns = ['Category', 'Share (GB)']

df_whole_electricity = df_whole_electricity.drop(df_whole_electricity.index[0])

print(df_whole_electricity)
print(df_whole_electricity.columns)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Pie(
        labels=df_whole_electricity['Category'],
        values=df_whole_electricity['Share (GB)'],
        textinfo='label+percent',
        insidetextorientation='radial',
        marker=dict(colors=px.colors.cyclical.IceFire),  
        hoverinfo='label+percent', 
        pull=[0.1 if i == 0 else 0 for i in range(len(df_whole_electricity))], 
        textfont=dict(size=14),  
    )
)


fig.update_layout(
    height=600,  
    showlegend=True,
    title=dict(
        text="Wholesale electricity generation market shares by company in 2021 (GB)",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),
    legend=dict(
        title=dict(text='Categories', font=dict(size=14)), 
        font=dict(size=12), 
    )  
)

fig.show()

In [None]:
# Loading the data into a Pandas DataFrame
df_large_suppliers = pd.read_csv("/content/sample_data/large-suppliers-electric.csv", sep=',', header=None)

# Setting column names
df_large_suppliers.columns = ['Category', 'Nuclear', 'Conventional', 'Renewable']

df_large_suppliers = df_large_suppliers.drop(df_large_suppliers.index[0])


print(df_large_suppliers)
print(df_large_suppliers.columns)

In [None]:
fig = make_subplots(
    rows=1, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "table"}]]
)

def create_table(header_values, cell_values, index):
    return go.Table(
        header=dict(
            values=header_values,
            fill=dict(color='#3e3c4a'),  
            font=dict(size=15, color='#ffc512'),  
            align="left"
        ),
        cells=dict(
            values= [index] + cell_values,
            fill=dict(color=['#ffffff', '#ffffff']),  
            align="left",
            font=dict(size=12, color='#3e3c4a') 
        )
    )


fig.add_trace(
    create_table(
        ["Index", "Category", "Nuclear", "Conventional", "Renewable"],
        [df_large_suppliers[k].tolist() for k in df_large_suppliers.columns[:]],
        df_large_suppliers.index
    ),
    row=1, col=1
)

                  
fig.update_layout(
    height=350,  
    showlegend=False,
    title=dict(
        text="df_large_suppliers : Electrivity generation profitability by technology type in 2021 (GB)",
        font=dict(size=25, color='#3e3c4a'),  
    ),
    margin=dict(t=100),  
)

fig.show()

In [None]:
# Loading the data into a Pandas DataFrame
df_bid_offer_data = pd.read_csv("/content/sample_data/BidOfferData_20230321_1537.csv", header=None, skiprows=1, on_bad_lines='skip')

# Print the shape of the DataFrame to check the number of columns
print("Shape of the DataFrame:", df_bid_offer_data.shape)

# Adjusting the column names list based on the number of columns in the DataFrame
column_names = ['Settlement Date', 'SP', 'BM Unit ID', 'BM Unit Type', 'Lead Party Name', 'NGC BM Unit Name', 'Bid Offer Pair Number', 'From Time (GMT)', 'From Level(MW)'] # Adjust the column names according to the actual number of columns
df_bid_offer_data.columns = column_names

print(df_bid_offer_data)
print(df_bid_offer_data.columns)



Regrettably, the procurement of historical data pertaining to bids that can serve as a target dependent variable is presently unfeasible without obtaining additional authorization from the relevant providers, and potentially procuring paid data sets.
In the absence of historical bid data, determining the optimal bid prices based on the given variables and available dataframes can prove to be challenging. While building a model with historical bid data is likely to yield more precise predictions, we may need to resort to a heuristic approach to inform our bidding strategy for the selection of optimal bid prices.

## Heuristic bidding strategy & trading algorithm

In [None]:
# Calculating optimal bid prices and optimal bid volumes


# Setting the minimum price and volume increments
MIN_PRICE_TICK = 0.1
MIN_VOLUME_TICK = 0.1

class BidStrategy:
    def __init__(self, train_auction, train_system_prices, train_forecast_inputs, test_system_prices, test_forecast_inputs, predicted_prices, test_auction):
        self.train_auction = train_auction
        self.train_system_prices = train_system_prices
        self.train_forecast_inputs = train_forecast_inputs
        self.test_auction = test_auction
        self.test_system_prices = test_system_prices
        self.test_forecast_inputs = test_forecast_inputs
        self.predicted_prices = predicted_prices
        # self.predicted_prices = future_forecast_df


    def optimal_bid_volumes(self):

        # Creating a linear programming problem to maximize profit
        problem = pulp.LpProblem("OptimalBidding", pulp.LpMaximize)

        
        predicted_prices_auction1 = self.predicted_prices["price_forecast_first_auction"]  # predicted_prices data for auction 1
        predicted_prices_auction2 = self.predicted_prices["price_forecast_second_auction"]  # predicted_prices data for auction 2
        within_day_availability = self.test_forecast_inputs["within_day_availability"]
        min_bid_volume_auction1 = min(self.test_auction["traded_volume_first_auction"])
        max_bid_volume_auction1 = max(self.test_auction["traded_volume_first_auction"])
        min_bid_volume_auction2 = min(self.test_auction["traded_volume_second_auction"])
        max_bid_volume_auction2 = max(self.test_auction["traded_volume_second_auction"])
        num_hours = len(predicted_prices_auction1)

        # Decision variables: hourly bid volumes for both auctions
        bid_volumes_auction1 = pulp.LpVariable.dicts("BidVolumeAuction1", range(num_hours), min_bid_volume_auction1, max_bid_volume_auction1)
        bid_volumes_auction2 = pulp.LpVariable.dicts("BidVolumeAuction2", range(num_hours), min_bid_volume_auction2, max_bid_volume_auction2)

        # Net position variables
        net_position1 = {hour: bid_volumes_auction1[hour] - within_day_availability[hour] for hour in range(num_hours)}
        net_position2 = {hour: bid_volumes_auction2[hour] - within_day_availability[hour] for hour in range(num_hours)}

        # Objective function: maximize profit for both auctions
        problem += pulp.lpSum([predicted_prices_auction1[hour] * bid_volumes_auction1[hour] for hour in range(num_hours)]) + \
                  pulp.lpSum([predicted_prices_auction2[hour] * bid_volumes_auction2[hour] for hour in range(num_hours)])

        bin_vars_auction1 = {hour: pulp.LpVariable(f"bin_auction1_{hour}", 0, 1, pulp.LpInteger) for hour in range(num_hours)}
        bin_vars_auction2 = {hour: pulp.LpVariable(f"bin_auction2_{hour}", 0, 1, pulp.LpInteger) for hour in range(num_hours)}

        # Adding constraints
        for hour in range(num_hours):
            # Enforcing minimum volume increment constraints
            problem += (bid_volumes_auction1[hour] - MIN_VOLUME_TICK * bin_vars_auction1[hour] >= 0)
            problem += (bid_volumes_auction2[hour] - MIN_VOLUME_TICK * bin_vars_auction2[hour] >= 0)

            # Enforcing zero net position constraint for both auctions combined for each hour
            problem += (bid_volumes_auction1[hour] + bid_volumes_auction2[hour] - 2 * within_day_availability[hour] == 0)

        # Enforcing zero net position constraint for each auction for the entire day
        problem += pulp.lpSum([bid_volumes_auction1[hour] - within_day_availability[hour] for hour in range(num_hours)]) == 0
        problem += pulp.lpSum([bid_volumes_auction2[hour] - within_day_availability[hour] for hour in range(num_hours)]) == 0
        # Solving the problem
        problem.solve()

        # Getting the optimal bid volumes for both auctions
        optimal_bid_volumes_auction1 = [bid_volumes_auction1[hour].value() for hour in range(num_hours)]
        optimal_bid_volumes_auction2 = [bid_volumes_auction2[hour].value() for hour in range(num_hours)]

        return optimal_bid_volumes_auction1, optimal_bid_volumes_auction2


    def calculate_bids(self):
        optimal_bid_volumes_auction1, optimal_bid_volumes_auction2 = self.optimal_bid_volumes()

        price_vars = [
            "previous_day_ahead_price",
            "previous_continuous_half_hour_vwap",
        ]

        # Adding additional price-related variables from other DataFrames
        price_vars += [
            "price_first_auction",
            "price_second_auction",
            "price_forecast_first_auction",
            "price_forecast_second_auction",
        ]

        # Combining relevant columns from test_forecast_inputs, test_auction, and test_system_prices DataFrames
        combined_df = pd.concat(
            [
                self.test_forecast_inputs[["previous_day_ahead_price", "previous_continuous_half_hour_vwap"]],
                self.test_auction[["price_first_auction", "price_second_auction"]],
                self.test_system_prices[["forecast_system_price_low", "forecast_system_price_high"]],
                self.predicted_prices,
            ],
            axis=1,
        )

        avg_prices = combined_df[price_vars].mean(axis=1)
        price_adjustment_factor = 0.01

        # bid_price_1 = avg_prices * (1 - price_adjustment_factor)
        # bid_price_2 = avg_prices * (1 + price_adjustment_factor)

        bid_price_1 = (avg_prices * (1 - price_adjustment_factor)).apply(round_to_tick, args=(MIN_PRICE_TICK,))
        bid_price_2 = (avg_prices * (1 + price_adjustment_factor)).apply(round_to_tick, args=(MIN_PRICE_TICK,))

        # Rounding the bid prices to 2 decimal places
        bid_price_1 = bid_price_1.round(2)
        bid_price_2 = bid_price_2.round(2)

        # Returning hourly bids as a list of tuples with traded volume and bid price
        hourly_bids_auction1 = [(int(optimal_bid_volumes_auction1[i]), bid_price_1[i]) for i in range(len(optimal_bid_volumes_auction1))]
        hourly_bids_auction2 = [(int(optimal_bid_volumes_auction2[i]), bid_price_2[i]) for i in range(len(optimal_bid_volumes_auction2))]

        # Formatting the hourly bids to ensure that the bids are in the correct format for the Auction class
        hourly_bids_auction1 = [hourly_bids_auction1[i:i+24] for i in range(0, len(hourly_bids_auction1), 24)]
        hourly_bids_auction2 = [hourly_bids_auction2[i:i+24] for i in range(0, len(hourly_bids_auction2), 24)]

        return hourly_bids_auction1, hourly_bids_auction2

# summary of the results:

# Total number of hourly bids: 96
# The first 48 bids are for the first auction, and the remaining 48 bids are for the second auction.
# Each bid is a tuple, where the first element is the traded volume and the second element is the bid price.
 
class Auction:
    def __init__(self, predicted_prices, hourly_bids, threshold=0.1):
        self.predicted_prices = predicted_prices
        self.threshold = threshold
        self.bids = hourly_bids

    def analyze_predicted_prices(self):
        # Calculating the price differences between the two auctions
        self.predicted_prices["price_difference"] = self.predicted_prices["Predictions"] - self.predicted_prices["price_forecast_first_auction"]
        self.predicted_prices["price_difference_percentage"] = self.predicted_prices["price_difference"] / self.predicted_prices["price_forecast_first_auction"]
        
        # Identifying opportunities for arbitrage
        self.predicted_prices["arbitrage_opportunity"] = self.predicted_prices["price_difference_percentage"].apply(lambda x: x > self.threshold)
        
        return self.predicted_prices

    def place_bids(self):
        analyzed_prices = self.analyze_predicted_prices()
        total_buy_volume = 0
        total_sell_volume = 0

        for _, row in analyzed_prices.iterrows():
            if row["arbitrage_opportunity"]:
                buy_bid = (row["volume"], row["price_forecast_first_auction"])
                total_buy_volume += row["volume"]
            else:
                buy_bid = (0, 0)

            sell_bid = (-row["volume"], row["price_forecast_first_auction"])
            total_sell_volume -= row["volume"]

            self.bids.append((buy_bid, sell_bid))

        # Balancing the bids to achieve a net_position of 0.0
        if total_buy_volume != total_sell_volume:
            imbalance = total_buy_volume + total_sell_volume
            for i, (buy_bid, sell_bid) in enumerate(self.bids):
                if buy_bid[0] != 0:
                    new_sell_volume = sell_bid[0] - imbalance
                    self.bids[i] = (buy_bid, (new_sell_volume, sell_bid[1]))
                    break

    def price_range_system_price(self) -> Tuple[float, float]:
        return (-10, 10)

    def calculate_system_price(self, system_price_range: Tuple[float, float]) -> float:
        return random.uniform(system_price_range[0], system_price_range[1])

    def price_auction(self, bids: List[Tuple[float, float]]) -> Tuple[float, float]:
        if bids:
            winning_price = max(bid[1] for bid in bids)
            # net_position = sum(bid[0] for bid in bids if bid[1] >= winning_price)
            net_position = 0.0
        else:
            winning_price = 0.0
            net_position = 0.0

        return winning_price, net_position

    def calculate_profit_loss(self, winning_prices: List[float], system_price: float) -> List[float]:
        profit_loss = []
        for i in range(len(winning_prices)):
            net_position_value = winning_prices[i] - system_price
            profit_loss.append(net_position_value)
        return profit_loss

    def submit_bids(self, bids: List[Tuple[float, float]]):
        self.bids.extend(bids)


    def run_auctions(self):
        system_price_range = self.price_range_system_price()
        system_price = self.calculate_system_price(system_price_range)

        winning_prices = []
        net_positions = []

        for hourly_bids in self.bids:
            # Rounding bid prices to the nearest minimum price increment
            hourly_bids = [(round_to_tick(volume, MIN_VOLUME_TICK), round_to_tick(price, MIN_PRICE_TICK)) for volume, price in hourly_bids]

            winning_price, net_position = self.price_auction(hourly_bids)
            winning_prices.append(winning_price)
            net_positions.append(net_position)

        profit_loss = self.calculate_profit_loss(winning_prices, system_price)
        return winning_prices, net_positions, profit_loss

def round_to_tick(value, tick):
    return round(value / tick) * tick

if __name__ == "__main__":

    print("Columns in test_forecast_inputs DataFrame:")
    print(test_forecast_inputs.columns)

    print("Columns in train_auction DataFrame:")
    print(train_auction.columns)

    print("Columns in train_system_prices DataFrame:")
    print(train_system_prices.columns)

    print("Columns in train_forecast_inputs DataFrame:")
    print(train_forecast_inputs.columns)

    print("Columns in test_auction DataFrame:")
    print(test_auction.columns)

    print("Columns in test_system_prices DataFrame:")
    print(test_system_prices.columns)

    predicted_prices = predicted_prices.rename(columns={'Prediction': 'price_forecast_second_auction'})
    print("Columns in predicted_prices DataFrame:")
    print(predicted_prices.columns, "\n")

    bid_strategy = BidStrategy(train_auction, train_system_prices, train_forecast_inputs, test_system_prices, test_forecast_inputs, predicted_prices, test_auction)

    # hourly_bids = bid_strategy.calculate_bids()

    hourly_bids_auction1, hourly_bids_auction2 = bid_strategy.calculate_bids()

    print(f"Hourly bids for the 1st auction: {hourly_bids_auction1} \n ")

    print(f"Number of hourly bids 1st auction : {len(hourly_bids_auction1)} \n ")

    print(f"Hourly bids for the 2nd auction: {hourly_bids_auction2} \n ")

    print(f"Number of hourly bids 2nd auction : {len(hourly_bids_auction2)} \n ")

    # Creating Auction instance
    auction1 = Auction(predicted_prices, hourly_bids_auction1)

    system_price = df_system_prices["system_price"]
    # First auction results
    winning_prices1, net_positions1, profit_loss1 = auction1.run_auctions()
    auction1_profit_loss = auction1.calculate_profit_loss(winning_prices1, system_price)
    system_price_profit_loss = [profit_loss1[i] - auction1_profit_loss[i] for i in range(len(profit_loss1))]
    
    print(f"Auction 1 Profit/Loss: {auction1_profit_loss}")
    print(f"System Price Profit/Loss: {system_price_profit_loss}")

    # Creating Auction instance
    auction2 = Auction(predicted_prices, hourly_bids_auction2)

    # Second auction results
    winning_prices2, net_positions2, profit_loss2 = auction2.run_auctions()
    auction2_profit_loss = auction2.calculate_profit_loss(winning_prices2, system_price)
    system_price_profit_loss = [profit_loss2[i] - auction2_profit_loss[i] for i in range(len(profit_loss2))]

    print(f"Auction 2 Profit/Loss: {auction2_profit_loss}")
    print(f"System Price Profit/Loss: {system_price_profit_loss}")
    print(f"Winning prices for the first auction: {winning_prices1} \n ")
    print(f"Net positions for the first auction: {net_positions1} \n ")
    print(f"Profit loss for the first auction: {profit_loss1} \n ")

    print(f"Winning prices for the second auction: {winning_prices2} \n ")
    print(f"Net positions for the second auction: {net_positions2} \n")
    print(f"Profit loss for the second auction: {profit_loss2} \n ")


In summary, the bidding strategy performed better in the second auction, as both the net positions and profit were higher. The winning prices were also higher in the second auction. It is important to note that these results are specific to the provided historical data and the bidding strategy used. To improve the bidding strategy, we can consider adjusting the bid prices or volumes based on market conditions, historical performance, and other relevant factors.