# Day ahead of You!
- wind energy

In [2]:
# Importing required libraries
import numpy as np
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.4f' % x)

import datetime as dt
import time

import math

#from scipy import stats

#from statsmodels.tsa.stattools import adfuller
#from statsmodels.tsa.stattools import pacf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping


from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
#from keras_tuner import RandomSearch
#from keras_tuner.engine.hyperparameters import HyperParameters

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

#sns.set_context("paper", font_scale=1.3)
#sns.set_style('white')
#
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

import mlflow
from mlflow import tensorflow

import warnings
warnings.filterwarnings('ignore')
#!pip install keras-tuner -q
#!pip install keras-tuner --upgrade
# defining the user defined colors
clrs = ['#FAB800','#93BE3E','#35B179','#009A9A','#4E7E91']


# Loading data

display(dbutils.fs.ls("FileStore/tables"))

file_location = "/FileStore/tables/Actual_EQ.csv"
file_type = "csv"

df = spark.read.csv(file_location,header = True, inferSchema=True)
df = df.toPandas()

In [3]:
df = pd.read_csv('../data/Actual_EQ.csv')

In [4]:
# check the size of dataset
print(f'Dataset has {df.shape[0]} rows and {df.shape[1]} columns')
print(f"columns in raw dataset: ", list(df.columns))

Dataset has 16126 rows and 3 columns
columns in raw dataset:  ['dt_start_utc', 'ac_price', 'eq_rank_val_01']


In [5]:
cols= ['ds','act_price','pre_price']
df.columns = cols
# ds data type is converting to datetime type
df['ds'] = pd.to_datetime(df['ds'], format='%Y-%m-%d %H:%M:%S')
# ds column is set as index
df.reset_index(drop=True,inplace=True)
#df.head(2)

In [6]:
print('The time series starts from : ', df.ds.min())
print('The time series ends on :     ', df.ds.max())
print(f'intervals of the dataset :   ', df.ds[1] - df.ds[0])

The time series starts from :  2019-09-01 00:00:00
The time series ends on :      2021-07-03 21:00:00
intervals of the dataset :    0 days 01:00:00


In [7]:
df=df.loc[:,['ds','pre_price','act_price']]
df.sort_values('ds', inplace=True, ascending=True)
df = df.reset_index(drop=True)
#df.head(2)

In [8]:
df = df.query("ds>='2021-01-01 00:00:00'")
df.reset_index(drop=True,inplace=True)
#df.head(2)

# data split

## train and test data split

In [9]:
 """Create training and test dataset. Training dataset is
    80% of the total data and the remaining 20% will be predicted"""
lookBack = 168   # 1 week = 7*24 = 168hours considered to predict
n_ahead = 1      # forcast price at 1 hours ahead
train_size = int(len(df) * 0.80) #80% of data used for training
test_size = len(df) - train_size
df_train, df_test = df[0:train_size], df[train_size-lookBack-1:]
print(df_train.shape, df_test.shape, df_test.shape)
print('The train data starts from :     ', df_train.ds.min())
print('The train data ends on :         ', df_train.ds.max())
print(f'intervals of the train dataset :', df_train.ds[1] - df_train.ds[0])
print('The test data starts from :      ', df_test.ds.min())
print('The test data ends on :          ', df_test.ds.max())
print(f'intervals of the test dataset : ', df_test.ds[train_size-lookBack+1+1] - df_test.ds[train_size-lookBack+1])

(3531, 3) (1052, 3) (1052, 3)
The train data starts from :      2021-01-01 00:00:00
The train data ends on :          2021-05-28 02:00:00
intervals of the train dataset : 0 days 01:00:00
The test data starts from :       2021-05-21 02:00:00
The test data ends on :           2021-07-03 21:00:00
intervals of the test dataset :  0 days 01:00:00


## splitting the dates from detasets

In [10]:
train_dates = pd.to_datetime(df_train['ds'])
test_dates = pd.to_datetime(df_test['ds'])


In [11]:
df_train = df_train.drop(['ds','pre_price'],axis = 1)
df_test = df_test.drop(['ds','pre_price'],axis = 1)

In [12]:
train_data = df_train.reset_index()['act_price']
test_data = df_test.reset_index()['act_price']
print('train_data shape:', train_data.shape)
print('test_data shape:', test_data.shape)

train_data shape: (3531,)
test_data shape: (1052,)


## normalization of data

In [13]:
scaler=MinMaxScaler(feature_range=(-1,1))
train_data=scaler.fit_transform(np.array(train_data).reshape(-1,1))
test_data=scaler.transform(np.array(test_data).reshape(-1,1))


## features and target data set split

In [14]:
# convert an array of values into a dataset matrix
def create_datasets(dataset,target_index,lookback = 1,n_ahead = 0):
    X, y = [], []
    for i in range(lookback, dataset.shape[0]- n_ahead+1):
        X.append(dataset[i-lookback:i,0:dataset.shape[1]])
        y.append(dataset[i+n_ahead-1:i+n_ahead,target_index])
    return np.array(X), np.array(y)

In [15]:
# Splitting the dataset into features and targets
X_train, y_train= create_datasets(train_data, target_index = 0,lookback = lookBack,n_ahead = n_ahead)
X_test, y_test = create_datasets(test_data, target_index = 0,lookback = lookBack,n_ahead = n_ahead)


In [16]:
print(X_train.shape), print(y_train.shape)

(3363, 168, 1)
(3363, 1)


(None, None)

# building LSTM regression model
 Long Short Time Memory

In [17]:
import tensorflow as tf
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,save_weights_only=True,verbose=1)
mlflow.tensorflow.autolog()

In [38]:
def run_model(params):
    with mlflow.start_run(run_name="forecasting_tracker") as run:
        # create model framework and fit the LSTM network
        checkpoint_path = "training/cp.ckpt"
        checkpoint_dir = os.path.dirname(checkpoint_path)
        cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,save_best_only=True,save_weights_only=True,verbose=1)
        model=Sequential()

        model.add(LSTM(units=params['units'], activation =params['activation'], input_shape=(X_train.shape[1],X_train.shape[2]),return_sequences = True))
        model.add(Dropout(0.2))

        model.add(LSTM(units=params['units'],return_sequences=True))
        model.add(Dropout(0.2))

        model.add(LSTM(units=int(params['units']*0.5),return_sequences=True))
        model.add(Dropout(0.2))

        model.add(LSTM(units=int(params['units']*0.5)))
        model.add(Dropout(0.2))

        model.add(Dense(1))
        """Compile and fit the model"""
        model.compile(loss='mean_squared_error',optimizer='adam')
        stop_early = EarlyStopping(monitor='val_loss', patience=3)
        history = model.fit(X_train,y_train,validation_split=0.2,epochs=params['epochs'],batch_size=params['batchsize'],callbacks=[stop_early,cp_callback],verbose=1,shuffle=False)
        model.load_weights(checkpoint_path)
        run = mlflow.active_run()
        #print("Run with id %s finished" % run.info.run_id)
        return (run.info.experiment_id, run.info.run_id)
    

In [39]:
for units, activation, epochs, batchsize in [[100,'relu',50,24], [100,'tanh',50,24]]:
  params = {"units":units, "activation": activation, "epochs": epochs, "batchsize":batchsize}
  run_model(params)

Epoch 1/50
Epoch 00001: val_loss improved from inf to 0.06599, saving model to training/cp.ckpt
Epoch 2/50
Epoch 00002: val_loss improved from 0.06599 to 0.05847, saving model to training/cp.ckpt
Epoch 3/50
Epoch 00003: val_loss improved from 0.05847 to 0.05559, saving model to training/cp.ckpt
Epoch 4/50
Epoch 00004: val_loss improved from 0.05559 to 0.05296, saving model to training/cp.ckpt
Epoch 5/50
Epoch 00005: val_loss improved from 0.05296 to 0.04798, saving model to training/cp.ckpt
Epoch 6/50
Epoch 00006: val_loss improved from 0.04798 to 0.04685, saving model to training/cp.ckpt
Epoch 7/50
Epoch 00007: val_loss improved from 0.04685 to 0.03287, saving model to training/cp.ckpt
Epoch 8/50
Epoch 00008: val_loss improved from 0.03287 to 0.02933, saving model to training/cp.ckpt
Epoch 9/50
Epoch 00009: val_loss improved from 0.02933 to 0.01826, saving model to training/cp.ckpt
Epoch 10/50
Epoch 00010: val_loss improved from 0.01826 to 0.00753, saving model to training/cp.ckpt
Epo



INFO:tensorflow:Assets written to: /var/folders/tp/frdkj__12p57wvtnhqk3pylr0000gn/T/tmpvbof7zmx/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/tp/frdkj__12p57wvtnhqk3pylr0000gn/T/tmpvbof7zmx/model/data/model/assets


Epoch 1/50
Epoch 00001: val_loss improved from inf to 0.06225, saving model to training/cp.ckpt
Epoch 2/50
Epoch 00002: val_loss improved from 0.06225 to 0.05960, saving model to training/cp.ckpt
Epoch 3/50
Epoch 00003: val_loss improved from 0.05960 to 0.05418, saving model to training/cp.ckpt
Epoch 4/50
Epoch 00004: val_loss improved from 0.05418 to 0.04916, saving model to training/cp.ckpt
Epoch 5/50
Epoch 00005: val_loss improved from 0.04916 to 0.04043, saving model to training/cp.ckpt
Epoch 6/50
Epoch 00006: val_loss improved from 0.04043 to 0.02137, saving model to training/cp.ckpt
Epoch 7/50
Epoch 00007: val_loss improved from 0.02137 to 0.00868, saving model to training/cp.ckpt
Epoch 8/50
Epoch 00008: val_loss improved from 0.00868 to 0.00612, saving model to training/cp.ckpt
Epoch 9/50
Epoch 00009: val_loss improved from 0.00612 to 0.00563, saving model to training/cp.ckpt
Epoch 10/50
Epoch 00010: val_loss improved from 0.00563 to 0.00534, saving model to training/cp.ckpt
Epo



INFO:tensorflow:Assets written to: /var/folders/tp/frdkj__12p57wvtnhqk3pylr0000gn/T/tmpl3n7ih9j/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/tp/frdkj__12p57wvtnhqk3pylr0000gn/T/tmpl3n7ih9j/model/data/model/assets


In [34]:
model.summary()

NameError: name 'model' is not defined

In [40]:
import mlflow
logged_model = 'runs:/b011a8be58ee47388ee2e5dd9010f0c7/model'

In [41]:
# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)


In [42]:
### Lets Do the prediction and check performance metrics
train_predict=loaded_model.predict(X_train)
test_predict=loaded_model.predict(X_test)

In [46]:
# Saving model to disk
import pickle
pickle.dump(loaded_model, open('forecasting.pkl','wb'))
#model = pickle.load(open('forecasting.pkl','rb'))



INFO:tensorflow:Assets written to: ram://8fbe3bf9-8faa-464e-81b2-f237bef2dee8/assets


INFO:tensorflow:Assets written to: ram://8fbe3bf9-8faa-464e-81b2-f237bef2dee8/assets


In [28]:
hist = pd.DataFrame(history.history)
fig = go.Figure()
fig.add_trace(go.Scatter(x=hist.index, y=hist["loss"],
              name="Train Loss", line=dict(color=clrs[4], width=4)))
fig.add_trace(go.Scatter(x=hist.index, y=hist["val_loss"],
              name="Valiation Loss", line=dict(color=clrs[0], width=4)))

fig.update_layout(
    title='Training and validation loss over epochs',
    xaxis_nticks=10,
    yaxis_nticks=10,
    xaxis_title=" date time",
    yaxis_title=" Loss",
    autosize=False,
    width=1000,
    height=600,
    legend_title="prices",
    font=dict(size=12),
    legend=dict(yanchor="top", y=0.98, xanchor="right", x=0.98))

fig.show()

## Model Evaluation

### prediction

In [29]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

### denormalizing ( inverse scaling)

In [43]:
##Transform back to original form
train_predict=scaler.inverse_transform(train_predict)
test_predict=scaler.inverse_transform(test_predict)
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)

In [44]:
# Trained Model errors
train_rmse  = np.sqrt(mean_squared_error(y_train, train_predict)).round(2)
test_rmse   = np.sqrt(mean_squared_error(y_test, test_predict)).round(2)
# Testing Model errors
train_mse   = mean_squared_error(y_train, train_predict).round(2)
test_mse    = mean_squared_error(y_test, test_predict).round(2)

In [45]:
print('Train Mean Absolute Error:', mean_absolute_error(y_train, train_predict).round(2))
print('Train Root Mean Squared Error:',np.sqrt(mean_squared_error(y_train, train_predict).round(2)))
print('Train Mean Squared Error:',mean_squared_error(y_train, train_predict).round(2))

print('Test Mean Absolute Error:', mean_absolute_error(y_test, test_predict))
print('Test Mean Squared Error:',mean_squared_error(y_test, test_predict))
print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test, test_predict)))

Train Mean Absolute Error: 4.2
Train Root Mean Squared Error: 6.153048025166064
Train Mean Squared Error: 37.86
Test Mean Absolute Error: 4.95547523462125
Test Mean Squared Error: 45.404153765466134
Test Root Mean Squared Error: 6.738260440608253


# Forcasting next 24 hours Prices

## Take last 1 week (168 hours) data from test data to predict next 24hours prices:

## preparing the input data for forecasting

In [None]:
print(test_data.shape)
x_input=test_data[len(test_data)-lookBack:].reshape(1,-1)
print(x_input.shape)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()


## forcasting for next 24 hours

In [None]:
# demonstrate prediction for next 24 hours
from numpy import array

lst_output=[]
n_steps=lookBack
i=0
while(i<=24):
    
    if(len(temp_input)>lookBack):
        #print(temp_input)
        x_input=np.array(temp_input[1:])
        #print(f"{i} hour input {x_input}")#.format(i,x_input))
        x_input=x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        #print(x_input)
        yhat = model.predict(x_input, verbose=0)
        #print(f"{i} hour output {yhat}")#.format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        #print(yhat[0])
        temp_input.extend(yhat[0].tolist())
        #print(len(temp_input))
        lst_output.extend(yhat.tolist())
        i=i+1
    
lst_output = scaler.inverse_transform(lst_output)

#print(lst_output)

## creating a future time for given period

In [None]:
future_time = pd.date_range(list(test_dates)[-1],periods = 24,freq='1H')#.tolist()
future_time = pd.DataFrame(future_time)[:]

## merging and maping with the forcasted data with date and time

In [None]:
df_forecast = pd.merge(future_time,pd.DataFrame(lst_output[0:-1]),left_index = True,right_index = True,how='left')
df_forecast.columns = ['ds','lstm_pred']
df_forecast['ds'] = pd.to_datetime(df_forecast['ds'], format='%Y-%m-%d %H:%M:%S')

In [None]:
df_forecast.set_index('ds').to_csv('forcasted_prices.csv')

In [None]:
df_train_predicted = pd.DataFrame({'ds':train_dates[lookBack:].values,'lstm_pred':train_predict[:,0]})
df_test_predicted =pd.DataFrame({'ds':test_dates[lookBack:].values,'lstm_pred':test_predict[:,0]})

## plotting results

In [None]:
df_test_predicted['y_test'] = y_test[:,0]
df_test_predicted['residual']  = df_test_predicted['y_test'] - df_test_predicted['lstm_pred'] 


In [None]:
df_train_predicted['y_train'] = y_train[:,0]
df_train_predicted['residual']  = df_train_predicted['y_train'] - df_train_predicted['lstm_pred'] 


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['ds'], y=df["act_price"],
              name="actual price", line=dict(color=clrs[0], width=4)))
#fig.add_trace(go.Scatter(x=df_train_predicted['ds'], y=df_train_predicted["lstm_pred"],
#              name="trained prediction", line=dict(color=clrs[2], width=1)))
#fig.add_trace(go.Scatter(x=df_test_predicted['ds'], y=df_test_predicted["lstm_pred"],
#              name="test prediction", line=dict(color=clrs[4], width=1)))
#fig.add_trace(go.Scatter(x=df_forecast['ds'], y=df_forecast["lstm_pred"],
#              name="forcasted", line=dict(color=clrs[3], width=1)))
fig.add_trace(go.Bar(x=df_train_predicted['ds'][:-1], y=df_train_predicted['lstm_pred'][:-1], name = 'train predicted price', marker_color=clrs[2]))
fig.add_trace(go.Bar(x=df_test_predicted['ds'][:-1], y=df_test_predicted['lstm_pred'][:-1], name = 'test predicted price', marker_color=clrs[4]))
fig.add_trace(go.Bar(x=df_forecast['ds'], y=df_forecast['lstm_pred'], name = 'forecasted price', marker_color=clrs[3]))


fig.update_layout(
    title='actual and predicted prices using LSTM',
    xaxis_nticks=25,
    yaxis_nticks=10,
    xaxis_title=" date time",
    yaxis_title=" Price in €/mwh",
    autosize=False,
    width=1000,
    height=600,
    legend_title="prices",
    font=dict(size=10),
    legend=dict(yanchor="top", y=0.3, xanchor="left", x=0.01)
)
fig.update_xaxes(
    tickangle=-45,
    title_text="Date time",
    title_font={"size": 12},
    title_standoff=25,
    tickformat='%Y-%m-%d<br>%H:%M')

fig.update_yaxes(
    title_text="price €/mwh",
    title_standoff=25)
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=8, label="8H", step="hour", stepmode="backward"),
            dict(count=1, label="1D", step="day", stepmode="backward"),
            dict(count=2, label="2D", step="day", stepmode="backward"),
            dict(count=1, label="1w", step="month", stepmode="backward"),
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=2, label="2m", step="month", stepmode="backward"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    ))

fig.show()


# Residuals

In [None]:
fig = go.Figure()
#fig.add_trace(go.Scatter(x=df['ds'], y=df["act_price"]-df["pre_price"],
#              name="actual price", line=dict(color=clrs[0], width=1)))
#
#fig.add_trace(go.Scatter(x=df_train_predicted['ds'], y=df_train_predicted["residual"],
#              name="trained prediction", line=dict(color=clrs[2], width=2)))
#fig.add_trace(go.Scatter(ç,
#              name="test prediction", line=dict(color=clrs[0], width=4)))
fig.add_trace(go.Bar(x=df_test_predicted['ds'], y=df_test_predicted["residual"], name = 'lstm model residuals', marker_color=clrs[4]))
fig.add_trace(go.Bar(x=df['ds'], y=df["act_price"]-df["pre_price"], name = 'original model residuals', marker_color=clrs[0]))


fig.update_layout(
    title='actual and predicted residuals',
    xaxis_nticks=25,
    yaxis_nticks=10,
    xaxis_title=" date time",
    yaxis_title=" residual in price €/mwh",
    autosize=True,
    width=1000,
    height=600,
    legend_title="price residual",
    font=dict(size=10),
    legend=dict(yanchor="top", y=1.0, xanchor="right", x=0.98),
    yaxis_range = [-5,30]
)
fig.update_xaxes(
    tickangle=-45,
    title_text="Date time",
    title_font={"size": 16},
    title_standoff=25,
    tickformat='%Y-%m-%d<br>%H:%M')

fig.update_yaxes(
    title_text="price €/mwh",
    title_standoff=10
    )

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=8, label="8H", step="hour", stepmode="backward"),
            dict(count=2, label="2D", step="day", stepmode="backward"),
            dict(count=1, label="1w", step="month", stepmode="backward"),
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(step="all")
        ])
    ))

st.plotly_chart(fig)

In [None]:
fig = px.scatter(x= df.act_price, y = df.pre_price,labels={'x':'Actual price','y':'Predicted price'},opacity=0.65,
                 trendline='ols', trendline_color_override=clrs[0],
                 title='Previous predictions')
fig.update_layout(
    title='Existing Model Residuals ',
    xaxis_nticks=25,
    yaxis_nticks=10,
    xaxis_title=" date time",
    yaxis_title=" Price in €/mwh",
    autosize=False,
    width=1000,
    height=600,
    legend_title="prices",
    font=dict(size=10),
    legend=dict(yanchor="top", y=0.4, xanchor="left", x=0.01))
fig.show()

In [None]:
fig = px.scatter(x= df_train_predicted.y_train, y = df_train_predicted.lstm_pred,labels={'x':'Actual price','y':'Predicted price'},opacity=0.65,
                 trendline='ols', trendline_color_override=clrs[0],
                 title='Trained Model Residuals'
)
fig.update_layout(width=1000,height=600,)
fig.show()

In [None]:
fig = px.scatter(x= df_test_predicted.y_test, y = df_test_predicted.lstm_pred,labels={'x':'Actual price','y':'Predicted price'},opacity=0.65,
                 trendline='ols', trendline_color_override=clrs[0],
                 title='Test Model Residuals'
                 )
fig.update_layout(width=1000,height=600,)
fig.show()

# Results Comparision

In [None]:
def oppertunity_analyses(dataset, dataset_name,col_y, col_yhat):
    df = dataset.copy()
    df['error'] = df[col_y]-df[col_yhat]
    total = int(df[col_y].sum().round(2))
    opportunity = int(abs(df.query("error < 0")[col_yhat].sum()))
    opportunity = round(opportunity,2)
    overbidding = abs(df.query("error >= 0")[col_y].sum()).round(2)
    income = (total - opportunity - overbidding).round(2)
    mse = mean_squared_error(df[col_y],df[col_yhat]).round(2)
    rsme = abs(mse**0.5).round(2)
    
    #print(f'total :      €{total}')
    #print(f'opportunity: €{opportunity}')
    #print(f'overbidding: €{overbidding}')
    #print(f'income:      €{income}')
    #print(f'MSE:         €{mse}')
    #print(f'RSME:        €{rsme}')
    df_temp = pd.DataFrame(
        [[total,opportunity,overbidding,income,mse,rsme]],columns = ['total','opportunity','overbidding','income','mse','rsme'],index=[dataset_name])
    #print (df_temp)
    return df_temp#[total,opportunity,overbidding,income,mse,rsme]


In [None]:
df_final_evaluation = pd.DataFrame(columns = ['total','opportunity','overbidding','income','mse','rsme'])
df_final_evaluation

In [None]:
df_final_evaluation = df_final_evaluation.append(oppertunity_analyses(df,'Base Model','act_price','pre_price'))
df_final_evaluation = df_final_evaluation.append(oppertunity_analyses(df_train_predicted,'Trained Model','y_train','lstm_pred'))
df_final_evaluation = df_final_evaluation.append(oppertunity_analyses(df_test_predicted,'Test Model','y_test','lstm_pred'))

In [None]:
df_final_evaluation= df_final_evaluation.reset_index()

In [None]:
cols = df_final_evaluation.columns[1:]
cols

In [None]:
df2 = df_final_evaluation.copy()
df2

In [None]:
for col in list(cols):
    print(col)
    #df2[col]
    df2[col] = df2[col]/df_final_evaluation['total']*1000000
df2

In [None]:
x = list(df2["index"].values)
fig = go.Figure(go.Bar(x=x, y=df2['opportunity'], name='Opportunity',marker_color=clrs[4]))
fig.add_trace(go.Bar(x=x, y=df2['overbidding'], name='Overbiddin',marker_color=clrs[0]))
fig.add_trace(go.Bar(x=x, y=-df2['income'], name='Income',marker_color=clrs[3]))
fig.update_layout(barmode='stack', width = 1000, height=600)
fig.show()

In [None]:
df2.plot()

In [None]:
# Loading model to compare the results
model2 = pickle.load(open('model.pkl','rb'))


def forecasting(model,n_steps,n_ahead=1 ):
    lst_output=[]
    n_steps=n_steps # lookBack
    global temp_input
    i=0
    while(i<=n_ahead): # number of units ahead
        
        if(len(temp_input)>lookBack):
            #print(temp_input)
            x_input=np.array(temp_input[1:])
            #print(f"{i} hour input {x_input}")#.format(i,x_input))
            x_input=x_input.reshape(1,-1)
            x_input = x_input.reshape((1, n_steps, 1))
            #print(x_input)
            yhat = model.predict(x_input, verbose=0)
            #print(f"{i} hour output {yhat}")#.format(i,yhat))
            temp_input.extend(yhat[0].tolist())
            temp_input=temp_input[1:]
            #print(temp_input)
            lst_output.extend(yhat.tolist())
            i=i+1
        else:
            x_input = x_input.reshape((1, n_steps,1))
            yhat = model.predict(x_input, verbose=0)
            #print(yhat[0])
            temp_input.extend(yhat[0].tolist())
            #print(len(temp_input))
            lst_output.extend(yhat.tolist())
            i=i+1
        
    lst_output = scaler.inverse_transform(lst_output)
    return lst_output

In [None]:
def prediction(model, dataset):
    global lookBack
    X_test = []
    inputs = df_new.iloc[int(len(df_new)*0.2)-lookBack:,-1:].values
    inputs=inputs.reshape(-1,1)
    inputs = scaler.transform(inputs)
    #inputs.shape
    # Creation of dataset for prediction
    for i in range(lookBack, inputs.shape[0]):
        X_test.append(inputs[i-lookBack:i, 0])
    X_test = np.array(X_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    # print(X_test.shape)
    prediction = model.predict(X_test)
    prediction = scaler.inverse_transform(prediction)[-1]
    return prediction

In [None]:
df_new = df.copy()
i = 0
N = 24
while (i<=N-1):
    df_new.loc[len(df_new)] = list(df_new.iloc[len(df_new)-1])
    new_price = prediction(model,df_new)
    df_new.iloc[-1:,1:2] = new_price
    #df_new.iloc[-1:,2:] = None
    df_new.iloc[-1:,0:1] = df_new['ds'].iloc[-1]+pd.DateOffset(hours=1) 
    #inputs = df_new
    i = i+1
#df_new[int(len(df_new)*0.8)-lookBack+1:]
df_new[-N:]

df_new = df.copy()
i = 0
N = 24
while (i<=N-1):
    df_new.loc[len(df_new)] = list(df_new.iloc[len(df_new)-1])
    new_price = prediction(model2,df_new)
    df_new.iloc[-1:,1:2] = new_price
    #df_new.iloc[-1:,2:] = None
    df_new.iloc[-1:,0:1] = df_new['ds'].iloc[-1]+pd.DateOffset(hours=1) 
    #inputs = df_new
    i = i+1
#df_new[int(len(df_new)*0.8)-lookBack+1:]
df_new[-N:]