In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import KFold
import pmdarima as pm
from pmdarima.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt


2023-11-15 13:58:48.009700: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<h1>Hyperparameters<h1>


In [149]:
#Add here

<h1>Creating DataFrames<h1>

In [2]:
spot_prices = pd.read_csv("spotpriser.csv").set_index("time")
consumption = pd.read_csv("consumption_temp_with_flags.csv").set_index("time")

oslo_consumption = consumption[consumption["location"] == "oslo"]
oslo_consumption = oslo_consumption.merge(spot_prices["oslo"], left_index=True, right_index=True, how="left")
oslo_consumption=oslo_consumption.drop(['location'],axis=1).rename(columns={'oslo':'spot_price'})

stavanger_consumption = consumption[consumption["location"] == "stavanger"]
stavanger_consumption = stavanger_consumption.merge(spot_prices["stavanger"], left_index=True, right_index=True, how="left")
stavanger_consumption=stavanger_consumption.drop(['location'],axis=1).rename(columns={'stavanger':'spot_price'})

trondheim_consumption = consumption[consumption["location"] == "trondheim"]
trondheim_consumption = trondheim_consumption.merge(spot_prices["trondheim"], left_index=True, right_index=True, how="left")
trondheim_consumption=trondheim_consumption.drop(['location'],axis=1).rename(columns={'trondheim':'spot_price'})

tromso_consumption = consumption[consumption["location"] == "tromsø"]
tromso_consumption = tromso_consumption.merge(spot_prices["tromsø"], left_index=True, right_index=True, how="left")
tromso_consumption=tromso_consumption.drop(['location'],axis=1).rename(columns={'tromsø':'spot_price'})

bergen_consumption = consumption[consumption["location"] == "bergen"]
bergen_consumption = bergen_consumption.merge(spot_prices["bergen"], left_index=True, right_index=True, how="left")
bergen_consumption=bergen_consumption.drop(['location'],axis=1).rename(columns={'bergen':'spot_price'})
    

<h1>Feature engineering<h1>

In [3]:
CONSUMPTION_DFS = [oslo_consumption,stavanger_consumption,trondheim_consumption,tromso_consumption,bergen_consumption]

cols_to_standard_normal = ["temperature","delta_temp"]
cols_to_normalize_01 = ["consumption","spot_price"]

for df in CONSUMPTION_DFS:
    df["delta_temp"] = df["temperature"]-df["temperature"].shift(1)
    df["is_holiday"] = df["is_holiday"].astype(int)
    df["is_weekend"] = df["is_weekend"].astype(int)

    for col in cols_to_standard_normal:
        mu, sig = df[col].mean(), df[col].std()
        df[col] = (df[col]-mu)/sig
    for col in cols_to_normalize_01:
        df[col] = (df[col]-df[col].min())/(df[col].max()-df[col].min())
    df = df.iloc[1:,:]


<h1>Splitting datasets<h1>

In [4]:
oslo_with_price = oslo_consumption.dropna()
oslo_no_price = pd.concat([oslo_consumption,oslo_with_price]).drop_duplicates(keep=False).drop(columns=["spot_price"])

stavanger_with_price = stavanger_consumption.dropna()
stavanger_no_price = pd.concat([stavanger_consumption,stavanger_with_price]).drop_duplicates(keep=False).drop(columns=["spot_price"])

trondheim_with_price = trondheim_consumption.dropna()
trondheim_no_price = pd.concat([trondheim_consumption,trondheim_with_price]).drop_duplicates(keep=False).drop(columns=["spot_price"])

tromso_with_price = tromso_consumption.dropna()
tromso_no_price = pd.concat([tromso_consumption,tromso_with_price]).drop_duplicates(keep=False).drop(columns=["spot_price"])

bergen_with_price = bergen_consumption.dropna()
bergen_no_price = pd.concat([bergen_consumption,bergen_with_price]).drop_duplicates(keep=False).drop(columns=["spot_price"])



In [5]:
ALL_DFS = [oslo_with_price,oslo_no_price,stavanger_with_price,stavanger_no_price,trondheim_with_price,trondheim_no_price,tromso_with_price,tromso_no_price,bergen_with_price,bergen_no_price]

for i in range(len(ALL_DFS)):
    ALL_DFS[i].index = pd.to_datetime(ALL_DFS[i].index)
    ALL_DFS[i].loc[:,"hour"] = ALL_DFS[i].index.hour

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ALL_DFS[i].loc[:,"hour"] = ALL_DFS[i].index.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ALL_DFS[i].loc[:,"hour"] = ALL_DFS[i].index.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ALL_DFS[i].loc[:,"hour"] = ALL_DFS[i].index.hour
A value is trying to be set on a copy of a slice from a Da

<h1>Making training data<h1>

In [6]:
Y = oslo_with_price["consumption"]
X = oslo_with_price.drop("consumption", axis = 1)
X = X - X-X.shift(24)
X.dropna(inplace=True, axis = 0)
Y = Y.iloc[24:]

predictions = {}
for column in df.columns:
    # Split data into train and test
    train, test = train_test_split(df[column], train_size=0.8)

    # Fit an auto-ARIMA model
    model = pm.auto_arima(y=Y,X=X, seasonal=True, m=24, start_P=120, start_Q=120, start_p = 120, start_q=120, max_p=122,max_q=122,max_P=122,max_Q=122, information_criterion='bic') # m is the seasonal order

    # Make predictions
    forecast = model.predict(n_periods=len(test))

    # Store predictions
    predictions[column] = forecast

    # Plot the results (optional)
    plt.figure(figsize=(10, 6))
    plt.plot(train.index, train, label='Train')
    plt.plot(test.index, test, label='Test')
    plt.plot(test.index, forecast, label='Forecast')
    plt.title(f'Forecast vs Actuals for {column}')
    plt.legend()
    plt.show()



for column in df.columns:
    test = df[column][-len(predictions[column]):]  # Get the corresponding test set
    rmse = sqrt(mean_squared_error(test, predictions[column]))
    print(f'RMSE for {column}: {rmse}')

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
