In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm


In [None]:
df = pd.read_excel("/Users/evenbakke/Documents/Master Thesis/MasterThesis-/Data 2.0/Final Data with 2024.xlsx")
df.set_index("DateTime", inplace=True)
df.sort_index(ascending=True)
df

# System Price time series analysis

## Overview of the time series 

In [None]:

statistics_summary = df['System Price'].describe()
print(statistics_summary)


In [None]:
grouped_by_year = df['System Price'].groupby(df.index.year)

yearly_descriptive_stats = grouped_by_year.describe()

yearly_descriptive_stats


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Time Series Plot
plt.figure(figsize=(14, 7))
plt.plot(df.index, df['System Price'], label='System Price')
plt.title('Day-Ahead System Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price (EUR/MWh)')
plt.legend()
plt.grid(True)
plt.show()

# Histogram
plt.figure(figsize=(10, 6))
sns.histplot(df['System Price'], bins=30, kde=True)
plt.title('Distribution of System Prices')
plt.xlabel('System Price')
plt.ylabel('Frequency')
plt.show()

# Box Plot
plt.figure(figsize=(8, 6))
sns.boxplot(x=df['System Price'])
plt.title('Box Plot of System Prices')
plt.xlabel('System Price')
plt.show()


## Subsetting 2023/2024

In [None]:
df = df.loc["2023":"2024"]

In [None]:
import matplotlib.pyplot as plt


plt.figure(figsize=(20, 10)) 
plt.plot(df.index, df['System Price'], label='System Price')  
plt.title('Hourly Day-Ahead System Price')  
plt.xlabel('DateTime')  
plt.ylabel('System Price') 
plt.legend()  
plt.grid(True)
plt.tight_layout()  


plt.show()

## Seasonal decompose from statsmodels

In [None]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

y = df["System Price"]


decomposition_additive = sm.tsa.seasonal_decompose(y, model='additive', period=24)


fig, axes = plt.subplots(4, 1, figsize=(10, 8))
axes[0].plot(decomposition_additive.observed)
axes[0].set_ylabel('Observed')
axes[1].plot(decomposition_additive.trend)
axes[1].set_ylabel('Trend')
axes[2].plot(decomposition_additive.seasonal)
axes[2].set_ylabel('Seasonal')
axes[3].plot(decomposition_additive.resid)
axes[3].set_ylabel('Residual')
plt.tight_layout()


In [None]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


y = df["System Price"]


decomposition_additive = sm.tsa.seasonal_decompose(y, model='additive', period=24)


fig, axes = plt.subplots(4, 1, figsize=(12, 10))
fig.suptitle('Decomposition of Electricity Prices', fontsize=16)


date_format = mdates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(date_format)


axes[0].plot(decomposition_additive.observed)
axes[0].set_ylabel('Observed', fontsize=12)
axes[0].grid(True)

axes[1].plot(decomposition_additive.trend)
axes[1].set_ylabel('Trend', fontsize=12)
axes[1].grid(True)

axes[2].plot(decomposition_additive.seasonal)
axes[2].set_ylabel('Seasonal', fontsize=12)
axes[2].grid(True)

axes[3].plot(decomposition_additive.resid)
axes[3].set_ylabel('Residual', fontsize=12)
axes[3].grid(True)


plt.tight_layout(rect=[0, 0, 1, 0.96])  


for ax in axes:
    ax.xaxis.set_major_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import adfuller


residuals = decomposition_additive.resid.dropna() 


plt.figure(figsize=(10, 4))
plt.plot(residuals)
plt.title('Residuals')
plt.axhline(0, linestyle='--', color='gray')
plt.show()


fig, ax = plt.subplots(2, 1, figsize=(14, 8))
plot_acf(residuals, lags=50, ax=ax[0])  
plot_pacf(residuals, lags=50, ax=ax[1])  
plt.show()


# Perform Ljung-Box test and output the results
lb_results = acorr_ljungbox(residuals, lags=[24], return_df=True)
print(lb_results)


# Augmented Dickey-Fuller Test
adf_result = adfuller(residuals)
print(f'ADF Statistic: {adf_result[0]}')
print(f'p-value: {adf_result[1]}')
print(f'Critical Values: {adf_result[4]}')




### Zooming in on the seasonal component 

In [None]:

seasonal_component = decomposition_additive.seasonal['2023-01-01':'2023-01-02']

plt.figure(figsize=(10, 6))
plt.plot(seasonal_component)
plt.title('Seasonal Component for the First Two Days of January 2023')
plt.ylabel('Seasonal')
plt.xlabel('Date')
plt.xticks(rotation=45)  
plt.tight_layout()  
plt.show()


In [None]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt


y_january = df["System Price"]['2023-01-01':'2023-01-31']


decomposition_additive_january = sm.tsa.seasonal_decompose(y_january, model='additive', period=24)


fig, axes = plt.subplots(4, 1, figsize=(10, 8))
axes[0].plot(decomposition_additive_january.observed)
axes[0].set_ylabel('Observed')
axes[0].set_title('January 2023')
axes[1].plot(decomposition_additive_january.trend)
axes[1].set_ylabel('Trend')
axes[2].plot(decomposition_additive_january.seasonal)
axes[2].set_ylabel('Seasonal')
axes[3].plot(decomposition_additive_january.resid)
axes[3].set_ylabel('Residual')
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt


daily_avg = df.groupby(df.index.hour).mean()

# Plot daily averages
plt.figure(figsize=(12, 6))
plt.plot(range(1, 25), daily_avg['System Price'])  
plt.title('24 hours')
plt.xlabel('Hour')
plt.ylabel('System Price (EUR/MWh)')
plt.xticks([0, 5, 10, 15, 20, 25])  
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt


weekly_avg = df.groupby([df.index.dayofweek, df.index.hour]).mean()


hourly_weekly_avg = [weekly_avg.loc[(day, hour), 'System Price'] for day in range(7) for hour in range(24)]

# Plot weekly averages
plt.figure(figsize=(12, 6)) 
plt.plot(range(1, 169), hourly_weekly_avg, linestyle='-')
plt.title('168 hours')
plt.xlabel('Hour')
plt.ylabel('System Price (EUR/MWh)')


plt.xticks(range(0, 169, 50))  


plt.grid(True)


plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt


daily_avg = df.groupby(df.index.hour).mean()


weekly_avg = df.groupby([df.index.dayofweek, df.index.hour]).mean()


hourly_weekly_avg = [weekly_avg.loc[(day, hour), 'System Price'] for day in range(7) for hour in range(24)]


fig, axes = plt.subplots(1, 2, figsize=(16, 4))  


axes[0].plot(range(1, 25), daily_avg['System Price'])  
axes[0].set_title('24 hours')
axes[0].set_xlabel('Hour')
axes[0].set_ylabel('System Price (EUR/MWh)')
axes[0].set_xticks([0, 5, 10, 15, 20, 25]) 
axes[0].grid(True)

# Plot weekly averages
axes[1].plot(range(1, 169), hourly_weekly_avg, linestyle='-')
axes[1].set_title('168 hours')
axes[1].set_xlabel('Hour')
axes[1].set_ylabel('System Price (EUR/MWh)')
axes[1].set_xticks(range(0, 169, 50))  
axes[1].grid(True)

plt.tight_layout()  
plt.show()


# ACF and PACF (Autocorrelation)

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))  

plot_acf(df['System Price'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF', fontsize=14)
#ax1.set_xlabel('Lags', fontsize=12)
#ax1.set_ylabel('Autocorrelation', fontsize=12)
ax1.grid(True)  


plot_pacf(df['System Price'], lags=50, alpha=0.05, ax=ax2)
ax2.set_title('PACF', fontsize=14)
#ax2.set_xlabel('Lags', fontsize=12)
#ax2.set_ylabel('Partial Autocorrelation', fontsize=12)
ax2.grid(True)  


plt.tight_layout()
plt.show()


fig2, (ax3, ax4) = plt.subplots(1, 2, figsize=(16, 6))  

plot_acf(df['System Price'], lags=170, alpha=0.05, ax=ax3)
ax3.set_title('Autocorrelation Function (ACF) for System Price - 170 Lags', fontsize=14)
#ax3.set_xlabel('Lags', fontsize=12)
#ax3.set_ylabel('Autocorrelation', fontsize=12)
ax3.grid(True)  

plot_pacf(df['System Price'], lags=170, alpha=0.05, ax=ax4)
ax4.set_title('Partial Autocorrelation Function (PACF) for System Price - 170 Lags', fontsize=14)
#ax4.set_xlabel('Lags', fontsize=12)
#ax4.set_ylabel('Partial Autocorrelation', fontsize=12)
ax4.grid(True)  

plt.tight_layout()
plt.show()



From the ACF plot, we see a strong and slowly decaying autocorrelation over time, which indicates non-stationarity in the data.

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))  

# Plot ACF
plot_acf(df['System Price'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF', fontsize=14)
ax1.grid(True) 

# Plot PACF
plot_pacf(df['System Price'], lags=50, alpha=0.05, ax=ax2)
ax2.set_title('PACF', fontsize=14)
ax2.grid(True)  


plt.tight_layout()
plt.show()


## Tests for stationarity 

In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.stattools import zivot_andrews  



ts = df['System Price']

# Augmented Dickey-Fuller Test
adf_result = adfuller(ts, autolag='AIC')
print("ADF Statistic: %f" % adf_result[0])
print("p-value: %f" % adf_result[1])
print("Critical Values:")
for key, value in adf_result[4].items():
    print('\t%s: %.3f' % (key, value))

# Kwiatkowski-Phillips-Schmidt-Shin Test
kpss_result = kpss(ts, regression='c', nlags='auto')
print("\nKPSS Statistic: %f" % kpss_result[0])
print("p-value: %f" % kpss_result[1])
print("Critical Values:")
for key, value in kpss_result[3].items():
    print('\t%s: %.3f' % (key, value))

# Zivot-Andrews Test 
za_result = zivot_andrews(ts)
print("\nZivot-Andrews Statistic: %f" % za_result[0])
print("p-value: %f" % za_result[1])
print("Critical Values:")
for key, value in za_result[2].items():
    print('\t%s: %.3f' % (key, value))


### Removing trend component

In [None]:

df['detrended'] = df['System Price'].diff()

df = df.dropna()

In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, kpss
from arch.unitroot import ZivotAndrews


ts = df['detrended']


# Augmented Dickey-Fuller Test
adf_result = adfuller(ts, autolag='AIC')
print("ADF Statistic: %f" % adf_result[0])
print("p-value: %f" % adf_result[1])
print("Critical Values:")
for key, value in adf_result[4].items():
    print('\t%s: %.3f' % (key, value))

# Kwiatkowski-Phillips-Schmidt-Shin Test
kpss_result = kpss(ts, regression='c', nlags='auto')
print("\nKPSS Statistic: %f" % kpss_result[0])
print("p-value: %f" % kpss_result[1])
print("Critical Values:")
for key, value in kpss_result[3].items():
    print('\t%s: %.3f' % (key, value))

# Zivot-Andrews Test
za_result = ZivotAndrews(ts)
print("\nZivot-Andrews Statistic: %f" % za_result.stat)
print("p-value: %f" % za_result.pvalue)
print("Critical Values:")
for key, value in za_result.critical_values.items():
    print('\t%s: %.3f' % (key, value))


In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))  


plot_acf(df['detrended'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF 50 Lags', fontsize=14)
#ax1.set_xlabel('Lags', fontsize=12)
#ax1.set_ylabel('Autocorrelation', fontsize=12)
ax1.grid(True)  


plot_acf(df['detrended'], lags=170, alpha=0.05, ax=ax2)
ax2.set_title('ACF 170 Lags', fontsize=14)
#ax2.set_xlabel('Lags', fontsize=12)
#ax2.set_ylabel('Autocorrelation', fontsize=12)
ax2.grid(True)  

plt.tight_layout()
plt.show()

fig1, (ax3, ax4) = plt.subplots(1, 2, figsize=(18, 6))  


plot_pacf(df['detrended'], lags=50, alpha=0.05, ax=ax3)
ax3.set_title('PACF 50 Lags', fontsize=14)
#ax3.set_xlabel('Lags', fontsize=12)
#ax3.set_ylabel('Partial Autocorrelation', fontsize=12)
ax3.grid(True)  

# Second Plot with 170 Lags - PACF
plot_pacf(df['detrended'], lags=170, alpha=0.05, ax=ax4)
ax4.set_title('PACF 170 Lags', fontsize=14)
#ax4.set_xlabel('Lags', fontsize=12)
#ax4.set_ylabel('Partial Autocorrelation', fontsize=12)
ax4.grid(True)  


plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig1, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))  


plot_acf(df['detrended'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF 50 Lags (Detrended)', fontsize=14)
ax1.grid(True) 

plot_pacf(df['detrended'], lags=50, alpha=0.05, ax=ax2)
ax2.set_title('PACF 50 Lags (Detrended)', fontsize=14)
ax2.grid(True) 


plt.tight_layout()
plt.show()


### Removing seasonal component 

In [None]:

df['detrend_deseason'] = df['detrended'].diff(24)

df = df.dropna()

In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, kpss
from arch.unitroot import ZivotAndrews


ts = df['detrend_deseason']

# Augmented Dickey-Fuller Test
adf_result = adfuller(ts, autolag='AIC')
print("ADF Statistic: %f" % adf_result[0])
print("p-value: %f" % adf_result[1])
print("Critical Values:")
for key, value in adf_result[4].items():
    print('\t%s: %.3f' % (key, value))

# Kwiatkowski-Phillips-Schmidt-Shin Test
kpss_result = kpss(ts, regression='c', nlags='auto')
print("\nKPSS Statistic: %f" % kpss_result[0])
print("p-value: %f" % kpss_result[1])
print("Critical Values:")
for key, value in kpss_result[3].items():
    print('\t%s: %.3f' % (key, value))

# Zivot-Andrews Test
za_result = ZivotAndrews(ts)
print("\nZivot-Andrews Statistic: %f" % za_result.stat)
print("p-value: %f" % za_result.pvalue)
print("Critical Values:")
for key, value in za_result.critical_values.items():
    print('\t%s: %.3f' % (key, value))


In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))  


plot_acf(df['detrended'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF 50 Lags', fontsize=14)
#ax1.set_xlabel('Lags', fontsize=12)
#ax1.set_ylabel('Autocorrelation', fontsize=12)
ax1.grid(True)  


plot_acf(df['detrended'], lags=170, alpha=0.05, ax=ax2)
ax2.set_title('ACF 170 Lags', fontsize=14)
#ax2.set_xlabel('Lags', fontsize=12)
#ax2.set_ylabel('Autocorrelation', fontsize=12)
ax2.grid(True)  

plt.tight_layout()
plt.show()

fig1, (ax3, ax4) = plt.subplots(1, 2, figsize=(18, 6))  


plot_pacf(df['detrended'], lags=50, alpha=0.05, ax=ax3)
ax3.set_title('PACF 50 Lags', fontsize=14)
#ax3.set_xlabel('Lags', fontsize=12)
#ax3.set_ylabel('Partial Autocorrelation', fontsize=12)
ax3.grid(True)  

plot_pacf(df['detrended'], lags=170, alpha=0.05, ax=ax4)
ax4.set_title('PACF 170 Lags', fontsize=14)
#ax4.set_xlabel('Lags', fontsize=12)
#ax4.set_ylabel('Partial Autocorrelation', fontsize=12)
ax4.grid(True)  


plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))  


plot_acf(df['detrend_deseason'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF 50 Lags', fontsize=14)
#ax1.set_xlabel('Lags', fontsize=12)
#ax1.set_ylabel('Autocorrelation', fontsize=12)
ax1.grid(True)  

# Second Plot with 170 Lags - ACF
plot_acf(df['detrend_deseason'], lags=170, alpha=0.05, ax=ax2)
ax2.set_title('ACF 170 Lags', fontsize=14)
#ax2.set_xlabel('Lags', fontsize=12)
#ax2.set_ylabel('Autocorrelation', fontsize=12)
ax2.grid(True)  

plt.tight_layout()
plt.show()

fig1, (ax3, ax4) = plt.subplots(1, 2, figsize=(18, 6))  


plot_pacf(df['detrend_deseason'], lags=50, alpha=0.05, ax=ax3)
ax3.set_title('PACF 50 Lags', fontsize=14)
#ax3.set_xlabel('Lags', fontsize=12)
#ax3.set_ylabel('Partial Autocorrelation', fontsize=12)
ax3.grid(True)  

plot_pacf(df['detrend_deseason'], lags=170, alpha=0.05, ax=ax4)
ax4.set_title('PACF 170 Lags', fontsize=14)
#ax4.set_xlabel('Lags', fontsize=12)
#ax4.set_ylabel('Partial Autocorrelation', fontsize=12)
ax4.grid(True)  


plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


fig1, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) 


plot_acf(df['detrend_deseason'], lags=50, alpha=0.05, ax=ax1)
ax1.set_title('ACF 50 Lags (Detrended & Deseasonalized)', fontsize=14)
ax1.grid(True)  


plot_pacf(df['detrend_deseason'], lags=50, alpha=0.05, ax=ax2)
ax2.set_title('PACF 50 Lags (Detrended & Deseasonalized)', fontsize=14)
ax2.grid(True) 


plt.tight_layout()
plt.show()


In [None]:
import numpy as np
from scipy.stats import boxcox
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Perform Box-Cox transformation
df['detrend_deseason_boxcox'], lambda_value = boxcox(df['detrend_deseason'] + 1)  # Adding 1 to avoid zero values

In [None]:
import matplotlib.pyplot as plt


fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 16))  


ax1.plot(df.index, df["System Price"], label='System Price')
ax1.set_title('System Price', fontsize=14)  
ax1.set_xlabel('Date', fontsize=12)  
ax1.set_ylabel('System Price', fontsize=12)  
ax1.legend()  
ax1.grid(True)

ax2.plot(df.index, df["detrend_deseason_boxcox"], label='System Price')
ax2.set_title('System Price after applying Detrending, Deseasonalization & Box-Cox Transformation', fontsize=14)  
ax2.set_xlabel('Date', fontsize=12)  
ax2.set_ylabel('System Price', fontsize=12)  
ax2.legend()  
ax2.grid(True)


plt.tight_layout()


plt.show()


# Detrend/deseason all features

In [None]:
df.columns

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf


# List of features to plot ACF for (excluding the dummy variables)
features = ['System Price', 'NO1 Price', 'NO2 Price', 'NO3 Price', 'NO4 Price',
            'NO5 Price', 'SE1 Price', 'SE2 Price', 'SE3 Price', 'SE4 Price',
            'DK1 Price', 'DK2 Price', 'FI Price', 'Total_Volume', 'Settled wind production SE',
            'Settled wind production NO','Settled wind production DK', 'Settled wind production FI', 'Oil',
            'Coal', 'Gas', 'Permit', 'Temp NO', 'Precip NO', 'Total Consumption',
            'Total Production', 'System Price Lag 1', 'System Price Lag 2',
            'System Price Lag 3', 'System Price Lag 24', 'System Price Lag 48',
            'System Price Lag 168']


def plot_acf_for_feature(df, feature, lags=168):
    plt.figure(figsize=(10, 6))
    plot_acf(df[feature], lags=lags)
    plt.title(f'ACF for {feature}')
    plt.show()


for feature in features:
    plot_acf_for_feature(df, feature)


### Removing trend and seasonanility from the features with the same trend/seasonality

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf

# Features to be differenced
features_to_diff = [
    'Gas', 'Oil', 'Permit', 'Coal', 'Settled wind production SE', 'Settled wind production DK', 
    'Settled wind production FI', 'Precip NO', 'Settled wind production NO'
]

# Apply simple differencing to the features
for feature in features_to_diff:
    df[feature] = df[feature].diff()

# Drop rows with NaN values resulting from differencing
df.dropna(inplace=True)

# Plot ACF for each differenced feature
for feature in features_to_diff:
    plt.figure(figsize=(10, 5))
    plot_acf(df[feature].dropna(), lags=50)
    plt.title(f'ACF for {feature}')
    plt.show()



In [None]:
df

In [None]:
import pandas as pd

file_path = "/Users/evenbakke/Documents/Master Thesis/MasterThesis-/Complete Data 2024 DTDS.xlsx"  

df.to_excel(file_path, index=True)

print(f"DataFrame has been exported to {file_path}")