In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [None]:
data_src = r'../dat/bundesnetzagentur/pre_processed_data/prod_price_de_lu_2019_2021.csv'

# Hourly data from Bundesnetzagentur (DE/LU bidding zone, 2019-2021)

In [None]:
combined = pd.read_csv(data_src,
                       parse_dates=['time'])
combined = combined.set_index('time')
combined

In [None]:
fig, ax1 = plt.subplots(figsize=(20,10))

ax2 = ax1.twinx()

ax1.plot(combined['Price'], color='red')
ax1.set_ylabel('Price [€/MWh]')

ax2.plot(combined['Wind'], color='blue')
ax2.set_ylabel('Wind [MWh]')

plt.plot()

In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))

ax2 = ax1.twinx()

day = '2019-08-06'

ax1.plot(combined.loc[day, 'Price'], color='red')
ax1.set_ylabel('Price [€/MWh]')

ax2.plot(combined.loc[day, 'Wind'], color='blue')
ax2.set_ylabel('Wind [MWh]')

plt.plot()

## Use Regression

### Deal with missing values

In [None]:
combined['Wind'].loc[combined['Wind'].isna()]

In [None]:
combined['Price'].loc[combined['Price'].isna()]

In [None]:
combined = combined.dropna()

In [None]:
X = combined['Solar & Wind'].to_numpy().reshape(-1,1)
y = combined['Price'].to_numpy()

reg = LinearRegression()
reg.fit(X, y)

m = reg.coef_
b = reg.intercept_

plt.figure(figsize=(10,5))

plt.plot(X, y, 'o')
plt.plot(X, m*X + b, 'r')

plt.xlabel('Solar & Wind [MWh]')
plt.ylabel('Price [€/MWh]')

plt.tight_layout()
# plt.savefig('regression_test.pdf')
plt.show()

### Pumped-Storage Hydro is used when prices are high
They do arbitrage in time.

In [None]:
X = combined['Pumped-Storage Hydro'].to_numpy().reshape(-1,1)
y = combined['Price'].to_numpy()

reg = LinearRegression()
reg.fit(X, y)

m = reg.coef_
b = reg.intercept_

plt.figure(figsize=(10,5))

plt.plot(X, y, 'o')
plt.plot(X, m*X + b, 'r')

plt.xlabel(' [MWh]')
plt.ylabel('Price [€/MWh]')

plt.tight_layout()
# plt.savefig('regression_test.pdf')
plt.show()

## Take a look at daily values

In [None]:
daily = combined.resample('D').mean()

X = daily['Wind'].to_numpy().reshape(-1,1)
y = daily['Price'].to_numpy()

reg = LinearRegression()
reg.fit(X, y)

m = reg.coef_
b = reg.intercept_

plt.figure(figsize=(10,5))

plt.plot(X, y, 'o')
plt.plot(X, m*X + b, 'r')

plt.xlabel('Wind [MWh]')
plt.ylabel('Price [€/MWh]')

plt.tight_layout()
# plt.savefig('regression_test.pdf')
plt.show()