In [None]:
from models.create_dataset import create_dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# lasso model
from sklearn.linear_model import Lasso
import logging
import os


In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [None]:
# load data
target_col = 'DK1_price'
data = create_dataset(target_col=target_col)


In [None]:
# split into X and y
X = data.drop(target_col, axis=1)
y = data[target_col]


In [None]:
# split into train and test
test_cutoff = pd.to_datetime('2021-01-01')
# subtract one hour
X_train, X_test = X[X.index < test_cutoff], X[X.index >= test_cutoff]
y_train, y_test = y[y.index < test_cutoff], y[y.index >= test_cutoff]
# split into train and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=False)

In [None]:
# quick linear regression
reg = LinearRegression().fit(X_train, y_train)


In [None]:
# iterate over test set day by day and fit and predict
day_range = pd.date_range(start=test_cutoff, end=y.index[-1], freq='D')
preds = pd.DataFrame(index=y_test.index, columns=['pred'])

for i, day in enumerate(day_range):
    if day.day == 1:
        logger.info(f'Predicting day {day} ({i+1}/{len(day_range)})')
    # get train data
    X_train = X[X.index < day]
    y_train = y[y.index < day]
    # fit model
    #reg = LinearRegression().fit(X_train, y_train)
    # lasso

    reg = Lasso(alpha=1, tol=1e-2).fit(X_train, y_train)

    # get test data

    X_test = X[(X.index >= day) & (X.index < day + pd.Timedelta(days=1))]

    # predict
    pred = reg.predict(X_test)
    preds.loc[X_test.index, 'pred'] = pred

In [None]:
# calculate MAE and RMSE
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(y_test, preds['pred'])
rmse = np.sqrt(mean_squared_error(y_test, preds['pred']))

In [None]:
mae

In [None]:
rmse

In [None]:
# plot last month
import matplotlib.pyplot as plt

plt.plot(y_test[-24*30:], label='true')
plt.plot(preds['pred'][-24*30:], label='pred')
plt.legend()
plt.show()

In [None]:
0