In [1]:
import requests
import pandas as pd
import hopsworks
import datetime
import matplotlib.pyplot as plt
import json
import re
import os
import sys
from dotenv import load_dotenv
from datetime import datetime, timedelta
import warnings
sys.path.append(os.path.abspath(os.path.join('..', 'functions')))
import util
sys.path.append(os.path.abspath(os.path.join('..', 'model')))
import nn
import torch
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split


warnings.filterwarnings("ignore")

In [2]:
load_dotenv()
proj = hopsworks.login()
fs = proj.get_feature_store("KTH_ID2223") 

2024-12-29 13:09:14,411 INFO: Initializing external client
2024-12-29 13:09:14,411 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-29 13:09:17,866 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164449


In [3]:
el_prices_fg = fs.get_feature_group(
    name='el_prices',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)
power_fg = fs.get_feature_group(
    name='power',
    version=1,
)

In [4]:
el_prices_fg.show(10)
# weather_fg.show(10)
# power_fg.show(10)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.95s) 


Unnamed: 0,date,price
0,2023-01-23 00:00:00+00:00,1.662196
1,2023-12-20 00:00:00+00:00,0.535002
2,2023-05-27 00:00:00+00:00,0.064113
3,2023-03-15 00:00:00+00:00,0.975358
4,2022-11-01 00:00:00+00:00,0.655705
5,2023-07-27 00:00:00+00:00,0.381928
6,2023-01-26 00:00:00+00:00,1.101363
7,2024-06-02 00:00:00+00:00,0.097737
8,2023-03-22 00:00:00+00:00,0.488279
9,2024-09-04 00:00:00+00:00,0.209347


In [5]:
weather_df = weather_fg.read()
power_df = power_fg.read()
el_prices_df = el_prices_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.66s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.65s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.61s) 


In [6]:
merged_df = (
    weather_df
    .merge(power_df, on="date", how="inner")
    .merge(el_prices_df, on="date", how="inner")
)

merged_df['date'] = pd.to_datetime(merged_df['date'])
merged_df = merged_df.sort_values(by="date").reset_index(drop=True)

dates = merged_df['date']
features = merged_df.drop(columns=['date', 'price']).iloc[:-1]
target = merged_df['price'].iloc[1:]
normalizer = Normalizer()
normalized_continuous_features = normalizer.fit_transform(features)
normalized_features_df = pd.DataFrame(normalized_continuous_features, columns=features.columns)

print(normalized_features_df.head())

   temperature_2m_mean  precipitation_sum  wind_speed_10m_max  \
0             0.000806           0.000110            0.001211   
1             0.000430           0.000040            0.000594   
2             0.000292           0.000000            0.000538   
3             0.000338           0.000000            0.000867   
4             0.000726           0.000522            0.001808   

   wind_direction_10m_dominant  sunshine_duration  hydro_mw  nuclear_mw  \
0                     0.019292           0.000000  0.807575    0.522701   
1                     0.008403           0.877442  0.357346    0.254733   
2                     0.007029           0.917855  0.288438    0.207158   
3                     0.005856           0.905648  0.273211    0.228957   
4                     0.014766           0.000000  0.630715    0.531639   

   other_mw   wind_mw  
0  0.082356  0.259709  
1  0.040314  0.189246  
2  0.034096  0.173835  
3  0.035613  0.226786  
4  0.072164  0.560472  


In [7]:
X = torch.from_numpy(features.values)
y = torch.from_numpy(target.values)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

In [8]:
input_features = [
        "temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max",
        "wind_direction_10m_dominant", "sunshine_duration",
        "Hydro Water Reservoir", "Nuclear", "Other", "Wind Onshore", 
]
input_size = len(input_features)
model = nn.EnergyPricePredictorLSTM(input_size=input_size)

In [90]:
from torch.utils.data import Dataset, DataLoader

class SequenceDataset(Dataset):
    def __init__(self, X, y, seq_length):
        self.X = X
        self.y = y
        self.seq_length = seq_length

    def __len__(self):
        return len(self.X) // self.seq_length

    def __getitem__(self, idx):
        x_seq = self.X[idx:idx + self.seq_length, :]
        y_seq = self.y[idx + self.seq_length]
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_seq, dtype=torch.float32)


In [163]:
seq_length = 3
batch_size = 16
train_dataset = SequenceDataset(X_train, y_train, seq_length)
test_dataset = SequenceDataset(X_test, y_test, seq_length)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [164]:
for i, (X, y) in enumerate(train_loader):
    print(X.size())
    print(y.size())
    break

for i, (X, y) in enumerate(test_loader):
    print(X.size())
    print(y.size())
    break

torch.Size([16, 3, 9])
torch.Size([16])
torch.Size([16, 3, 9])
torch.Size([16])


In [165]:
hidden_size = 64        
num_layers = 2         
output_size = 1

In [None]:
import torch.optim as optim
import torch.nn as nn

class PriceLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(PriceLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  
        out, _ = self.lstm(x, (h0, c0))  
        out = self.fc(out[:, -1, :]) 
        return out

num_epochs = 200
model = PriceLSTM(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
train_loss_list = []
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    

    print(f"Epoch {epoch+1}, Training Loss: {train_loss/len(train_loader):.4f}")
    train_loss_list.append(train_loss/len(train_loader))



Epoch 1, Training Loss: 1.1233
Epoch 2, Training Loss: 1.1173
Epoch 3, Training Loss: 1.1117
Epoch 4, Training Loss: 1.1061
Epoch 5, Training Loss: 1.1005
Epoch 6, Training Loss: 1.0951
Epoch 7, Training Loss: 1.0897
Epoch 8, Training Loss: 1.0844
Epoch 9, Training Loss: 1.0791
Epoch 10, Training Loss: 1.0742
Epoch 11, Training Loss: 1.0688
Epoch 12, Training Loss: 1.0640
Epoch 13, Training Loss: 1.0589
Epoch 14, Training Loss: 1.0540
Epoch 15, Training Loss: 1.0488
Epoch 16, Training Loss: 1.0439
Epoch 17, Training Loss: 1.0388
Epoch 18, Training Loss: 1.0338
Epoch 19, Training Loss: 1.0289
Epoch 20, Training Loss: 1.0240
Epoch 21, Training Loss: 1.0193
Epoch 22, Training Loss: 1.0145
Epoch 23, Training Loss: 1.0092
Epoch 24, Training Loss: 1.0036
Epoch 25, Training Loss: 0.9988
Epoch 26, Training Loss: 0.9940
Epoch 27, Training Loss: 0.9890
Epoch 28, Training Loss: 0.9841
Epoch 29, Training Loss: 0.9796
Epoch 30, Training Loss: 0.9747
Epoch 31, Training Loss: 0.9701
Epoch 32, Trainin

In [177]:
model.eval()
test_loss = 0
y_preds = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        loss = criterion(y_pred.squeeze(), y_batch)
        y_preds += y_pred.squeeze().tolist()
        test_loss += loss.item()

print(f"Test Loss: {test_loss/len(test_loader):.4f}")

Test Loss: 0.5507


download model hopswork

In [63]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)

r2 = r2_score(y_test, y_pred)
print("R squared:", r2)

MSE: 0.13796250595005646
R squared: 0.6951807681678888
