In [2]:
import requests
import pandas as pd
import hopsworks
import datetime
import matplotlib.pyplot as plt
import json
import re
import os
import sys
from dotenv import load_dotenv
from datetime import datetime, timedelta
import warnings
sys.path.append(os.path.abspath(os.path.join('..', 'functions')))
import util
sys.path.append(os.path.abspath(os.path.join('..', 'model')))
import nn
import torch
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split


warnings.filterwarnings("ignore")

In [3]:
load_dotenv()
proj = hopsworks.login()
fs = proj.get_feature_store("KTH_ID2223") 

2024-12-30 17:06:42,911 INFO: Initializing external client
2024-12-30 17:06:42,911 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-30 17:06:46,550 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164449


In [4]:
el_prices_fg = fs.get_feature_group(
    name='el_prices',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)
power_fg = fs.get_feature_group(
    name='power',
    version=1,
)

In [491]:
el_prices_fg.show(10)
# weather_fg.show(10)
# power_fg.show(10)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.08s) 


Unnamed: 0,date,price
0,2023-01-23 00:00:00+00:00,1.662196
1,2023-12-20 00:00:00+00:00,0.535002
2,2023-05-27 00:00:00+00:00,0.064113
3,2023-03-15 00:00:00+00:00,0.975358
4,2022-11-01 00:00:00+00:00,0.655705
5,2023-07-27 00:00:00+00:00,0.381928
6,2023-01-26 00:00:00+00:00,1.101363
7,2024-06-02 00:00:00+00:00,0.097737
8,2023-03-22 00:00:00+00:00,0.488279
9,2024-09-04 00:00:00+00:00,0.209347


In [5]:
weather_df = weather_fg.read()
power_df = power_fg.read()
el_prices_df = el_prices_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.63s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.61s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.59s) 


In [6]:
merged_df = (
    weather_df
    .merge(power_df, on="date", how="inner")
    .merge(el_prices_df, on="date", how="inner")
)

merged_df['date'] = pd.to_datetime(merged_df['date'])
merged_df = merged_df.sort_values(by="date").reset_index(drop=True)

dates = merged_df['date']
features = merged_df.drop(columns=['date', 'price']).iloc[:-1]
target = merged_df['price'].iloc[1:]
normalizer = Normalizer()
normalized_continuous_features = normalizer.fit_transform(features)
normalized_features_df = pd.DataFrame(normalized_continuous_features, columns=features.columns)

In [7]:
normalized_features_df

Unnamed: 0,temperature_2m_mean,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,sunshine_duration,hydro_mw,nuclear_mw,other_mw,wind_mw
0,0.000806,0.000110,0.001211,0.019292,0.000000,0.807575,0.522701,0.082356,0.259709
1,0.000430,0.000040,0.000594,0.008403,0.877442,0.357346,0.254733,0.040314,0.189246
2,0.000292,0.000000,0.000538,0.007029,0.917855,0.288438,0.207158,0.034096,0.173835
3,0.000338,0.000000,0.000867,0.005856,0.905648,0.273211,0.228957,0.035613,0.226786
4,0.000726,0.000522,0.001808,0.014766,0.000000,0.630715,0.531639,0.072164,0.560472
...,...,...,...,...,...,...,...,...,...
775,-0.000047,0.000126,0.002167,0.021913,0.000000,0.638038,0.459011,0.046921,0.616060
776,0.000180,0.000077,0.001704,0.014277,0.695744,0.358687,0.299630,0.031855,0.544321
777,0.000054,0.000000,0.001275,0.015362,0.723997,0.431470,0.313577,0.045410,0.434778
778,0.000095,0.000404,0.001726,0.014557,0.000000,0.676140,0.452456,0.056168,0.578575


In [8]:
target

1      0.607735
2      0.550615
3      0.292413
4      0.422040
5      0.351532
         ...   
776    0.270241
777    0.735706
778    0.384965
779    0.230300
780    0.324242
Name: price, Length: 780, dtype: float64

Testing Lagged Prices

In [532]:
# normalized_features_df['price_lag_1'] = target.shift(1).values
# normalized_features_df['price_lag_2'] = target.shift(2).values
# normalized_features_df['price_lag_3'] = target.shift(3).values

In [9]:
X = torch.from_numpy(features.values)
y = torch.from_numpy(target.values)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

In [300]:
X_train.size()
y_train.size()

torch.Size([624])

In [10]:
input_features = [
        "temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max",
        "wind_direction_10m_dominant", "sunshine_duration",
        "Hydro Water Reservoir", "Nuclear", "Other", "Wind Onshore", 
]
input_size = len(input_features)
# model = nn.EnergyPricePredictorLSTM(input_size=input_size)

In [11]:
from torch.utils.data import Dataset, DataLoader
import numpy as np
class SequenceDataset(Dataset):
    def __init__(self, X, y, seq_length):
        self.X = X
        self.y = y
        self.seq_length = seq_length

    def __len__(self):
        return len(self.X) // self.seq_length

    def __getitem__(self, idx):
        x_seq = self.X[idx:idx + self.seq_length, :]
        y_hist = self.y[idx:idx + self.seq_length].reshape(-1, 1) 
        x_combined = torch.tensor(np.hstack((x_seq, y_hist)), dtype=torch.float32)
        y_target = torch.tensor(self.y[idx + self.seq_length], dtype=torch.float32)
        return x_combined, y_target


In [12]:
input_size = 10


In [13]:
seq_length = 3
batch_size = 16
train_dataset = SequenceDataset(X_train, y_train, seq_length)
test_dataset = SequenceDataset(X_test, y_test, seq_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [14]:
for i, (X, y) in enumerate(train_loader):
    print(X.size())
    print(y.size())
    break

for i, (X, y) in enumerate(test_loader):
    print(X.size())
    print(y.size())
    break

torch.Size([16, 3, 10])
torch.Size([16])
torch.Size([16, 3, 10])
torch.Size([16])


In [15]:
hidden_size = 64        
num_layers = 2         
output_size = 1

In [16]:
import torch.optim as optim
import torch.nn as nn

class PriceLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(PriceLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(p=0.4)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  
        out, _ = self.lstm(x, (h0, c0))  
        out = self.dropout(out)
        out = self.fc(out[:, -1, :]) 
        return out

num_epochs = 100
model = PriceLSTM(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
train_loss_list = []
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    

    print(f"Epoch {epoch+1}, Training Loss: {train_loss/len(train_loader):.4f}")
    train_loss_list.append(train_loss/len(train_loader))


Epoch 1, Training Loss: 1.1119
Epoch 2, Training Loss: 1.0496
Epoch 3, Training Loss: 1.0083
Epoch 4, Training Loss: 0.9589
Epoch 5, Training Loss: 0.9224
Epoch 6, Training Loss: 0.8722
Epoch 7, Training Loss: 0.8289
Epoch 8, Training Loss: 0.8035
Epoch 9, Training Loss: 0.7722
Epoch 10, Training Loss: 0.7040
Epoch 11, Training Loss: 0.6965
Epoch 12, Training Loss: 0.6765
Epoch 13, Training Loss: 0.6547
Epoch 14, Training Loss: 0.6188
Epoch 15, Training Loss: 0.6033
Epoch 16, Training Loss: 0.6050
Epoch 17, Training Loss: 0.6007
Epoch 18, Training Loss: 0.6098
Epoch 19, Training Loss: 0.5907
Epoch 20, Training Loss: 0.5949
Epoch 21, Training Loss: 0.6181
Epoch 22, Training Loss: 0.6047
Epoch 23, Training Loss: 0.6053
Epoch 24, Training Loss: 0.5991
Epoch 25, Training Loss: 0.5794
Epoch 26, Training Loss: 0.5877
Epoch 27, Training Loss: 0.5921
Epoch 28, Training Loss: 0.5853
Epoch 29, Training Loss: 0.5885
Epoch 30, Training Loss: 0.5902
Epoch 31, Training Loss: 0.5947
Epoch 32, Trainin

In [25]:
from sklearn.metrics import mean_squared_error, r2_score

model.eval()
test_loss = 0
y_preds = []
y_true = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        y_preds += y_pred.tolist()
        y_true += y_batch.tolist()

In [26]:
mse = mean_squared_error(y_true, y_preds)
print("MSE:", mse)

r2 = r2_score(y_true, y_preds)
print("R squared:", r2)


MSE: 0.600434302834533
R squared: -0.09112162843008287


In [534]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

xgb_model = XGBRegressor(n_estimators=1000, learning_rate=0.01)
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)

r2 = r2_score(y_test, y_pred)
print("R squared:", r2)

eval_dict = { 
    "MSE": str(mse),
    "R squared": str(r2),
}

MSE: 0.1194033606000809
R squared: 0.7361859991912219


Upload model to Hopsworks

In [499]:
model_dir = "price_prediction_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [500]:
xgb_model.save_model(model_dir + "/model.json")

In [501]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

input_schema = Schema(normalized_features_df)
output_schema = Schema(target)

model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

mr = proj.get_model_registry()

price_prediction_model = mr.python.create_model(
    name="price_prediction_model", 
    metrics=eval_dict,
    model_schema=model_schema,
    input_example=target.sample().values, 
    description="Electricity Price Predicition Model",
)

price_prediction_model.save(model_dir)

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/4063513 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/20 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/928 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/1164449/models/price_prediction_model/1


Model(name: 'price_prediction_model', version: 1)