In [1]:
import pandas as pd
import hopsworks
import datetime
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
import os
import sys
from dotenv import load_dotenv
from datetime import datetime, timedelta, timezone
import warnings
sys.path.append(os.path.abspath(os.path.join('..', 'functions')))
import util

warnings.filterwarnings("ignore")

In [2]:
project = hopsworks.login()
fs = project.get_feature_store() 

2025-01-03 15:57:03,763 INFO: Initializing external client
2025-01-03 15:57:03,764 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-03 15:57:05,028 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164449


In [3]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="price_prediction_model",
    version=4,
)

saved_model_dir = retrieved_model.download()

Downloading model artifact (0 dirs, 1 files)... DONE

In [4]:
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

In [5]:
today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
yesterday = today - timedelta(days=1)

In [6]:
weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)
power_fg = fs.get_feature_group(
    name='power',
    version=1,
)

el_prices_fg = fs.get_feature_group(
    name='el_prices',
    version=1,
)


weather_batch_data = weather_fg.read()
power_batch_data = power_fg.read()
el_prices_data = el_prices_fg.read()


batch_data = weather_batch_data.merge(power_batch_data, on="date", how="inner")
batch_data['date'] = pd.to_datetime(batch_data['date'])
batch_data = batch_data.sort_values(by="date", ascending=False).reset_index(drop=True)


el_prices_data['date'] = pd.to_datetime(el_prices_data['date'])
el_prices_data = el_prices_data.sort_values(by="date", ascending=True).reset_index(drop=True)

el_prices_data['price_lag_1'] = el_prices_data['price'].shift(0)
el_prices_data['price_lag_2'] = el_prices_data['price'].shift(1)
el_prices_data['price_lag_3'] = el_prices_data['price'].shift(2)

el_prices_data = el_prices_data.dropna().reset_index(drop=True)

batch_data_with_lags = batch_data.merge(el_prices_data, on="date", how="inner")

latest_data = batch_data_with_lags.iloc[0]
latest_data = latest_data.drop('price')
latest_data = latest_data.drop('date')
print(latest_data)
latest_data = latest_data.apply(pd.to_numeric, errors='coerce')
print(latest_data)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.59s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.64s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.59s) 
temperature_2m_mean               -4.347083
precipitation_sum                       0.0
wind_speed_10m_max                21.959999
wind_direction_10m_dominant      283.490631
sunshine_duration              14318.928711
hydro_mw                            9434.75
nuclear_mw                           5827.5
other_mw                        1057.208333
wind_mw                         6998.458333
price_lag_1                        0.075346
price_lag_2                        0.324242
price_lag_3                          0.2303
Name: 0, dtype: object
temperature_2m_mean               -4.347083
precipitation_sum                  0.000000
wind_speed_10m_max                21.959999
wind_direction_10m_dominant      283.490631
sunshine_durati

In [7]:
print(latest_data[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant', 'sunshine_duration', 'hydro_mw', 'nuclear_mw', 'other_mw', 'wind_mw', 'price_lag_1', 'price_lag_2', 'price_lag_3']])

temperature_2m_mean               -4.347083
precipitation_sum                  0.000000
wind_speed_10m_max                21.959999
wind_direction_10m_dominant      283.490631
sunshine_duration              14318.928711
hydro_mw                        9434.750000
nuclear_mw                      5827.500000
other_mw                        1057.208333
wind_mw                         6998.458333
price_lag_1                        0.075346
price_lag_2                        0.324242
price_lag_3                        0.230300
Name: 0, dtype: float64


In [8]:
latest_data_df = latest_data[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 
                              'wind_direction_10m_dominant', 'sunshine_duration', 'hydro_mw', 
                              'nuclear_mw', 'other_mw', 'wind_mw', 'price_lag_1', 'price_lag_2', 'price_lag_3']]

latest_data_df = latest_data_df.values.reshape(1, -1)

prediction = retrieved_xgboost_model.predict(latest_data_df)[0]

print(prediction)

0.6953426


In [12]:
prediction_df = pd.DataFrame({"date": [today], "price": [prediction]})

Upload value to hopswork

In [13]:
prediction_fg = fs.get_or_create_feature_group(
    name='price_predictions',
    description='Electricty price predictions',
    version=1,
    primary_key=['date'],
    event_time="date"
)

In [14]:
prediction_fg.insert(prediction_df, write_options={"wait_for_job": True})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1164449/fs/1155152/fg/1393491


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: price_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1164449/jobs/named/price_predictions_1_offline_fg_materialization/executions
2025-01-03 15:54:23,985 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-01-03 15:54:27,162 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-01-03 15:56:08,660 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-01-03 15:56:08,808 INFO: Waiting for log aggregation to finish.
2025-01-03 15:56:17,378 INFO: Execution finished successfully.


(Job('price_predictions_1_offline_fg_materialization', 'SPARK'), None)