In [65]:
import pandas as pd
from datetime import datetime
from sklearn.metrics import mean_absolute_error
import dataframe_image as dfi
import requests
import hopsworks
import joblib
import os
%load_ext dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [2]:
%dotenv -vo .env

In [3]:
# TODO: runs local only, add (daily) modal function export/creation

In [4]:
project = hopsworks.login()
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5300




Connected. Call `.close()` to terminate connection gracefully.


In [26]:
# get feature group
# TODO: we could also just read the feature view but that doesn't include the
#  date of the latest entry. Shouldn't be a problem if both run on the same day
#  but just to be sure we'll get the complete entry from the group (see below).
feature_group = fs.get_feature_group(name="ny_elec", version=1)
display(feature_group.show(5))

2023-01-07 14:01:04,599 INFO: USE `iris_featurestore`
2023-01-07 14:01:05,146 INFO: SELECT `fg0`.`date` `date`, `fg0`.`temperature` `temperature`, `fg0`.`demand` `demand`, `fg0`.`day` `day`, `fg0`.`month` `month`, `fg0`.`holiday` `holiday`
FROM `iris_featurestore`.`ny_elec_1` `fg0`




Unnamed: 0,date,temperature,demand,day,month,holiday
0,2017-06-25,25.7,441658,6,6,0
1,2019-07-17,30.8,600695,2,7,0
2,2020-04-04,10.9,328054,5,4,0
3,2019-10-16,15.8,387485,2,10,0
4,2019-11-21,7.4,409803,3,11,0


In [9]:
# model
mr = project.get_model_registry()
model = mr.get_model("ny_elec_model", version=1)
model_dir = model.download()
model = joblib.load(model_dir + "/ny_elec_model.pkl")

Connected. Call `.close()` to terminate connection gracefully.
Downloading file ... 



In [31]:
offset = 1
X_pred = feature_group.read().tail(offset)
display(X_pred)

2023-01-07 14:04:07,953 INFO: USE `iris_featurestore`
2023-01-07 14:04:08,455 INFO: SELECT `fg0`.`date` `date`, `fg0`.`temperature` `temperature`, `fg0`.`demand` `demand`, `fg0`.`day` `day`, `fg0`.`month` `month`, `fg0`.`holiday` `holiday`
FROM `iris_featurestore`.`ny_elec_1` `fg0`




Unnamed: 0,date,temperature,demand,day,month,holiday
1826,2023-01-05,11.2,395100,3,1,0


In [32]:
# predict and get latest (daily) feature
y_pred = model.predict(X_pred.drop(columns=['demand', 'date']))
display(y_pred)

array([405542.53], dtype=float32)

In [44]:
prediction_date = X_pred.iloc[0]['date']
prediction_date = prediction_date.date()
display(prediction_date)

datetime.date(2023, 1, 5)

In [48]:
# get demand (forecast)
url = ('https://api.eia.gov/v2/electricity/rto/daily-region-data/data/'
       '?frequency=daily'
       '&data[0]=value'
       '&facets[respondent][]=NY'
       '&facets[timezone][]=Eastern'
       '&facets[type][]=DF'
       '&sort[0][column]=period'
       '&sort[0][direction]=desc'
       '&offset=0'
       '&length=5000')

url = url + '&start={}&end={}&api_key={}'.format(prediction_date, prediction_date, os.environ.get('EIA_API_KEY'))

In [49]:
data = requests.get(url).json()['response']['data']

In [50]:
display(data)

[{'period': '2023-01-05',
  'respondent': 'NY',
  'respondent-name': 'New York',
  'type': 'DF',
  'type-name': 'Day-ahead demand forecast',
  'timezone': 'Eastern',
  'timezone-description': 'Eastern',
  'value': 385451,
  'value-units': 'megawatthours'}]

In [55]:
forecast = data[0]['value']
display(forecast)
display(X_pred.iloc[0]['demand'])
display(y_pred[0])

385451

395100

405542.53

In [59]:
# DF for monitoring data
now = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
data = {
    'prediction': y_pred,
    'actual': [X_pred.iloc[0]['demand']],
    'forecast_eia': [forecast],
    'prediction_date': [prediction_date],
    'datetime': [now],
}
monitor_df = pd.DataFrame(data)
display(monitor_df)

Unnamed: 0,prediction,actual,forecast_eia,prediction_date,datetime
0,405542.53125,395100,385451,2023-01-05,"01/07/2023, 14:16:12"


In [60]:
# create monitoring FG
monitor_fg = fs.get_or_create_feature_group(name="ny_elec_predictions",
                                            version=1,
                                            primary_key=["datetime"],
                                            description="NY Electricity Prediction/Outcome Monitoring")

monitor_fg.insert(monitor_df, write_options={"wait_for_job": False})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/5300/fs/5220/fg/14715


Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/5300/jobs/named/ny_elec_predictions_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x2742ec88a90>, None)

In [62]:
history_df = monitor_fg.read()
# Add our prediction to the history, as the history_df won't have it -
# the insertion was done asynchronously, so it will take ~1 min to land on App
# TODO: commented for now since we can wait in a notebook, remember to uncomment
#  if running e.g. in a modal job!
#history_df = pd.concat([history_df, monitor_df])
display(history_df)

2023-01-07 14:19:31,672 INFO: USE `iris_featurestore`
2023-01-07 14:19:32,167 INFO: SELECT `fg0`.`prediction` `prediction`, `fg0`.`actual` `actual`, `fg0`.`forecast_eia` `forecast_eia`, `fg0`.`prediction_date` `prediction_date`, `fg0`.`datetime` `datetime`
FROM `iris_featurestore`.`ny_elec_predictions_1` `fg0`




Unnamed: 0,prediction,actual,forecast_eia,prediction_date,datetime
0,405542.53,395100,385451,2023-01-05,"01/07/2023, 14:16:12"


In [64]:
# MAE
y_pred = history_df['prediction']
y_test = history_df['actual']
mean_error = mean_absolute_error(y_test, y_pred)
display(mean_error) # in MWh
# TODO: compute "live" in UI

10442.530000000028

In [67]:
# create "recents" table for UI and upload
dataset_api = project.get_dataset_api()
dfi.export(history_df.tail(5), './df_ny_elec_recent.png', table_conversion='matplotlib')
dataset_api.upload("./df_ny_elec_recent.png", "Resources/images", overwrite=True)

Uploading: 0.000%|          | 0/17611 elapsed<00:00 remaining<?

'Resources/images/df_ny_elec_recent.png'