In [1]:
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import datetime
import requests
import hopsworks
import os
%load_ext dotenv

In [2]:
%dotenv -vo .env

In [None]:
# TODO: runs local only, add (daily) modal function export/creation

In [3]:
# Get date two days ago (Demand and demand forecast are 2 days behind)
prediction_date = datetime.datetime.today() - datetime.timedelta(days=2)
prediction_date = prediction_date.date()
print(prediction_date)

2023-01-05


In [4]:
url = ('https://api.eia.gov/v2/electricity/rto/daily-region-data/data/'
       '?frequency=daily'
       '&data[0]=value'
       '&facets[respondent][]=NY'
       '&facets[timezone][]=Eastern'
       '&facets[type][]=D'
       '&facets[type][]=DF'
       '&sort[0][column]=period'
       '&sort[0][direction]=desc'
       '&offset=0'
       '&length=5000')

url = url + '&start={}&end={}&api_key={}'.format(prediction_date, prediction_date, os.environ.get('EIA_API_KEY'))

In [5]:
data = requests.get(url).json()['response']['data']

In [6]:
# To be used in inference
data_forecast = data[0]
data_forecast = pd.DataFrame(data_forecast, index=[0])
data_forecast.head()

Unnamed: 0,period,respondent,respondent-name,type,type-name,timezone,timezone-description,value,value-units
0,2023-01-05,NY,New York,DF,Day-ahead demand forecast,Eastern,Eastern,385451,megawatthours


In [7]:
data_demand = data[1]
data_demand = pd.DataFrame(data_demand, index=[0])
data_demand.head()

Unnamed: 0,period,respondent,respondent-name,type,type-name,timezone,timezone-description,value,value-units
0,2023-01-05,NY,New York,D,Demand,Eastern,Eastern,395100,megawatthours


In [8]:
# TODO: we don't need this here (used to compare predictions, UI only?)
data_forecast = data_forecast[['period', 'value']].rename(columns={'period': 'date', 'value': 'forecast'})
data_forecast['date'] = pd.to_datetime(data_forecast['date'], infer_datetime_format=True)
display(data_forecast.head(5))

Unnamed: 0,date,forecast
0,2023-01-05,385451


In [9]:
# Clean DF to same format as fg
data_demand = data_demand[['period', 'value']].rename(columns={'period': 'date', 'value': 'demand'})
data_demand['date'] = pd.to_datetime(data_demand['date'], infer_datetime_format=True)
display(data_demand.head())

Unnamed: 0,date,demand
0,2023-01-05,395100


In [10]:
# Get temperature
weather_api_key = os.environ.get('WEATHER_API_KEY')
weather_url = ('http://api.weatherapi.com/v1/history.json'
               '?key={}'
               '&q=New%20York,%20USA'
               '&dt={}').format(weather_api_key, prediction_date)

In [11]:
weather_data = requests.get(weather_url).json()['forecast']['forecastday'][0]['day']['avgtemp_c']
print(weather_data)
weather_df = pd.DataFrame({'date': [prediction_date], 'temperature': [weather_data]})
weather_df['date'] = pd.to_datetime(weather_df['date'], infer_datetime_format=True)
print(weather_df)

11.2
        date  temperature
0 2023-01-05         11.2


In [12]:
merged_df = pd.merge(weather_df,data_demand,how='inner', on='date')
merged_df['day'] = merged_df['date'].dt.dayofweek
merged_df['month'] = merged_df['date'].dt.month
merged_df.head()

Unnamed: 0,date,temperature,demand,day,month
0,2023-01-05,11.2,395100,3,1


In [13]:
# Get bank holidays
holidays = calendar().holidays(start=merged_df['date'].min(), end=merged_df['date'].max())
merged_df['holiday'] = merged_df['date'].isin(holidays).astype(int)
display(merged_df.head())

Unnamed: 0,date,temperature,demand,day,month,holiday
0,2023-01-05,11.2,395100,3,1,0


In [14]:
project = hopsworks.login()
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5300




Connected. Call `.close()` to terminate connection gracefully.


In [16]:
fg = fs.get_feature_group(name="ny_elec", version=1)
fg.insert(merged_df, write_options={"wait_for_job": False})

Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/5300/jobs/named/ny_elec_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x1afdf511eb0>, None)