In [4]:
%reload_ext autoreload
%autoreload 2

In [30]:
import src.config as config
import pandas as pd

In [6]:
import hopsworks

# connect to project
project = hopsworks.login(
    project=config.HOPSWORKS_PROJECT_NAME,
    api_key_value=config.HOPSWORKS_API_KEY,
)

# connect to feature store
feature_store = project.get_feature_store()

# connect to feature group
feature_group = feature_store.get_feature_group(
    name=config.FEATURE_GROUP_NAME,
    version=config.FEATURE_GROUP_VERSION,
)

2024-12-12 12:41:58,074 INFO: Closing external client and cleaning up certificates.
Connection closed.
2024-12-12 12:41:58,078 INFO: Initializing external client
2024-12-12 12:41:58,078 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-12 12:41:59,137 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1051798


In [None]:
# Create feature view if it doesn't exist already
try:
    # Create feature view if it doesn't exist
    feature_store.create_feature_view(
        name=config.FEATURE_VIEW_NAME,
        version=config.FEATURE_GROUP_VERSION,
        query=feature_group.select_all(), #Create from all features in FG
    )
except:
    print("Feature view already existed, skip creation.")

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1051798/fs/1043525/fv/daily_demand_feature_view/version/1


In [8]:
# Get feature view
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, 
    version=config.FEATURE_VIEW_VERSION
)

In [76]:
data, _ = feature_view.training_data(
    description='Daily demand'
)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (3.10s) 

2024-12-12 15:23:21,171 INFO: Provenance cached data - overwriting last accessed/created training dataset from 3 to 4.


In [77]:
data.sort_values(by=['ba_code', 'datetime'], inplace=True)
data.head()

Unnamed: 0,datetime,demand,ba_code
10877,2022-10-01 00:00:00+00:00,51628,AECI
43376,2022-10-02 00:00:00+00:00,53127,AECI
23568,2022-10-03 00:00:00+00:00,54708,AECI
27303,2022-10-04 00:00:00+00:00,53345,AECI
35516,2022-10-05 00:00:00+00:00,53356,AECI


In [78]:
data.dtypes, data.shape

(datetime    object
 demand       int64
 ba_code     object
 dtype: object,
 (53801, 3))

In [87]:
def prepare_feature_store_data_for_training(data: pd.DataFrame) -> pd.DataFrame:
    """
    Prepares feature store data for training with SKForecast. Target series are
    moved to individual columns and the timestamp is set as the index.
    
    Args:
        data: dataframe from Hopsworks feature store
    
    Returns:
        pd.DataFrame    
    """

    from src.config import BAS

    # Filter out unwanted BAs
    data = data[data["ba_code"].isin(BAS)].copy()

    data["datetime"] = pd.to_datetime(data["datetime"]).dt.date
    data = data.set_index("datetime")

    data = pd.pivot_table(
        data=data, values="demand", index="datetime", columns="ba_code"
    )
    # Resetting column names
    data.columns.name = None
    data.columns = [f"ba_{ba_code}" for ba_code in data.columns]

    # Explicitly set frequency of index
    data = data.asfreq("1D")

    data = data.sort_index()

    return data


data_train = prepare_feature_store_data_for_training(data)

In [88]:
data_train.head()

Unnamed: 0_level_0,ba_AECI,ba_AVA,ba_AZPS,ba_BANC,ba_BPAT,ba_CHPD,ba_CISO,ba_CPLE,ba_CPLW,ba_DOPD,...,ba_SWPP,ba_TAL,ba_TEC,ba_TEPC,ba_TIDC,ba_TPWR,ba_TVA,ba_WACM,ba_WALC,ba_WAUW
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-10-01,51628.0,27070.0,96193.0,46398.0,123905.0,3745.0,592567.0,123640.0,11797.0,4617.0,...,649266.0,6601.0,49325.0,34960.0,8222.0,10804.0,348678.0,80299.0,24633.0,1820.0
2022-10-02,53127.0,28039.0,97208.0,42814.0,125317.0,3655.0,560074.0,124418.0,11759.0,4583.0,...,647709.0,6682.0,51702.0,37336.0,7194.0,10773.0,345900.0,79702.0,26100.0,1749.0
2022-10-03,54708.0,30110.0,96570.0,47041.0,133353.0,3790.0,623658.0,132803.0,12155.0,4732.0,...,689771.0,6886.0,54270.0,35662.0,8346.0,11228.0,373596.0,80536.0,25310.0,1915.0
2022-10-04,53345.0,30764.0,88963.0,48332.0,134664.0,3831.0,654561.0,134430.0,12294.0,4829.0,...,687579.0,6781.0,53101.0,31415.0,8765.0,10927.0,378071.0,80522.0,23938.0,1838.0
2022-10-05,53356.0,30421.0,91984.0,50362.0,135464.0,3815.0,664304.0,136348.0,12482.0,4854.0,...,687179.0,7009.0,54212.0,30993.0,8457.0,10962.0,376763.0,81362.0,23839.0,1828.0
