In [2]:
import logging
import os
import sys
from datetime import datetime, timedelta, timezone
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
from datetime import timedelta
import pandas as pd
import config.config as config
from src.utils.inference_utils.inference_utils import (
    get_feature_store,
    get_model_predictions,
    load_model_from_registry,
)
from src.data_fetching_and_processing.transform_ts_data_to_features_and_target import transform_ts_data_info_features


# Get the current datetime64[us, Etc/UTC]
# for number in range(22, 24 * 29):
# current_date = pd.Timestamp.now(tz="Etc/UTC") - timedelta(hours=number)
current_date = pd.Timestamp.now(tz="Etc/UTC")
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1 * 29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)


2025-03-01 01:06:10,884 INFO: Initializing external client
2025-03-01 01:06:10,885 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-01 01:06:11,827 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214648
Fetching data from 2025-01-31 06:06:10.882794+00:00 to 2025-03-01 05:06:10.882794+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (4.58s) 


In [3]:
ts_data

Unnamed: 0,pickup_hour,pickup_location_id,zone,rides
1,2025-01-31 23:00:00,36,"Bushwick North, Brooklyn",0
2,2025-01-31 19:00:00,154,"Marine Park/Floyd Bennett Field, Brooklyn",0
3,2025-02-21 17:00:00,5,"Arden Heights, Staten Island",0
5,2025-02-13 13:00:00,44,"Charleston/Tottenville, Staten Island",0
8,2025-02-01 01:00:00,247,"West Concourse, Bronx",0
...,...,...,...,...
185311,2025-02-01 11:00:00,156,"Mariners Harbor, Staten Island",0
185313,2025-02-28 19:00:00,5,"Arden Heights, Staten Island",0
185314,2025-02-25 12:00:00,109,"Great Kills, Staten Island",0
185315,2025-02-12 04:00:00,204,"Rossville/Woodrow, Staten Island",0


In [None]:
features = transform_ts_data_info_features(ts_data, window_size=24 * 28, step_size=23)

model = load_model_from_registry()

predictions = get_model_predictions(model, features)
predictions["pickup_hour"] = current_date.ceil("h")
print(predictions)

feature_group = get_feature_store().get_or_create_feature_group(
    name=config.FEATURE_GROUP_MODEL_PREDICTION,
    version=1,
    description="Predictions from LGBM Model V2",
    primary_key=["pickup_location_id", "pickup_hour",'zone'],
    event_time="pickup_hour",
)

feature_group.insert(predictions, write_options={"wait_for_job": False})
