In [6]:
import os
# import config as config
from pathlib import Path
import hopsworks
from dotenv import load_dotenv

load_dotenv()
project = hopsworks.login(
    project=os.getenv("HOPSWORKS_PROJECT_NAME"),
    api_key_value=os.getenv("HOPSWORKS_API_KEY")
)
feature_store = project.get_feature_store()
feature_group=feature_store.get_or_create_feature_group(
    name=os.getenv("FEATURE_GROUP_NAME"),
    version=os.getenv("FEATURE_GROUP_VERSION"),
    description= "Time-series Data for Bike at six hour frequency",
    primary_key=["location_id","pickup_hour"],
    event_time="pickup_hour"
)

2025-05-11 12:08:27,818 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-11 12:08:27,822 INFO: Initializing external client
2025-05-11 12:08:27,822 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-11 12:08:28,657 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1228972


In [7]:
from dotenv import load_dotenv

load_dotenv()
# Create a feature view if it doesn't already exist
try:
    feature_store.create_feature_view(
        name=os.getenv('FEATURE_VIEW_NAME'),
        version=os.getenv('FEATURE_VIEW_VERSION'),
        query=feature_group.select_all(),
    )
    print(f"Feature view '{os.getenv('FEATURE_VIEW_NAME')}' (version {os.getenv('FEATURE_VIEW_VERSION')}) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

# Retrieve the feature view
try:
    feature_view = feature_store.get_feature_view(
        name=os.getenv('FEATURE_VIEW_NAME'),
        version=os.getenv('FEATURE_VIEW_VERSION'),
    )
    print(f"Feature view '{os.getenv('FEATURE_VIEW_NAME')}' (version {os.getenv('FEATURE_VIEW_VERSION')}) retrieved successfully.")
except Exception as e:
    print(f"Error retrieving feature view: {e}")

Error creating feature view: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1228972/featurestores/1213539/featureview). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":270179,"usrMsg":"Feature view: time_series_six_hourly_feature_view_bike, version: 1","errorMsg":"The provided feature view name and version already exists"}', error code: 270179, error msg: The provided feature view name and version already exists, user msg: Feature view: time_series_six_hourly_feature_view_bike, version: 1
Feature view 'time_series_six_hourly_feature_view_bike' (version 1) retrieved successfully.


In [8]:
import joblib
def get_hopsworks_project() -> hopsworks.project.Project:
    return hopsworks.login(
        project=os.getenv('HOPSWORKS_PROJECT_NAME'), api_key_value=os.getenv('HOPSWORKS_API_KEY')
    )
project = get_hopsworks_project()
model_registry = project.get_model_registry()

models = model_registry.get_models(name='Bike_demand_predictor_next_hour')
model = max(models, key=lambda model: model.version)
model_dir = model.download()
model = joblib.load(Path(model_dir) / "lightgbm_bikeride_model.joblib")

2025-05-11 12:08:33,266 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-11 12:08:33,269 INFO: Initializing external client
2025-05-11 12:08:33,271 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-11 12:08:34,344 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1228972


Downloading: 100.000%|██████████| 279868/279868 elapsed<00:00 remaining<00:00

Downloading model artifact (0 dirs, 1 files)... DONE




In [9]:
from datetime import timedelta
import pandas as pd

# Step 1: Load feature view data from Hopsworks
ts_data, _ = feature_view.training_data(
    description="time_series_six_hourly_bike_ride"
)

# Step 2: Preprocess location_id
ts_data["location_id"] = ts_data["location_id"].astype(str).str.replace('.', '', regex=False)
ts_data["pickup_hour"] = pd.to_datetime(ts_data["pickup_hour"])

# Keep only the 3 exact stations
valid_ids = {"614005", "590514", "532903"}
ts_data = ts_data[ts_data["location_id"].isin(valid_ids)]

# Step 3: Setup for prediction
full_df = ts_data.copy()
predictions = []

# Define prediction timeline and cleaned location IDs
future_dates = pd.date_range("2025-01-01 00:00:00", "2025-12-31 18:00:00", freq="6H", tz="UTC")
location_ids = sorted(valid_ids)  # keep it ordered

# Step 4: LightGBM expects these exact features
reg_features = [f"target_lag_{i+1}" for i in range(112)] + ["hour", "day_of_week", "month", "is_weekend", "location_id"]

print("🔮 Generating predictions for 2025...")

# Step 5: Rolling prediction loop
for ts in future_dates:
    for loc in location_ids:
        # Get latest 112 lag entries for this station
        hist = full_df[full_df["location_id"] == loc].sort_values("pickup_hour").tail(112)
        if len(hist) < 112:
            continue

        # Create lag features
        feature_row = {
            f"target_lag_{i+1}": hist.iloc[-(i+1)]["target"] for i in range(112)
        }

        # Add time-based features
        feature_row["hour"] = ts.hour
        feature_row["day_of_week"] = ts.dayofweek
        feature_row["month"] = ts.month
        feature_row["is_weekend"] = int(ts.dayofweek in [5, 6])
        feature_row["pickup_hour"] = ts
        feature_row["location_id"] = loc

        # Prepare DataFrame for prediction
        X_pred = pd.DataFrame([feature_row])[reg_features]
        X_pred["location_id"] = X_pred["location_id"].astype(float)  # ensure numeric for LGBM

        # Predict
        pred = model.predict(X_pred)[0]

        # Store prediction
        predictions.append({
            "pickup_hour": ts,
            "location_id": loc,
            "predicted_rides": round(pred)
        })

        # Append predicted row to history for future lags
        full_df = pd.concat([
            full_df,
            pd.DataFrame([{
                **feature_row,
                "target": pred
            }])
        ], ignore_index=True)

print("✅ 2025 predictions complete.")

# Step 6: Save predictions
pred_df = pd.DataFrame(predictions)
pred_df.to_csv("bike_predictions_2025_6hr.csv", index=False)
print("📁 Saved as bike_predictions_2025_6hr.csv")


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.26s) 




🔮 Generating predictions for 2025...
✅ 2025 predictions complete.
📁 Saved as bike_predictions_2025_6hr.csv


In [10]:
# Get Hopsworks feature store
fs = project.get_feature_store()

# Create a new feature group
fg_pred = fs.create_feature_group(
    name="bike_demand_predictions",
    version=1,
    description="6-hourly predicted demand for 2025",
    primary_key=["pickup_hour", "location_id"],
    event_time="pickup_hour"
)

# Save data to the new feature group
fg_pred.insert(pred_df, write_options={"wait_for_job": True})
print("✅ Predictions uploaded to Hopsworks Feature Group: bike_demand_predictions v1")


Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1228972/fs/1213539/fg/1454690


Uploading Dataframe: 100.00% |██████████| Rows 4380/4380 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: bike_demand_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1228972/jobs/named/bike_demand_predictions_1_offline_fg_materialization/executions
2025-05-11 12:12:05,178 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-05-11 12:13:15,268 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-05-11 12:14:44,253 INFO: Waiting for execution to finish. Current state: SUCCEEDING. Final status: UNDEFINED
2025-05-11 12:14:47,352 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-05-11 12:14:47,440 INFO: Waiting for log aggregation to finish.
2025-05-11 12:14:59,597 INFO: Execution finished successfully.
✅ Predictions uploaded to Hopsworks Feature Group: bike_demand_predictions v1
