In [2]:
import datetime
import pandas as pd
from xgboost import XGBRegressor
import hopsworks
import json

In [11]:
today = pd.Timestamp.today().normalize()
yesterday = today - pd.Timedelta(days=1)
day_before_yesterday = today - pd.Timedelta(days=2)
today, yesterday, day_before_yesterday

(Timestamp('2026-01-11 00:00:00'),
 Timestamp('2026-01-10 00:00:00'),
 Timestamp('2026-01-09 00:00:00'))

In [4]:
import os
import hopsworks

api_key = os.getenv("HOPSWORKS_API_KEY")  # will exist in GitHub Actions

if api_key:
    project = hopsworks.login(api_key_value=api_key)
else:
    project = hopsworks.login()  # local (uses your existing auth)

2026-01-11 16:32:49,983 INFO: Initializing external client
2026-01-11 16:32:49,985 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-01-11 16:32:51,860 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1286325


In [5]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="flight_xgboost_model",
    version=1,
)

fv = retrieved_model.get_feature_view()

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

2026-01-11 16:32:57,690 INFO: Initializing for batch retrieval of feature vectors


Downloading: 0.000%|          | 0/448339 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... DONE

In [6]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

## How the pipeline should look like 

Idea is to get yesterdays wheater and calander data and predict the total number of flights that will occur today. 

In [None]:
# Retrieve the google trend values to predict tomorrows flights 
fs = project.get_feature_store()

weather_cal_data_fg = fs.get_feature_group(
    name='stockholm_weather_calendar_features',
    version=1,
)

batch_data = weather_cal_data_fg.filter(weather_cal_data_fg.date >= day_before_yesterday).read()
batch_data

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.17s) 


Unnamed: 0,date,day_of_week,is_weekend,week_of_year,month,is_holiday,tavg,prcp,snow,wspd
0,2026-01-10 00:00:00+00:00,5,1,2,1,0,-6.1,0.0,0.0,26.6


# Prediction

In [13]:
X = batch_data.drop(columns=['date'])
X

Unnamed: 0,day_of_week,is_weekend,week_of_year,month,is_holiday,tavg,prcp,snow,wspd
0,5,1,2,1,0,-6.1,0.0,0.0,26.6


In [14]:
y_pred = retrieved_xgboost_model.predict(X)

In [15]:
forecast_row = pd.DataFrame({
    "date":[today],
    "predicted_landings": y_pred,
})

forecast_row

Unnamed: 0,date,predicted_landings
0,2026-01-11,264.767456


In [16]:
web_df = forecast_row.copy()
web_df.columns = ["date", "prediction"]     # rename for website
web_df["date"] = web_df["date"].astype(str) # make clean YYYY-MM-DD

web_df.to_csv("../data/predictions.csv", index=False)