# Daily Feature Pipeline
* Retrieve todays data for flights and google trends
* Add these new data to the Feature Store 

## OpenSky Recent Data 
* Use the OpenSky api to retrieve the most recent flight landing data, to update our feature group

In [1]:
import pandas as pd 
import os
import datetime
import requests 
import hopsworks

In [2]:
project = hopsworks.login()
fs = project.get_feature_store() 
secrets = hopsworks.get_secrets_api()

2026-01-09 16:10:45,462 INFO: Initializing external client
2026-01-09 16:10:45,462 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2026-01-09 16:10:47,372 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1296539


In [3]:
CLIENT_ID = secrets.get_secret("OPENSKY_CLIENT_ID").value
CLIENT_SECRET = secrets.get_secret("OPENSKY_CLIENT_SECRET").value
ICAO = "ESSA"

def get_access_token():
    auth_url = "https://auth.opensky-network.org/auth/realms/opensky-network/protocol/openid-connect/token"
    data = {
        "grant_type": "client_credentials",
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET
    }
    response = requests.post(auth_url, data=data)
    response.raise_for_status()
    return response.json().get("access_token")

def fetch_yesterday_data(token):
    # Calculate yesterday's window
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    start_ts = int(datetime.datetime.combine(yesterday, datetime.time.min).timestamp())
    end_ts = int(datetime.datetime.combine(yesterday, datetime.time.max).timestamp())

    api_url = f"https://opensky-network.org/api/flights/arrival?airport={ICAO}&begin={start_ts}&end={end_ts}"
    headers = {"Authorization": f"Bearer {token}"}
    
    response = requests.get(api_url, headers=headers)
    if response.status_code == 200:
        flights = response.json()
        return yesterday, len(flights)
    else:
        print(f"API Error: {response.status_code}")
        return yesterday, 0

token = get_access_token()
flight_date, total_landings = fetch_yesterday_data(token)

print(f"Arlanda had {total_landings} landings on date: {flight_date}")

Arlanda had 2 landings on date: 2026-01-08


## Uploading new data to the Feature Store 

**New Flight Data**

In [4]:
new_flight_data = pd.DataFrame({
    "date": [flight_date],
    "total_landings": [total_landings],
})

In [5]:
# Retrieve feature group
flight_data_fg = fs.get_feature_group(
    name='flight_data_arlanda',
    version=1,
)

In [6]:
# insert new data
flight_data_fg.insert(new_flight_data, wait = True)

Uploading Dataframe: 100.00% |█| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time


Launching job: flight_data_arlanda_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1296539/jobs/named/flight_data_arlanda_1_offline_fg_materialization/executions
2026-01-09 16:11:22,824 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-09 16:11:26,033 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-09 16:11:29,238 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-09 16:13:05,699 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-09 16:13:05,884 INFO: Waiting for log aggregation to finish.
2026-01-09 16:13:14,555 INFO: Execution finished successfully.


(Job('flight_data_arlanda_1_offline_fg_materialization', 'SPARK'), None)

## Google Trends Recent Data
* Download the most recent google search data for the search terms

In [76]:
import holidays
import numpy as np

In [77]:
run_date = (
    pd.Timestamp.utcnow()
    .tz_convert("Europe/Stockholm")
    .normalize()
    - pd.Timedelta(days=1)
).date()

In [78]:
def fetch_weather_for_date(date):
    url = "https://archive-api.open-meteo.com/v1/archive"

    params = {
        "latitude": 59.3293,
        "longitude": 18.0686,
        "start_date": date.strftime("%Y-%m-%d"),
        "end_date": date.strftime("%Y-%m-%d"),
        "daily": [
            "temperature_2m_mean", # daily mean temperature 2 m over water
            "precipitation_sum", # total precipitation for the day
            "snowfall_sum", # total snowfall for the day
            "windspeed_10m_max" # maximum wind speed during the day 10 m over water
        ],
        "timezone": "Europe/Stockholm"
    }

    r = requests.get(url, params=params)
    r.raise_for_status()

    daily = r.json()["daily"]

    df = pd.DataFrame(daily)
    df.rename(columns={
        "time": "date",
        "temperature_2m_mean": "tavg",
        "precipitation_sum": "prcp",
        "snowfall_sum": "snow",
        "windspeed_10m_max": "wspd"
    }, inplace=True)

    df["date"] = pd.to_datetime(df["date"])
    return df

In [83]:
se_holidays = holidays.Sweden()

def calendar_features(date):
    return {
        "day_of_week": np.int64(date.weekday()),
        "is_weekend": np.int64(date.weekday() >= 5),
        "week_of_year": np.int64(date.isocalendar().week),
        "month": np.int64(date.month),
        "is_holiday": np.int64(date in se_holidays)
    }

In [84]:
weather = fetch_weather_for_date(run_date)

cal = calendar_features(run_date)
 
features = pd.DataFrame([{
    "date": run_date,
    "tavg": weather.loc[0, "tavg"],
    "prcp": weather.loc[0, "prcp"],
    "snow": weather.loc[0, "snow"],
    "wspd": weather.loc[0, "wspd"],
    **cal
}])

features

Unnamed: 0,date,tavg,prcp,snow,wspd,day_of_week,is_weekend,week_of_year,month,is_holiday
0,2026-01-08,-0.7,1.2,0.77,21.8,3,0,2,1,0


In [85]:
# Retrieve feature group
weather_cal_fg = fs.get_feature_group(
    name="stockholm_weather_calendar_features",
    version=2
)

In [86]:
features["date"] = pd.to_datetime(features["date"])

weather_cal_fg.insert(
    features,
    write_options={"wait_for_job": True}
)

Uploading Dataframe: 100.00% |█| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time


Launching job: stockholm_weather_calendar_features_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1296539/jobs/named/stockholm_weather_calendar_features_2_offline_fg_materialization/executions
2026-01-09 18:22:37,990 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-09 18:22:41,196 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-09 18:22:44,411 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-09 18:24:26,914 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-09 18:24:27,096 INFO: Waiting for log aggregation to finish.
2026-01-09 18:24:49,266 INFO: Execution finished successfully.


(Job('stockholm_weather_calendar_features_2_offline_fg_materialization', 'SPARK'),
 None)