In [1]:
import pandas as pd
import requests
from datetime import datetime
import holidays
import hopsworks

In [2]:
START_DATE = "2022-01-01"
END_DATE = "2026-01-01"

dates = pd.date_range(START_DATE, END_DATE, freq="D")

In [3]:
def fetch_weather(start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"

    params = {
        "latitude": 59.3293, # Stockholm 
        "longitude": 18.0686,
        "start_date": start_date,
        "end_date": end_date,
        "daily": [
            "temperature_2m_mean",
            "precipitation_sum",
            "snowfall_sum",
            "windspeed_10m_max"
        ],
        "timezone": "Europe/Stockholm"
    }

    r = requests.get(url, params=params)
    r.raise_for_status()
    data = r.json()["daily"]

    df = pd.DataFrame(data)
    df.rename(columns={
        "time": "date",
        "temperature_2m_mean": "tavg",
        "precipitation_sum": "prcp",
        "snowfall_sum": "snow",
        "windspeed_10m_max": "wspd"
    }, inplace=True)

    df["date"] = pd.to_datetime(df["date"])
    return df

In [4]:
se_holidays = holidays.Sweden()

calendar = pd.DataFrame({"date": dates})
calendar["day_of_week"] = calendar["date"].dt.weekday.astype(int)
calendar["is_weekend"] = calendar["day_of_week"].isin([5, 6]).astype(int)
calendar["week_of_year"] = calendar["date"].dt.isocalendar().week.astype(int)
calendar["month"] = calendar["date"].dt.month.astype(int)
calendar["is_holiday"] = calendar["date"].isin(se_holidays).astype(int)

In [6]:
weather = fetch_weather(START_DATE, END_DATE)

features = (
    calendar
    .merge(weather, on="date", how="left")
    .sort_values("date")
    .ffill()
)
features 

Unnamed: 0,date,day_of_week,is_weekend,week_of_year,month,is_holiday,tavg,prcp,snow,wspd
0,2022-01-01,5,1,52,1,0,-1.6,0.8,0.56,16.3
1,2022-01-02,6,1,52,1,0,1.9,7.2,4.20,18.1
2,2022-01-03,0,0,1,1,0,3.0,2.0,0.00,14.6
3,2022-01-04,1,0,1,1,0,0.4,0.9,0.00,19.5
4,2022-01-05,2,0,1,1,0,-1.9,0.9,0.63,15.8
...,...,...,...,...,...,...,...,...,...,...
1457,2025-12-28,6,1,52,12,0,0.8,0.0,0.00,27.1
1458,2025-12-29,0,0,1,12,0,-1.1,0.1,0.07,25.5
1459,2025-12-30,1,0,1,12,0,-3.3,0.0,0.00,28.0
1460,2025-12-31,2,0,1,12,0,-6.3,0.2,0.14,9.5


In [7]:
project = hopsworks.login()
fs = project.get_feature_store()

fg = fs.get_or_create_feature_group(
    name="stockholm_weather_calendar_features",
    version=2,
    primary_key=["date"],
    event_time="date",
    description="Daily Stockholm weather + calendar features"
)

fg.insert(features, write_options={"wait_for_job": True})

2026-01-09 19:08:33,639 INFO: Initializing external client
2026-01-09 19:08:33,640 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2026-01-09 19:08:35,346 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1296539


Uploading Dataframe: 100.00% |█| Rows 1462/1462 | Elapsed Time: 00:01 | Remainin


Launching job: stockholm_weather_calendar_features_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1296539/jobs/named/stockholm_weather_calendar_features_2_offline_fg_materialization/executions
2026-01-09 19:08:52,606 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-09 19:08:55,829 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-09 19:10:48,093 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-09 19:10:48,269 INFO: Waiting for log aggregation to finish.
2026-01-09 19:11:00,348 INFO: Execution finished successfully.


(Job('stockholm_weather_calendar_features_2_offline_fg_materialization', 'SPARK'),
 None)