#### Imports

In [1]:
import datetime
import pandas as pd
import hopsworks
from functions import util
import os

In [2]:
with open('../data/hopsworks-api-key.txt', 'r') as file:
    os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()

project = hopsworks.login()
fs = project.get_feature_store() 

country="Sweden"
city = "Stockholm"
latitude = "59.3294"
longitude = "18.0687"

today = datetime.date.today()

2025-01-08 14:19:13,179 INFO: Initializing external client
2025-01-08 14:19:13,180 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-08 14:19:15,128 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1207502


#### Get references to the Feature Groups </span>

In [3]:
# Retrieve feature groups
accidents_fg = fs.get_feature_group(
    name='accidents',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)

#### Get Weather Forecast data</span>

In [4]:
hourly_df = util.get_hourly_weather_forecast(city, latitude, longitude)
hourly_df = hourly_df.set_index('date')

# We will only make 1 daily prediction, so we will replace the hourly forecasts with a single daily forecast
# We only want the daily weather data, so only get weather at 12:00
daily_df = hourly_df.between_time('11:59', '12:01')
daily_df = daily_df.reset_index()
daily_df['date'] = pd.to_datetime(daily_df['date']).dt.date
daily_df['date'] = pd.to_datetime(daily_df['date'])
daily_df['city'] = city
daily_df['day_of_week'] = daily_df['date'].dt.dayofweek

daily_df

Coordinates 59.25°N 18.0°E
Elevation 24.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


Unnamed: 0,date,temperature_2m_mean,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,city,day_of_week
0,2025-01-08,1.6,0.2,16.595179,183.731323,Stockholm,2
1,2025-01-09,-2.4,0.0,6.952754,248.74942,Stockholm,3
2,2025-01-10,-0.1,0.1,18.861387,13.240531,Stockholm,4
3,2025-01-11,-2.4,0.0,27.002399,359.236115,Stockholm,5
4,2025-01-12,-3.75,0.0,14.081477,327.528839,Stockholm,6
5,2025-01-13,2.7,0.3,23.507753,242.650208,Stockholm,0
6,2025-01-14,3.0,0.1,16.267857,294.863678,Stockholm,1
7,2025-01-15,1.55,0.0,4.334974,265.23645,Stockholm,2
8,2025-01-16,3.2,0.0,8.759178,260.53775,Stockholm,3
9,2025-01-17,2.85,0.0,12.864649,287.928009,Stockholm,4


In [5]:
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   date                         10 non-null     datetime64[ns]
 1   temperature_2m_mean          10 non-null     float32       
 2   precipitation_sum            10 non-null     float32       
 3   wind_speed_10m_max           10 non-null     float32       
 4   wind_direction_10m_dominant  10 non-null     float32       
 5   city                         10 non-null     object        
 6   day_of_week                  10 non-null     int32         
dtypes: datetime64[ns](1), float32(4), int32(1), object(1)
memory usage: 488.0+ bytes


In [6]:
weather_fg.insert(daily_df)

Uploading Dataframe: 100.00% |██████████| Rows 10/10 | Elapsed Time: 00:02 | Remaining Time: 00:00


Launching job: weather_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207502/jobs/named/weather_1_offline_fg_materialization/executions


(Job('weather_1_offline_fg_materialization', 'SPARK'), None)