## <span style='color:#ff5f27'> 📝 Imports

In [2]:
import pandas as pd

from functions.functions import *

---

## <span style='color:#ff5f27'> 💽 Loading Historical Data</span>


#### <span style='color:#ff5f27'> 👩🏻‍🔬 Air Quality Data

In [14]:
df_air_quality = pd.read_csv('data_poland/air_quality/air_quality_merged.csv')
df_air_quality.head()

Unnamed: 0,date,pm10,o3,no2,so2,co,city,pm25
0,2023-1-2,23.0,18.0,,3.0,,bielsko biała,
1,2023-1-3,25.0,27.0,7.0,2.0,,bielsko biała,
2,2023-1-4,19.0,24.0,4.0,1.0,,bielsko biała,
3,2023-1-5,9.0,20.0,3.0,1.0,,bielsko biała,
4,2023-1-6,13.0,17.0,,2.0,,bielsko biała,


In [15]:
df_air_quality.city.unique()

array(['bielsko biała', 'cracow', 'poznań', 'gdańsk', 'wroclaw',
       'biała podlaska', 'katowice', 'warsaw'], dtype=object)

In [16]:
df_air_quality.date = df_air_quality.date.apply(timestamp_2_time)
df_air_quality.sort_values(by = ['city','date'],inplace = True,ignore_index = True)

df_air_quality.head()

Unnamed: 0,date,pm10,o3,no2,so2,co,city,pm25
0,1411077600000,45.0,34.0,3.0,4.0,4.0,biała podlaska,
1,1411164000000,,26.0,4.0,2.0,,biała podlaska,99.0
2,1411250400000,,32.0,,1.0,,biała podlaska,
3,1411336800000,,21.0,,1.0,,biała podlaska,
4,1411423200000,,23.0,,1.0,,biała podlaska,


#### <span style='color:#ff5f27'> 🌦 Weather Data

In [21]:
df_weather = pd.read_csv('data_poland/weather/weather_merged.csv')

df_weather.head(3)

Unnamed: 0,city,date,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,windgust,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,solarenergy,uvindex,conditions
0,bielsko biała,2022-01-01,10.7,8.3,9.1,10.7,5.4,6.6,8.3,95.1,...,46.7,32.2,245.6,1022.4,99.0,7.4,11.6,1.0,1,"Rain, Overcast"
1,bielsko biała,2022-01-02,12.0,6.1,9.6,12.0,4.8,9.2,5.2,75.1,...,54.0,39.0,197.7,1018.8,92.6,40.5,30.9,2.6,2,"Rain, Overcast"
2,bielsko biała,2022-01-03,10.5,7.1,8.6,10.5,3.0,4.9,3.2,69.3,...,68.1,35.8,230.0,1011.3,83.1,46.2,25.5,2.2,2,"Rain, Partially cloudy"


In [22]:
df_weather.date = df_weather.date.apply(timestamp_2_time)
df_weather.sort_values(by=['city', 'date'],inplace=True, ignore_index=True)

df_weather.head(3)

Unnamed: 0,city,date,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,windgust,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,solarenergy,uvindex,conditions
0,biała podlaska,1640991600000,9.4,2.7,6.6,6.8,-0.5,3.6,4.6,87.9,...,50.4,22.4,303.9,1015.5,95.7,27.3,14.8,1.3,1,"Rain, Overcast"
1,biała podlaska,1641078000000,8.0,-0.6,3.2,5.2,-3.2,0.6,1.6,89.2,...,39.6,18.7,200.3,1016.6,90.5,21.6,15.4,1.2,1,"Rain, Overcast"
2,biała podlaska,1641164400000,8.5,6.0,7.4,5.7,2.7,4.5,5.1,85.7,...,57.6,24.4,260.6,1002.8,87.1,21.3,12.7,1.1,1,"Rain, Partially cloudy"


---

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [12]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

Connected. Call `.close()` to terminate connection gracefully.

Multiple projects found. 

	 (1) ID2223_Anton
	 (2) ID2223_Ernest

Enter project to access: 2

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5476
Connected. Call `.close()` to terminate connection gracefully.


---

## <span style="color:#ff5f27;">🪄 Creating Feature Groups</span>

#### <span style='color:#ff5f27'> 👩🏻‍🔬 Air Quality Data

In [17]:
air_quality_fg = fs.get_or_create_feature_group(
        name = 'pl_air_quality_fg',
        description = 'Air Quality characteristics of each day for Polish cities',
        version = 1,
        primary_key = ['city','date'],
        online_enabled = True,
        event_time = 'date'
    )    

air_quality_fg.insert(df_air_quality)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/5476/fs/5383/fg/14945


Uploading Dataframe: 0.00% |          | Rows 0/22362 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/5476/jobs/named/pl_air_quality_fg_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f8ab5087b50>, None)

#### <span style='color:#ff5f27'> 🌦 Weather Data

In [23]:
weather_fg = fs.get_or_create_feature_group(
        name = 'pl_weather_fg',
        description = 'Weather characteristics of each day for Polish cities',
        version = 1,
        primary_key = ['city','date'],
        online_enabled = True,
        event_time = 'date'
    )    

weather_fg.insert(df_weather)

FeatureStoreException: Features are not compatible with Feature Group schema: 
 - conditions (type: 'string') does not exist in feature group.

---