## <span style='color:#ff5f27'> 📝 Imports

In [1]:
import pandas as pd
from datetime import datetime
import time 
import requests

from functions.functions import *

---

In [2]:
date_today = datetime.now().strftime("%Y-%m-%d")

---

## <span style='color:#ff5f27'>  🧙🏼‍♂️ Parsing Data

In [3]:
cities = ['bielsko-biała', 'cracow', 'poznań', 'gdańsk', 'wroclaw',
       'biała-podlaska', 'katowice', 'warsaw']


data_air_quality = [get_air_quality_data(city) for city in cities]

data_weather = [get_weather_data(city, date_today) for city in cities]

---

## <span style='color:#ff5f27'> 🧑🏻‍🏫 Dataset Preparation

In [9]:
data_air_quality

[['bielsko-biała', 46, '2023-01-12', 46, nan, 10.6, nan, 0.1, nan],
 ['cracow', 103, '2023-01-09', 103, 58, 23.8, 5.4, 0.1, 4],
 ['poznań', 29, '2023-01-12', 29, 7, 12.5, 5, 0.1, 1],
 ['gdańsk', 36, '2023-01-12', 36, 9, 5.5, nan, 2.3, nan],
 ['wroclaw', 33, '2023-01-12', 33, nan, 21.6, nan, 0.1, nan],
 ['biała-podlaska', 25, '2023-01-12', nan, 25, 7.5, 8, nan, 10.2],
 ['katowice', 48, '2023-01-12', 48, 14, 8.6, 3.6, nan, 16.3],
 ['warsaw', 65, '2023-01-12', 65, 30, 22.5, 2, 0.1, 12]]

#### <span style='color:#ff5f27'> 👩🏻‍🔬 Air Quality Data

In [5]:
df_air_quality = get_air_quality_df(data_air_quality)

df_air_quality

ValueError: 6 columns passed, passed data had 9 columns

#### <span style='color:#ff5f27'> 🌦 Weather Data

In [5]:
df_weather = get_weather_df(data_weather)

df_weather.head()

Unnamed: 0,city,date,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,windgust,windspeed,winddir,pressure,cloudcover,visibility,solarradiation,solarenergy,uvindex,conditions
0,Bielsko-biała,1673478000000,8.4,3.4,5.0,5.4,0.0,2.2,2.2,83.0,...,36.0,23.6,218.7,1017.0,66.3,10.4,14.4,1.2,1.0,"Rain, Partially cloudy"
1,Cracow,1673478000000,8.9,2.0,4.8,6.6,0.2,3.3,1.8,81.4,...,25.9,24.1,225.6,1017.0,64.4,11.1,70.6,6.0,6.0,"Rain, Partially cloudy"
2,Poznań,1673478000000,9.0,5.0,7.0,6.2,1.2,4.1,4.5,85.0,...,41.4,27.7,212.1,1011.0,81.1,12.4,37.5,3.2,2.0,"Rain, Partially cloudy"
3,Gdańsk,1673478000000,7.0,4.0,5.9,5.0,0.8,2.5,4.1,88.4,...,50.0,29.5,210.0,1006.5,84.2,12.9,12.4,1.1,1.0,"Rain, Partially cloudy"
4,Wroclaw,1673478000000,10.0,5.0,7.3,6.8,2.8,4.7,3.3,76.3,...,38.2,25.9,220.1,1014.2,67.2,12.9,16.8,1.4,1.0,"Rain, Partially cloudy"


---

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [6]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

air_quality_fg = fs.get_or_create_feature_group(
    name = 'poland_air_quality_fg',
    version = 1
)
weather_fg = fs.get_or_create_feature_group(
    name = 'poland_weather_fg',
    version = 1
)

Connected. Call `.close()` to terminate connection gracefully.

Multiple projects found. 

	 (1) ID2223_Anton
	 (2) ID2223_Ernest

Enter project to access: 2

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5476
Connected. Call `.close()` to terminate connection gracefully.


---

## <span style="color:#ff5f27;">⬆️ Uploading new data to the Feature Store</span>

In [7]:
air_quality_fg.insert(df_air_quality)

FeatureStoreException: Features are not compatible with Feature Group schema: 
 - pm10 (type: 'double') is missing from input dataframe.
 - o3 (type: 'double') is missing from input dataframe.
 - no2 (type: 'double') is missing from input dataframe.
 - so2 (type: 'double') is missing from input dataframe.
 - co (type: 'double') is missing from input dataframe.
 - pm25 (type: 'double') is missing from input dataframe.
 - aqi (type: 'bigint') does not exist in feature group.
 - iaqi_h (type: 'double') does not exist in feature group.
 - iaqi_p (type: 'double') does not exist in feature group.
 - iaqi_pm10 (type: 'double') does not exist in feature group.
 - iaqi_t (type: 'double') does not exist in feature group.
 - o3_avg (type: 'bigint') does not exist in feature group.
 - o3_max (type: 'bigint') does not exist in feature group.
 - o3_min (type: 'bigint') does not exist in feature group.
 - pm10_avg (type: 'bigint') does not exist in feature group.
 - pm10_max (type: 'bigint') does not exist in feature group.
 - pm10_min (type: 'bigint') does not exist in feature group.
 - pm25_avg (type: 'bigint') does not exist in feature group.
 - pm25_max (type: 'bigint') does not exist in feature group.
 - pm25_min (type: 'bigint') does not exist in feature group.

In [None]:
weather_fg.insert(df_weather)

Uploading Dataframe: 0.00% |          | Rows 0/4 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/5476/jobs/named/weather_fg_1_offline_fg_backfill/executions


---