In [1]:
import json
import requests
from datetime import datetime, timedelta
import pickle
import xgboost
import pandas as pd

In [2]:
object_id_dict = {
    255439: "MP 01: Naamsestraat 35 Maxim",
    255440: "MP 02: Naamsestraat 57 Xior",
    255441: "MP 03: Naamsestraat 62 Taste",
    255442: "MP 05: Calvariekapel KU Leuven",
    255443: "MP 06: Parkstraat 2 La Filosovia",
    255444: "MP 07: Naamsestraat 81",
    280324: "MP08bis - Vrijthof",
}

In [3]:
def get_forecast_hourly_weather():
    url = "https://api.open-meteo.com/v1/forecast?latitude=50.88&longitude=4.70&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,rain,showers,snowfall,weathercode,pressure_msl,surface_pressure,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,evapotranspiration,et0_fao_evapotranspiration,vapor_pressure_deficit,windspeed_10m,winddirection_10m,windgusts_10m,shortwave_radiation,direct_radiation,diffuse_radiation,direct_normal_irradiance&models=best_match&timezone=Europe%2FBerlin"
    resp = requests.get(url)
    data = resp.json()
    df = pd.DataFrame(data["hourly"])
    return df


def get_forecast_hourly_air():
    url = "https://air-quality-api.open-meteo.com/v1/air-quality?latitude=50.88&longitude=4.70&hourly=pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,sulphur_dioxide,ozone,ammonia&timezone=Europe%2FBerlin"
    resp = requests.get(url)
    data = resp.json()
    df = pd.DataFrame(data["hourly"])
    return df

In [4]:
# Get 4 days
today = datetime.now().date()
next_4_days = [today + timedelta(days=i) for i in range(4)]

In [5]:
# Get weather
hourly_weather = get_forecast_hourly_weather()
hourly_weather["time"] = pd.to_datetime(hourly_weather["time"])
hourly_weather = hourly_weather[hourly_weather["time"].dt.date.isin(next_4_days)]
# selected_hourly_weather = hourly_weather[hourly_weather["time"].dt.date == selected_date]
hourly_weather["precipitation"] = (
    hourly_weather["rain"] + hourly_weather["showers"] + hourly_weather["snowfall"]
)
hourly_weather.drop(
    [
        "showers",
        "et0_fao_evapotranspiration",
        "evapotranspiration",
        "vapor_pressure_deficit",
    ],
    axis=1,
    inplace=True,
)

# Get air
hourly_air = get_forecast_hourly_air()
hourly_air["time"] = pd.to_datetime(hourly_air["time"])
hourly_air = hourly_air[hourly_air["time"].dt.date.isin(next_4_days)]
# selected_hourly_air = hourly_air[hourly_air["time"].dt.date == selected_date]
hourly_air.rename(
    columns={
        "carbon_monoxide": "co",
        "nitrogen_dioxide": "no2",
        "sulphur_dioxide": "so2",
        "ozone": "o3",
        "ammonia": "nh3",
    },
    inplace=True,
)


# Merge
hourly_df = pd.merge(hourly_weather, hourly_air, on="time")
hourly_df["date"] = hourly_df["time"].dt.date
hourly_df["hour"] = hourly_df["time"].dt.hour
hourly_df["month"] = hourly_df["time"].dt.month
hourly_df["weekday"] = hourly_df["time"].dt.strftime("%a")
hourly_df.drop(["time"], axis=1, inplace=True)
hourly_df.head(3)

Unnamed: 0,temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,rain,snowfall,weathercode,pressure_msl,surface_pressure,cloudcover,...,pm2_5,co,no2,so2,o3,nh3,date,hour,month,weekday
0,13.6,69,8.0,12.1,0.0,0.0,1,1020.7,1017.5,40,...,7.8,135.0,12.1,1.2,62.0,4.7,2023-06-03,0,6,Sat
1,12.8,74,8.3,11.4,0.0,0.0,2,1020.6,1017.4,44,...,8.6,140.0,15.4,0.9,54.0,5.7,2023-06-03,1,6,Sat
2,12.0,82,9.0,10.8,0.0,0.0,0,1020.9,1017.7,48,...,10.2,149.0,14.0,0.7,47.0,6.2,2023-06-03,2,6,Sat


In [6]:
def add_object_id(hourly_df, object_ids):
    dfs = []
    for id in object_ids:
        new_df = hourly_df.copy()
        new_df["object_id"] = id
        dfs.append(new_df)

    combined_df = pd.concat(dfs)
    combined_df.reset_index(drop=True, inplace=True)
    return combined_df

In [7]:
with open("../model/model_noise_level_file42/encoder_model_file42.pkl", "rb") as file:
    encoder = pickle.load(file)

In [8]:
with open("../model/model_noise_level_file42/xgb_laeq.pkl", "rb") as file:
    laeq_model = pickle.load(file)

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.



In [9]:
object_ids = list(object_id_dict.keys())
df = add_object_id(hourly_df, object_ids)
transformed_df = encoder.transform(df)
laeq_pred = laeq_model.predict(transformed_df)

In [10]:
laeq_pred[:10]

array([51.32492 , 51.464172, 41.475246, 46.360638, 46.48347 , 46.164146,
       43.95164 , 46.30882 , 50.368435, 51.257973], dtype=float32)