In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import requests
import seaborn as sns
import json
import sys
import os

# Import modules defined in src/
sys.path.append(os.path.abspath('../src'))

%matplotlib qt

In [None]:
# Set seaborn theme
sns.set_theme()

# Get data

Read your personal API key

In [None]:
with open("../api_key", "r") as file:
    API_KEY = file.read()

In [None]:
def get_data(code_polluant: int) -> pd.DataFrame:

    # Define the API endpoint and parameters
    url = "https://api.atmo-aura.fr/api/v1/valeurs/horaire"

    data = pd.DataFrame()

    for site_id in ["FR15038", "FR15043", "FR15053"]:
        params = {
            "api_token": API_KEY,
            "format": "json",
            "sites": site_id,
            "date_debut": "-2 years",
            "code_polluant": str(code_polluant),
            "order_by_date": "asc",
            "valeur_brute": "1"
        }

        # Make the GET request
        response = requests.get(url, params=params)

        response = json.loads(response.text)
        data = pd.concat([data, pd.DataFrame(response["data"])], ignore_index=True)

        while response["links"]["next"] is not None:
            response = requests.get(response["links"]["next"])
            response = json.loads(response.text)
            data = pd.concat([data, pd.DataFrame(response["data"])], ignore_index=True)

    data["date"] = pd.to_datetime(data["date"])
    data["date"] = data["date"].dt.tz_localize(None)

    return data

In [None]:
class site():
    def __init__(self, data, site_id):
        self.data = data[data["site_id"] == site_id]

        if self.data.empty:
            raise ValueError("No data available for this site")
        
        self.site_id = site_id

        self.reference_data = self.data.query("type_appareil_id == 11")
        self.reference_values = self.reference_data.loc[:, ["valeur", "date"]]

        self.microsensor_data = self.data.query("type_appareil_id == 57")
        self.microsensor_values = self.microsensor_data.loc[:, ["valeur", "date"]]

    def get_reference_data(self):
        return self.reference_data
    
    def get_microsensor_data(self):
        return self.microsensor_data
    
    def get_reference_values(self):
        return self.reference_values
    
    def get_microsensor_values(self):
        return self.microsensor_values
    
    def filter_date(self, start_date, end_date):
        reference = self.get_reference_data()
        microsensor = self.get_microsensor_data()

        reference = reference[(reference["date"] >= start_date) & (reference["date"] <= end_date)]
        microsensor = microsensor[(microsensor["date"] >= start_date) & (microsensor["date"] <= end_date)]

        return reference, microsensor
    
    def plot(self, ax = None, *, kwargs = {}):
        # Retrieve polutant label
        polutant_label = self.data.loc[0, "label_polluant"]
        # Retrieve measure unit
        measure_unit = self.data.loc[0, "unite"]

        if ax is None:
            ax = plt.gca()

        x_ref = self.reference_values["date"]
        y_ref = self.reference_values["valeur"]

        x_micro = self.microsensor_values["date"]
        y_micro = self.microsensor_values["valeur"]

        ax.plot(x_ref, y_ref, label="Reference values")
        ax.plot(x_micro, y_micro, label="Microsensor values")
        ax.set_title(f"{polutant_label} values for site {self.site_id}")
        ax.set_xlabel("Date")
        ax.set_ylabel(measure_unit)
        ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(ax.xaxis.get_major_locator()))
        ax.legend()
    
        return ax

In [None]:
code_polluant = 54
temperature = get_data(code_polluant=code_polluant)

In [None]:
code_polluant = 58
humidity = get_data(code_polluant=code_polluant)

In [None]:
site_id_dict = {
    "Saint Martin d'Hères": "FR15038",
    "Les Frenes": "FR15043",
    "Rocade Sud": "FR15053",
}

SMH_temp = site(temperature, site_id_dict["Saint Martin d'Hères"])
SMH_humidity = site(humidity, site_id_dict["Saint Martin d'Hères"])

In [None]:
SMH_temp.plot()

<Axes: title={'center': 'température values for site FR15038'}, xlabel='Date', ylabel='degré C'>

In [None]:
SMH_humidity.plot()

<Axes: title={'center': 'humidité relative values for site FR15038'}, xlabel='Date', ylabel='%'>

# Merge datasets

In [None]:
humidity_temperature = pd.merge(SMH_temp.data, SMH_humidity.data, on="date")

In [None]:
humidity_temperature.columns

Index(['mesure_id_x', 'date', 'valeur_x', 'validation_x', 'site_id_x',
       'type_appareil_id_x', 'type_appareil_label_x', 'code_polluant_x',
       'label_polluant_x', 'id_poll_ue_x', 'label_court_polluant_x',
       'label_unite_x', 'label_court_unite_x', 'unite_x', 'site_label_x',
       'mesure_id_y', 'valeur_y', 'validation_y', 'site_id_y',
       'type_appareil_id_y', 'type_appareil_label_y', 'code_polluant_y',
       'label_polluant_y', 'id_poll_ue_y', 'label_court_polluant_y',
       'label_unite_y', 'label_court_unite_y', 'unite_y', 'site_label_y'],
      dtype='object')

In [None]:
# Keep useful columns
humidity_temperature.drop(columns=[col for col in humidity_temperature.columns if col not in ["date", "valeur_x", "valeur_y"]], inplace=True)

In [None]:
humidity_temperature.head()

Unnamed: 0,date,valeur_x,valeur_y
0,2022-12-16 12:00:00,5.18,99.9
1,2022-12-16 13:00:00,4.87,99.9
2,2022-12-16 14:00:00,4.73,99.9
3,2022-12-16 15:00:00,4.55,99.9
4,2022-12-16 16:00:00,4.1,99.9


In [None]:
humidity_temperature.to_csv("../data/smh_23-24_humidity_temp.csv")