# Nice try
Però questa sorgente registra dati solo dal 2022-10-16 22:15:00, apparentemente

In [138]:
import requests
import bs4
import pandas as pd
import polars as pl
from pathlib import Path
from tqdm.notebook import tqdm
import time
from datetime import datetime

base = Path.home() / "Local_Workspace" / "Datasets" / "ARPA" / "FVG" / "DPC"
api_url = r"https://monitor.protezionecivile.fvg.it/api/"

In [35]:
stations = requests.get(f"{api_url}/stations").json()["stations"]
stations = pl.from_records(
    stations,
    schema={
        "id": pl.Utf8(),
        "name": pl.Utf8(),
        "istat": pl.Utf8(),
        "lat": pl.Float64(),
        "lon": pl.Float64(),
        "alt": pl.Float64(),
        "status": pl.Utf8(),
    },
)

sensors = requests.get(f"{api_url}/sensors").json()["sensors"]
sensors = pl.from_records(sensors)

In [39]:
stations.write_csv(base / "stations.csv")
sensors.write_csv(base / "sensors.csv")

In [57]:
def get_station_sensors(station_id, session=None):
    if session is None:
        station_sensors = requests.get(
            f"{api_url}/stations/{station_id}/sensors"
        ).json()["sensors"]
    else:
        station_sensors = session.get(
            f"{api_url}/stations/{station_id}/sensors"
        ).json()["sensors"]
    return pl.from_records(station_sensors)

In [186]:
def get_measures(station_id, sensor_id, from_time, to_time, req):
    payload = {
        "from": from_time.strftime(r"%Y-%m-%d %H:%M:%S"),
        "to": to_time.strftime(r"%Y-%m-%d %H:%M:%S"),
    }
    req = requests if req is None else req
    response = req.get(
        f"{api_url}/stations/{station_id}/sensors/{sensor_id}/measures",
        params=payload,
    ).json()
    return {
        key: response[key] for key in ["result", "count", "total"]
    }, pl.from_records(
        response["measures"]
    )

def no_more(response):
    return response[0]["count"] == 0


def get_n_measures(station_id, sensor_id, from_time, to_time, session=None):
    obj = requests if session is None else session
    payload = {
        "last": 1,
        "from": from_time.strftime(r"%Y-%m-%d %H:%M:%S"),
        "to": to_time.strftime(r"%Y-%m-%d %H:%M:%S"),
    }
    return obj.get(
        f"{api_url}/stations/{station_id}/sensors/{sensor_id}/measures", params=payload
    ).json()["total"]

In [69]:
stat_ids = stations["id"]
with requests.Session() as session:
    sensors_data = [
        get_station_sensors(stat_id, session) for stat_id in tqdm(stat_ids.to_list())
    ]

  0%|          | 0/327 [00:00<?, ?it/s]

In [80]:
sensors_data = pl.concat(
    list(filter(lambda tab: len(tab) > 0, sensors_data)), how="vertical"
)
sensors_data.write_csv(base / "sensors_data.csv")

In [84]:
stations_with_thermo = sensors_data.filter(pl.col("id").eq(2))
station_ids = stations_with_thermo["station_id"].unique()

In [191]:
station_ids[:-25]

station_id
i64
2
4
5
6
8
9
10
13
14
16


In [187]:
def measures_iter(station_id, start, end, session=None):
    req = requests if session is None else session
    
    # Retrieve measures from last block by block. At each step read the oldest measure and use it as the new "to" parameter
    # This is done to get around the 10000 measures limit
    go_on = True
    from_time = start
    to_time = end
    while go_on:
        response, data = get_measures(station_id, sensor_id="2", from_time=from_time, to_time=to_time, req=req)
        if response["count"] <= 1:
            break
        to_time = data["dt"].str.to_datetime().min()
        yield response, data

def get_station_measures(station_id, start = datetime(2000, 1, 1), end = datetime(2023, 1, 1)):
    with requests.Session() as session:
        n_measures = get_n_measures(station_id, sensor_id="2", from_time=start, to_time=end, session=session)
        iter_length = n_measures // 10000 + 1
        responses, data = zip(*list(tqdm(measures_iter(station_id, start, end, session=session), total=iter_length)))
    return responses, data


In [193]:
res5 = get_station_measures("553", start = datetime(2018, 1, 1))

  0%|          | 0/1 [00:00<?, ?it/s]

In [194]:
res5[1]

(shape: (7_304, 6)
 ┌────────────┬───────────┬───────────┬───────────┬─────────────────────┬──────────┐
 │ station_id ┆ sensor_id ┆ lat       ┆ lon       ┆ dt                  ┆ value    │
 │ ---        ┆ ---       ┆ ---       ┆ ---       ┆ ---                 ┆ ---      │
 │ i64        ┆ i64       ┆ f64       ┆ f64       ┆ str                 ┆ f64      │
 ╞════════════╪═══════════╪═══════════╪═══════════╪═════════════════════╪══════════╡
 │ 553        ┆ 2         ┆ 45.917922 ┆ 12.544995 ┆ 2022-10-16 22:15:00 ┆ 12.8     │
 │ 553        ┆ 2         ┆ 45.917922 ┆ 12.544995 ┆ 2022-10-16 22:30:00 ┆ 12.8     │
 │ 553        ┆ 2         ┆ 45.917922 ┆ 12.544995 ┆ 2022-10-16 22:45:00 ┆ 12.4     │
 │ 553        ┆ 2         ┆ 45.917922 ┆ 12.544995 ┆ 2022-10-16 23:00:00 ┆ 12.56    │
 │ …          ┆ …         ┆ …         ┆ …         ┆ …                   ┆ …        │
 │ 553        ┆ 2         ┆ 45.917922 ┆ 12.544995 ┆ 2022-12-31 23:15:00 ┆ 7.9      │
 │ 553        ┆ 2         ┆ 45.917922 ┆ 12.544

In [185]:
res2[1]

(shape: (7_304, 6)
 ┌────────────┬───────────┬───────────┬───────────┬─────────────────────┬───────┐
 │ station_id ┆ sensor_id ┆ lat       ┆ lon       ┆ dt                  ┆ value │
 │ ---        ┆ ---       ┆ ---       ┆ ---       ┆ ---                 ┆ ---   │
 │ i64        ┆ i64       ┆ f64       ┆ f64       ┆ str                 ┆ f64   │
 ╞════════════╪═══════════╪═══════════╪═══════════╪═════════════════════╪═══════╡
 │ 2          ┆ 2         ┆ 46.309217 ┆ 13.055607 ┆ 2022-10-16 22:15:00 ┆ 13.3  │
 │ 2          ┆ 2         ┆ 46.309217 ┆ 13.055607 ┆ 2022-10-16 22:30:00 ┆ 13.8  │
 │ 2          ┆ 2         ┆ 46.309217 ┆ 13.055607 ┆ 2022-10-16 22:45:00 ┆ 13.8  │
 │ 2          ┆ 2         ┆ 46.309217 ┆ 13.055607 ┆ 2022-10-16 23:00:00 ┆ 13.7  │
 │ …          ┆ …         ┆ …         ┆ …         ┆ …                   ┆ …     │
 │ 2          ┆ 2         ┆ 46.309217 ┆ 13.055607 ┆ 2022-12-31 23:15:00 ┆ 9.5   │
 │ 2          ┆ 2         ┆ 46.309217 ┆ 13.055607 ┆ 2022-12-31 23:30:00 ┆ 9.7  