In [2]:
import requests
import polars as pl
import pandas as pd
from pathlib import Path
import geopandas as gpd
from tqdm.notebook import tqdm

from datetime import datetime

In [3]:
base = Path.home() / "Local_Workspace" / "Datasets" / "ARPA" / "TRENTINO" / "bolzano"

In [4]:
sensors = pl.read_json(base / "sensors.json")
sensors.write_csv(base / "sensors.csv")
stations = gpd.read_file(base / "stations.geojson")

In [5]:
def query_payload(station_code, from_date: datetime, to_date: datetime):
    return {
        "station_code": station_code,
        "sensor_code": "LT",
        "date_from": from_date.strftime(r"%Y%m%d"),
        "date_to": to_date.strftime(r"%Y%m%d"),
        "output_format": "CSV",
    }

In [6]:
stat_ids = sensors.filter(pl.col("TYPE").eq("LT"))["SCODE"].to_list()

In [21]:
from time import sleep
from random import uniform

def file_path(station_id, year):
    path =  base / "fragments" / f"{station_id}/{year}.csv"
    if not path.parent.exists():
        path.parent.mkdir(parents=True)
    return path

def get_year_data(station_id, year):
    try:
        r = requests.get(
            "http://daten.buergernetz.bz.it/services/meteo/v1/timeseries",
            params=query_payload(
                station_id, datetime(year, 1, 1), datetime(year+1, 1, 1)
            ),
        )
        if r.status_code != 200:
            sleep(uniform(3,5))
            raise
        return r.text
    except:
        print(f"There was an error: {r.status_code}. Continuing...")
        return False

def get_station_data(station_id, bar: tqdm):
    for year in range(2000, 2023):
        path = file_path(station_id, year)
        if path.exists():
            bar.update()
            continue
        data = get_year_data(station_id, year)
        if not data:
            bar.update()
            continue
        if data.strip() == "":
            bar.update()
            continue
        with open(path, "wt") as file:
            file.write(data)
        bar.update()
        sleep(uniform(0.5, 1.5))
        
    bar.reset(total=2023-2000)

In [22]:
for station_id in tqdm(stat_ids):
    with tqdm(total=2023-2000, leave=True) as bar:
        get_station_data(station_id, bar)

  0%|          | 0/97 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

In [84]:
for file in (base / "fragments").glob("*.csv"):
    station_id = file.stem
    pl.read_csv(
        file, try_parse_dates=False
    ).with_columns(
        pl.col("DATE").str.to_datetime(format=r"%Y-%m-%dT%H:%M:%S%Z"),
        pl.lit(station_id).alias("original_id"),
    ).write_parquet(
        base / "dataset" / f"{station_id}.parquet"
    )

In [78]:
pl.read_csv(
    (base / "fragments" / "00390SF.csv"), try_parse_dates=False
).head().to_pandas()

Unnamed: 0,DATE,VALUE
0,2022-12-31T00:00:00CET,-1.8
1,2022-12-30T23:50:00CET,-1.7
2,2022-12-30T23:40:00CET,-1.8
3,2022-12-30T23:30:00CET,-1.7
4,2022-12-30T23:20:00CET,-1.7


DATE,VALUE
datetime[μs],f64
2022-12-31 00:00:00,-1.8
2022-12-30 23:50:00,-1.7
2022-12-30 23:40:00,-1.8
2022-12-30 23:30:00,-1.7
2022-12-30 23:20:00,-1.7
2022-12-30 23:10:00,-1.7
2022-12-30 23:00:00,-1.6
2022-12-30 22:50:00,-1.6
2022-12-30 22:40:00,-1.5
2022-12-30 22:30:00,-1.3
