In [1]:
import requests
import pandas as pd


In [26]:
TOKEN = {TOKEN}
CITY = "Vienna"
COUNTRY = "Austria"

In [37]:
# --- Step 1: Search for stations in the city ---
keyword = f"{CITY}"
search_url = "https://api.waqi.info/search/"
search_params = {"token": TOKEN, "keyword": keyword}

search_resp = requests.get(search_url, params=search_params, timeout=15)
search_resp.raise_for_status()
search_data = search_resp.json()

stations = search_data.get("data", [])
print(f"Found {len(stations_data)} stations matching '{keyword}'")

# --- Step 2: Convert to Pandas DataFrame ---
records = []
for s in stations:
    station = s.get("station", {})
    records.append({
        "uid": s.get("uid"),
        "name": station.get("name"),
        "country": station.get("country"),
        "lat": station.get("geo", [None, None])[0],
        "lon": station.get("geo", [None, None])[1],
        "aqi": s.get("aqi"),
        "url": f"https://aqicn.org/city/{station.get('url')}"
    })

df = pd.DataFrame(records)

# --- Step 3: Display and save ---
display(df.style.set_caption(f"WAQI stations found for {CITY}"))
df.to_csv(f"../../data/list_stations_{CITY}.csv", index=False)
print(f"Saved results to waqi_stations_{CITY}.csv")

Found 14 stations matching 'Vienna'


Unnamed: 0,uid,name,country,lat,lon,aqi,url
0,2855,"1, Hausgrundweg 23, Gstr. 254, Austria",AT,48.226361,16.458345,46,https://aqicn.org/city/austria/hausgrundweg-23--gstr.-254/1
1,2857,"Umspannwerk Gaudenzdorfer Gürtel, Austria",AT,48.187147,16.339331,46,https://aqicn.org/city/austria/umspannwerk-gaudenzdorfer-gurtel
2,14537,"Allgemeines Krankenhaus, Ostringweg (zwischen Gebäuden BT25), Austria",AT,48.21911,16.349818,42,https://aqicn.org/city/austria/allgemeines-krankenhaus--ostringweg-zwischen-gebauden-bt25
3,2870,"252, Belgradplatz (Südostecke), Gstr.Nr. 816, Austria",AT,48.174353,16.361417,42,https://aqicn.org/city/austria/belgradplatz-sudostecke--gstr.nr.-816/252
4,4736,"250, Wehlistraße 366, Gstr.Nr.2157, Austria",AT,48.20306,16.43455,38,https://aqicn.org/city/austria/wehlistrasse-366--gstr.nr.2157/250
5,2860,"Ecke Taborstraße - Glockengasse, Austria",AT,48.216739,16.380918,38,https://aqicn.org/city/austria/ecke-taborstrasse-glockengasse
6,4738,"Floridsdorf, Gerichtsgasse 1a (Prager Str. 65m), Austria",AT,48.261086,16.396954,38,https://aqicn.org/city/austria/floridsdorf--gerichtsgasse-1a-prager-str.-65m
7,2850,"Kendlerstraße 40 (Umspannwerk), Austria",AT,48.205,16.30975,34,https://aqicn.org/city/austria/kendlerstrasse-40-umspannwerk
8,4739,"4, Schafbergbad, Josef Redl Gasse 2, Gstr.Nr. 698, Austria",AT,48.23537,16.301563,34,https://aqicn.org/city/austria/schafbergbad--josef-redl-gasse-2--gstr.nr.-698/4
9,2813,"Laaer Berg, Theodor Sickel-Gasse 1, Austria",AT,48.161036,16.39292,-,https://aqicn.org/city/austria/laaer-berg--theodor-sickel-gasse-1


Saved results to waqi_stations_Vienna.csv


In [35]:
# --- Step 2: Check which stations report PM2.5 ---
stations_with_pm25 = []

for item in stations:
    uid = item["uid"]
    name = item["station"]["name"]
    geo = item["station"]["geo"]

    feed_url = f"https://api.waqi.info/feed/@{uid}/"
    feed_resp = requests.get(feed_url, params={"token": TOKEN}, timeout=15)
    feed_resp.raise_for_status()
    feed_data = feed_resp.json().get("data", {})
    iaqi = feed_data.get("iaqi", {}) or {}

    if "pm25" in iaqi:
        stations_with_pm25.append({
            "uid": uid,
            "name": name,
            "latitude": geo[0],
            "longitude": geo[1],
            "AQI": feed_data.get("aqi"),
            "PM2.5": iaqi["pm25"].get("v") if isinstance(iaqi["pm25"], dict) else iaqi["pm25"],
            "URL": f"https://aqicn.org/city/@{uid}"
        })

df = pd.DataFrame(stations_with_pm25)
display(df.style.set_caption(f"PM2.5 Stations in {CITY}, {COUNTRY}"))

Unnamed: 0,uid,name,latitude,longitude,AQI,PM2.5,URL
0,2855,"1, Hausgrundweg 23, Gstr. 254, Austria",48.226361,16.458345,46,46,https://aqicn.org/city/@2855
1,2857,"Umspannwerk Gaudenzdorfer Gürtel, Austria",48.187147,16.339331,46,46,https://aqicn.org/city/@2857
2,14537,"Allgemeines Krankenhaus, Ostringweg (zwischen Gebäuden BT25), Austria",48.21911,16.349818,42,42,https://aqicn.org/city/@14537
3,2870,"252, Belgradplatz (Südostecke), Gstr.Nr. 816, Austria",48.174353,16.361417,42,42,https://aqicn.org/city/@2870
4,4736,"250, Wehlistraße 366, Gstr.Nr.2157, Austria",48.20306,16.43455,38,38,https://aqicn.org/city/@4736
5,2860,"Ecke Taborstraße - Glockengasse, Austria",48.216739,16.380918,38,38,https://aqicn.org/city/@2860
6,4738,"Floridsdorf, Gerichtsgasse 1a (Prager Str. 65m), Austria",48.261086,16.396954,38,38,https://aqicn.org/city/@4738
7,2850,"Kendlerstraße 40 (Umspannwerk), Austria",48.205,16.30975,34,34,https://aqicn.org/city/@2850
8,4739,"4, Schafbergbad, Josef Redl Gasse 2, Gstr.Nr. 698, Austria",48.23537,16.301563,34,34,https://aqicn.org/city/@4739
9,2813,"Laaer Berg, Theodor Sickel-Gasse 1, Austria",48.161036,16.39292,89,89,https://aqicn.org/city/@2813


In [44]:
# --- Step 3: Build uid -> HID mapping by scraping station pages ---
import re
import time
import os
import requests
import pandas as pd

UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari"
BASE = "https://aqicn.org/city/@{uid}/"
HID_RE = re.compile(r"/data-platform/api/(H\d+)", re.IGNORECASE)

session = requests.Session()
session.headers.update({"User-Agent": UA, "Accept-Language": "en"})

# If the Data Platform link only appears when logged in, set your cookie here:
# session.headers.update({"Cookie": "aqi_session=YOUR_SESSION_COOKIE_VALUE"})

mapping_rows = []
errors = []

for s in df.to_dict("records"):
    uid = s["uid"]
    name = s["name"]
    url = BASE.format(uid=uid)
    try:
        r = session.get(url, timeout=30)
        r.raise_for_status()
        m = HID_RE.search(r.text)
        hid = m.group(1) if m else None
        mapping_rows.append({"uid": uid, "name": name, "station_url": url, "hid": hid})
        print(f"{uid}: {name} -> {hid or 'NOT FOUND'}")
    except Exception as e:
        print(f"Error {uid} {name}: {e}")
        errors.append((uid, name, str(e)))
    time.sleep(1.0)  # be polite

map_df = pd.DataFrame(mapping_rows)
display(map_df.style.set_caption("UID → HID mapping"))
map_df.to_csv("../../data/mapping_uid_to_hid.csv", index=False)
print("Saved mapping -> waqi_uid_to_hid.csv")


2855: 1, Hausgrundweg 23, Gstr. 254, Austria -> H2855
2857: Umspannwerk Gaudenzdorfer Gürtel, Austria -> H2857
14537: Allgemeines Krankenhaus, Ostringweg (zwischen Gebäuden BT25), Austria -> H14537
2870: 252, Belgradplatz (Südostecke), Gstr.Nr. 816, Austria -> H2870
4736: 250, Wehlistraße 366, Gstr.Nr.2157, Austria -> H4736
2860: Ecke Taborstraße - Glockengasse, Austria -> H2860
4738: Floridsdorf, Gerichtsgasse 1a (Prager Str. 65m), Austria -> H4738
2850: Kendlerstraße 40 (Umspannwerk), Austria -> H2850
4739: 4, Schafbergbad, Josef Redl Gasse 2, Gstr.Nr. 698, Austria -> H4739
2813: Laaer Berg, Theodor Sickel-Gasse 1, Austria -> H2813
2848: Allgemeines Krankenhaus, Südringweg, Austria -> H2848
2871: Rinnböckstraße 15, Gstr. 1092, Austria -> H2871
2849: 4, Floridsdorf, Gerichtsgasse 1a, Gstr.Nr. 438, Austria -> H2849
2851: 4, Josef Redl Gasse 2, Gstr.Nr. 698, Austria -> H2851


Unnamed: 0,uid,name,station_url,hid
0,2855,"1, Hausgrundweg 23, Gstr. 254, Austria",https://aqicn.org/city/@2855/,H2855
1,2857,"Umspannwerk Gaudenzdorfer Gürtel, Austria",https://aqicn.org/city/@2857/,H2857
2,14537,"Allgemeines Krankenhaus, Ostringweg (zwischen Gebäuden BT25), Austria",https://aqicn.org/city/@14537/,H14537
3,2870,"252, Belgradplatz (Südostecke), Gstr.Nr. 816, Austria",https://aqicn.org/city/@2870/,H2870
4,4736,"250, Wehlistraße 366, Gstr.Nr.2157, Austria",https://aqicn.org/city/@4736/,H4736
5,2860,"Ecke Taborstraße - Glockengasse, Austria",https://aqicn.org/city/@2860/,H2860
6,4738,"Floridsdorf, Gerichtsgasse 1a (Prager Str. 65m), Austria",https://aqicn.org/city/@4738/,H4738
7,2850,"Kendlerstraße 40 (Umspannwerk), Austria",https://aqicn.org/city/@2850/,H2850
8,4739,"4, Schafbergbad, Josef Redl Gasse 2, Gstr.Nr. 698, Austria",https://aqicn.org/city/@4739/,H4739
9,2813,"Laaer Berg, Theodor Sickel-Gasse 1, Austria",https://aqicn.org/city/@2813/,H2813


Saved mapping -> waqi_uid_to_hid.csv


In [None]:
# --- Step 2: Download historical PM2.5 CSV per station using HID ---