In [0]:
import requests, json, time
from datetime import datetime, timezone

In [0]:
# --- API setup ---
UA = "HermanAirQualityDemo/1.0 herman.lillejord@gmail.com"
BASE = "https://api.met.no/weatherapi/airqualityforecast/0.1/"
HEADERS = {"User-Agent": UA, "Accept": "application/json"}

lat, lon = 60.46, 5.35
areaclass = "kommune"
vars_to_pull = [
    "AQI", "pm25_concentration", "pm10_concentration",
    "no2_concentration", "o3_concentration", "so2_concentration"
]

In [0]:
# --- Target Volume path ---
# Assuming your volume path is exactly this:
volume_root = "/Volumes/main_uc/bronze/met_bergen_airquality_jsondumps"
ts_part     = datetime.now(timezone.utc).strftime("%Y/%m/%d/%H%M%S")

target_dir  = f"{volume_root}/{ts_part}"
target_file = f"{target_dir}/airquality.jsonl"  # single newline-delimited JSON file


In [0]:
# --- Fetch & build JSONL (one object per line) ---
records = []
for var in vars_to_pull:
    params = {"lat": lat, "lon": lon, "areaclass": areaclass, "filter_vars": var}
    r = requests.get(BASE, params=params, headers=HEADERS, timeout=30)
    r.raise_for_status()
    js = r.json()

    for t in js.get("data", {}).get("time", []):
        v = (t.get("variables") or {})
        # value might be in lowercase/uppercase keys
        var_obj = v.get(var) or v.get(var.upper()) or {}
        rec = {
            "time_from": t.get("from"),
            "time_to":   t.get("to"),
            "variable":  var,
            "value":     var_obj.get("value")
        }
        records.append(rec)

    time.sleep(0.2)  # be polite to API

In [0]:

# Convert to JSON Lines text
jsonl_str = "\n".join(json.dumps(rec, ensure_ascii=False) for rec in records)

# Ensure directory and write the single JSONL file

dbutils.fs.mkdirs(target_dir)
dbutils.fs.put(target_file, jsonl_str, overwrite=True)



In [0]:
df = spark.read.format("json").load(target_file)
display(df)