In [21]:
import requests
import pandas as pd
import matplotlib.pyplot as plt

# ────────────────────────────────────────────────────────────────────────────────
# CONFIGURATION
# ────────────────────────────────────────────────────────────────────────────────

API_KEY   = "f81c20bb8256d06db879269e6e9c07530db7ef28696d1aff5bfd5247fa4f6209"
HEADERS   = {"X-API-Key": API_KEY}  # must include on every call :contentReference[oaicite:3]{index=3}
MEAS_URL  = "https://api.openaq.org/v3/measurements"
PARAM_URL = "https://api.openaq.org/v3/parameters"

POLLUTANTS = ["pm25", "pm10", "no2", "o3"]
PAGE_SIZE  = 1000

# Continental U.S. bounding box: min_lon, min_lat, max_lon, max_lat :contentReference[oaicite:4]{index=4}
BBOX = "-124.7844,24.7433,-66.9514,49.3458"

# EPA short-term thresholds
EPA_THRESH = {
    "pm25": 35,   # µg/m³ (24h) :contentReference[oaicite:5]{index=5}
    "pm10":150,   # µg/m³ (24h) :contentReference[oaicite:6]{index=6}
    "no2": 100,   # ppb   (1h) :contentReference[oaicite:7]{index=7}
    "o3":  70     # ppb   (8h) :contentReference[oaicite:8]{index=8}
}


# ────────────────────────────────────────────────────────────────────────────────
# 1. FETCH PARAMETER IDs (for documentation only; we’ll use the names below)
# ────────────────────────────────────────────────────────────────────────────────

r = requests.get(PARAM_URL, headers=HEADERS, params={"limit":200})
r.raise_for_status()
params = {p["name"]: p["id"] for p in r.json().get("results", []) if p["name"] in POLLUTANTS}


# ────────────────────────────────────────────────────────────────────────────────
# 2. FETCH & VERIFY MEASUREMENTS PER POLLUTANT
# ────────────────────────────────────────────────────────────────────────────────

def fetch_us_data(parameter: str) -> pd.Series:
    """
    Query /v3/measurements?parameter=<name>&bbox=...
    Returns: Series indexed by city with mean value.
    """
    records, page = [], 1

    while True:
        resp = requests.get(
            MEAS_URL,
            headers=HEADERS,
            params={
                "parameter": parameter,  # filter by pollutant name :contentReference[oaicite:9]{index=9}
                "bbox":      BBOX,       
                "limit":     PAGE_SIZE,
                "page":      page
            }
        )
        resp.raise_for_status()
        results = resp.json().get("results", [])
        if not results:
            break

        # collect (city, value)
        for m in results:
            city = m.get("city")
            val  = m.get("value")
            if city and val is not None:
                records.append((city, val))

        if len(results) < PAGE_SIZE:
            break
        page += 1

    count = len(records)
    print(f"Fetched {count} records for '{parameter}'")  # check verify :contentReference[oaicite:10]{index=10}
    if count == 0:
        raise RuntimeError(f"No data found for '{parameter}' in U.S. bbox.")

    df = pd.DataFrame(records, columns=["city", parameter])
    return df.groupby("city")[parameter].mean()


# ────────────────────────────────────────────────────────────────────────────────
# 3. AGGREGATE ALL AND FLAG EXCEEDANCES
# ────────────────────────────────────────────────────────────────────────────────

city_data = {p: fetch_us_data(p) for p in POLLUTANTS}

df = pd.concat(city_data.values(), axis=1) \
       .reset_index().rename(columns={"index":"city"}) \
       .fillna(0)

# ensure numeric dtype
for p in POLLUTANTS:
    df[p] = pd.to_numeric(df[p], errors="coerce").fillna(0)

# flag exceedances
for p, thr in EPA_THRESH.items():
    df[f"{p}_exceeds"] = df[p] > thr
df["n_exceed"] = df[[f"{p}_exceeds" for p in POLLUTANTS]].sum(axis=1)
df.sort_values("n_exceed", ascending=False, inplace=True)


# ────────────────────────────────────────────────────────────────────────────────
# 4. PLOT TOP-10 U.S. CITIES FOR PM₂.₅ AND O₃
# ────────────────────────────────────────────────────────────────────────────────

if df.empty:
    print("No U.S. data to plot.")
else:
    # PM2.5
    top_pm25 = df.nlargest(10, "pm25")
    plt.figure(figsize=(10,5))
    plt.bar(top_pm25["city"], top_pm25["pm25"])
    plt.axhline(EPA_THRESH["pm25"], linestyle="--", label="EPA 35 µg/m³")
    plt.title("Top 10 U.S. Cities by Latest PM₂.₅")
    plt.xticks(rotation=45, ha="right")
    plt.ylabel("µg/m³")
    plt.legend()
    plt.tight_layout()
    plt.show()

    # O3
    top_o3 = df.nlargest(10, "o3")
    plt.figure(figsize=(10,5))
    plt.bar(top_o3["city"], top_o3["o3"])
    plt.axhline(EPA_THRESH["o3"], linestyle="--", label="EPA 70 ppb")
    plt.title("Top 10 U.S. Cities by Latest O₃")
    plt.xticks(rotation=45, ha="right")
    plt.ylabel("ppb")
    plt.legend()
    plt.tight_layout()
    plt.show()


# ────────────────────────────────────────────────────────────────────────────────
# 5. EXPORT TO CSV
# ────────────────────────────────────────────────────────────────────────────────

df.to_csv("us_air_quality_bbox.csv", index=False)
print("✅ Exported summary to us_air_quality_bbox.csv")


HTTPError: 404 Client Error: Not Found for url: https://api.openaq.org/v3/measurements?parameter=pm25&bbox=-124.7844%2C24.7433%2C-66.9514%2C49.3458&limit=1000&page=1