In [7]:
# bronze_to_silver_pollutant.py  ──────────────────────────────────────
import pandas as pd, pathlib, glob, re, html, sys
BRONZE_DEF = pathlib.Path("/workspaces/airoute_mlops/airoute_mlops/data/bronze/defra")
OUTPUT_PATH = pathlib.Path("/workspaces/airoute_mlops/airoute_mlops/data/silver/pollutant")

# ───────── helpers ───────────────────────────────────────────────────
def normalise(col: str) -> str:
    """lower-case, strip HTML <sub>, spaces."""
    return re.sub(r"</?sub>", "", html.unescape(col)).lower().strip()

DESIRED = ["site_id", "date_time", "pm25", "no2", "o3"]
REMAP   = {
    "ozone":                                 "o3",
    "nitrogen dioxide":                      "no2",
    "pm2.5 particulate matter (hourly measured)": "pm25"
}

# ───────── iterate CSVs ──────────────────────────────────────────────
frames = []
for fp in glob.glob(str(BRONZE_DEF / "*.csv")):
    site_id = pathlib.Path(fp).stem.split("_")[0]
    print(f"· Parsing {fp}")

    # 1. read & normalise column names
    df = pd.read_csv(fp, parse_dates=[["date", "time"]])
    df.columns = [normalise(c) for c in df.columns]

    # 2. rename pollutant columns to canonical names
    df = df.rename(columns=REMAP)

    # 3. ensure 'site_id' present
    df["site_id"] = site_id

    # 4. add any missing pollutant columns as NA
    for col in ["pm25", "no2", "o3"]:
        if col not in df.columns:
            df[col] = pd.NA

    # 5. keep exactly desired columns (others are dropped)
    df = df[DESIRED]
        # after df = df[DESIRED]
    df["date_time"] = pd.to_datetime(
        df["date_time"], format="%d-%m-%Y %H:%M", errors="coerce"
    ).dt.tz_localize("UTC")


    # 6. drop rows where ALL three pollutants are NA
    df = df.dropna(subset=["pm25", "no2", "o3"], how="all")

    if df.empty:
        print(f"  ⚠  {site_id}: no pollutant values → skipped")
        continue

    frames.append(df)

# ───────── write parquet ────────────────────────────────────────────
if not frames:
    sys.exit("❌ No valid pollutant rows found in any CSV!")

silver = pd.concat(frames, ignore_index=True)
# OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
# silver.to_parquet(OUTPUT_PATH, index=False)

# print(f"\n✅ Saved {len(silver):,} rows to {OUTPUT_PATH}")

# --- keep your imports / code above unchanged -----------------------

# after you build `silver`   (the DataFrame)
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)         # make the dir once

outfile = OUTPUT_PATH / "pollutant_hourly_2025.parquet"   # ← the file

silver.to_parquet(outfile, index=False)                # write here

print(f"\n✅ Saved {len(silver):,} rows to {outfile}")



· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BLWD_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WAL4_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PEGR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SOUT_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CAM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ECCL_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/KIEW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GKA8_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WIG5_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CNPR_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SHLW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/EAGL_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/STOR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WEYB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BEL2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/COPP_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BRAS_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHP_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MY1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HARW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BMLD_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ESK_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/COBR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PLYM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NWBV_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DESB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TOFT_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NTN4_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GLA4_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SLOW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHS7_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/KC1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CARD_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ABD9_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MALA_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NPT3_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CAEB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DYAG_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/YK10_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HG1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MACK_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SWHO_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ED3_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BIRR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WFEN_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DCST_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HULR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DERR_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DUMB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LEAM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NOTK_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SDY_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LEED_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MAID_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHBR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MIMW_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SA33_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TAMM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WTHG_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PEGE_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PLYR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/OX_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HG4_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TRAN_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SK5_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BBRD_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MKCC_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/STOK_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PMTH_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/POAR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHLG_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CARM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TDHD_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HUL2_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LECU_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LEAR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BLAR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LOTA_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HOPE_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ROCH_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/COAL_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TALL_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BLC2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/THUR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BDMP_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/EX_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/OX8_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/IMGM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/AH_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GGWR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ABD7_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BORN_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HSAW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NEWC_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SWA1_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHBO_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LH_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WAKA_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NOTB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BIHG_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BPLE_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NO12_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WOEA_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CANK_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHGR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ACTH_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PEEB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HIL_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/REA1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ARM6_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GLKP_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WREX_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BOTR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ABD8_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DUMF_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CHAT_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/NCA3_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BALM_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MID_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CLL2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SHUN_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PEMB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BUSH_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/PRES_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SUNR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LED6_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LN_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SOTR_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SASH_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TH2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HP1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LERW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LEIR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HORS_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/REA5_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BDMA_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/FARC_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/HONI_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MH_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DCC1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/EB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GLOT_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/TED2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/ROED_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BOLD_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WAR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BLAP_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/INV2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WSMR_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LIN3_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SOUS_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BURW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BAR3_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BHA4_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/RRKL_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BEL1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SHE_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WBKP_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/YK11_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LUTR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SHBR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BAAR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/OSY_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CWMC_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/DESA_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/WORT_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/MAN3_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BRS8_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GLAZ_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LOFS_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SUN2_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SIB_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SHDG_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/GHSR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SV_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BRT3_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/YW_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/CA1_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/STKR_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/BR11_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/EDNS_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])


· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LVPT_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/SEND_2025.csv
· Parsing /workspaces/airoute_mlops/airoute_mlops/data/bronze/defra/LONM_2025.csv


  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  df = pd.read_csv(fp, parse_dates=[["date", "time"]])
  silver = pd.concat(frames, ignore_index=True)



✅ Saved 892,080 rows to /workspaces/airoute_mlops/airoute_mlops/data/silver/pollutant/pollutant_hourly_2025.parquet


In [None]:
data = pd.read_parquet(outfile)  # read back to check
data.info()  # print info about the DataFrame