In [None]:
# === Get AQS data ===

PARAM_DICT = {
    "44201": "o3",
    "68105": "avg_temp",
    "62101": "outdoor_temp",
    "61301": "mix_height",
    "42101": "co",
    "42601": "no",
    "42602": "no2",
    "88101": "pm25",
    "86101": "pm10_25"
}

import requests, json, tqdm
for year in tqdm.tqdm(range(1998,2026)):
    r = requests.get(
        url='https://aqs.epa.gov/data/api/sampleData/bySite',
        params={
            'email':'david_mike_woodilla@brown.edu',
            'key':'berrycat37',
            'param':",".join(PARAM_DICT.keys()),
            'bdate':f'{year}0101',
            'edate':f'{year}1231',
            'state':'44',
            'county':'007',
            'site':'1010'
        }
    )
    with open(f'./data/downloaded/aqs/myron{year}.json', 'w') as f:
        json.dump(r.json(), f, indent=2)

In [None]:
import pandas as pd
import os
import json

PARAM_DICT = {
    "44201": "o3",
    "68105": "avg_temp",
    "62101": "outdoor_temp",
    "61301": "mix_height",
    "42101": "co",
    "42601": "no",
    "42602": "no2",
    "88101": "pm25",
    "86101": "pm10_25"
}

df_list:list[pd.DataFrame] = []

for dataset in os.listdir("./data/downloaded/aqs/"):
    if "json" not in dataset: continue
    with open(os.path.join("./data/downloaded/aqs/", dataset)) as f:
        raw_json = json.load(f)
    df = pd.json_normalize(raw_json['Data'])
    df['datetime_gmt'] = pd.to_datetime(df['date_gmt'] + ' ' + df['time_gmt'], utc=True)

    df = df[['datetime_gmt','parameter_code','sample_measurement']]
    df['parameter_code'] = df['parameter_code'].apply(lambda x: PARAM_DICT[x])
    df = df.pivot_table(
        index='datetime_gmt',
        columns='parameter_code',
        values='sample_measurement',
        dropna=False
    ).reset_index()
    df = df.drop(columns=['avg_temp','mix_height', 'pm10_25'], errors='ignore')
    df_list.extend([df])

aq_ds = pd.concat(df_list)
aq_ds['datetime_gmt'] = pd.to_datetime(aq_ds['datetime_gmt'], utc=True, errors='raise')
aq_ds = aq_ds.set_index('datetime_gmt')
aq_ds.to_pickle('./data/downloaded/myron_ds.pkl')
aq_ds.to_json('./data/downloaded/myron_ds.json', index=True)