In [5]:
import pandas as pd
import numpy as np
import requests
import json

In [19]:
# Utility function to flatten AQS dict structure and use state names as keys and index as values
def process_aqs_dict(d_list):
    return {d["value_represented"]: d["code"] for d in d_list}

In [15]:
states = requests.get("https://aqs.epa.gov/data/api/list/states?email=test@aqs.api&key=test")
states_data = states.json()["Data"]
states_data = process_aqs_dict(states_data)
states_data

{'Alabama': '01',
 'Alaska': '02',
 'Arizona': '04',
 'Arkansas': '05',
 'California': '06',
 'Colorado': '08',
 'Connecticut': '09',
 'Delaware': '10',
 'District Of Columbia': '11',
 'Florida': '12',
 'Georgia': '13',
 'Hawaii': '15',
 'Idaho': '16',
 'Illinois': '17',
 'Indiana': '18',
 'Iowa': '19',
 'Kansas': '20',
 'Kentucky': '21',
 'Louisiana': '22',
 'Maine': '23',
 'Maryland': '24',
 'Massachusetts': '25',
 'Michigan': '26',
 'Minnesota': '27',
 'Mississippi': '28',
 'Missouri': '29',
 'Montana': '30',
 'Nebraska': '31',
 'Nevada': '32',
 'New Hampshire': '33',
 'New Jersey': '34',
 'New Mexico': '35',
 'New York': '36',
 'North Carolina': '37',
 'North Dakota': '38',
 'Ohio': '39',
 'Oklahoma': '40',
 'Oregon': '41',
 'Pennsylvania': '42',
 'Rhode Island': '44',
 'South Carolina': '45',
 'South Dakota': '46',
 'Tennessee': '47',
 'Texas': '48',
 'Utah': '49',
 'Vermont': '50',
 'Virginia': '51',
 'Washington': '53',
 'West Virginia': '54',
 'Wisconsin': '55',
 'Wyoming': '56

In [27]:
MA_code = states_data['Massachusetts']
MA_counties = requests.get(f"https://aqs.epa.gov/data/api/list/countiesByState?email=test@aqs.api&key=test&state={MA_code}")
MA_counties_data = MA_counties.json()["Data"]
MA_counties_data = process_aqs_dict(MA_counties_data)
MA_counties_data

{'Barnstable': '001',
 'Berkshire': '003',
 'Bristol': '005',
 'Dukes': '007',
 'Essex': '009',
 'Franklin': '011',
 'Hampden': '013',
 'Hampshire': '015',
 'Middlesex': '017',
 'Nantucket': '019',
 'Norfolk': '021',
 'Plymouth': '023',
 'Suffolk': '025',
 'Worcester': '027'}

In [29]:
# Boston is in Suffolk County
Suffolk_code = MA_counties_data["Suffolk"]
Suffolk_sites = requests.get(f"https://aqs.epa.gov/data/api/list/sitesByCounty?email=test@aqs.api&key=test&state={MA_code}&county={Suffolk_code}")
Suffolk_sites_data = Suffolk_sites.json()["Data"]
Suffolk_sites_data = process_aqs_dict(Suffolk_sites_data)
Suffolk_sites_data

{None: '2002',
 'BOSTON KENMORE SQ': '0002',
 'BOSTON CITY SQUARE': '0027',
 '531A EAST FIRST STREET': '0040',
 'BOSTON LONG ISLAND': '0041',
 'DUDLEY SQUARE ROXBURY': '0042',
 'NORTH END SITE CENTRAL ARTERY': '0043',
 'VON HILLERN ST': '0044',
 'Chinatown': '0045'}

In [30]:
requests.get("https://aqs.epa.gov/data/api/list/classes?email=test@aqs.api&key=test").json()

{'Header': [{'status': 'Success',
   'request_time': '2024-04-30T00:34:05-04:00',
   'url': 'https://aqs.epa.gov/data/api/list/classes?email=test@aqs.api&key=test',
   'rows': 27}],
 'Data': [{'code': 'AIRNOW MAPS',
   'value_represented': 'The parameters represented on AirNow maps (88101, 88502, and 44201)'},
  {'code': 'ALL', 'value_represented': 'Select all Parameters Available'},
  {'code': 'AQI POLLUTANTS',
   'value_represented': 'Pollutants that have an AQI Defined'},
  {'code': 'CORE_HAPS', 'value_represented': 'Urban Air Toxic Pollutants'},
  {'code': 'CRITERIA', 'value_represented': 'Criteria Pollutants'},
  {'code': 'CSN DART',
   'value_represented': 'List of CSN speciation parameters to populate the STI DART tool'},
  {'code': 'FORECAST',
   'value_represented': 'Parameters routinely extracted by AirNow (STI)'},
  {'code': 'HAPS', 'value_represented': 'Hazardous Air Pollutants'},
  {'code': 'IMPROVE CARBON', 'value_represented': 'IMPROVE Carbon Parameters'},
  {'code': 'IM

## EDA on county level data

In [36]:
suffolk_data = requests.get(f"https://aqs.epa.gov/data/api/dailyData/byCounty?email=test@aqs.api&key=test&param=88101&bdate=20160101&edate=20160229&state={MA_code}&county={Suffolk_code}").json()["Data"]
len(suffolk_data)

2274

In [37]:
suffolk_data[0]

{'state_code': '25',
 'county_code': '025',
 'site_number': '0043',
 'parameter_code': '88101',
 'poc': 1,
 'latitude': 42.3631,
 'longitude': -71.0543,
 'datum': 'WGS84',
 'parameter': 'PM2.5 - Local Conditions',
 'sample_duration_code': '7',
 'sample_duration': '24 HOUR',
 'pollutant_standard': 'PM25 24-hour 2006',
 'date_local': '2016-01-01',
 'units_of_measure': 'Micrograms/cubic meter (LC)',
 'event_type': 'No Events',
 'observation_count': 1,
 'observation_percent': 100.0,
 'validity_indicator': 'Y',
 'arithmetic_mean': 3.0,
 'first_max_value': 3.0,
 'first_max_hour': 0,
 'aqi': 13,
 'method_code': '145',
 'method': 'R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC - Gravimetric',
 'local_site_name': 'NORTH END SITE CENTRAL ARTERY',
 'site_address': '174 NORTH ST',
 'state': 'Massachusetts',
 'county': 'Suffolk',
 'city': 'Boston',
 'cbsa_code': '14460',
 'cbsa': 'Boston-Cambridge-Newton, MA-NH',
 'date_of_last_change': '2021-11-08'}

## Pulling site specific data

In [33]:
Kenmore_code = Suffolk_sites_data["BOSTON KENMORE SQ"]
daily_data = requests.get(f"https://aqs.epa.gov/data/api/dailyData/bySite?email=test@aqs.api&key=test&param=44201&bdate=20230618&edate=20230618&state={MA_code}&county={Suffolk_code}&site={Kenmore_code}")
daily_data.json()

{'Header': [{'status': 'No data matched your selection',
   'request_time': '2024-04-30T00:36:04-04:00',
   'url': 'https://aqs.epa.gov/data/api/dailyData/bySite?email=test@aqs.api&key=test&param=44201&bdate=20230618&edate=20230618&state=25&county=025&site=0002',
   'rows': 0}],
 'Data': []}