In [1]:
import requests
import pandas as pd
import dotenv
import os

In [2]:
# set email and key variables by retrieving from my .env file
email = os.getenv("AQS_EMAIL")
key = os.getenv("AQS_KEY")

In [3]:
# starting by trying to figure out the parameter information that I need for my first sample data query
criteria_url = f"https://aqs.epa.gov/data/api/list/parametersByClass?email={email}&key={key}&pc=CRITERIA"
criteria_response = requests.get(criteria_url)
criteria_response.status_code

200

9/23/25: The connection seems to be fine, but I'm getting a 503 error code - indicating that the server is currently unavailable due to high traffic. I was able to research and learn that for PM_2.5 local conditions I'll need param of 88101 and for state of Hawaii number 15.

9/24/25: success! It took over one minute but returned a status code of 200

In [4]:
# base url for sample data by state
url = "https://aqs.epa.gov/data/api/sampleData/byState"
state_number = "15"
param = "88101"
response = requests.get(f"{url}?email={email}&key={key}&param={param}&bdate=20220101&edate=20220115&state={state_number}")
if response.status_code == 200:
    sample_data = response.json()
else:
    print("error fetching data:", response.status_code)


9/23/25: substantial loading time, 503 error
9/24/25: 200 success!

In [18]:
sample_data.keys()

dict_keys(['Header', 'Data'])

In [8]:
type(sample_data['Data'])

list

In [9]:
len(sample_data['Data'])

6813

In [11]:
sample_data['Data'][0]

{'state_code': '15',
 'county_code': '001',
 'site_number': '0005',
 'parameter_code': '88101',
 'poc': 1,
 'latitude': 19.4308,
 'longitude': -155.2578,
 'datum': 'WGS84',
 'parameter': 'PM2.5 - Local Conditions',
 'date_local': '2022-01-01',
 'time_local': '00:00',
 'date_gmt': '2022-01-01',
 'time_gmt': '10:00',
 'sample_measurement': 3.0,
 'units_of_measure': 'Micrograms/cubic meter (LC)',
 'units_of_measure_code': '105',
 'sample_duration': '1 HOUR',
 'sample_duration_code': '1',
 'sample_frequency': 'HOURLY',
 'detection_limit': 5.0,
 'uncertainty': None,
 'qualifier': None,
 'method_type': 'FEM',
 'method': 'Met One BAM-1020 Mass Monitor w/VSCC - Beta Attenuation',
 'method_code': '170',
 'state': 'Hawaii',
 'county': 'Hawaii',
 'date_of_last_change': '2022-03-29',
 'cbsa_code': '25900'}

From exploring, I can see that I have a list of 6813 dictionaries that include all the information above. To be able to analyze better and also save, I'll conver the response into a dataframe.

In [None]:
df = pd.DataFrame(sample_data['Data'])

In [15]:
df.head()

Unnamed: 0,state_code,county_code,site_number,parameter_code,poc,latitude,longitude,datum,parameter,date_local,...,detection_limit,uncertainty,qualifier,method_type,method,method_code,state,county,date_of_last_change,cbsa_code
0,15,1,5,88101,1,19.4308,-155.2578,WGS84,PM2.5 - Local Conditions,2022-01-01,...,5.0,,,FEM,Met One BAM-1020 Mass Monitor w/VSCC - Beta At...,170,Hawaii,Hawaii,2022-03-29,25900
1,15,1,5,88101,1,19.4308,-155.2578,WGS84,PM2.5 - Local Conditions,2022-01-01,...,5.0,,,FEM,Met One BAM-1020 Mass Monitor w/VSCC - Beta At...,170,Hawaii,Hawaii,2022-03-29,25900
2,15,1,5,88101,1,19.4308,-155.2578,WGS84,PM2.5 - Local Conditions,2022-01-01,...,5.0,,,FEM,Met One BAM-1020 Mass Monitor w/VSCC - Beta At...,170,Hawaii,Hawaii,2022-03-29,25900
3,15,1,5,88101,1,19.4308,-155.2578,WGS84,PM2.5 - Local Conditions,2022-01-01,...,5.0,,,FEM,Met One BAM-1020 Mass Monitor w/VSCC - Beta At...,170,Hawaii,Hawaii,2022-03-29,25900
4,15,1,5,88101,1,19.4308,-155.2578,WGS84,PM2.5 - Local Conditions,2022-01-01,...,5.0,,,FEM,Met One BAM-1020 Mass Monitor w/VSCC - Beta At...,170,Hawaii,Hawaii,2022-03-29,25900


In [19]:
df.to_csv("../../Data/Raw/aqs_sample_9_24_25.csv")