In [1]:
import os 
import requests
from bs4 import BeautifulSoup
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
with open("../../../src/api_credentials.json") as f:
    aqs_creds = json.load(f)["aqs_credentials"]

aqs_email = aqs_creds["aqs_email"]
aqs_pw = aqs_creds["aqs_password"]

In [3]:
# function to pull data by county

def aqs_by_county(state_id, county_id, bdate_, edate_, param_, aqs_email, aqs_pw):
    """
    Fetches data from the Air Quality System (AQS) API for a specified county.

    Parameters:
    - state_id (str): 2 digit state FIPS code for the state (with leading zero)
    - county_id (str): 3 digit state FIPS code for the county within the state (with leading zeroes)
    - bdate_ (str): The beginning date in the format 'YYYYMMDD'
    - edate_ (str): The ending date in the format 'YYYYMMDD'
    - param_ (str): The AQS parameter code for the data selection. AQS uses proprietary 5 digit codes. They may be obtained via the list parameters service.
    - aqs_email (str): The email associated with the AQS API key
    - aqs_pw (str): The AQS API key

    Returns:
    dict: A dictionary containing the API response.
    """

    url = "https://aqs.epa.gov/data/api/sampleData/byCounty"

    params = {
        "email": aqs_email,
        "key": aqs_pw,
        "state": state_id,
        "county": county_id,
        "bdate": bdate_,
        "edate": edate_,
        "param": param_
    }

    response = requests.get(url, params=params)

    data = response.json()

    return data

In [4]:
# Store variables for function call by county. The 10 counties are:
# Box Elder, Cache, Rich, Weber, Morgan, Davis, Summit, Salt Lake, Tooele, Wasatch, Utah
# Codes obtained from here: https://www.census.gov/library/reference/code-lists/ansi.html

box_elder_id = "003"
cache_id = "005"
rich_id = "033"
weber_id = "057"
morgan_id = "029"
davis_id = "011"
summit_id = "043"
salt_lake_id = "035"
tooele_id = "045"
wasatch_id = "051"
utah_county_id = "049"

utah_state_id = "49"
utah_county_ids = [box_elder_id, cache_id, rich_id, weber_id, morgan_id,
                   davis_id, summit_id, salt_lake_id, tooele_id,
                   wasatch_id, utah_county_id]
beginning_date = "20230101"
end_date = "20231231"
pm2_5lc_param = "88101" # can be found here: https://aqs.epa.gov/aqsweb/documents/codetables/methods_all.html

In [5]:
# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm2_5lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs.append(df_temp)

all_slc_counties_2023 = pd.concat(county_data_list_dfs, ignore_index=True)

In [6]:
all_slc_counties_2023.to_csv("aqs-all-slc-counties-2023.csv")

In [7]:
# update param to be PM10.0 local conditions instead of PM2.5

# not sure which one to use

# PM10 Speciated Parameters
pm10lc_param = "85101" # WRONG

# PM10 24-hour standard. Violations include actual exceedences and expected exceedences where data is missing.
#pm10lc_param = "81102" # WRONG

# Arsenic
#pm10lc_param = "82103"

# try PM2.5
#pm10lc_param = "88101"

In [8]:
# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2023 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2023.append(df_temp)

all_slc_counties_2023_pm10 = pd.concat(county_data_list_dfs_2023, ignore_index=True)

In [9]:
all_slc_counties_2023_pm10.to_csv("aqs-all-slc-counties-2023-pm10.csv")

In [10]:
beginning_date = "20220101"
end_date = "20221231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2022 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2022.append(df_temp)

all_slc_counties_2022_pm10 = pd.concat(county_data_list_dfs_2022, ignore_index=True)

In [11]:
beginning_date = "20210101"
end_date = "20211231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2021 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2021.append(df_temp)

all_slc_counties_2021_pm10 = pd.concat(county_data_list_dfs_2021, ignore_index=True)

In [12]:
beginning_date = "20200101"
end_date = "20201231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2020 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2020.append(df_temp)

all_slc_counties_2020_pm10 = pd.concat(county_data_list_dfs_2020, ignore_index=True)

In [13]:
beginning_date = "20190101"
end_date = "20191231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2019 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2019.append(df_temp)

all_slc_counties_2019_pm10 = pd.concat(county_data_list_dfs_2019, ignore_index=True)

In [14]:
beginning_date = "20180101"
end_date = "20181231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2018 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2018.append(df_temp)

all_slc_counties_2018_pm10 = pd.concat(county_data_list_dfs_2018, ignore_index=True)

In [15]:
beginning_date = "20170101"
end_date = "20171231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2017 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2017.append(df_temp)

all_slc_counties_2017_pm10 = pd.concat(county_data_list_dfs_2017, ignore_index=True)

In [16]:
beginning_date = "20160101"
end_date = "20161231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2016 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2016.append(df_temp)

all_slc_counties_2016_pm10 = pd.concat(county_data_list_dfs_2016, ignore_index=True)

In [17]:
beginning_date = "20150101"
end_date = "20151231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2015 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2015.append(df_temp)

all_slc_counties_2015_pm10 = pd.concat(county_data_list_dfs_2015, ignore_index=True)

In [18]:
beginning_date = "20140101"
end_date = "20141231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2014 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2014.append(df_temp)

all_slc_counties_2014_pm10 = pd.concat(county_data_list_dfs_2014, ignore_index=True)

In [19]:
beginning_date = "20130101"
end_date = "20131231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2013 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2013.append(df_temp)

all_slc_counties_2013_pm10 = pd.concat(county_data_list_dfs_2013, ignore_index=True)

In [20]:
beginning_date = "20120101"
end_date = "20121231"

# loop through all counties in SLC-area and combine their 2023
# data into one dataframe

# create temporary df
county_data_list_dfs_2012 =[]

# loop through each SLC county
for county_id in utah_county_ids:
    # make the API call for each county
    county_data = aqs_by_county(utah_state_id, county_id, beginning_date, end_date, pm10lc_param, aqs_email, aqs_pw)

    # store the data in a dataframe
    df_temp = pd.DataFrame(county_data["Data"])
    
    # append the dataframe to a list
    county_data_list_dfs_2012.append(df_temp)

all_slc_counties_2012_pm10 = pd.concat(county_data_list_dfs_2012, ignore_index=True)

In [21]:
all_slc_counties_pm10_2012_2023 = pd.concat([all_slc_counties_2012_pm10,
                                             all_slc_counties_2013_pm10,
                                             all_slc_counties_2014_pm10,
                                             all_slc_counties_2015_pm10,
                                             all_slc_counties_2016_pm10,
                                             all_slc_counties_2017_pm10,
                                             all_slc_counties_2018_pm10,
                                             all_slc_counties_2019_pm10,
                                             all_slc_counties_2020_pm10,
                                             all_slc_counties_2021_pm10,
                                             all_slc_counties_2022_pm10,
                                             all_slc_counties_2023_pm10], ignore_index=True)

In [22]:
# export to csv

all_slc_counties_pm10_2012_2023.to_csv("aqs-all-slc-counties-pm10-2012-2023.csv")