In [1]:
# importing dependencies

import requests
import pandas as pd
from key import api_key, email

### <font color='#ff6600'> Collecting Annual Ozone Data for the city of Austin (Travis County) </font>

In [7]:
# function to retrieve daily ozone data


def get_daily_summary(email, api_key, param, bdate, edate, state, county):

    url = "https://aqs.epa.gov/data/api/dailyData/byCounty"
    
    payload = {
        "email": email,
        "key": api_key,
        "param": param,
        "bdate": bdate,
        "edate": edate,
        "state": state,
        "county": county
    }

    response = requests.get(url, params=payload)

    if response.status_code == 200:

        l = len(response.json()["Data"])
               
        if l != 0:

            df = pd.DataFrame()

            for i in range(0, l):

                res = response.json()["Data"][i]
                df = df.append(res, ignore_index=True)

            df = df[
                [
                    "state",
                    "date_local",
                    "city",
                    "county",
                    "parameter",
                    "sample_duration",
                    "observation_count",
                    "aqi",
                    "arithmetic_mean",
                    "first_max_value",
                    "units_of_measure",
                    "latitude",
                    "longitude",
                ]
            ].assign(state_cd=df.state.apply(lambda x: convert_state_name(x)))

            return df
    else:
        return None

In [8]:
# calling the function to retrieve Ozone (O3) data in a dataframe for 2015, 2016, 2017, 2018 and 2019 

oz_2015_df = pd.DataFrame()
oz_2016_df = pd.DataFrame()
oz_2017_df = pd.DataFrame()
oz_2018_df = pd.DataFrame()
oz_2019_df = pd.DataFrame()

# state code for Texas and county code for city of Austin (Travis county)
state_code = "48"
county = "453"


oz_2015_df = oz_2015_df.append(
    get_daily_summary(
        email=email,
        api_key=api_key,
        param="44201",
        bdate="20150101",
        edate="20151231",
        state=state_code,
        county=county
    )
)

oz_2016_df = oz_2016_df.append(
    get_daily_summary(
        email=email,
        api_key=api_key,
        param="44201",
        bdate="20160101",
        edate="20161231",
        state=state_code,
        county=county
    )
)

oz_2017_df = oz_2017_df.append(
    get_daily_summary(
        email=email,
        api_key=api_key,
        param="44201",
        bdate="20170101",
        edate="20171231",
        state=state_code,
        county=county
    )
)

oz_2018_df = oz_2018_df.append(
    get_daily_summary(
        email=email,
        api_key=api_key,
        param="44201",
        bdate="20180101",
        edate="20181231",
        state=state_code,
        county=county
    )
)

oz_2019_df = oz_2019_df.append(
    get_daily_summary(
        email=email,
        api_key=api_key,
        param="44201",
        bdate="20190101",
        edate="20191231",
        state=state_code,
        county=county
    )
)

In [12]:
# filtering and selecting only required columns

oz_2015_data = oz_2015_df[(oz_2015_df['city'] == 'Austin') & (oz_2015_df['sample_duration'] == "8-HR RUN AVG BEGIN HOUR")]
oz_2015_data = pd.DataFrame(oz_2015_df.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"Ozone(ppm)"})

oz_2016_data = oz_2016_df[(oz_2016_df['city'] == 'Austin') & (oz_2016_df['sample_duration'] == "8-HR RUN AVG BEGIN HOUR")]
oz_2016_data = pd.DataFrame(oz_2016_df.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"Ozone(ppm)"})

oz_2017_data = oz_2017_df[(oz_2017_df['city'] == 'Austin') & (oz_2017_df['sample_duration'] == "8-HR RUN AVG BEGIN HOUR")]
oz_2017_data = pd.DataFrame(oz_2017_df.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"Ozone(ppm)"})

oz_2018_data = oz_2018_df[(oz_2018_df['city'] == 'Austin') & (oz_2018_df['sample_duration'] == "8-HR RUN AVG BEGIN HOUR")]
oz_2018_data = pd.DataFrame(oz_2018_df.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"Ozone(ppm)"})

oz_2019_data = oz_2019_df[(oz_2019_df['city'] == 'Austin') & (oz_2019_df['sample_duration'] == "8-HR RUN AVG BEGIN HOUR")]
oz_2019_data = pd.DataFrame(oz_2019_df.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"Ozone(ppm)"})


In [14]:
# exporting the ozone data into a csv

oz_2015_data.to_csv("../data/daily_ozone_2015_austin.csv", index=False)
oz_2016_data.to_csv("../data/daily_ozone_2016_austin.csv", index=False)
oz_2017_data.to_csv("../data/daily_ozone_2017_austin.csv", index=False)
oz_2018_data.to_csv("../data/daily_ozone_2018_austin.csv", index=False)
oz_2019_data.to_csv("../data/daily_ozone_2019_austin.csv", index=False)

### <font color = '#ff6600'> Collecting Daily PM2.5 Data for the city of Austin (Travis County) </font>

In [19]:
# function to get daily average PM2.5 data

def get_daily_pm(email, api_key, param, bdate, edate, state, county):

    url = "https://aqs.epa.gov/data/api/dailyData/byCounty"

    payload = {
        "email": email,
        "key": api_key,
        "param": param,
        "bdate": bdate,
        "edate": edate,
        "state": state,
        "county": county
    }

    response = requests.get(url, params=payload)
    
    if response.status_code == 200:

        l = len(response.json()["Data"])
        
        if l != 0:

            df = pd.DataFrame()

            for i in range(0, l):
                res = response.json()["Data"][i]
                df = df.append(res, ignore_index=True)

            df = df[
                [
                    "aqi",
                    "state",
                    "city",
                    "county",
                    "date_local",
                    "local_site_name",
                    "sample_duration",
                    "parameter",
                    "observation_count",
                    "arithmetic_mean",
                    "first_max_value",
                    "units_of_measure",
                    "latitude",
                    "longitude",
                ]
            ]

            return df

    else:
        return None


In [32]:
# collecting daily average PM2.5 data in dataframes for 2015, 2016, 2017, 2018 and 2019 

pm_2015_df = pd.DataFrame()
pm_2016_df = pd.DataFrame()
pm_2017_df = pd.DataFrame()
pm_2018_df = pd.DataFrame()
pm_2019_df = pd.DataFrame()

# state code for Texas and county code for city of Austin (Travis county)
state_code = "48"
county = "453"


pm_2015_df = pm_2015_df.append(
    get_daily_pm(
        email=email,
        api_key=api_key,
        param="88101",
        bdate="20150101",
        edate="20151231",
        state=state_code,
        county=county
    )
)

pm_2016_df = pm_2016_df.append(
    get_daily_pm(
        email=email,
        api_key=api_key,
        param="88101",
        bdate="20160101",
        edate="20161231",
        state=state_code,
        county=county
    )
)

pm_2017_df = pm_2017_df.append(
    get_daily_pm(
        email=email,
        api_key=api_key,
        param="88101",
        bdate="20170101",
        edate="20171231",
        state=state_code,
        county=county
    )
)

pm_2018_df = pm_2018_df.append(
    get_daily_pm(
        email=email,
        api_key=api_key,
        param="88101",
        bdate="20180101",
        edate="20181231",
        state=state_code,
        county=county
    )
)

pm_2019_df = pm_2019_df.append(
    get_daily_pm(
        email=email,
        api_key=api_key,
        param="88101",
        bdate="20190101",
        edate="20191231",
        state=state_code,
        county=county
    )
)

In [48]:
# filtering and selecting only required columns

pm_2015_data = pm_2015_df[pm_2015_df['city'] == 'Austin']
pm_2015_data = pd.DataFrame(pm_2015_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM2.5"})

pm_2016_data = pm_2016_df[pm_2016_df['city'] == 'Austin']
pm_2016_data = pd.DataFrame(pm_2016_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM2.5"})

pm_2017_data = pm_2017_df[pm_2017_df['city'] == 'Austin']
pm_2017_data = pd.DataFrame(pm_2017_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM2.5"})

pm_2018_data = pm_2018_df[pm_2018_df['city'] == 'Austin']
pm_2018_data = pd.DataFrame(pm_2018_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM2.5"})

pm_2019_data = pm_2019_df[pm_2019_df['city'] == 'Austin']
pm_2019_data = pd.DataFrame(pm_2019_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM2.5"})


In [49]:
# exporting the PM2.5 data into a csv

pm_2015_data.to_csv("../data/daily_pm_2015_austin.csv", index=False)
pm_2016_data.to_csv("../data/daily_pm_2016_austin.csv", index=False)
pm_2017_data.to_csv("../data/daily_pm_2017_austin.csv", index=False)
pm_2018_data.to_csv("../data/daily_pm_2018_austin.csv", index=False)
pm_2019_data.to_csv("../data/daily_pm_2019_austin.csv", index=False)

### <font color = '#ff6600'> Collecting Daily Nitrogen dioxide NO2 Data for the city of Austin (Travis County) </font>

In [71]:
# function to get daily average Nitrogen dioxide (NO2) data

def get_daily_no(email, api_key, param, bdate, edate, state, county):

    url = "https://aqs.epa.gov/data/api/dailyData/byCounty"

    payload = {
        "email": email,
        "key": api_key,
        "param": param,
        "bdate": bdate,
        "edate": edate,
        "state": state,
        "county": county
    }

    response = requests.get(url, params=payload)
     
    if response.status_code == 200:

        l = len(response.json()["Data"])
        
        if l != 0:

            df = pd.DataFrame()

            for i in range(0, l):
                res = response.json()["Data"][i]
                df = df.append(res, ignore_index=True)

            df = df[
                [
                    "aqi",
                    "state",
                    "city",
                    "county",
                    "date_local",
                    "local_site_name",
                    "sample_duration",
                    "parameter",
                    "observation_count",
                    "arithmetic_mean",
                    "first_max_value",
                    "units_of_measure",
                    "latitude",
                    "longitude",
                ]
            ]

            return df

    else:
        return None


In [8]:
# collecting daily average Nitrogen dioxide (NO2) data in dataframes for 2015, 2016, 2017, 2018 and 2019 

no2_2015_df = pd.DataFrame()
no2_2016_df = pd.DataFrame()
no2_2017_df = pd.DataFrame()
no2_2018_df = pd.DataFrame()
no2_2019_df = pd.DataFrame()

# state code for Texas and county code for city of Austin (Travis county)
state_code = "48"
county = "453"


no2_2015_df = no2_2015_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42602",
        bdate="20150101",
        edate="20151231",
        state=state_code,
        county=county
    )
)

no2_2016_df = no2_2016_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42602",
        bdate="20160101",
        edate="20161231",
        state=state_code,
        county=county
    )
)

no2_2017_df = no2_2017_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42602",
        bdate="20170101",
        edate="20171231",
        state=state_code,
        county=county
    )
)

no2_2018_df = no2_2018_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42602",
        bdate="20180101",
        edate="20181231",
        state=state_code,
        county=county
    )
)

no2_2019_df = no2_2019_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42602",
        bdate="20190101",
        edate="20191231",
        state=state_code,
        county=county
    )
)

In [12]:
# filtering and selecting only required columns

no2_2015_data = no2_2015_df[no2_2015_df['city'] == 'Austin']
no2_2015_data = pd.DataFrame(no2_2015_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"NO2"})

no2_2016_data = no2_2016_df[no2_2016_df['city'] == 'Austin']
no2_2016_data = pd.DataFrame(no2_2016_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"NO2"})

no2_2017_data = no2_2017_df[no2_2017_df['city'] == 'Austin']
no2_2017_data = pd.DataFrame(no2_2017_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"NO2"})

no2_2018_data = no2_2018_df[no2_2018_df['city'] == 'Austin']
no2_2018_data = pd.DataFrame(no2_2018_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"NO2"})

no2_2019_data = no2_2019_df[no2_2019_df['city'] == 'Austin']
no2_2019_data = pd.DataFrame(no2_2019_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"NO2"})


In [16]:
# exporting the NO2 data into a csv

no2_2015_data.to_csv("../data/daily_no2_2015_austin.csv", index=False)
no2_2016_data.to_csv("../data/daily_no2_2016_austin.csv", index=False)
no2_2017_data.to_csv("../data/daily_no2_2017_austin.csv", index=False)
no2_2018_data.to_csv("../data/daily_no2_2018_austin.csv", index=False)
no2_2019_data.to_csv("../data/daily_no2_2019_austin.csv", index=False)

### <font color = '#ff6600'> Collecting Daily Sulfur dioxide Data for the city of Austin (Travis County) </font>

In [72]:
# function to get daily average Sulpher Dioxide (SO) data

def get_daily_so(email, api_key, param, bdate, edate, state, county):

    url = "https://aqs.epa.gov/data/api/dailyData/byCounty"

    payload = {
        "email": email,
        "key": api_key,
        "param": param,
        "bdate": bdate,
        "edate": edate,
        "state": state,
        "county": county
    }

    response = requests.get(url, params=payload)
     
    if response.status_code == 200:

        l = len(response.json()["Data"])
        
        if l != 0:

            df = pd.DataFrame()

            for i in range(0, l):
                res = response.json()["Data"][i]
                df = df.append(res, ignore_index=True)

            df = df[
                [
                    "aqi",
                    "state",
                    "city",
                    "county",
                    "date_local",
                    "local_site_name",
                    "sample_duration",
                    "parameter",
                    "observation_count",
                    "arithmetic_mean",
                    "first_max_value",
                    "units_of_measure",
                    "latitude",
                    "longitude",
                ]
            ]

            return df

    else:
        return None


In [51]:
# collecting daily average Sulpher dioxide data in dataframes for 2015, 2016, 2017, 2018 and 2019 

so_2015_df = pd.DataFrame()
so_2016_df = pd.DataFrame()
so_2017_df = pd.DataFrame()
so_2018_df = pd.DataFrame()
so_2019_df = pd.DataFrame()

# state code for Texas and county code for city of Austin (Travis county)
state_code = "48"
county = "453"


so_2015_df = so_2015_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42401",
        bdate="20150101",
        edate="20151231",
        state=state_code,
        county=county
    )
)

so_2016_df = so_2016_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42401",
        bdate="20160101",
        edate="20161231",
        state=state_code,
        county=county
    )
)

so_2017_df = so_2017_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42401",
        bdate="20170101",
        edate="20171231",
        state=state_code,
        county=county
    )
)

so_2018_df = so_2018_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42401",
        bdate="20180101",
        edate="20181231",
        state=state_code,
        county=county
    )
)

so_2019_df = so_2019_df.append(
    get_daily_no(
        email=email,
        api_key=api_key,
        param="42401",
        bdate="20190101",
        edate="20191231",
        state=state_code,
        county=county
    )
)

In [54]:
# filtering and selecting only required columns

so_2015_data = so_2015_df[so_2015_df['city'] == 'Austin']
so_2015_data = pd.DataFrame(so_2015_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"SO"})

so_2016_data = so_2016_df[so_2016_df['city'] == 'Austin']
so_2016_data = pd.DataFrame(so_2016_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"SO"})

so_2017_data = so_2017_df[so_2017_df['city'] == 'Austin']
so_2017_data = pd.DataFrame(so_2017_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"SO"})

so_2018_data = so_2018_df[so_2018_df['city'] == 'Austin']
so_2018_data = pd.DataFrame(so_2018_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"SO"})

so_2019_data = so_2019_df[so_2019_df['city'] == 'Austin']
so_2019_data = pd.DataFrame(so_2019_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"SO"})


In [60]:
# exporting the SO data into a csv

so_2015_data.to_csv("../data/daily_so_2015_austin.csv", index=False)
so_2016_data.to_csv("../data/daily_so_2016_austin.csv", index=False)
so_2017_data.to_csv("../data/daily_so_2017_austin.csv", index=False)
so_2018_data.to_csv("../data/daily_so_2018_austin.csv", index=False)
so_2019_data.to_csv("../data/daily_so_2019_austin.csv", index=False)

### <font color = '#ff6600'> Collecting Daily PM 10 Data for the city of Austin (Travis County) </font>

In [82]:
# function to get daily average PM10 Total 0-10um STP data

def get_daily_pm10(email, api_key, param, bdate, edate, state, county):

    url = "https://aqs.epa.gov/data/api/dailyData/byCounty"

    payload = {
        "email": email,
        "key": api_key,
        "param": param,
        "bdate": bdate,
        "edate": edate,
        "state": state,
        "county": county
    }

    response = requests.get(url, params=payload)
   
    if response.status_code == 200:

        l = len(response.json()["Data"])
        
        if l != 0:

            df = pd.DataFrame()

            for i in range(0, l):
                res = response.json()["Data"][i]
                df = df.append(res, ignore_index=True)

            df = df[
                [
                    "aqi",
                    "state",
                    "city",
                    "county",
                    "date_local",
                    "local_site_name",
                    "sample_duration",
                    "parameter",
                    "observation_count",
                    "arithmetic_mean",
                    "first_max_value",
                    "units_of_measure",
                    "latitude",
                    "longitude",
                ]
            ]

            return df

    else:
        return None


In [83]:
# collecting daily average PM 10 data in dataframes for 2015, 2016, 2017, 2018 and 2019 

pm10_2015_df = pd.DataFrame()
pm10_2016_df = pd.DataFrame()
pm10_2017_df = pd.DataFrame()
pm10_2018_df = pd.DataFrame()
pm10_2019_df = pd.DataFrame()

# state code for Texas and county code for city of Austin (Travis county)
state_code = "48"
county = "453"


pm10_2015_df = pm10_2015_df.append(
    get_daily_pm10(
        email=email,
        api_key=api_key,
        param="81102",
        bdate="20150101",
        edate="20151231",
        state=state_code,
        county=county
    )
)

pm10_2016_df = pm10_2016_df.append(
    get_daily_pm10(
        email=email,
        api_key=api_key,
        param="81102",
        bdate="20160101",
        edate="20161231",
        state=state_code,
        county=county
    )
)

pm10_2017_df = pm10_2017_df.append(
    get_daily_pm10(
        email=email,
        api_key=api_key,
        param="81102",
        bdate="20170101",
        edate="20171231",
        state=state_code,
        county=county
    )
)

pm10_2018_df = pm10_2018_df.append(
    get_daily_pm10(
        email=email,
        api_key=api_key,
        param="81102",
        bdate="20180101",
        edate="20181231",
        state=state_code,
        county=county
    )
)

pm10_2019_df = pm10_2019_df.append(
    get_daily_pm10(
        email=email,
        api_key=api_key,
        param="81102",
        bdate="20190101",
        edate="20191231",
        state=state_code,
        county=county
    )
)

In [89]:
# filtering and selecting only required columns

pm10_2015_data = pm10_2015_df[pm10_2015_df['city'] == 'Austin']
pm10_2015_data = pd.DataFrame(pm10_2015_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM10"})

pm10_2016_data = pm10_2016_df[pm10_2016_df['city'] == 'Austin']
pm10_2016_data = pd.DataFrame(pm10_2016_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM10"})

pm10_2017_data = pm10_2017_df[pm10_2017_df['city'] == 'Austin']
pm10_2017_data = pd.DataFrame(pm10_2017_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM10"})

pm10_2018_data = pm10_2018_df[pm10_2018_df['city'] == 'Austin']
pm10_2018_data = pd.DataFrame(pm10_2018_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM10"})

pm10_2019_data = pm10_2019_df[pm10_2019_df['city'] == 'Austin']
pm10_2019_data = pd.DataFrame(pm10_2019_data.groupby("date_local")['arithmetic_mean'].max()) \
                    .reset_index() \
                    .rename(columns={"arithmetic_mean":"PM10"})


In [91]:
# exporting the SO data into a csv

pm10_2015_data.to_csv("../data/daily_pm10_2015_austin.csv", index=False)
pm10_2016_data.to_csv("../data/daily_pm10_2016_austin.csv", index=False)
pm10_2017_data.to_csv("../data/daily_pm10_2017_austin.csv", index=False)
pm10_2018_data.to_csv("../data/daily_pm10_2018_austin.csv", index=False)
pm10_2019_data.to_csv("../data/daily_pm10_2019_austin.csv", index=False)