# U.S. Environmental Protection Agency

In [1]:
import pandas as pd

### Data Gathering

In [2]:
# 2018 EPA Air Quality Index by county (1049 records, 19 columns)
aqi = pd.read_csv('../Data/US_EPA_AQI_2018.csv')
print(aqi.shape)
aqi.head()

(1049, 19)


Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days SO2,Days PM2.5,Days PM10
0,Alabama,Baldwin,2018,270,245,25,0,0,0,0,97,50,35,0,0,214,0,56,0
1,Alabama,Clay,2018,110,103,7,0,0,0,0,64,45,27,0,0,0,0,110,0
2,Alabama,Colbert,2018,277,251,26,0,0,0,0,93,50,35,0,0,209,0,68,0
3,Alabama,DeKalb,2018,350,316,34,0,0,0,0,84,50,35,0,0,317,0,33,0
4,Alabama,Elmore,2018,222,203,19,0,0,0,0,71,49,33,0,0,222,0,0,0


In [3]:
# add state abbreviations
state_abbrevs = {'State ID': ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
                              'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
                              'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
                              'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
                              'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'],
                 'State': ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
                          'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
                          'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
                          'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
                          'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',
                          'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
                          'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
                          'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
                          'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
                          'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']}

aqi = aqi.merge(pd.DataFrame(state_abbrevs), on='State', how='left')
aqi.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days SO2,Days PM2.5,Days PM10,State ID
0,Alabama,Baldwin,2018,270,245,25,0,0,0,0,97,50,35,0,0,214,0,56,0,AL
1,Alabama,Clay,2018,110,103,7,0,0,0,0,64,45,27,0,0,0,0,110,0,AL
2,Alabama,Colbert,2018,277,251,26,0,0,0,0,93,50,35,0,0,209,0,68,0,AL
3,Alabama,DeKalb,2018,350,316,34,0,0,0,0,84,50,35,0,0,317,0,33,0,AL
4,Alabama,Elmore,2018,222,203,19,0,0,0,0,71,49,33,0,0,222,0,0,0,AL


In [4]:
# Air Quality Index legend
# https://airnow.gov/index.cfm?action=aqibasics.aqi
aqi = aqi[['State ID','Days with AQI','Good Days']]
aqi = aqi.groupby(['State ID']).median()
aqi = aqi[['Days with AQI','Good Days']].astype(int)
aqi.head()

Unnamed: 0_level_0,Days with AQI,Good Days
State ID,Unnamed: 1_level_1,Unnamed: 2_level_1
AK,357,282
AL,324,251
AR,362,261
AZ,365,261
CA,365,203


In [5]:
# Air Quality Index statistics by state
aqi.describe()

Unnamed: 0,Days with AQI,Good Days
count,50.0,50.0
mean,354.2,279.52
std,28.323928,37.699261
min,175.0,131.0
25%,358.25,262.25
50%,362.0,278.5
75%,364.75,306.0
max,365.0,355.0


### Data Loading

In [6]:
aqi.to_pickle("../Data/EPA.pkl")