Resource used: https://api-ninjas.com/api/airquality

In [4]:
import requests
import json
import pandas as pd

## API Request to access Air Quality Information

### Information gathered for 100 cities around the DMV

In [5]:
cities = [
    'Washington', 'Arlington', 'Alexandria', 'Fairfax', 'Rockville', 
    'Bethesda', 'Silver Spring', 'Reston', 'Frederick', 'Gaithersburg',
    'Annapolis', 'Bowie', 'College Park', 'Greenbelt', 'Laurel',
    'Cumberland', 'Hagerstown', 'Salisbury', 'Westminster', 'Hyattsville',
    'Takoma Park', 'Chevy Chase', 'Kensington', 'Poolesville', 'Bladensburg',
    'Mount Rainier', 'Riverdale Park', 'Berwyn Heights', 'New Carrollton', 'Glenarden',
    'Brentwood', 'Capitol Heights', 'Colmar Manor', 'Cottage City', 'Cheverly',
    'Edmonston', 'Fairmount Heights', 'Forest Heights', 'Landover Hills', 'Morningside',
    'North Brentwood', 'Seat Pleasant', 'University Park', 'Upper Marlboro', 'Vienna',
    'Falls Church', 'Manassas', 'Leesburg', 'Herndon', 'Purcellville',
    'Warrenton', 'Culpeper', 'Winchester', 'Front Royal', 'Strasburg',
    'Woodstock', 'Luray', 'Berryville', 'Middleburg', 'Dumfries',
    'Occoquan', 'Haymarket', 'Clifton', 'Quantico', 'Hamilton',
    'Hillsboro', 'Lovettsville', 'Round Hill', 'Ashburn', 'Chantilly',
    'Sterling', 'Great Falls', 'McLean', 'Tysons', 'Burke',
    'Springfield', 'Lorton', 'Mount Vernon', 'Annandale', 'Oakton',
    'Falls Church', 'Centreville', 'Manassas Park', 'Gainesville', 'Bristow',
    'Nokesville', 'Dale City', 'Woodbridge', 'Stafford', 'Fredericksburg', 
    'Potomac', 'Ellicott City', 'Germantown', 'Waldorf', 'Lanham',
    'Beltsville', 'Crofton', 'Oxon Hill', 'Sykesville', 'Olney', 'Georgetown', 'Dupont Circle'
]

In [6]:

api_url = 'https://api.api-ninjas.com/v1/airquality?city={}'
api_key = 'rtsVUhXvel9N968aGEoMeg==zciz73lsmuFNEOQM'

# Dictionary to store the results
results = {}

# Loop through each city and make an API request
for city in cities:
    response = requests.get(api_url.format(city), headers={'X-Api-Key': api_key})
    
    if response.status_code == requests.codes.ok:
        results[city] = response.json()
    else:
        print("Error in city:", city, "-", response.status_code, response.text)

# Save the results in a JSON file
with open('air_quality_data.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)

print("Data saved in air_quality_data.json")

Data saved in air_quality_data.json


## Read Data from JSON File and Save as a Dataframe

In [7]:
# Load the JSON data from the file
file_path = 'air_quality_data.json'
with open(file_path, 'r') as file:
    data = json.load(file)

# Convert the JSON data to a DataFrame
df_list = []
for city, metrics in data.items():
    row = {
        'city': city,
        'CO': metrics.get('CO'),
        'NO2': metrics.get('NO2'),
        'O3': metrics.get('O3'),
        'SO2': metrics.get('SO2'),
        'PM2.5': metrics.get('PM2.5'),
        'PM10': metrics.get('PM10'),
        'overall_aqi': metrics.get('overall_aqi')
    }
    df_list.append(row)

df = pd.DataFrame(df_list)

df.head()


Unnamed: 0,city,CO,NO2,O3,SO2,PM2.5,PM10,overall_aqi
0,Washington,"{'concentration': 634.19, 'aqi': 7}","{'concentration': 77.46, 'aqi': 96}","{'concentration': 0.76, 'aqi': 0}","{'concentration': 4.05, 'aqi': 5}","{'concentration': 15.93, 'aqi': 51}","{'concentration': 19.89, 'aqi': 18}",96
1,Arlington,"{'concentration': 263.69, 'aqi': 2}","{'concentration': 9.17, 'aqi': 11}","{'concentration': 84.4, 'aqi': 123}","{'concentration': 3.07, 'aqi': 4}","{'concentration': 11.68, 'aqi': 37}","{'concentration': 12.86, 'aqi': 11}",123
2,Alexandria,"{'concentration': 220.3, 'aqi': 2}","{'concentration': 0.75, 'aqi': 0}","{'concentration': 85.83, 'aqi': 127}","{'concentration': 2.62, 'aqi': 3}","{'concentration': 39.78, 'aqi': 98}","{'concentration': 215.43, 'aqi': 130}",130
3,Fairfax,"{'concentration': 580.79, 'aqi': 6}","{'concentration': 75.4, 'aqi': 94}","{'concentration': 2.3, 'aqi': 1}","{'concentration': 3.55, 'aqi': 5}","{'concentration': 15.25, 'aqi': 49}","{'concentration': 19.79, 'aqi': 18}",94
4,Rockville,"{'concentration': 447.27, 'aqi': 5}","{'concentration': 56.21, 'aqi': 70}","{'concentration': 14.13, 'aqi': 11}","{'concentration': 4.41, 'aqi': 6}","{'concentration': 10.67, 'aqi': 34}","{'concentration': 14.02, 'aqi': 12}",70


## Clean the DF

In [8]:
# Separate the concentration and AQI values into distinct columns
for pollutant in ['CO', 'NO2', 'O3', 'SO2', 'PM2.5', 'PM10']:
    df[f'{pollutant}_concentration'] = df[pollutant].apply(lambda x: x.get('concentration'))
    df[f'{pollutant}_aqi'] = df[pollutant].apply(lambda x: x.get('aqi'))
    df.drop(columns=[pollutant], inplace=True)

# Reorder the columns for better readability
columns_order = ['city', 'CO_concentration', 'CO_aqi', 'NO2_concentration', 'NO2_aqi', 
                 'O3_concentration', 'O3_aqi', 'SO2_concentration', 'SO2_aqi', 
                 'PM2.5_concentration', 'PM2.5_aqi', 'PM10_concentration', 'PM10_aqi', 'overall_aqi']
df = df[columns_order]

df

Unnamed: 0,city,CO_concentration,CO_aqi,NO2_concentration,NO2_aqi,O3_concentration,O3_aqi,SO2_concentration,SO2_aqi,PM2.5_concentration,PM2.5_aqi,PM10_concentration,PM10_aqi,overall_aqi
0,Washington,634.19,7,77.46,96,0.76,0,4.05,5,15.93,51,19.89,18,96
1,Arlington,263.69,2,9.17,11,84.40,123,3.07,4,11.68,37,12.86,11,123
2,Alexandria,220.30,2,0.75,0,85.83,127,2.62,3,39.78,98,215.43,130,130
3,Fairfax,580.79,6,75.40,94,2.30,1,3.55,5,15.25,49,19.79,18,94
4,Rockville,447.27,5,56.21,70,14.13,11,4.41,6,10.67,34,14.02,12,70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,Oxon Hill,627.52,7,62.38,77,0.11,0,3.22,4,14.62,47,19.05,17,77
97,Sykesville,417.23,4,41.81,52,26.11,22,5.78,8,9.25,30,13.38,12,52
98,Olney,226.97,2,1.67,2,87.98,132,2.35,3,10.94,35,11.20,10,132
99,Georgetown,310.42,3,0.06,0,23.60,20,0.05,0,1.18,3,2.44,2,20


In [9]:
virginia_cities = {
    'Arlington', 'Alexandria', 'Fairfax', 'Reston', 'Vienna', 'Falls Church', 'Manassas', 'Leesburg', 'Herndon', 
    'Purcellville', 'Warrenton', 'Culpeper', 'Winchester', 'Front Royal', 'Strasburg', 'Woodstock', 'Luray', 
    'Berryville', 'Middleburg', 'Dumfries', 'Occoquan', 'Haymarket', 'Clifton', 'Quantico', 'Hamilton', 'Hillsboro', 
    'Lovettsville', 'Round Hill', 'Ashburn', 'Chantilly', 'Sterling', 'Great Falls', 'McLean', 'Tysons', 'Burke', 
    'Springfield', 'Lorton', 'Mount Vernon', 'Annandale', 'Oakton', 'Centreville', 'Manassas Park', 'Gainesville', 
    'Bristow', 'Nokesville', 'Dale City', 'Woodbridge', 'Stafford', 'Fredericksburg'
}

maryland_cities = {
    'Rockville', 'Bethesda', 'Silver Spring', 'Frederick', 'Gaithersburg', 'Annapolis', 'Bowie', 'College Park', 
    'Greenbelt', 'Laurel', 'Cumberland', 'Hagerstown', 'Salisbury', 'Westminster', 'Hyattsville', 'Takoma Park', 
    'Chevy Chase', 'Kensington', 'Poolesville', 'Bladensburg', 'Mount Rainier', 'Riverdale Park', 'Berwyn Heights', 
    'New Carrollton', 'Glenarden', 'Brentwood', 'Capitol Heights', 'Colmar Manor', 'Cottage City', 'Cheverly', 
    'Edmonston', 'Fairmount Heights', 'Forest Heights', 'Landover Hills', 'Morningside', 'North Brentwood', 
    'Seat Pleasant', 'University Park', 'Upper Marlboro', 'Potomac', 'Ellicott City', 'Germantown', 'Waldorf', 
    'Lanham', 'Beltsville', 'Crofton', 'Oxon Hill', 'Sykesville', 'Olney'
}

washington_dc = {'Washington', 'Georgetown', 'Dupont Circle'}

df1 = df.copy()

df1['area'] = df1['city'].apply(lambda city: 'Virginia' if city in virginia_cities 
                                        else 'Maryland' if city in maryland_cities 
                                        else 'Washington D.C.' if city in washington_dc
                                        else 'Unknown')

df1

Unnamed: 0,city,CO_concentration,CO_aqi,NO2_concentration,NO2_aqi,O3_concentration,O3_aqi,SO2_concentration,SO2_aqi,PM2.5_concentration,PM2.5_aqi,PM10_concentration,PM10_aqi,overall_aqi,area
0,Washington,634.19,7,77.46,96,0.76,0,4.05,5,15.93,51,19.89,18,96,Washington D.C.
1,Arlington,263.69,2,9.17,11,84.40,123,3.07,4,11.68,37,12.86,11,123,Virginia
2,Alexandria,220.30,2,0.75,0,85.83,127,2.62,3,39.78,98,215.43,130,130,Virginia
3,Fairfax,580.79,6,75.40,94,2.30,1,3.55,5,15.25,49,19.79,18,94,Virginia
4,Rockville,447.27,5,56.21,70,14.13,11,4.41,6,10.67,34,14.02,12,70,Maryland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,Oxon Hill,627.52,7,62.38,77,0.11,0,3.22,4,14.62,47,19.05,17,77,Maryland
97,Sykesville,417.23,4,41.81,52,26.11,22,5.78,8,9.25,30,13.38,12,52,Maryland
98,Olney,226.97,2,1.67,2,87.98,132,2.35,3,10.94,35,11.20,10,132,Maryland
99,Georgetown,310.42,3,0.06,0,23.60,20,0.05,0,1.18,3,2.44,2,20,Washington D.C.


## Save Output

In [10]:
# This is the dataframe without the area classification
df.to_csv('dmv_air-quality-data.csv')

In [11]:
# dataframe with the area classification
df1.to_csv('dmv-area_air-quality-data.csv')