In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pls
import config

# Loading and displaying US cities data

In [46]:
cities = pd.read_csv('../data/maps/uscities.csv', header=0)
print(cities)
print(type(cities))

              city   city_ascii state_id  state_name  county_fips   
0         New York     New York       NY    New York        36061  \
1      Los Angeles  Los Angeles       CA  California         6037   
2          Chicago      Chicago       IL    Illinois        17031   
3            Miami        Miami       FL     Florida        12086   
4           Dallas       Dallas       TX       Texas        48113   
...            ...          ...      ...         ...          ...   
28333        Gross        Gross       NE    Nebraska        31015   
28334       Lotsee       Lotsee       OK    Oklahoma        40143   
28335    The Ranch    The Ranch       MN   Minnesota        27087   
28336     Shamrock     Shamrock       OK    Oklahoma        40037   
28337       Monowi       Monowi       NE    Nebraska        31015   

       county_name      lat       lng  population  density   source  military   
0         New York  40.6943  -73.9249    18713220    10715  polygon     False  \
1      Lo

In [47]:
biggest_cities = cities.sort_values(by=['population'], ascending=False).head(75)
print("Twenty biggest cities:")
print(biggest_cities)
cities = biggest_cities

Twenty biggest cities:
                city        city_ascii state_id      state_name  county_fips   
0           New York          New York       NY        New York        36061  \
1        Los Angeles       Los Angeles       CA      California         6037   
2            Chicago           Chicago       IL        Illinois        17031   
3              Miami             Miami       FL         Florida        12086   
4             Dallas            Dallas       TX           Texas        48113   
..               ...               ...      ...             ...          ...   
70  Colorado Springs  Colorado Springs       CO        Colorado         8041   
71        Charleston        Charleston       SC  South Carolina        45019   
72       Springfield       Springfield       MA   Massachusetts        25013   
73      Grand Rapids      Grand Rapids       MI        Michigan        26081   
74     Mission Viejo     Mission Viejo       CA      California         6059   

    county_name 

# Loading and displaying Pollution data

In [48]:
import requests
import json

In [49]:
def get_air_pollution_history(lat, lon, start, end, api_key):
    api_url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&start={start}&end={end}&appid={api_key}"
    response = requests.get(api_url)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print("Error:", response.status_code)
        return None

Get air pollution history for a single city.

In [50]:
start_time = 1652299344 # May 11, 2022 8:02:24 PM
end_time = 1683835344 # May 11, 2023 8:02:24 PM
api_key = config.api_key


new_york = cities[cities['city'] == 'New York']

print(new_york)

new_york_data = get_air_pollution_history(new_york.lat.values[0], new_york.lng.values[0], start_time, end_time, api_key)

       city city_ascii state_id state_name  county_fips county_name      lat   
0  New York   New York       NY   New York        36061    New York  40.6943  \

       lng  population  density   source  military  incorporated   
0 -73.9249    18713220    10715  polygon     False          True  \

           timezone  ranking   
0  America/New_York        1  \

                                                zips          id  
0  11229 11226 11225 11224 11222 11221 11220 1138...  1840034016  


In [51]:
print(new_york_data)

{'coord': {'lon': -73.9249, 'lat': 40.6943}, 'list': [{'main': {'aqi': 1}, 'components': {'co': 427.25, 'no': 19.67, 'no2': 32.9, 'o3': 40.77, 'so2': 10.73, 'pm2_5': 5.56, 'pm10': 9.55, 'nh3': 3.17}, 'dt': 1684159131}]}


In [52]:
def merge_json(data):
    result = list()
    result.extend(json.load(data))

In [53]:
print(cities.shape)

(75, 17)


In [54]:
pollution_list = []
for index, row in cities.iterrows():
    # print(row.lat)
    data = get_air_pollution_history(row.lat, row.lng, start_time, end_time, api_key)
    # print(data)
    pollution_list.append(data)

In [55]:
# print(pollution_list)
# print()

In [56]:
file_path = '../data/pollution/pollution_data_simple.json'
with open(file_path, "w") as json_file:
    json.dump(pollution_list, json_file)