In [1]:
import pandas as pd
import folium

In [2]:
def generateBaseMap(default_location=[40.693943, -73.985880], default_zoom_start=12):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

In [3]:
basemap = generateBaseMap()
basemap

In [4]:
raw_data_copy = pd.read_csv(r'pollution_us_2000_2016.csv')
raw_data_copy['State County Site Code'] = raw_data_copy['State Code'].astype(str) + '-' + raw_data_copy['County Code'].astype(str) + '-' + raw_data_copy['Site Num'].astype(str)
raw_data_copy['SO2 AQI'] = raw_data_copy['SO2 AQI'].fillna(raw_data_copy.groupby(['State County Site Code', 'Date Local'])['SO2 AQI'].transform('mean'))
raw_data_copy['CO AQI'] = raw_data_copy['CO AQI'].fillna(raw_data_copy.groupby(['State County Site Code', 'Date Local'])['CO AQI'].transform('mean'))
raw_data_copy = raw_data_copy.dropna() #drop empty rows
raw_data_copy = raw_data_copy.drop(['Unnamed: 0', 'State Code', 'County Code', 'Site Num'], axis = 1)
raw_data_copy['County'] = raw_data_copy['County'].replace(['Bronx', 'Queens'], 'New York') #new york is in only one county
raw_data_copy = raw_data_copy[raw_data_copy['State'] != 'Country Of Mexico'] #country of mexico not in USA
raw_data_copy = raw_data_copy[raw_data_copy['City'] != 'Capitan'] 
raw_data_copy['State'] = raw_data_copy['State'].replace('District Of Columbia', 'District of Columbia') #DOC to DoC
raw_data_copy = raw_data_copy[raw_data_copy['City'] != 'Calumet City (PU RR name Calumet Park (sta.))']
raw_data_copy['City'] = raw_data_copy['City'].replace('Indianapolis (Remainder)', 'Indianapolis')
raw_data_copy['City'] = raw_data_copy['City'].replace('Lexington-Fayette (corporate name for Lexington)', 'Lexington')
raw_data_copy['County'] = raw_data_copy['County'].replace('Saint Louis', 'St. Louis')
raw_data_copy['County'] = raw_data_copy['County'].replace('St. Louis City', 'St. Louis')
raw_data_copy['County'] = raw_data_copy['County'].replace('Alexandria City', 'Alexandria')
raw_data_copy = raw_data_copy[raw_data_copy['City'] != 'West Los Angeles'] # no coordinates for this city
raw_data_copy['City'] = raw_data_copy['City'].replace('East Saint Louis', 'East St. Louis')
raw_data_copy['County'] = raw_data_copy['County'].replace('Saint Clair', 'St. Clair')
raw_data_copy['City'] = raw_data_copy['City'].replace('East Hartford', 'Hartford')
raw_data_copy['County'] = raw_data_copy['County'].replace('Hampton City', 'Hampton')
raw_data_copy['County'] = raw_data_copy['County'].replace('Fairbanks North Star ', 'Fairbanks North Star')
raw_data_copy['City'] = raw_data_copy['City'].replace('Dentsville (Dents)', 'Dentsville')
new_data = raw_data_copy.groupby(['Address', 'Date Local'])[['NO2 Mean','NO2 AQI','O3 Mean','O3 AQI','SO2 Mean','SO2 AQI','CO Mean','CO AQI',]].transform('mean')
new_data['Address'] = raw_data_copy['Address']
new_data['State'] = raw_data_copy['State']
new_data['County'] = raw_data_copy['County']
new_data['City'] = raw_data_copy['City']
new_data['Date Local'] = raw_data_copy['Date Local']
new_data['NO2 Units'] = raw_data_copy['NO2 Units']
new_data['O3 Units'] = raw_data_copy['O3 Units']
new_data['SO2 Units'] = raw_data_copy['SO2 Units']
new_data['CO Units'] = raw_data_copy['CO Units']
raw_data_copy = new_data
data_with_site = raw_data_copy.drop_duplicates()
new_data = data_with_site.groupby(['City', 'Date Local'])[['NO2 Mean','NO2 AQI','O3 Mean','O3 AQI','SO2 Mean','SO2 AQI','CO Mean','CO AQI']].transform('mean')
new_data['State'] = raw_data_copy['State']
new_data['County'] = raw_data_copy['County']
new_data['City'] = raw_data_copy['City']
new_data['Date Local'] = raw_data_copy['Date Local']
new_data['NO2 Units'] = raw_data_copy['NO2 Units']
new_data['O3 Units'] = raw_data_copy['O3 Units']
new_data['SO2 Units'] = raw_data_copy['SO2 Units']
new_data['CO Units'] = raw_data_copy['CO Units']
data_with_city = new_data
data_with_city = data_with_city.drop_duplicates()

In [5]:
# data_with_city[data_with_city['City'] == 'Tijuana']

In [6]:
raw_data_copy.isna().sum()

NO2 Mean      0
NO2 AQI       0
O3 Mean       0
O3 AQI        0
SO2 Mean      0
SO2 AQI       0
CO Mean       0
CO AQI        0
Address       0
State         0
County        0
City          0
Date Local    0
NO2 Units     0
O3 Units      0
SO2 Units     0
CO Units      0
dtype: int64

In [7]:
cols_to_include = ['city', 'county_name', 'state_name', 'lat', 'lng']
coordinates = pd.read_csv('uscities.csv')[cols_to_include]
#adding missing cities
coordinates.loc[28889] = ['Rubidoux', 'Riverside', 'California', 33.9972, -117.4176]
coordinates.loc[28890] = ['Vandenberg Air Force Base', 'Santa Barbara', 'California', 34.7420, -120.5724]
coordinates.loc[28891] = ['Westport', 'Fairfield', 'Connecticut', 41.1415, -73.3579]
coordinates.loc[28892] = ['Cornwall', 'Litchfield', 'Connecticut', 41.8437, -73.3293]

combined = pd.merge(data_with_city, coordinates, how = 'left', 
                   left_on = ['City', 'County', 'State'], right_on = ['city', 'county_name', 'state_name']).drop(['city', 'county_name', 'state_name'], axis = 1)

In [8]:
combined.shape

(377730, 18)

In [9]:
combined = combined.dropna()

In [10]:
combined.shape

(346831, 18)

In [23]:
# combined.to_csv('new_us_pollution.csv')

In [11]:
# combined.isna().sum()

In [12]:
from folium.plugins import HeatMap

In [63]:
basemap = generateBaseMap()

In [73]:
for item in combined[['lat', 'lng', 'NO2 Mean']][combined['Date Local'].str.contains('2010-01')].dropna().values.tolist():
    folium.CircleMarker([item[0], item[1]], radius = item[2], color = 'red').add_to(basemap)

In [74]:
for item in combined[['lat', 'lng', 'NO2 Mean']][combined['Date Local'].str.contains('2016-01')].dropna().values.tolist():
    folium.CircleMarker([item[0], item[1]], radius = item[2], color = 'blue').add_to(basemap)

In [45]:
HeatMap(data= combined[['lat', 'lng', 'CO Mean']][combined['Date Local'] == '2016-01-01'].dropna().values.tolist()).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x1e296664ac8>

In [75]:
basemap

In [18]:
combined[['lat', 'lng', 'NO2 Mean']][combined['Date Local'] == '2000-01-01']

Unnamed: 0,lat,lng,NO2 Mean
0,33.5722,-112.0891,19.041667
348,33.6872,-111.8651,47.208333
533,32.1545,-110.8782,15.208333
880,37.9722,-122.0016,14.782609
1243,38.0288,-121.6404,9.826087
1597,37.9628,-122.3425,13.695652
1963,38.0185,-121.897,16.217391
2326,32.6849,-115.4944,9.304348
2667,34.1879,-118.3235,25.869565
3339,33.798,-118.1675,33.391304


In [19]:
newMap = generateBaseMap([33.5722, -112.0891])

In [20]:
combined.head()

Unnamed: 0,NO2 Mean,NO2 AQI,O3 Mean,O3 AQI,SO2 Mean,SO2 AQI,CO Mean,CO AQI,State,County,City,Date Local,NO2 Units,O3 Units,SO2 Units,CO Units,lat,lng
0,19.041667,46.0,0.0225,34.0,2.9875,13.0,1.01239,25.0,Arizona,Maricopa,Phoenix,2000-01-01,Parts per billion,Parts per million,Parts per billion,Parts per million,33.5722,-112.0891
1,22.958333,34.0,0.013375,27.0,1.947917,4.0,0.958333,26.0,Arizona,Maricopa,Phoenix,2000-01-02,Parts per billion,Parts per million,Parts per billion,Parts per million,33.5722,-112.0891
2,38.125,48.0,0.007958,14.0,5.225,16.0,1.845833,28.0,Arizona,Maricopa,Phoenix,2000-01-03,Parts per billion,Parts per million,Parts per billion,Parts per million,33.5722,-112.0891
3,40.26087,72.0,0.014167,28.0,7.066667,23.0,1.910417,34.0,Arizona,Maricopa,Phoenix,2000-01-04,Parts per billion,Parts per million,Parts per billion,Parts per million,33.5722,-112.0891
4,48.45,58.0,0.006667,10.0,8.704166,21.0,2.660417,42.0,Arizona,Maricopa,Phoenix,2000-01-05,Parts per billion,Parts per million,Parts per billion,Parts per million,33.5722,-112.0891


In [21]:
from folium.plugins import HeatMapWithTime

In [35]:
# # make a heatmap with time by adding the coordinates to the list so many times as the AQI. 
# HeatMap([[[33.5722, -112.0891, 2], [33.5722, -112.0891, 2],[33.5722, -112.0891, 2],[33.5722, -112.0891, 2],[33.5722, -112.0891, 2],[33.5722, -112.0891, 2]]).add_to(newMap)# ,radius=50, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=10, max_opacity=10).add_to(newMap)

<folium.plugins.heat_map.HeatMap at 0x1e29601d470>

In [36]:
newMap

In [None]:
# map_2 = folium.Map(location=[45.372, -121.6972],
# zoom_start=12,
# tiles='Stamen Terrain')
# folium.Marker([45.3288, -121.6625]).add_to(map_2)
# folium.Marker([45.3311, -121.7113]).add_to(map_2)
# map_2

In [None]:
# cols_to_include = ['city', 'county_name', 'state_name', 'lat', 'lng']
# coordinates = pd.read_csv('uscities.csv')[cols_to_include]
# combined = pd.merge(pollution_df, coordinates, how = 'left', 
#                    left_on = ['City', 'County', 'State'], right_on = ['city', 'county_name', 'state_name']).drop(['city', 'county_name', 'state_name'], axis = 1)