In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot
import geopandas as gp
import folium
import json
from folium import plugins

In [2]:
data = pd.read_csv('../data/US_Accidents_Dec19.csv')

In [3]:
data.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,False,True,False,Day,Day,Day,Day


In [4]:
data.columns

Index(['ID', 'Source', 'TMC', 'Severity', 'Start_Time', 'End_Time',
       'Start_Lat', 'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)',
       'Description', 'Number', 'Street', 'Side', 'City', 'County', 'State',
       'Zipcode', 'Country', 'Timezone', 'Airport_Code', 'Weather_Timestamp',
       'Temperature(F)', 'Wind_Chill(F)', 'Humidity(%)', 'Pressure(in)',
       'Visibility(mi)', 'Wind_Direction', 'Wind_Speed(mph)',
       'Precipitation(in)', 'Weather_Condition', 'Amenity', 'Bump', 'Crossing',
       'Give_Way', 'Junction', 'No_Exit', 'Railway', 'Roundabout', 'Station',
       'Stop', 'Traffic_Calming', 'Traffic_Signal', 'Turning_Loop',
       'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
       'Astronomical_Twilight'],
      dtype='object')

In [5]:
data.shape

(2974335, 49)

In [6]:
with open('../data/2010_us_states.json') as f:
    usArea = json.load(f)

In [8]:
usMap = folium.Map(location=[39.5, -98.35], tiles='Stamen Toner', zoom_start = 4)
folium.GeoJson(usArea).add_to(usMap)
# for i, row in data[:100000].iterrows():
#     folium.CircleMarker((row.Start_Lat, row.Start_Lng), radius=1, weight=2, color='red', fill_color='red', fill_opacity=.5).add_to(usMap)
# usMap

<folium.features.GeoJson at 0x7f692073cad0>

In [9]:
choro_df = data[['State', 'ID']].copy()

In [10]:
choro_df.head()

Unnamed: 0,State,ID
0,OH,A-1
1,OH,A-2
2,OH,A-3
3,OH,A-4
4,OH,A-5


In [11]:
num_accidents_series = choro_df.groupby('State').count()

In [12]:
num_accidents_series.head()

Unnamed: 0_level_0,ID
State,Unnamed: 1_level_1
AL,36369
AR,1749
AZ,62330
CA,663204
CO,40124


In [13]:
num_accidents_state = pd.DataFrame()

In [14]:
num_accidents_state['state'] = [str(i) for i in num_accidents_series.index]

In [15]:
num_accidents_state['accidents'] = num_accidents_series.values

In [16]:
state_full = {'AL':'Alabama', 'AR':'Arkansas', 'AZ':'Arizona', 'CA':'California', 'CO':'Colorado', 'CT':'Connecticut', 'DC':'District of Columbia', 'DE':'Delaware','FL':'Florida', 'GA':'Georgia', 'IA':'Iowa', 'ID':'Idaho', 'IL':'Illinois', 'IN':'Indiana', 'KS':'Kansas', 'KY':'Kentucky', 'LA':'Louisiana', 'MA':'Massachusetts', 'MD':'Maryland', 'ME':'Maine', 'MI':'Michigan', 'MN':'Minnesota', 'MO':'Missouri', 'MS':'Mississippi', 'MT':'Montana', 'NC':'North Carolina', 'ND':'North Dakota', 'NE': 'Nebraska', 'NH':'New Hampshire', 'NJ':'New Jersey', 'NM':'New Mexico', 'NV': 'Nevada', 'NY':'New York', 'OH':'Ohio', 'OK':'Oklahoma', 'OR':'Oregon', 'PA':'Pennsylvania', 'RI':'Rhode Island', 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN':'Tennessee', 'TX':'Texas', 'UT': 'Utah', 'VA':'Virginia', 'VT':'Vermont', 'WA':'Washington', 'WI':'Wisconsin', 'WV':'West Virginia', 'WY':'Wyoming'}

In [17]:
num_accidents_state.head()

Unnamed: 0,state,accidents
0,AL,36369
1,AR,1749
2,AZ,62330
3,CA,663204
4,CO,40124


In [18]:
state_full = {abbrev: state for abbrev, state in state_full.items()}

In [19]:
num_accidents_state['state'] = num_accidents_state['state'].map(state_full)

In [20]:
missing_states = [['Puerto Rico', 0], ['Alaska', 0], ['Hawaii', 0]]

In [21]:
missing_states = pd.DataFrame(missing_states, columns=['state','accidents'])

In [22]:
num_accidents_state = pd.concat([missing_states, num_accidents_state], ignore_index=True)

In [34]:
usMap = folium.Map(location=[39.5, -98.35], tiles='Stamen Toner', zoom_start = 5)
folium.GeoJson(usArea).add_to(usMap)
usMap.choropleth(geo_data='../data/2010_us_states.json', data=num_accidents_state, columns=['state', 'accidents'], key_on='feature.properties.NAME', fill_color='YlGn', fill_opacity=1)
usMap.save('choro_map.html')

In [None]:
'''
Car Data:
https://www.statista.com/statistics/196010/total-number-of-registered-automobiles-in-the-us-by-state/
'''

In [24]:
cars_per_state = {'state': ['California', 'Texas', 'Florida', 'New York', 'Ohio', 'Illinois', 'Pennsylvania', 'Georgia', 'North Carolina', 'Virginia', 'Michigan', 'Washington', 'New Jersey', 'Arizona', 'Tennessee', 'Indiana', 'Massachusetts', 'Alabama', 'Missouri', 'Wisconsin', 'Minnesota', 'Maryland', 'South Carolina', 'Colorado', 'Kentucky', 'Oregon', 'Louisiana', 'Connecticut', 'Oklahoma', 'Iowa', 'Nevada', 'Kansas', 'Utah', 'Arkansas', 'Mississippi', 'Nebraska', 'New Mexico', 'Idaho', 'West Virginia', 'Hawaii', 'New Hampshire', 'Montana', 'Delaware', 'Rhode Island', 'Maine', 'South Dakota', 'North Dakota', 'Vermont', 'District of Columbia', 'Wyoming', 'Alaska'],'cars':[15065827,8248322,7966091,4712779,4603594,4477763,4424183,3557469,3393781,3267735,3023940,2964939,2754253,2391772,2285329,2248870,2182530,2161212,2102216,2087518,1976525,1922463,1830186,1798177,1721942,1488623,1389249,1306709,1296219,1242219,1073760,975171,937421,921161,825338,683020,655766,598774,560118,509492,506959,452845,433363,412255,390506,358859,240048,218302,209723,203546,183270]}

In [25]:
cars_per_state = pd.DataFrame(cars_per_state)

In [26]:
accident_rates = cars_per_state.set_index('state').join(num_accidents_state.set_index('state'))

In [27]:
accident_rates['accident_rate'] = accident_rates['accidents'] / accident_rates['cars']
accident_rates.reset_index(inplace=True)

In [35]:
usMap = folium.Map(location=[39.5, -98.35], tiles='Stamen Toner', zoom_start = 5)
folium.GeoJson(usArea).add_to(usMap)
usMap.choropleth(geo_data='../data/2010_us_states.json', data=accident_rates, columns=['state', 'accident_rate'], key_on='feature.properties.NAME', fill_color='YlGn', fill_opacity=1)
usMap.save('choro_rates.html')

In [36]:
accident_rates.sort_values(by='accident_rate', ascending=False).head()

Unnamed: 0,state,cars,accidents,accident_rate
22,South Carolina,1830186,146689,0.08015
25,Oregon,1488623,70840,0.047588
32,Utah,937421,41385,0.044148
0,California,15065827,663204,0.04402
8,North Carolina,3393781,142460,0.041977


In [53]:
with open('../data/sc_area.json') as file:
    scArea = json.load(file)
scMap = folium.Map(location=[34, -79.94], tiles='Stamen Toner', zoom_start = 8)
folium.GeoJson(scArea).add_to(scMap)
scMap

In [48]:
sc_data = data[data['State']=='SC'][['Start_Lat','Start_Lng']]

In [49]:
sc_data.head()

Unnamed: 0,Start_Lat,Start_Lng
146283,34.318562,-82.663651
146290,34.202515,-82.134941
146296,34.293327,-81.545921
146299,34.204174,-82.195602
146300,33.457443,-81.894531


In [54]:
for i, row in sc_data[:10000].iterrows():
    folium.CircleMarker((row.Start_Lat, row.Start_Lng), radius=1, weight=2, color='red', fill_color='red', fill_opacity=.5).add_to(scMap)
scMap

In [59]:
scMap.add_child(plugins.HeatMap(data=sc_data[['Start_Lat', 'Start_Lng']].as_matrix(), radius=5, blur=2))
scMap.save('heat_sc.html')

  """Entry point for launching an IPython kernel.


Index(['ID', 'Source', 'TMC', 'Severity', 'Start_Time', 'End_Time',
       'Start_Lat', 'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)',
       'Description', 'Number', 'Street', 'Side', 'City', 'County', 'State',
       'Zipcode', 'Country', 'Timezone', 'Airport_Code', 'Weather_Timestamp',
       'Temperature(F)', 'Wind_Chill(F)', 'Humidity(%)', 'Pressure(in)',
       'Visibility(mi)', 'Wind_Direction', 'Wind_Speed(mph)',
       'Precipitation(in)', 'Weather_Condition', 'Amenity', 'Bump', 'Crossing',
       'Give_Way', 'Junction', 'No_Exit', 'Railway', 'Roundabout', 'Station',
       'Stop', 'Traffic_Calming', 'Traffic_Signal', 'Turning_Loop',
       'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
       'Astronomical_Twilight'],
      dtype='object')