In [26]:
import pandas as pd
import datetime as dt

In [2]:
crash_data = pd.read_csv('../data/clean/east_nash_crashes.csv')
crash_data.describe()

Unnamed: 0.1,Unnamed: 0,accident_number,number_of_motor_vehicles,number_of_injuries,number_of_fatalities,zip,rpa,lat,long
count,20831.0,20831.0,20831.0,20831.0,20831.0,20831.0,20831.0,20746.0,20746.0
mean,89604.491335,20195520000.0,1.765734,0.428592,0.0,37155.950266,3261.957803,36.231822,-86.730996
std,51736.915153,19694350.0,0.77524,0.797124,0.0,46.707425,4837.666896,0.042429,0.024834
min,4.0,20170000000.0,0.0,0.0,0.0,37115.0,1009.0,36.1044,-86.8861
25%,45294.0,20180400000.0,2.0,0.0,0.0,37115.0,1426.0,36.1884,-86.7504
50%,88621.0,20190650000.0,2.0,0.0,0.0,37115.0,1713.0,36.2462,-86.7354
75%,134370.0,20210410000.0,2.0,1.0,0.0,37206.0,1851.0,36.2638,-86.7124
max,180275.0,20240110000.0,8.0,9.0,0.0,37216.0,20143.0,36.3096,-86.6476


In [3]:
crash_data.isna().sum(axis = 0)

Unnamed: 0                     0
accident_number                0
date_and_time                  0
number_of_motor_vehicles       0
number_of_injuries             0
number_of_fatalities           0
hit_and_run                    0
collision_type_description     0
weather_description            0
illumination_description       0
harmfuldescriptions            0
street_address                 0
city                           0
state                          0
zip                            0
rpa                            0
precinct                       0
lat                           85
long                          85
mapped_location               85
property_damage                0
dtype: int64

Location data is important for the first step of identifying "hot spots", so any nulls in these columns will need to be addressed. The records with nulls for location fields (`lat`, `long`, & `mapped_location`) do seem to be from actual events and not errors, but given that at the time of this analysis they represent 0.04% of the data, I don't believe removeing them will negatively impact the overall analysis.

In [4]:
crash_data = crash_data.dropna()

In [5]:
crash_data.isna().sum(axis = 0)

Unnamed: 0                    0
accident_number               0
date_and_time                 0
number_of_motor_vehicles      0
number_of_injuries            0
number_of_fatalities          0
hit_and_run                   0
collision_type_description    0
weather_description           0
illumination_description      0
harmfuldescriptions           0
street_address                0
city                          0
state                         0
zip                           0
rpa                           0
precinct                      0
lat                           0
long                          0
mapped_location               0
property_damage               0
dtype: int64

In [6]:
crash_data.describe()

Unnamed: 0.1,Unnamed: 0,accident_number,number_of_motor_vehicles,number_of_injuries,number_of_fatalities,zip,rpa,lat,long
count,20746.0,20746.0,20746.0,20746.0,20746.0,20746.0,20746.0,20746.0,20746.0
mean,89874.419551,20195400000.0,1.768196,0.429191,0.0,37155.823773,3269.518654,36.231822,-86.730996
std,51660.982668,19644530.0,0.773764,0.79812,0.0,46.690817,4846.094455,0.042429,0.024834
min,4.0,20170000000.0,0.0,0.0,0.0,37115.0,1009.0,36.1044,-86.8861
25%,45748.5,20180390000.0,2.0,0.0,0.0,37115.0,1431.0,36.1884,-86.7504
50%,89093.0,20190640000.0,2.0,0.0,0.0,37115.0,1713.0,36.2462,-86.7354
75%,134558.75,20210390000.0,2.0,1.0,0.0,37206.0,1851.0,36.2638,-86.7124
max,180275.0,20240110000.0,8.0,9.0,0.0,37216.0,20143.0,36.3096,-86.6476


No fatalities are reported in the entire dataset, which is surprising. However, these reports are only as accurate as the officers recording them, and they may be busy attending to those involved and quickly filling these out as soon as they arrive or or after they've left. That said, I will ignore the column for this analysis but leave it in the dataset, so it can be used in the future if numbers start showing up.

As this analysis is only looking at one particular roadway corridor, I can't figure out how to specify a radius from the street but I can at least filter out any crashes that occured on interstates.

In [7]:
crash_data_no_hwys = crash_data.drop(crash_data[(crash_data['street_address'].str.contains('I*24')) | (crash_data['street_address'].str.contains('I*40')) | (crash_data['street_address'].str.contains('I*40')) == True].index)
crash_data_no_hwys

Unnamed: 0.1,Unnamed: 0,accident_number,date_and_time,number_of_motor_vehicles,number_of_injuries,number_of_fatalities,hit_and_run,collision_type_description,weather_description,illumination_description,...,street_address,city,state,zip,rpa,precinct,lat,long,mapped_location,property_damage
0,4,20240113169,2024-02-18 12:49:00,2.0,0.0,0.0,False,FRONT TO REAR,CLEAR,DAYLIGHT,...,BRILEY PKWY E EXT RAMP & GALLATIN PKE,NASHVILLE,TN,37216,1801,EAST,36.2347,-86.7248,"{'type': 'Point', 'coordinates': [-86.7248, 36...",True
1,15,20240112411,2024-02-18 00:51:00,2.0,0.0,0.0,False,ANGLE,CLEAR,DARK - LIGHTED,...,STATE RT 45 & GALLATIN PKEN,MADISON,TN,37115,1701,MADISO,36.2631,-86.7119,"{'type': 'Point', 'coordinates': [-86.7119, 36...",True
2,19,20240112142,2024-02-17 20:49:00,1.0,0.0,0.0,False,NOT COLLISION W/MOTOR VEHICLE-TRANSPORT,CLEAR,DARK - LIGHTED,...,GALLATIN PKEN & OLD TIME VILLAGE PVTDR,MADISON,TN,37115,1701,MADISO,36.2638,-86.7117,"{'type': 'Point', 'coordinates': [-86.7117, 36...",True
3,31,20240111599,2024-02-17 18:11:00,2.0,0.0,0.0,True,FRONT TO REAR,CLEAR,DARK - LIGHTED,...,DICKERSON PKE & DUE WEST AVN,MADISON,TN,37115,2017,MADISO,36.2563,-86.7579,"{'type': 'Point', 'coordinates': [-86.7579, 36...",True
4,63,20240110010,2024-02-16 21:36:00,2.0,0.0,0.0,False,ANGLE,RAIN,DARK - NOT LIGHTED,...,N 8TH ST & RAMSEY ST,NASHVILLE,TN,37206,1999,EAST,36.1764,-86.7594,"{'type': 'Point', 'coordinates': [-86.7594, 36...",True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20826,180210,20170001270,2017-01-01 14:59:00,2.0,0.0,0.0,False,FRONT TO REAR,RAIN,DAYLIGHT,...,CLEVELAND ST & N 9TH ST,NASHVILLE,TN,37206,1925,EAST,36.1847,-86.7583,"{'type': 'Point', 'coordinates': [-86.7583, 36...",True
20827,180219,20170001226,2017-01-01 14:33:00,2.0,2.0,0.0,False,FRONT TO REAR,RAIN,DAYLIGHT,...,ANDERSON LN & MYATT DR,MADISON,TN,37115,1713,MADISO,36.2721,-86.6890,"{'type': 'Point', 'coordinates': [-86.689, 36....",True
20828,180244,20170000705,2017-01-01 07:59:00,1.0,2.0,0.0,False,NOT COLLISION W/MOTOR VEHICLE-TRANSPORT,CLEAR,DAYLIGHT,...,I65 S EXT RAMP & I 65,MADISON,TN,37115,20044,MADISO,36.2481,-86.7430,"{'type': 'Point', 'coordinates': [-86.743, 36....",True
20829,180258,20170000450,2017-01-01 03:47:00,1.0,0.0,0.0,False,NOT COLLISION W/MOTOR VEHICLE-TRANSPORT,CLEAR,DARK - LIGHTED,...,RIVERWOOD DR & COOPER LN,NASHVILLE,TN,37216,1449,EAST,36.2095,-86.7135,"{'type': 'Point', 'coordinates': [-86.7135, 36...",True


Need to filter for only crashes where `street_address` makes reference to the study corridor, which includes:<br>
- Main Street<br>
- Gallatin Pike<br>
- Gallatin Avenue<br>

In [8]:
crashes = crash_data_no_hwys[crash_data_no_hwys['street_address'].str.contains('GALLATIN|MAIN')]
crashes.head()

Unnamed: 0.1,Unnamed: 0,accident_number,date_and_time,number_of_motor_vehicles,number_of_injuries,number_of_fatalities,hit_and_run,collision_type_description,weather_description,illumination_description,...,street_address,city,state,zip,rpa,precinct,lat,long,mapped_location,property_damage
0,4,20240113169,2024-02-18 12:49:00,2.0,0.0,0.0,False,FRONT TO REAR,CLEAR,DAYLIGHT,...,BRILEY PKWY E EXT RAMP & GALLATIN PKE,NASHVILLE,TN,37216,1801,EAST,36.2347,-86.7248,"{'type': 'Point', 'coordinates': [-86.7248, 36...",True
1,15,20240112411,2024-02-18 00:51:00,2.0,0.0,0.0,False,ANGLE,CLEAR,DARK - LIGHTED,...,STATE RT 45 & GALLATIN PKEN,MADISON,TN,37115,1701,MADISO,36.2631,-86.7119,"{'type': 'Point', 'coordinates': [-86.7119, 36...",True
2,19,20240112142,2024-02-17 20:49:00,1.0,0.0,0.0,False,NOT COLLISION W/MOTOR VEHICLE-TRANSPORT,CLEAR,DARK - LIGHTED,...,GALLATIN PKEN & OLD TIME VILLAGE PVTDR,MADISON,TN,37115,1701,MADISO,36.2638,-86.7117,"{'type': 'Point', 'coordinates': [-86.7117, 36...",True
8,85,20240109234,2024-02-16 15:00:00,1.0,0.0,0.0,False,NOT COLLISION W/MOTOR VEHICLE-TRANSPORT,CLOUDY,DAYLIGHT,...,DULING AV & GALLATIN PKEN,MADISON,TN,37115,1701,MADISO,36.2659,-86.711,"{'type': 'Point', 'coordinates': [-86.711, 36....",True
12,148,20240104276,2024-02-14 18:15:00,3.0,0.0,0.0,True,FRONT TO REAR,CLEAR,NAN,...,GALLATIN PKES & GALLATIN PKE SB EXT RAMP,MADISON,TN,37115,1509,MADISO,36.2379,-86.7241,"{'type': 'Point', 'coordinates': [-86.7241, 36...",True


In [9]:
crashes.to_csv('../data/clean/crashes.csv')

In [10]:
nash_311 = pd.read_csv('../data/clean/nash_311.csv')
nash_311.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1811 entries, 0 to 1810
Data columns (total 22 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Unnamed: 0.1               1811 non-null   int64  
 1   Unnamed: 0                 1811 non-null   int64  
 2   case_number                1811 non-null   int64  
 3   status                     1811 non-null   object 
 4   case_request               1811 non-null   object 
 5   case_subrequest            1811 non-null   object 
 6   additional_subrequest      1152 non-null   object 
 7   date_time_opened           1811 non-null   object 
 8   date_time_closed           1811 non-null   object 
 9   case_origin                1811 non-null   object 
 10  state_issue                1811 non-null   bool   
 11  closed_when_created        1811 non-null   bool   
 12  incident_address           1811 non-null   object 
 13  incident_city              1811 non-null   objec

In [11]:
nash_311.isna().sum(axis = 0)

Unnamed: 0.1                    0
Unnamed: 0                      0
case_number                     0
status                          0
case_request                    0
case_subrequest                 0
additional_subrequest         659
date_time_opened                0
date_time_closed                0
case_origin                     0
state_issue                     0
closed_when_created             0
incident_address                0
incident_city                   0
incident_council_district       0
incident_zip_code               0
latitude                        0
longitude                       0
mapped_location                 0
contact_type                 1797
parent_case                  1811
preferred_language           1811
dtype: int64

In [12]:
nash_311 = nash_311.dropna(subset=['latitude', 'longitude'])

In [13]:
nash_311 = nash_311.dropna(subset=['incident_address'])
nash_311_filter = nash_311[nash_311['incident_address'].str.contains('GALLATIN|MAIN')]
nash_311_filter.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,case_number,status,case_request,case_subrequest,additional_subrequest,date_time_opened,date_time_closed,case_origin,...,incident_address,incident_city,incident_council_district,incident_zip_code,latitude,longitude,mapped_location,contact_type,parent_case,preferred_language
0,114,1183,698461,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,,2021-06-03 13:00:00,2021-06-03 13:00:00,Phone,...,1701 GALLATIN PIKE N,MADISON,10.0,37115,36.293616,-86.701402,"{'latitude': '36.29361642', 'longitude': '-86....",,,
1,180,1984,698491,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,,2021-06-03 13:01:00,2021-06-03 13:01:00,Phone,...,ELVIRA AVE / GALLATIN PIKE,NASHVILLE,5.0,37216,36.206055,-86.736822,"{'latitude': '36.20605503', 'longitude': '-86....",,,
2,434,4496,810882,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,,2021-10-26 13:00:00,2021-10-26 13:00:00,Phone,...,GALLATIN PIKE N / MYATT DR,MADISON,10.0,37115,36.296923,-86.699167,"{'latitude': '36.29692346', 'longitude': '-86....",,,
3,547,5555,1405624,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,,2023-11-15 14:00:17,2023-11-15 14:33:16,Phone,...,GALLATIN AVE / CAHAL AVE,NASHVILLE,6.0,37216,36.196671,-86.742257,"{'latitude': '36.19667141', 'longitude': '-86....",,,
4,836,8591,693888,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,,2021-05-27 13:00:00,2021-05-27 13:00:00,Phone,...,MAIN ST / S 6TH ST,NASHVILLE,5.0,37206,36.173589,-86.763381,"{'latitude': '36.17358895', 'longitude': '-86....",,,


In [14]:
nash_311_filter.to_csv('../data/clean/nash_311.csv')

In [15]:
st_rentals = pd.read_csv('../data/clean/rentals.csv')
st_rentals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2868 entries, 0 to 2867
Data columns (total 28 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0                  2868 non-null   int64  
 1   permit                      2868 non-null   object 
 2   applicant                   2866 non-null   object 
 3   contact                     2768 non-null   object 
 4   permit_subtype_description  2868 non-null   object 
 5   permit_status               2868 non-null   object 
 6   parcel                      2868 non-null   object 
 7   date_entered                2868 non-null   object 
 8   date_issued                 2562 non-null   object 
 9   expiration_date             2868 non-null   object 
 10  address                     2868 non-null   object 
 11  city                        2868 non-null   object 
 12  state                       2868 non-null   object 
 13  zip                         2868 

Now is a good time to pause and look at the all of this on a map.<br><br>
(This is better done in a separate notebook, so this will serve as a stopping point for this one. The current table will be exported to a .csv file and used in the mapping notebook. Refer to `mapping.ipynb` for the overall map(s) and next steps will follow below)

Now we'll look at the different categorical information in our datasets.

In [16]:
crashes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6916 entries, 0 to 20823
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0                  6916 non-null   int64  
 1   accident_number             6916 non-null   int64  
 2   date_and_time               6916 non-null   object 
 3   number_of_motor_vehicles    6916 non-null   float64
 4   number_of_injuries          6916 non-null   float64
 5   number_of_fatalities        6916 non-null   float64
 6   hit_and_run                 6916 non-null   bool   
 7   collision_type_description  6916 non-null   object 
 8   weather_description         6916 non-null   object 
 9   illumination_description    6916 non-null   object 
 10  harmfuldescriptions         6916 non-null   object 
 11  street_address              6916 non-null   object 
 12  city                        6916 non-null   object 
 13  state                       6916 non-

In [17]:
crashes['collision_type_description'].value_counts()

collision_type_description
ANGLE                                      2472
FRONT TO REAR                              2295
SIDESWIPE - SAME DIRECTION                  965
NOT COLLISION W/MOTOR VEHICLE-TRANSPORT     612
HEAD-ON                                     205
SIDESWIPE - OPPOSITE DIRECTION              159
OTHER                                        85
REAR TO SIDE                                 57
UNKNOWN                                      35
REAR-TO-REAR                                 31
Name: count, dtype: int64

In [18]:
crashes['weather_description'].value_counts()

weather_description
CLEAR                4733
CLOUDY               1244
RAIN                  654
NAN                   226
SNOW                   25
UNKNOWN                15
FOG                    11
OTHER (NARRATIVE)       3
SLEET, HAIL             2
SEVERE CROSSWIND        1
BLOWING SNOW            1
SMOG, SMOKE             1
Name: count, dtype: int64

As the overwhelming majority of crashes happen with clear or cloudy conditions, weather can be eliminated as a potential factor.

In [19]:
crashes['illumination_description'].value_counts()

illumination_description
DAYLIGHT                 4745
DARK - LIGHTED           1781
DUSK                      157
DARK - NOT LIGHTED        154
DAWN                       31
DARK-UNKNOWN LIGHTING      17
UNKNOWN                    12
NAN                        11
OTHER                       8
Name: count, dtype: int64

Same story for illumination, although it may be worth investigating the `DARK - NOT LIGHTED` instances to see if there is an issue with aparticular area.

In [20]:
crashes['harmfuldescriptions'].value_counts()

harmfuldescriptions
MOTOR VEHICLE IN TRANSPORT                                             5811
PARKED MOTOR VEHICLE                                                    176
PEDESTRIAN                                                              175
UTILITY POLE                                                             94
MOTOR VEHICLE IN TRANSPORT;PARKED MOTOR VEHICLE                          92
                                                                       ... 
OTHER TRAFFIC BARRIER;UNKNOWN MOST HARMFUL EVENT                          1
MOTOR VEHICLE IN TRANSPORT-OTHER ROADWAY;RAN OFF ROAD-RIGHT               1
CONCRETE TRAFFIC BARRIER;RAN OFF ROAD-LEFT                                1
MOTOR VEHICLE-IN-MOTION OUTSIDE TRAFFICW;UNKNOWN MOST HARMFUL EVENT       1
MOTOR VEHICLE IN TRANSPORT;BUILDING;DITCH;FENCE                           1
Name: count, Length: 162, dtype: int64

Could not find info specific to Tennessee but according to the state of Massachusetts, a "Collision with a motot vehicle in transport" means:<br><br>
"An event where a motor vehicle collides with another motor vehicle which is actively in motion on a roadway. This includes: motor vehicle in traffic on a highway, driverless motor vehicle in motion, motionless motor vehicle abandoned on a roadway, disabled motor vehicle on a roadway, etc."<br>(Source: https://masscrashreportmanual.com/vehicle/sequence-of-events-most-harmful-event/)<br><br>
So this count is simply telling us that the overwhelming majority of crashes involved a vehicle travelling down th eroad, as opposed to one that was not moving.<br><br>
It's worth looking at the collisions with pedestrians and parked vehicles. More on that later.

Now let's look at the different types of 311 complaints that have been made in the area.

In [21]:
nash_311.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1811 entries, 0 to 1810
Data columns (total 22 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Unnamed: 0.1               1811 non-null   int64  
 1   Unnamed: 0                 1811 non-null   int64  
 2   case_number                1811 non-null   int64  
 3   status                     1811 non-null   object 
 4   case_request               1811 non-null   object 
 5   case_subrequest            1811 non-null   object 
 6   additional_subrequest      1152 non-null   object 
 7   date_time_opened           1811 non-null   object 
 8   date_time_closed           1811 non-null   object 
 9   case_origin                1811 non-null   object 
 10  state_issue                1811 non-null   bool   
 11  closed_when_created        1811 non-null   bool   
 12  incident_address           1811 non-null   object 
 13  incident_city              1811 non-null   objec

In [22]:
nash_311['case_request'].value_counts()

case_request
Streets, Roads & Sidewalks        1739
Other Metro Services and Forms      18
Trash, Recycling & Litter           17
Transit                             11
Trees                               10
Public Safety                        6
Property Violations                  6
Electric & Water General             3
COVID-19                             1
Name: count, dtype: int64

Interestingly, there are almost 30,000 entries related to road and sidewalk conditions - over 4 times the number of crashes along the corridor... Let's dig into those.

In [23]:
nash_311[nash_311['case_request'] == 'Streets, Roads & Sidewalks']['case_subrequest'].value_counts()

case_subrequest
Blocking the Right of Way         994
Traffic Light Issue               537
Damaged Sign                       42
Traffic Light Timing               40
Remove debris in roadway           39
Potholes                           22
Roadwork Complaint                 14
Dead Animal Pickup                 13
Snow and Ice Removal               10
Sign Signal Repairs                 8
Obstruction of right of way         5
Road Closures                       4
Traffic Engineering                 4
Illegal Dumping                     2
Sidewalks                           2
Curbing and Berms                   1
Contractor Complaint                1
Shared Scooter and Bike Issues      1
Name: count, dtype: int64

While there aren't many, several subcategories related to infrastructure issues include requests for improvements. Plotting those on a map and comparing against crash locations will tell us where residents think the problem areas are.

In [24]:
impr_requests = nash_311[nash_311['case_subrequest'].isin(['Request New Sign', 'Traffic Engineering', 'Paving Request', 'Request a Speed Monitor Trailer', 'Traffic Light Timing', 'Request New Signal', 'Traffic Calming', 'Request for a New/Improved Bikeway', 'Request Warning Sign'])]
impr_requests.to_csv(('../data/clean/impr_requests.csv'))
impr_requests.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,case_number,status,case_request,case_subrequest,additional_subrequest,date_time_opened,date_time_closed,case_origin,...,incident_address,incident_city,incident_council_district,incident_zip_code,latitude,longitude,mapped_location,contact_type,parent_case,preferred_language
94,24521,253063,831604,Closed,"Streets, Roads & Sidewalks",Traffic Light Timing,Traffic Light Timing,2021-11-22 22:17:00,2021-12-09 17:35:00,hubNashville Community,...,GALLATIN AVE & W EASTLAND AVE,NASHVILLE,5.0,37206,36.183363,-86.748577,"{'latitude': '36.183363', 'longitude': '-86.74...",,,
107,25882,267385,819541,Closed,"Streets, Roads & Sidewalks",Traffic Light Timing,Traffic Light Timing,2021-11-05 15:57:00,2021-11-08 17:28:00,Mobile App,...,OLD HICKORY BLVD & GALLATIN PIKE,MADISON,9.0,37115,36.262217,-86.712215,"{'latitude': '36.26221745', 'longitude': '-86....",,,
448,61969,723845,319077,Closed,"Streets, Roads & Sidewalks",Traffic Light Timing,Traffic Light Timing,2020-03-06 01:10:00,2020-03-09 17:17:00,hubNashville Community,...,GALLATIN PIKE & OLD HICKORY BLVD,MADISON,9.0,37115,36.262217,-86.712215,"{'latitude': '36.26221745', 'longitude': '-86....",,,
754,77679,866888,159263,Closed,"Streets, Roads & Sidewalks",Traffic Light Timing,Traffic Light Timing,2019-03-25 19:10:00,2019-03-26 21:30:00,hubNashville Community,...,GALLATIN PIKE & RIVERWOOD DR,NASHVILLE,7.0,37216,36.224119,-86.725646,"{'latitude': '36.22411857', 'longitude': '-86....",,,
767,78123,871508,166948,Closed,"Streets, Roads & Sidewalks",Traffic Light Timing,Traffic Light Timing,2019-04-10 00:30:32,2019-05-02 17:15:29,hubNashville Community,...,GALLATIN AVE & EASTLAND AVE,NASHVILLE,6.0,37206,36.183834,-86.74838,"{'latitude': '36.18383396588818', 'longitude':...",,,


Ok now let's look at how some of these metrics have evolved over time.

In [25]:
plt.figure(figsize=(12,5))

sns.lineplot(data=crashes_per_day, x='date', y=, linewidth=0.5, errorbar=None)
#plt.xlim(0, 4.0)
#plt.ylabel('')
#plt.axhline(y=78, label='25% Delay', linestyle='--', color='blue', linewidth=1.5)
#plt.axhline(y=70, label='40% Delay', linestyle='--', color='#a7190c', linewidth=1.5)
#plt.legend(loc='lower center')


plt.savefig('../Plots/crashes_over_time.png', bbox_inches='tight', transparent=True, pad_inches=0.125)
plt.show()

<class 'pandas.core.frame.DataFrame'>
Index: 6916 entries, 0 to 20823
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0                  6916 non-null   int64  
 1   accident_number             6916 non-null   int64  
 2   date_and_time               6916 non-null   object 
 3   number_of_motor_vehicles    6916 non-null   float64
 4   number_of_injuries          6916 non-null   float64
 5   number_of_fatalities        6916 non-null   float64
 6   hit_and_run                 6916 non-null   bool   
 7   collision_type_description  6916 non-null   object 
 8   weather_description         6916 non-null   object 
 9   illumination_description    6916 non-null   object 
 10  harmfuldescriptions         6916 non-null   object 
 11  street_address              6916 non-null   object 
 12  city                        6916 non-null   object 
 13  state                       6916 non-

Taking this to Tableau and comparing the number of injuries and number of vehicles involved in crashes relative to their locations, a few intersections are jumping out as "hot spots". See the tableau dashboard for more information.