# Identifying the most dangerous junctions in the UK

In [1]:
#main
import pandas as pd
import numpy as np
import math

#graphic
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline

#warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
raw_df = pd.read_csv('Accident_NaNs_imputed.csv')
raw_df.shape

(597840, 40)

In [4]:
raw_df.drop_duplicates(subset=['accident_index'], inplace =True)

In [5]:
raw_df['junction_detail'].value_counts(dropna=False) # 0 means 'Not at junction or within 20 metres'

0.0    254049
3.0    176873
6.0     56609
1.0     48961
9.0     23305
8.0     14942
5.0      8504
2.0      7762
7.0      6835
Name: junction_detail, dtype: int64

In [6]:
raw_df['junction_control'].value_counts(dropna=False)

4.0    526550
2.0     65342
3.0      3597
1.0      1959
0.0       392
Name: junction_control, dtype: int64

In [7]:
raw_df['accident_severity'].value_counts(dropna=False)

3    481638
2    108112
1      8090
Name: accident_severity, dtype: int64

In [8]:
raw_df['accident_severity']= raw_df['accident_severity'].replace({1:'Serious', 2:'Serious', 3:'Slight'}).astype(object)

In [9]:
raw_df['accident_severity'].value_counts(dropna=False)

Slight     481638
Serious    116202
Name: accident_severity, dtype: int64

In [10]:
junction_df = raw_df[(raw_df['junction_detail'] != 0.0) & (raw_df['accident_severity']=='Serious')]

In [11]:
junction_df.shape

(60683, 40)

In [12]:
junction_df[['latitude','longitude']] = junction_df[['latitude','longitude']].apply(lambda x: round(x,3))


In [13]:
def latlong_combine(row):
    lat= row['latitude']
    long= row['longitude']
    return str(lat) + ", " + str(long)

junction_df['latlong'] = junction_df.apply(latlong_combine, axis=1)

In [14]:
junction_df.head()

Unnamed: 0.1,Unnamed: 0,accident_year,location_easting_osgr,location_northing_osgr,longitude,latitude,number_of_vehicles,number_of_casualties,accident_index,accident_reference,police_force,accident_severity,day_of_week,time,local_authority_district,local_authority_ons_district,local_authority_highway,first_road_class,first_road_number,road_type,speed_limit,junction_detail,junction_control,second_road_class,second_road_number,pedestrian_crossing_human_control,pedestrian_crossing_physical_facilities,light_conditions,weather_conditions,road_surface_conditions,special_conditions_at_site,carriageway_hazards,urban_or_rural_area,did_police_officer_attend_scene_of_accident,trunk_road_flag,lsoa_of_accident_location,month,hour,easting_binned,northing_binned,latlong
18,18,2016,516350.0,175710.0,-0.326,51.468,1,1,2016010000048,10000048,1,Serious,3,07:10,25.0,E09000018,E09000018,3,3004,6.0,30.0,3.0,4.0,6.0,0.0,0.0,4.0,1.0,1.0,1.0,0.0,2.0,1.0,1.0,2.0,E01002667,1,7,J,N,"51.468, -0.326"
59,59,2016,531430.0,178020.0,-0.108,51.486,2,1,2016010000116,10000116,1,Serious,4,06:25,9.0,E09000022,E09000022,3,3,3.0,30.0,3.0,2.0,3.0,3.0,0.0,5.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,E01003107,2,6,J,N,"51.486, -0.108"
63,63,2016,515430.0,180170.0,-0.338,51.509,1,2,2016010000121,10000121,1,Serious,4,09:50,27.0,E09000009,E09000009,3,4020,6.0,30.0,6.0,2.0,3.0,3002.0,0.0,4.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,E01001252,2,9,J,N,"51.509, -0.338"
70,70,2016,536210.0,182860.0,-0.038,51.528,1,1,2016010000132,10000132,1,Serious,4,08:24,5.0,E09000030,E09000030,3,1205,3.0,20.0,3.0,4.0,6.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,E01004230,2,8,J,N,"51.528, -0.038"
85,85,2016,550990.0,190170.0,0.178,51.59,2,1,2016010000163,10000163,1,Serious,3,13:27,15.0,E09000016,E09000016,3,12,3.0,50.0,3.0,4.0,6.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,E01002334,1,13,J,N,"51.59, 0.178"


In [15]:
junction_df.columns

Index(['Unnamed: 0', 'accident_year', 'location_easting_osgr',
       'location_northing_osgr', 'longitude', 'latitude', 'number_of_vehicles',
       'number_of_casualties', 'accident_index', 'accident_reference',
       'police_force', 'accident_severity', 'day_of_week', 'time',
       'local_authority_district', 'local_authority_ons_district',
       'local_authority_highway', 'first_road_class', 'first_road_number',
       'road_type', 'speed_limit', 'junction_detail', 'junction_control',
       'second_road_class', 'second_road_number',
       'pedestrian_crossing_human_control',
       'pedestrian_crossing_physical_facilities', 'light_conditions',
       'weather_conditions', 'road_surface_conditions',
       'special_conditions_at_site', 'carriageway_hazards',
       'urban_or_rural_area', 'did_police_officer_attend_scene_of_accident',
       'trunk_road_flag', 'lsoa_of_accident_location', 'month', 'hour',
       'easting_binned', 'northing_binned', 'latlong'],
      dtype='objec

In [16]:
junction_df = junction_df[['accident_index','latitude','longitude','latlong', 'number_of_vehicles',
                           'number_of_casualties',  'accident_severity','local_authority_district',
                           'junction_detail', 'junction_control','road_type',
                           'first_road_class', 'first_road_number',
                           'second_road_class', 'second_road_number']]

In [17]:
junction_df.shape

(60683, 15)

In [22]:
junction_df.sort_values(by = ['number_of_casualties'], ascending = False).head(10)

Unnamed: 0,accident_index,longitude,latitude,latlong,number_of_vehicles,number_of_casualties,accident_severity,local_authority_district,junction_detail,junction_control,road_type,first_road_class,first_road_number,second_road_class,second_road_number
75624,201634NE09806,-0.469,52.505,"52.505, -0.469",2,58,Serious,382.0,6.0,4.0,6.0,5,0,5.0,0.0
122918,2016551602181,-2.454,50.612,"50.612, -2.454",2,27,Serious,646.0,1.0,4.0,6.0,5,0,6.0,0.0
67153,2016300018272,-1.421,53.24,"53.24, -1.421",3,21,Serious,322.0,3.0,4.0,6.0,4,6543,6.0,0.0
458574,2019350900122,-0.043,52.359,"52.359, -0.043",2,20,Serious,393.0,6.0,4.0,6.0,4,1040,6.0,0.0
464665,2019410889448,-0.195,51.889,"51.889, -0.195",2,19,Serious,435.0,9.0,4.0,6.0,4,197,6.0,0.0
136099,201697UC70210,-4.43,55.576,"55.576, -4.43",1,19,Serious,919.0,1.0,4.0,1.0,3,76,3.0,719.0
594904,2020990939366,-2.94,56.548,"56.548, -2.94",2,19,Serious,912.0,3.0,4.0,3.0,3,928,3.0,90.0
472728,2019440129002,-1.346,50.703,"50.703, -1.346",3,19,Serious,505.0,6.0,4.0,6.0,3,3054,6.0,0.0
106496,2016460111374,0.515,51.394,"51.394, 0.515",4,15,Serious,544.0,9.0,4.0,6.0,6,0,6.0,0.0
226874,2017430254240,-0.749,52.084,"52.084, -0.749",3,14,Serious,479.0,5.0,4.0,3.0,1,1,1.0,1.0


In [29]:
most_dangerous_junctions=junction_df.groupby(['latlong','latitude','longitude','local_authority_district','junction_control', 'junction_detail']).agg(
    {'number_of_casualties': sum,'number_of_vehicles': sum}).sort_values(
                                                by=['number_of_casualties'], ascending=False).head(54)

most_dangerous_junctions # junctions with 100 and more casualties over a 5 year period (2016-2020)

# Almost all of them are 'Give way or uncontrolled' (junction_control=4)
# My preventative action proposal is installation of traffic lights to these junctions. 
# In addition to that, presence of traffic police at these junctions from time to time may help 
# enforce the law and having more control of accidents in these hotspots.


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,number_of_casualties,number_of_vehicles
latlong,latitude,longitude,local_authority_district,junction_control,junction_detail,Unnamed: 6_level_1,Unnamed: 7_level_1
"52.505, -0.469",52.505,-0.469,382.0,4.0,6.0,62,6
"53.358, -0.19",53.358,-0.19,351.0,4.0,6.0,30,16
"50.612, -2.454",50.612,-2.454,646.0,4.0,1.0,27,2
"50.703, -1.346",50.703,-1.346,505.0,4.0,6.0,23,9
"52.359, -0.043",52.359,-0.043,393.0,4.0,6.0,23,6
"56.548, -2.94",56.548,-2.94,912.0,4.0,3.0,22,4
"53.24, -1.421",53.24,-1.421,322.0,4.0,3.0,21,3
"55.576, -4.43",55.576,-4.43,919.0,4.0,1.0,19,1
"51.889, -0.195",51.889,-0.195,435.0,4.0,9.0,19,2
"51.394, 0.515",51.394,0.515,544.0,4.0,9.0,15,4


In [None]:
# most_dangerous_junctions.to_csv('most_dangerous_junctions.csv')