# Vehicles

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline
plt.style.use('ggplot')
font = { 'family': 'DejaVu Sans', 'weight': 'bold', 'size': 22 }
plt.rc('font', **font)

In [3]:
from IPython.core.pylabtools import figsize
figsize(14, 7)

In [4]:
data_path = '../../data/'

In [5]:
df = pd.read_csv(os.path.join(data_path, 'vehicles.csv'))

In [6]:
df.head()

Unnamed: 0,accident_id,Vehicle_Reference,Vehicle_Type,Towing_and_Articulation,Vehicle_Manoeuvre,Vehicle_Location-Restricted_Lane,Junction_Location,Skidding_and_Overturning,Hit_Object_in_Carriageway,Vehicle_Leaving_Carriageway,...,Was_Vehicle_Left_Hand_Drive?,Journey_Purpose_of_Driver,Sex_of_Driver,Age_of_Driver,Engine_Capacity_(CC),Propulsion_Code,Age_of_Vehicle,Driver_IMD_Decile,Driver_Home_Area_Type,Vehicle_IMD_Decile
0,430245,1,Car,No tow/articulation,Going ahead other,On main c'way - not in restricted lane,Approaching junction or waiting/parked at junc...,,,Did not leave carriageway,...,No,Other,Male,18.0,1299.0,Petrol,16.0,4.0,3.0,4.0
1,430245,2,Car,No tow/articulation,Waiting to go - held up,On main c'way - not in restricted lane,Approaching junction or waiting/parked at junc...,,,Did not leave carriageway,...,No,Journey as part of work,Female,24.0,1598.0,Heavy oil,1.0,9.0,1.0,9.0
2,387170,1,Car,No tow/articulation,Going ahead other,On main c'way - not in restricted lane,Not at or within 20 metres of junction,,,Did not leave carriageway,...,No,Journey as part of work,Male,54.0,-1.0,-1,-1.0,8.0,3.0,8.0
3,326114,1,Car,No tow/articulation,Turning right,On main c'way - not in restricted lane,Leaving main road,,,Did not leave carriageway,...,No,Not known,Female,38.0,2935.0,Petrol,22.0,2.0,1.0,2.0
4,326114,2,Motorcycle over 125cc and up to 500cc,No tow/articulation,Going ahead other,On main c'way - not in restricted lane,Approaching junction or waiting/parked at junc...,Overturned,,Did not leave carriageway,...,No,Other,Male,21.0,399.0,Petrol,25.0,5.0,1.0,5.0


In [7]:
df.iloc[0, :]

accident_id                                                                    430245
Vehicle_Reference                                                                   1
Vehicle_Type                                                                      Car
Towing_and_Articulation                                           No tow/articulation
Vehicle_Manoeuvre                                                   Going ahead other
Vehicle_Location-Restricted_Lane               On main c'way - not in restricted lane
Junction_Location                   Approaching junction or waiting/parked at junc...
Skidding_and_Overturning                                                         None
Hit_Object_in_Carriageway                                                        None
Vehicle_Leaving_Carriageway                                 Did not leave carriageway
Hit_Object_off_Carriageway                                                       None
1st_Point_of_Impact                                   

In [8]:
df.shape

(455652, 22)

In [9]:
'Number of vehicles: {:,}'.format(df.shape[0])

'Number of vehicles: 455,652'

In [10]:
list(df.columns)

['accident_id',
 'Vehicle_Reference',
 'Vehicle_Type',
 'Towing_and_Articulation',
 'Vehicle_Manoeuvre',
 'Vehicle_Location-Restricted_Lane',
 'Junction_Location',
 'Skidding_and_Overturning',
 'Hit_Object_in_Carriageway',
 'Vehicle_Leaving_Carriageway',
 'Hit_Object_off_Carriageway',
 '1st_Point_of_Impact',
 'Was_Vehicle_Left_Hand_Drive?',
 'Journey_Purpose_of_Driver',
 'Sex_of_Driver',
 'Age_of_Driver',
 'Engine_Capacity_(CC)',
 'Propulsion_Code',
 'Age_of_Vehicle',
 'Driver_IMD_Decile',
 'Driver_Home_Area_Type',
 'Vehicle_IMD_Decile']

In [11]:
df.dtypes

accident_id                           int64
Vehicle_Reference                     int64
Vehicle_Type                         object
Towing_and_Articulation              object
Vehicle_Manoeuvre                    object
Vehicle_Location-Restricted_Lane     object
Junction_Location                    object
Skidding_and_Overturning             object
Hit_Object_in_Carriageway            object
Vehicle_Leaving_Carriageway          object
Hit_Object_off_Carriageway           object
1st_Point_of_Impact                  object
Was_Vehicle_Left_Hand_Drive?         object
Journey_Purpose_of_Driver            object
Sex_of_Driver                        object
Age_of_Driver                       float64
Engine_Capacity_(CC)                float64
Propulsion_Code                      object
Age_of_Vehicle                      float64
Driver_IMD_Decile                   float64
Driver_Home_Area_Type               float64
Vehicle_IMD_Decile                  float64
dtype: object

In [12]:
df.isnull().mean()

accident_id                         0.0
Vehicle_Reference                   0.0
Vehicle_Type                        0.0
Towing_and_Articulation             0.0
Vehicle_Manoeuvre                   0.0
Vehicle_Location-Restricted_Lane    0.0
Junction_Location                   0.0
Skidding_and_Overturning            0.0
Hit_Object_in_Carriageway           0.0
Vehicle_Leaving_Carriageway         0.0
Hit_Object_off_Carriageway          0.0
1st_Point_of_Impact                 0.0
Was_Vehicle_Left_Hand_Drive?        0.0
Journey_Purpose_of_Driver           0.0
Sex_of_Driver                       0.0
Age_of_Driver                       0.0
Engine_Capacity_(CC)                0.0
Propulsion_Code                     0.0
Age_of_Vehicle                      0.0
Driver_IMD_Decile                   0.0
Driver_Home_Area_Type               0.0
Vehicle_IMD_Decile                  0.0
dtype: float64

In [13]:
'Number of accidents in the accidents related to the vehicles: {:,}'.format(len(df['accident_id'].unique()))

'Number of accidents in the accidents related to the vehicles: 244,091'

In [14]:
df['accident_id'].value_counts()[:20]

301654    16
443919    15
490559    15
380485    14
522113    13
525478    13
428202    12
482694    12
497365    11
421211    11
569304    11
436390    11
358428    11
469205    11
357194    11
479509    11
403266    10
388578    10
436301    10
325799    10
Name: accident_id, dtype: int64

In [15]:
vehicles_per_accident = list(df['accident_id'].value_counts())

In [16]:
np.mean(vehicles_per_accident), np.std(vehicles_per_accident)

(1.866730031013024, 0.7366665733848325)

In [17]:
df['Vehicle_Reference'].value_counts()

1      246276
2      174592
3       26208
4        6030
5        1555
6         544
7         224
8          98
9          55
10         30
11         15
12          9
13          5
14          4
15          2
23          1
16          1
21          1
22          1
101         1
Name: Vehicle_Reference, dtype: int64

In [18]:
df['Vehicle_Type'].value_counts()

Car                                      326407
Pedal cycle                               34434
Van / Goods 3.5 tonnes mgw or under       23480
Motorcycle 125cc and under                15217
Motorcycle over 500cc                     11719
Taxi/Private hire car                      9175
Bus or coach (17 or more pass seats)       8482
Goods 7.5 tonnes mgw and over              7888
Motorcycle over 125cc and up to 500cc      3893
Motorcycle 50cc and under                  3327
Other vehicle                              2687
Goods over 3.5t. and under 7.5t            2600
Goods vehicle - unknown weight             2339
Agricultural vehicle                        997
Minibus (8 - 16 passenger seats)            843
Motorcycle - unknown cc                     692
-1                                          624
Mobility scooter                            491
Ridden horse                                198
Electric motorcycle                         115
Tram                                    

In [19]:
df['Towing_and_Articulation'].value_counts()

No tow/articulation           445714
Articulated vehicle             4734
-1                              2709
Single trailer                  1503
Other tow                        616
Caravan                          275
Double or multiple trailer       101
Name: Towing_and_Articulation, dtype: int64

In [20]:
df['Vehicle_Manoeuvre'].value_counts()

Going ahead other                      220175
Turning right                           43510
Slowing or stopping                     33890
Waiting to go - held up                 27397
Moving off                              20902
Parked                                  18684
Going ahead right-hand bend             15963
Turning left                            15102
Going ahead left-hand bend              13782
Overtaking moving vehicle - offside      8748
Waiting to turn right                    6759
Reversing                                6414
Overtaking static vehicle - offside      5071
Changing lane to right                   4259
Changing lane to left                    3882
U-turn                                   3450
Overtaking - nearside                    2779
-1                                       2714
Waiting to turn left                     2171
Name: Vehicle_Manoeuvre, dtype: int64

In [21]:
df['Vehicle_Location-Restricted_Lane'].value_counts()

On main c'way - not in restricted lane                            441911
Footway (pavement)                                                  4706
-1                                                                  2739
Bus lane                                                            1786
Cycle lane (on main carriageway)                                    1541
On lay-by or hard shoulder                                          1258
Cycleway or shared use footway (not part of  main carriageway)       621
Leaving lay-by or hard shoulder                                      496
Entering lay-by or hard shoulder                                     242
Tram/Light rail track                                                206
Busway (including guided busway)                                     146
Name: Vehicle_Location-Restricted_Lane, dtype: int64

In [22]:
df['Junction_Location'].value_counts()

Not at or within 20 metres of junction                         189281
Approaching junction or waiting/parked at junction approach     99969
Mid Junction - on roundabout or on main road                    89210
Cleared junction or waiting/parked at junction exit             23611
Entering main road                                              19472
Entering roundabout                                             12638
Leaving main road                                               11197
Leaving roundabout                                               6769
-1                                                               2007
Entering from slip road                                          1498
Name: Junction_Location, dtype: int64

In [23]:
df['Skidding_and_Overturning'].value_counts()

None                         404696
Skidded                       30783
Overturned                     9078
Skidded and overturned         7976
-1                             2903
Jackknifed                      126
Jackknifed and overturned        90
Name: Skidding_and_Overturning, dtype: int64

In [24]:
df['Hit_Object_in_Carriageway'].value_counts()

None                                432957
Parked vehicle                        7073
Kerb                                  6679
-1                                    3110
Bollard or refuge                     2066
Other object                          1304
Open door of vehicle                   733
Any animal (except ridden horse)       560
Central island of roundabout           447
Bridge (side)                          256
Previous accident                      209
Road works                             208
Bridge (roof)                           50
Name: Hit_Object_in_Carriageway, dtype: int64

In [25]:
df['Vehicle_Leaving_Carriageway'].value_counts()

Did not leave carriageway                404893
Nearside                                  25298
Offside                                   12679
Nearside and rebounded                     3133
-1                                         2933
Offside on to central reservation          1885
Straight ahead at junction                 1668
Offside and rebounded                      1561
Offside on to centrl res + rebounded       1201
Offside - crossed central reservation       401
Name: Vehicle_Leaving_Carriageway, dtype: int64

In [26]:
df['Hit_Object_off_Carriageway'].value_counts()

None                             423575
Wall or fence                      5703
Other permanent object             5699
Tree                               4920
Road sign or traffic signal        2997
Entered ditch                      2869
Lamp post                          2353
Central crash barrier              2234
Near/Offside crash barrier         2224
-1                                 1846
Telegraph or electricity pole       990
Bus stop or bus shelter             212
Submerged in water                   30
Name: Hit_Object_off_Carriageway, dtype: int64

In [27]:
df['1st_Point_of_Impact'].value_counts()

Front             224273
Back               79154
Offside            64621
Nearside           58149
Did not impact     26621
-1                  2834
Name: 1st_Point_of_Impact, dtype: int64

In [28]:
df['Was_Vehicle_Left_Hand_Drive?'].value_counts()

No     449455
-1       3762
Yes      2435
Name: Was_Vehicle_Left_Hand_Drive?, dtype: int64

In [29]:
df['Sex_of_Driver'].value_counts()

Male         293729
Female       127399
Not known     34478
-1               46
Name: Sex_of_Driver, dtype: int64

In [30]:
df['Age_of_Driver'].value_counts()

-1.0      51412
 30.0     12940
 25.0     10975
 26.0     10268
 24.0      9855
 28.0      9745
 27.0      9737
 23.0      9709
 29.0      9512
 22.0      9361
 21.0      9289
 35.0      9100
 20.0      8910
 31.0      8824
 32.0      8673
 34.0      8603
 33.0      8443
 37.0      8400
 36.0      8394
 40.0      8336
 45.0      8334
 19.0      8276
 50.0      8157
 18.0      8087
 46.0      7714
 47.0      7532
 48.0      7430
 38.0      7415
 44.0      7313
 49.0      7313
          ...  
 85.0       695
 13.0       694
 12.0       606
 86.0       577
 87.0       487
 11.0       448
 88.0       405
 10.0       299
 89.0       285
 90.0       246
 9.0        200
 91.0       155
 8.0        133
 92.0       112
 7.0        104
 93.0        87
 6.0         71
 94.0        60
 5.0         53
 4.0         28
 95.0        27
 96.0        19
 3.0         12
 97.0         9
 98.0         5
 1.0          4
 99.0         2
 2.0          2
 100.0        1
 101.0        1
Name: Age_of_Driver, Len

In [31]:
df['Engine_Capacity_(CC)'].value_counts()

-1.0        104497
 1598.0      27107
 1968.0      12893
 998.0       12228
 1560.0      11955
 1242.0      11883
 1995.0      11774
 1997.0       9216
 1596.0       7729
 1896.0       7213
 125.0        6930
 1461.0       6442
 124.0        6401
 1796.0       6245
 1390.0       5844
 1998.0       5771
 2143.0       5695
 1229.0       5098
 1198.0       5030
 1248.0       5023
 2198.0       4729
 1398.0       4612
 1149.0       4269
 1360.0       3908
 1364.0       3890
 2993.0       3872
 999.0        3779
 1388.0       3470
 1686.0       3301
 1299.0       3207
             ...  
 1505.0          1
 5500.0          1
 3780.0          1
 751.0           1
 5999.0          1
 5993.0          1
 11950.0         1
 6162.0          1
 2330.0          1
 6400.0          1
 2361.0          1
 3217.0          1
 1608.0          1
 3202.0          1
 7000.0          1
 294.0           1
 3940.0          1
 936.0           1
 1179.0          1
 1583.0          1
 7200.0          1
 3153.0     

In [32]:
df['Propulsion_Code'].value_counts()

Petrol                 196018
Heavy oil              150777
-1                     103610
Hybrid electric          4414
Electric                  318
Gas/Bi-fuel               283
Electric diesel           115
Petrol/Gas (LPG)           60
Gas                        55
New fuel technology         2
Name: Propulsion_Code, dtype: int64

In [33]:
df['Age_of_Vehicle'].value_counts()

-1.0     120587
 1.0      30004
 2.0      26199
 3.0      23719
 10.0     22004
 9.0      21783
 4.0      21482
 11.0     21256
 12.0     20507
 5.0      20038
 8.0      19639
 6.0      18926
 13.0     18591
 7.0      18559
 14.0     16246
 15.0     12375
 16.0      8267
 17.0      5201
 18.0      3242
 19.0      2028
 20.0      1245
 21.0       834
 22.0       573
 23.0       448
 24.0       310
 26.0       250
 25.0       241
 27.0       193
 28.0       122
 29.0       122
          ...  
 54.0        12
 57.0        11
 48.0        11
 52.0        10
 56.0         9
 45.0         8
 44.0         8
 55.0         7
 51.0         7
 50.0         7
 43.0         7
 61.0         6
 46.0         6
 58.0         5
 60.0         5
 59.0         5
 53.0         5
 47.0         4
 70.0         3
 65.0         3
 74.0         2
 67.0         2
 64.0         2
 69.0         2
 75.0         2
 68.0         2
 62.0         1
 78.0         1
 85.0         1
 63.0         1
Name: Age_of_Vehicle, Le

In [34]:
df['Driver_IMD_Decile'].value_counts()

-1.0     142964
 2.0      34879
 1.0      34109
 3.0      33772
 4.0      33567
 5.0      32504
 6.0      31750
 7.0      30262
 8.0      29022
 9.0      27939
 10.0     24884
Name: Driver_IMD_Decile, dtype: int64

In [35]:
df['Driver_Home_Area_Type'].value_counts()

 1.0    276409
-1.0    105644
 3.0     41860
 2.0     31739
Name: Driver_Home_Area_Type, dtype: int64

In [36]:
df['Vehicle_IMD_Decile'].value_counts()

-1.0     142964
 2.0      34879
 1.0      34109
 3.0      33772
 4.0      33567
 5.0      32504
 6.0      31750
 7.0      30262
 8.0      29022
 9.0      27939
 10.0     24884
Name: Vehicle_IMD_Decile, dtype: int64