<h1>Project: United Kingdom Road Accident Data Analysis</h1>
<h2>Inclusive Years: 2019 - 2022</h2>
<h3>Analyst: Jiro Miko S. Viñas</h3>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
accident = pd.read_csv('dataset\\accident_data.csv')

In [3]:
accident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

In [4]:
accident.dtypes

Index                       object
Accident_Severity           object
Accident Date               object
Latitude                   float64
Light_Conditions            object
District Area               object
Longitude                  float64
Number_of_Casualties         int64
Number_of_Vehicles           int64
Road_Surface_Conditions     object
Road_Type                   object
Urban_or_Rural_Area         object
Weather_Conditions          object
Vehicle_Type                object
dtype: object

In [5]:
accident.describe()

Unnamed: 0,Latitude,Longitude,Number_of_Casualties,Number_of_Vehicles
count,660654.0,660653.0,660679.0,660679.0
mean,52.553866,-1.43121,1.35704,1.831255
std,1.406922,1.38333,0.824847,0.715269
min,49.91443,-7.516225,1.0,1.0
25%,51.49069,-2.332291,1.0,1.0
50%,52.315641,-1.411667,1.0,2.0
75%,53.453452,-0.232869,1.0,2.0
max,60.757544,1.76201,68.0,32.0


In [6]:
accident['Accident_Severity'] = accident['Accident_Severity'].astype('category')

In [7]:
accident['Accident_Severity'].value_counts()

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

In [8]:
accident.dtypes

Index                        object
Accident_Severity          category
Accident Date                object
Latitude                    float64
Light_Conditions             object
District Area                object
Longitude                   float64
Number_of_Casualties          int64
Number_of_Vehicles            int64
Road_Surface_Conditions      object
Road_Type                    object
Urban_or_Rural_Area          object
Weather_Conditions           object
Vehicle_Type                 object
dtype: object

<h1>Coverting Object to DateTime Data Type</h1>

In [9]:
accident['Accident Date'] = pd.to_datetime(accident['Accident Date'], dayfirst = True, errors = 'coerce')

In [10]:
accident.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                   object
District Area                      object
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions            object
Road_Type                          object
Urban_or_Rural_Area                object
Weather_Conditions                 object
Vehicle_Type                       object
dtype: object

<h1>Filling Up Null Values</h1>

In [11]:
accident.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

In [12]:
accident['Latitude'] = accident['Latitude'].fillna(accident['Latitude'].mode()[0])
accident['Longitude'] = accident['Longitude'].fillna(accident['Longitude'].mode()[0])
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].fillna('unknown surface condition')
accident['Road_Type'] = accident['Road_Type'].fillna('unknown road type')
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].fillna(accident['Urban_or_Rural_Area'].mode()[0])
accident['Weather_Conditions'] = accident['Weather_Conditions'].fillna('unknown weather condition')

In [13]:
accident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

In [14]:
accident['Accident_Severity'] = accident['Accident_Severity'].astype('category')
accident['Latitude'] = accident['Latitude'].astype('category')
accident['Light_Conditions'] = accident['Light_Conditions'].astype('category')
accident['District Area'] = accident['District Area'].astype('category')
accident['Longitude'] = accident['Longitude'].astype('category')
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].astype('category')
accident['Road_Type'] = accident['Road_Type'].astype('category')
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].astype('category')
accident['Weather_Conditions'] = accident['Weather_Conditions'].astype('category')
accident['Vehicle_Type'] = accident['Vehicle_Type'].astype('category')

In [15]:
accident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

In [16]:
accident.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                         category
Light_Conditions                 category
District Area                    category
Longitude                        category
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

<h1>Extracting date information using pandas date time</h1>

In [17]:
accident['Year'] = accident['Accident Date'].dt.year
accident['Month'] = accident['Accident Date'].dt.month
accident['Day'] = accident['Accident Date'].dt.day
accident['DayOfWeek'] = accident['Accident Date'].dt.dayofweek

In [18]:
accident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
Year                       0
Month                      0
Day                        0
DayOfWeek                  0
dtype: int64

<h1>Question No.1: What is the most common accident severity category</h1>

In [19]:
accident['Accident_Severity'].value_counts()

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

<h1>Insight No.1: The most common accident severity category is Slight Category</h1>

<h1>Question No.2: What is the most common type of road surface condition have the most accident?</h1>

In [20]:
accident['Road_Surface_Conditions'].value_counts()

Road_Surface_Conditions
Dry                          447821
Wet or damp                  186708
Frost or ice                  18517
Snow                           5890
Flood over 3cm. deep           1017
unknown surface condition       726
Name: count, dtype: int64

<h1>Insight No.2: The most common type of surface condition have the most accident is Dry

<h1>Question No.3: What is the most frequent weather condition during accidents?</h1>

In [21]:
accident['Weather_Conditions'].value_counts()

Weather_Conditions
Fine no high winds           520885
Raining no high winds         79696
Other                         17150
unknown weather condition     14128
Raining + high winds           9615
Fine + high winds              8554
Snowing no high winds          6238
Fog or mist                    3528
Snowing + high winds            885
Name: count, dtype: int64

<h1>Insight No.3: The most frequet weather condition during accidents is Fine no high winds</h1>

<h1>Question No.4: What is the most common type of vehicle have the most accident?</h1>

In [22]:
accident['Vehicle_Type'].value_counts()

Vehicle_Type
Car                                      497992
Van / Goods 3.5 tonnes mgw or under       34160
Bus or coach (17 or more pass seats)      25878
Motorcycle over 500cc                     25657
Goods 7.5 tonnes mgw and over             17307
Motorcycle 125cc and under                15269
Taxi/Private hire car                     13294
Motorcycle over 125cc and up to 500cc      7656
Motorcycle 50cc and under                  7603
Goods over 3.5t. and under 7.5t            6096
Other vehicle                              5637
Minibus (8 - 16 passenger seats)           1976
Agricultural vehicle                       1947
Pedal cycle                                 197
Data missing or out of range                  6
Ridden horse                                  4
Name: count, dtype: int64

<h1>Insight No.4: The most common type of car have the most accident is Car</h1>

<h1>Question No.5: What area have the most accident is it Urban, Rural or Unallocated</h1>

In [23]:
accident['Urban_or_Rural_Area'].value_counts()

Urban_or_Rural_Area
Urban          421678
Rural          238990
Unallocated        11
Name: count, dtype: int64

<h1>Insight No.5: The area that have a most accident is Urban</h1>

<h1>Question No.6: What Year have the most number of accident?</h1>

In [24]:
accident['Year'].value_counts()

Year
2019    182115
2020    170591
2021    163554
2022    144419
Name: count, dtype: int64

<h1>Insight No.6: The year that have the most number of accident is year 2019</h1>

<h1>Question No.7: What Month have the most number of accident?</h1>

In [25]:
accident['Month'].value_counts()

Month
11    60424
10    59580
7     57445
6     56481
9     56455
5     56352
3     54086
8     53913
1     52872
12    51836
4     51744
2     49491
Name: count, dtype: int64

<h1>Insight No.7: The month that have the most number of accident is month of November</h1>

<h1>Question No.8: What Day have the most number of accident?</h1>

In [26]:
accident['Day'].value_counts()

Day
1     22606
12    22536
11    22503
5     22409
10    22328
9     22252
17    22117
23    21997
6     21974
16    21972
19    21956
8     21914
20    21838
15    21818
13    21816
2     21815
18    21719
4     21699
21    21641
22    21634
3     21540
14    21474
7     21431
24    21301
26    21168
27    21038
28    20707
29    20138
25    19949
30    19573
31    11816
Name: count, dtype: int64

<h1>Insight No.8: The day that have the most number of accident is day 1</h1>

<h1>Question No.9: What District have the most accident happened?</h1>

In [27]:
accident['District Area'].mode()[0]

'Birmingham'

<h1>Insight No.9: The District that have the most accident happened is Birmingham</h1>

<h1>Question No.10: What Road Type have the most accident happened</h1>

In [28]:
accident['Road_Type'].mode()[0]

'Single carriageway'

<h1>Insight No.10: The Road Type that have the most accident happened is Single carriageway</h1>

<h1>Bivariate</h1>

<h1>Question No.11: Is there a correlation between the number of vehicles involved in an accident and the severity of the accident?</h1>

In [29]:
accident.groupby('Accident_Severity')['Number_of_Vehicles'].mean()

Accident_Severity
Fatal      1.786976
Serious    1.678327
Slight     1.855864
Name: Number_of_Vehicles, dtype: float64

<h1>The average number of vehicles does not show a clear, strong correlation with the severity of accidents based on this analysis, although slight accidents tend to involve a slightly higher number of vehicles.</h1>

<h1>Question No.12: How does the severity of accidents vary under different light conditions?</h1>

In [30]:
accident.groupby('Light_Conditions')['Accident_Severity'].value_counts().unstack()

Accident_Severity,Fatal,Serious,Slight
Light_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Darkness - lighting unknown,68,794,5622
Darkness - lights lit,1860,19130,108345
Darkness - lights unlit,45,360,2138
Darkness - no lighting,1612,7174,28651
Daylight,5076,60759,419045


<h1>Insight No.12: Accidents are more severe in darkness, especially with no or unlit lighting. Daylight and proper lighting reduce severity, while poor visibility increases serious or fatal accidents.</h1>

<h1>Question No.13: How does the number of casualties in an accident change depending on the type of road where the accident occurs?</h1>

In [31]:
accident.groupby('Road_Type')['Number_of_Casualties'].mean()

Road_Type
Dual carriageway      1.477279
One way street        1.192713
Roundabout            1.274891
Single carriageway    1.344666
Slip road             1.423661
unknown road type     1.248230
Name: Number_of_Casualties, dtype: float64

<h1>Insight No.13: Dual carriageways have the most casualties, likely due to higher speeds, while one-way streets have the fewest. Other road types show moderate casualties, and unknown road types have relatively low numbers, possibly due to incomplete data.</h1>

<h1>Question No.14: How do weather conditions influence the road surface conditions at the time of an accident?</h1>

In [32]:
accident.groupby(['Weather_Conditions', 'Road_Surface_Conditions']).size().unstack()

Road_Surface_Conditions,Dry,Flood over 3cm. deep,Frost or ice,Snow,Wet or damp,unknown surface condition
Weather_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fine + high winds,5023,15,183,19,3311,3
Fine no high winds,427799,113,10653,1043,81178,99
Fog or mist,471,3,709,27,2316,2
Other,2628,19,4718,413,9366,6
Raining + high winds,72,291,52,16,9184,0
Raining no high winds,547,548,404,106,78087,4
Snowing + high winds,4,3,162,549,167,0
Snowing no high winds,54,6,1411,3659,1108,0
unknown weather condition,11223,19,225,58,1991,612


<h1>Insight No.14: Weather conditions significantly affect road surface conditions, with dry surfaces dominating in fine weather, while wet, snow, and frost or ice conditions are more prevalent in specific weather conditions like rain, snow, and fog.</h1>

<h1>Question No.15: How does accident severity vary with different weather conditions?</h1>

In [33]:
accident.groupby('Weather_Conditions')['Accident_Severity'].value_counts().unstack()

Accident_Severity,Fatal,Serious,Slight
Weather_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fine + high winds,175,1245,7134
Fine no high winds,7100,72046,441739
Fog or mist,82,483,2963
Other,165,1801,15184
Raining + high winds,145,1261,8209
Raining no high winds,848,9468,69380
Snowing + high winds,3,109,773
Snowing no high winds,36,565,5637
unknown weather condition,107,1239,12782


<h1>Insight No.15: Accidents are most slight in fine weather, while raining and foggy conditions cause more serious accidents. Snowing and high winds result in fewer fatal accidents but still significant serious and slight accidents.</h1>

<h1>Question No.16: Is there a noticeable trend in the number of vehicles involved in accidents over time?</h1>

In [34]:
accident.groupby(accident['Accident Date'].dt.year)['Number_of_Vehicles'].mean()

Accident Date
2019    1.839310
2020    1.826615
2021    1.826229
2022    1.832273
Name: Number_of_Vehicles, dtype: float64

<h1>Insight No.16: There is no noticeable trend in the number of vehicles involved in accidents over time. The average number remains fairly consistent across the years, hovering around 1.83 vehicles per accident.</h1>

<h1>Question No.17: Are accidents in urban areas more severe than those in rural areas?</h1>

In [35]:
accident.groupby('Urban_or_Rural_Area')['Accident_Severity'].value_counts()

Urban_or_Rural_Area  Accident_Severity
Rural                Slight               196077
                     Serious               37312
                     Fatal                  5601
Unallocated          Slight                   10
                     Serious                   1
                     Fatal                     0
Urban                Slight               367714
                     Serious               50904
                     Fatal                  3060
Name: count, dtype: int64

<h1>Insight No.17: Urban areas have more serious accident and rural areas have a slightly higher number of fatal accidents.</h1>

<h1>Question No.18: Does the type of vehicle involved in an accident influence the number of casualties?</h1>

In [36]:
accident.groupby('Vehicle_Type')['Number_of_Casualties'].mean()

Vehicle_Type
Agricultural vehicle                     1.342065
Bus or coach (17 or more pass seats)     1.349216
Car                                      1.358841
Data missing or out of range             1.500000
Goods 7.5 tonnes mgw and over            1.351881
Goods over 3.5t. and under 7.5t          1.362861
Minibus (8 - 16 passenger seats)         1.345648
Motorcycle 125cc and under               1.332635
Motorcycle 50cc and under                1.337235
Motorcycle over 125cc and up to 500cc    1.343521
Motorcycle over 500cc                    1.359434
Other vehicle                            1.340075
Pedal cycle                              1.370558
Ridden horse                             1.250000
Taxi/Private hire car                    1.368663
Van / Goods 3.5 tonnes mgw or under      1.354537
Name: Number_of_Casualties, dtype: float64

<h1>Insight No.18: The type of vehicle does have a slight influence on the number of casualties, with pedal cycles and taxis/private hire cars seeing slightly higher casualties on average, while ridden horses tend to have the lowest.</h1>

<h1>Question No.19: How does the number of casualties in an accident vary with different weather conditions?</h1>

In [37]:
accident.groupby('Weather_Conditions')['Number_of_Casualties'].mean()

Weather_Conditions
Fine + high winds            1.386018
Fine no high winds           1.350480
Fog or mist                  1.452948
Other                        1.354869
Raining + high winds         1.416641
Raining no high winds        1.408214
Snowing + high winds         1.418079
Snowing no high winds        1.341776
unknown weather condition    1.233720
Name: Number_of_Casualties, dtype: float64

<h1>Insight No.19: Weather conditions impact the number of casualties, with fog or mist causing the most, while unknown weather conditions result in the least. Severe conditions like high winds tend to increase casualties.</h1>

<h1>Question No.20: How does the severity of accidents vary across different district areas?</h1>

In [38]:
accident.groupby('District Area')['Accident_Severity'].value_counts().unstack()

Accident_Severity,Fatal,Serious,Slight
District Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aberdeen City,12,239,1072
Aberdeenshire,66,463,1401
Adur,8,101,510
Allerdale,24,143,961
Alnwick,6,33,193
...,...,...,...
Wychavon,30,193,1138
Wycombe,20,216,1493
Wyre,15,186,1037
Wyre Forest,22,132,815


<h1>Insight No.20: The severity of accidents varies widely by district area, with certain areas experiencing higher numbers of serious and fatal accidents, while others have a larger proportion of slight accidents.</h1>

<h1>Multivariate</h1>

<h1>Question No.21: How does the fatal accident severity vary across different weather conditions and road types?</h1>>

In [39]:
accident[accident['Accident_Severity'] == 'Fatal'].groupby(['Weather_Conditions', 'Road_Type'])['Accident_Severity'].count().unstack()

Road_Type,Dual carriageway,One way street,Roundabout,Single carriageway,Slip road,unknown road type
Weather_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fine + high winds,32,5,5,133,0,0
Fine no high winds,1461,81,121,5375,37,25
Fog or mist,24,0,3,54,0,1
Other,32,1,0,128,2,2
Raining + high winds,39,1,1,103,1,0
Raining no high winds,191,5,9,634,9,0
Snowing + high winds,2,0,0,1,0,0
Snowing no high winds,9,0,1,25,0,1
unknown weather condition,25,2,2,74,0,4


<h1>Insight No.21: Fatal accidents are most frequent in fine weather with no high winds, especially on dual carriageways and single carriageways. Raining and snowing conditions lead to fewer fatalities but still show a presence, particularly on single carriageways.</h1>

<h1>Question No.22: How do accidents in urban areas with different road surface conditions and vehicle types compare in terms of accident severity?</h1>

In [40]:
accident[(accident['Urban_or_Rural_Area'] == 'Urban')].groupby(['Road_Surface_Conditions', 'Vehicle_Type'])['Accident_Severity'].value_counts().unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Road_Surface_Conditions,Vehicle_Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Dry,Agricultural vehicle,4,120,766
Dry,Bus or coach (17 or more pass seats),78,1495,10589
Dry,Car,1616,27907,197376
Dry,Data missing or out of range,0,0,3
Dry,Goods 7.5 tonnes mgw and over,41,995,6912
...,...,...,...,...
unknown surface condition,Other vehicle,0,1,4
unknown surface condition,Pedal cycle,0,0,0
unknown surface condition,Ridden horse,0,0,0
unknown surface condition,Taxi/Private hire car,0,0,7


<h1>Insight No.22: In urban areas, cars on wet/damp or dry surfaces have the most serious accidents, while buses and goods vehicles also experience more serious accidents, and agricultural vehicles show fewer severe outcomes.</h1>

<h1>Question No.23: For serious accidents, how does the number of casualties differ under various weather conditions?</h1>

In [41]:
accident[accident['Accident_Severity'] == 'Serious'].groupby('Weather_Conditions')['Number_of_Casualties'].mean()

Weather_Conditions
Fine + high winds            1.479518
Fine no high winds           1.452322
Fog or mist                  1.681159
Other                        1.547474
Raining + high winds         1.559873
Raining no high winds        1.557034
Snowing + high winds         1.706422
Snowing no high winds        1.511504
unknown weather condition    1.303471
Name: Number_of_Casualties, dtype: float64

<h1>Insight No.23: Fog and snowing with high winds contribute to the highest number of casualties in serious accidents, while unknown weather conditions lead to fewer casualties.</h1>

<h1>Question No.24: In 2022, how do urban areas compare to rural areas in terms of accident frequency and road types?</h1>

In [42]:
accident[accident['Year'] == 2022].groupby(['Urban_or_Rural_Area', 'Road_Type']).size().unstack()

Road_Type,Dual carriageway,One way street,Roundabout,Single carriageway,Slip road,unknown road type
Urban_or_Rural_Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Rural,10694,239,3530,34784,967,211
Unallocated,0,0,0,0,0,0
Urban,10721,2687,6358,73255,569,404


<h1>Insight No.24: In 2022, urban areas experience more accidents on single carriageways, one way streets, and roundabouts compared to rural areas, while rural areas see slightly more accidents on dual carriageways.</h1>

<h1>Question No.25: How does the average number of vehicles involved in fatal accidents vary across different road surface conditions?</h1>

In [43]:
accident[(accident['Accident_Severity'] == 'Fatal')].groupby('Road_Surface_Conditions')['Number_of_Vehicles'].mean()

Road_Surface_Conditions
Dry                          1.804423
Flood over 3cm. deep         1.782609
Frost or ice                 1.756477
Snow                         1.600000
Wet or damp                  1.753053
unknown surface condition    2.000000
Name: Number_of_Vehicles, dtype: float64

<h1>Insight No.25: In fatal accidents, unknown surface conditions typically involve more vehicles, while snowy conditions tend to involve fewer vehicles.</h1>

<h1>Question No.26: How do the types of vehicles involved in serious accidents vary across different weather conditions?</h1>

In [44]:
accident[(accident['Accident_Severity'] == 'Serious')].groupby(['Vehicle_Type', 'Weather_Conditions'])['Accident_Severity'].value_counts().unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Vehicle_Type,Weather_Conditions,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Agricultural vehicle,Fine + high winds,0,4,0
Agricultural vehicle,Fine no high winds,0,214,0
Agricultural vehicle,Fog or mist,0,2,0
Agricultural vehicle,Other,0,6,0
Agricultural vehicle,Raining + high winds,0,6,0
...,...,...,...,...
Van / Goods 3.5 tonnes mgw or under,Raining + high winds,0,64,0
Van / Goods 3.5 tonnes mgw or under,Raining no high winds,0,480,0
Van / Goods 3.5 tonnes mgw or under,Snowing + high winds,0,5,0
Van / Goods 3.5 tonnes mgw or under,Snowing no high winds,0,32,0


<h1>Insight No.26: The types of vehicles involved in serious accidents vary significantly with weather conditions, with cars and goods vehicles being the most common across multiple conditions, particularly in wet or foggy weather.</h1>

<h1>Question No.27: How do light conditions in urban areas compare to rural areas for fatal accidents?</h1>

In [45]:
accident[(accident['Accident_Severity'] == 'Fatal')].groupby(['Urban_or_Rural_Area', 'Light_Conditions'])['Accident_Severity'].value_counts().unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Urban_or_Rural_Area,Light_Conditions,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rural,Darkness - lighting unknown,41,0,0
Rural,Darkness - lights lit,578,0,0
Rural,Darkness - lights unlit,29,0,0
Rural,Darkness - no lighting,1569,0,0
Rural,Daylight,3384,0,0
Unallocated,Darkness - lighting unknown,0,0,0
Unallocated,Darkness - lights lit,0,0,0
Unallocated,Darkness - lights unlit,0,0,0
Unallocated,Darkness - no lighting,0,0,0
Unallocated,Daylight,0,0,0


<h1>Insight No.27: In fatal accidents, rural areas experience more fatalities in darkness with no lighting, while urban areas have more fatalities under lit darkness conditions.</h1>

<h1>Question No.28: How does the number of casualties differ by road type and weather conditions?</h1>

In [46]:
accident.groupby(['Road_Type', 'Weather_Conditions'])['Number_of_Casualties'].mean()

Road_Type           Weather_Conditions       
Dual carriageway    Fine + high winds            1.442396
                    Fine no high winds           1.477963
                    Fog or mist                  1.665689
                    Other                        1.433746
                    Raining + high winds         1.475652
                    Raining no high winds        1.502453
                    Snowing + high winds         1.389189
                    Snowing no high winds        1.399020
                    unknown weather condition    1.330494
One way street      Fine + high winds            1.183544
                    Fine no high winds           1.185132
                    Fog or mist                  1.241379
                    Other                        1.233010
                    Raining + high winds         1.307190
                    Raining no high winds        1.242127
                    Snowing + high winds         1.071429
                    Snowin

<h1>Insight No.28: Accident severity, in terms of casualties, tends to be higher on dual carriageways in poor weather conditions like raining and fog. Meanwhile, one-way streets typically have fewer casualties across all weather conditions.</h1>

<h1>Question No.29: How does the type of vehicle involved in fatal accidents affect the road surface conditions?</h1>

In [47]:
accident[(accident['Accident_Severity'] == 'Fatal')].groupby(['Vehicle_Type', 'Road_Surface_Conditions'])['Accident_Severity'].value_counts().unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Vehicle_Type,Road_Surface_Conditions,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Agricultural vehicle,Dry,13,0,0
Agricultural vehicle,Flood over 3cm. deep,1,0,0
Agricultural vehicle,Frost or ice,0,0,0
Agricultural vehicle,Snow,0,0,0
Agricultural vehicle,Wet or damp,7,0,0
...,...,...,...,...
Van / Goods 3.5 tonnes mgw or under,Flood over 3cm. deep,0,0,0
Van / Goods 3.5 tonnes mgw or under,Frost or ice,14,0,0
Van / Goods 3.5 tonnes mgw or under,Snow,3,0,0
Van / Goods 3.5 tonnes mgw or under,Wet or damp,139,0,0


<h1>Insight No.29: In fatal accidents, agricultural vehicles primarily occur on dry or wet surfaces, while vans and goods vehicles are more likely to be involved in accidents on wet, damp, or icy roads.</h1>

<h1>Question No.30: In 2019, how do light conditions affect the number of casualties in accidents?</h1>

In [48]:
accident[(accident['Year'] == 2019)].groupby(['Light_Conditions'])['Number_of_Casualties'].mean()

Light_Conditions
Darkness - lighting unknown    1.304299
Darkness - lights lit          1.374514
Darkness - lights unlit        1.342939
Darkness - no lighting         1.547224
Daylight                       1.342448
Name: Number_of_Casualties, dtype: float64

<h1>Insight No.30: In 2019, accidents occurring in darkness with no lighting had the highest average number of casualties, while daylight and accidents with lit or unlit lights had slightly lower casualty averages.</h1>

<h1>Question No.: Is there correlation between Number of vehicles and Number of Casualties?</h1>

In [49]:
accident['Number_of_Vehicles'].corr(accident['Number_of_Casualties'])

0.22888886126927627

<h1>Insight No.: There is no correlation between Number of vehicles and Number of Casualties</h1>

<h1>Question No.: </h1>

In [50]:
accident['Month'].corr(accident['Number_of_Casualties'])

0.0032184502191281465

In [51]:
accident['Month'].corr(accident['Number_of_Casualties'])

0.0032184502191281465

In [52]:
accident['Day'].corr(accident['Number_of_Vehicles'])

0.0016502945894125048

In [53]:
accident['Day'].corr(accident['Number_of_Casualties'])

0.006787175440893232

<h1>Question No.: How does the accident severity for different vehicle types differ between rural and urban areas in 2019?</h1>

In [54]:
df_2019 = accident[accident['Year'] == 2019]

In [55]:
df_2019.groupby(['Vehicle_Type', 'Urban_or_Rural_Area'])['Accident_Severity'].value_counts().unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Vehicle_Type,Urban_or_Rural_Area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Agricultural vehicle,Rural,6,42,184
Agricultural vehicle,Unallocated,0,0,0
Agricultural vehicle,Urban,1,61,342
Bus or coach (17 or more pass seats),Rural,105,556,3011
Bus or coach (17 or more pass seats),Unallocated,0,0,2
Bus or coach (17 or more pass seats),Urban,47,840,5922
Car,Rural,1278,7550,40153
Car,Unallocated,0,0,3
Car,Urban,670,9919,70731
Data missing or out of range,Rural,0,0,0


<h1>Insight No.: Urban areas see more slight accidents due to higher traffic density and frequent minor collisions. In contrast, rural areas report more serious or fatal accidents, especially with vehicles like motorcycles and buses, likely due to higher speeds and more hazardous roads.</h1>

<h1>Question No.: How does the accident severity for different vehicle types differ between rural and urban areas in 2022?</h1>

In [56]:
df_2022 = accident[accident['Year'] == 2022]

In [57]:
df_2022.groupby(['Vehicle_Type', 'Urban_or_Rural_Area'])['Accident_Severity'].value_counts().unstack()

Unnamed: 0_level_0,Accident_Severity,Fatal,Serious,Slight
Vehicle_Type,Urban_or_Rural_Area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Agricultural vehicle,Rural,1,28,98
Agricultural vehicle,Unallocated,0,0,0
Agricultural vehicle,Urban,1,16,155
Bus or coach (17 or more pass seats),Rural,22,179,1046
Bus or coach (17 or more pass seats),Unallocated,0,0,0
Bus or coach (17 or more pass seats),Urban,10,409,2865
Car,Rural,827,6093,32787
Car,Unallocated,0,0,0
Car,Urban,414,8372,63275
Data missing or out of range,Rural,0,0,0


<h1>Insight No.: Urban areas report more slight accidents likely due to higher traffic density, while rural areas have more serious and fatal accidents, especially for motorcycles and cars, possibly due to higher speeds and hazardous roads.</h1>