In [1]:
pip install scipy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


<H1>UK ROAD ACCIDENT DATA ANALYSYS</H1>
<h2>INCLUSIVE YEAR 2019-2022</h2>
<h3>Analyst: Richie M. Alcantara</h3>

<h1>DATA PREPARATION</h1>

<h1>Importing Libraries</h1>

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway

In [3]:
accident = pd.read_csv('datasets\\accident_data.csv')

In [4]:
accident.describe()

Unnamed: 0,Latitude,Longitude,Number_of_Casualties,Number_of_Vehicles
count,660654.0,660653.0,660679.0,660679.0
mean,52.553866,-1.43121,1.35704,1.831255
std,1.406922,1.38333,0.824847,0.715269
min,49.91443,-7.516225,1.0,1.0
25%,51.49069,-2.332291,1.0,1.0
50%,52.315641,-1.411667,1.0,2.0
75%,53.453452,-0.232869,1.0,2.0
max,60.757544,1.76201,68.0,32.0


In [5]:
accident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

In [6]:
accident['Accident Date'] = pd.to_datetime(accident['Accident Date'],dayfirst=True,errors = 'coerce')

In [7]:
accident['Accident_Severity'] = accident['Accident_Severity'].astype('category')

In [8]:
accident['Light_Conditions'] = accident['Light_Conditions'].astype('category')

In [9]:
accident['District Area'] = accident['District Area'].astype('category')

In [10]:
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].astype('category')

In [11]:
accident['Road_Type'] = accident['Road_Type'].astype('category')

In [12]:
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].astype('category')

In [13]:
accident['Weather_Conditions'] = accident['Weather_Conditions'].astype('category')

In [14]:
accident['Vehicle_Type'] = accident['Vehicle_Type'].astype('category')

In [15]:
accident.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                 category
District Area                    category
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

In [16]:
accident.isnull().sum()

Index                           0
Accident_Severity               0
Accident Date              395672
Latitude                       25
Light_Conditions                0
District Area                   0
Longitude                      26
Number_of_Casualties            0
Number_of_Vehicles              0
Road_Surface_Conditions       726
Road_Type                    4520
Urban_or_Rural_Area            15
Weather_Conditions          14128
Vehicle_Type                    0
dtype: int64

In [17]:
accident['Latitude'] = accident['Latitude'].fillna(accident['Latitude'].mean())

In [18]:
accident['Longitude'] = accident['Longitude'].fillna(accident['Longitude'].mean())

In [19]:
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].fillna(accident['Road_Surface_Conditions'].mode()[0])

In [20]:
accident['Road_Type'] = accident['Road_Type'].fillna(accident['Road_Type'].mode()[0])

In [21]:
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].fillna(accident['Urban_or_Rural_Area'].mode()[0])

In [22]:
accident.isnull().sum()

Index                           0
Accident_Severity               0
Accident Date              395672
Latitude                        0
Light_Conditions                0
District Area                   0
Longitude                       0
Number_of_Casualties            0
Number_of_Vehicles              0
Road_Surface_Conditions         0
Road_Type                       0
Urban_or_Rural_Area             0
Weather_Conditions          14128
Vehicle_Type                    0
dtype: int64

<H1>Insights</H1>

<H1>1. Accident mostly happens in Single Carriageway Road Type</H1>

In [23]:
accident['Road_Type'].value_counts()

Road_Type
Single carriageway    496663
Dual carriageway       99424
Roundabout             43992
One way street         13559
Slip road               7041
Name: count, dtype: int64

<H1>2. Most of the accidents happen in a Dry surface road condition</H1>

In [24]:
accident['Road_Surface_Conditions'].value_counts()

Road_Surface_Conditions
Dry                     448547
Wet or damp             186708
Frost or ice             18517
Snow                      5890
Flood over 3cm. deep      1017
Name: count, dtype: int64

<H1>3. The maximum number of Casualties happened in a Single Carriageway with Daylight Light condition with total number of 486942 </H1>

In [25]:
accident.groupby(['Road_Type','Light_Conditions'])['Number_of_Casualties'].sum()

  accident.groupby(['Road_Type','Light_Conditions'])['Number_of_Casualties'].sum()


Road_Type           Light_Conditions           
Dual carriageway    Darkness - lighting unknown      1261
                    Darkness - lights lit           30247
                    Darkness - lights unlit           669
                    Darkness - no lighting          11779
                    Daylight                       102921
One way street      Darkness - lighting unknown       148
                    Darkness - lights lit            4072
                    Darkness - lights unlit            68
                    Darkness - no lighting            152
                    Daylight                        11732
Roundabout          Darkness - lighting unknown       590
                    Darkness - lights lit           13986
                    Darkness - lights unlit           242
                    Darkness - no lighting            293
                    Daylight                        40974
Single carriageway  Darkness - lighting unknown      6356
                    Dark

<H1>4. Birmingham has the highest number of accidents among the Distict Area</H1>

<H1>5. Meanwhile Clackmannanshire has the lowest number of accidents among the Distict Area</H1>

In [26]:
accident['District Area'].value_counts()

District Area
Birmingham            13491
Leeds                  8898
Manchester             6720
Bradford               6212
Sheffield              5710
                      ...  
Berwick-upon-Tweed      153
Teesdale                142
Shetland Islands        133
Orkney Islands          117
Clackmannanshire         91
Name: count, Length: 422, dtype: int64

<H1>6. The highest number of Vehicle is 671229 that happens in a Single Carriageway Road Type with Daylight Light Condition </H1>

In [27]:
accident.groupby(['Road_Type','Light_Conditions'])['Number_of_Vehicles'].sum()

  accident.groupby(['Road_Type','Light_Conditions'])['Number_of_Vehicles'].sum()


Road_Type           Light_Conditions           
Dual carriageway    Darkness - lighting unknown      1776
                    Darkness - lights lit           38395
                    Darkness - lights unlit           876
                    Darkness - no lighting          14663
                    Daylight                       144109
One way street      Darkness - lighting unknown       186
                    Darkness - lights lit            5129
                    Darkness - lights unlit            90
                    Darkness - no lighting            138
                    Daylight                        15954
Roundabout          Darkness - lighting unknown       869
                    Darkness - lights lit           19082
                    Darkness - lights unlit           341
                    Darkness - no lighting            394
                    Daylight                        62957
Single carriageway  Darkness - lighting unknown      8405
                    Dark

<h1>7.Most of the accidents are classified as slight severe with total number of 381049 in Dry Road Surface Condition</h1>

In [None]:
sev_con = accident.groupby(['Road_Surface_Conditions','Accident_Severity']).sum()

In [None]:
sev_con

<h1>8. The Vehicle Type that has the highest number of casualty on either Urban or Rural area is Car followed by Van/Goods 3.5 tonnes mgw or under</h1>

In [None]:
casual = accident.groupby(['Urban_or_Rural_Area','Vehicle_Type'])['Number_of_Casualties'].size()

In [None]:
casual

In [None]:
area= accident.groupby(['Accident_Severity'])['Number_of_Casualties'].sum()

In [None]:
area

<H1>3. There is no significant correlation between Light Conditions and the Number of Casualties</H1
                                                                                                

In [None]:
f_stats,p_value = f_oneway(accident[accident['Light_Conditions'] == 'Daylight']['Number_of_Casualties'],
                    accident[accident['Light_Conditions'] == 'Darkness - lights lit']['Number_of_Casualties'],
                    accident[accident['Light_Conditions'] == 'Darkness - no lighting']['Number_of_Casualties'],
                    accident[accident['Light_Conditions'] == 'Darkness - lighting unknown']['Number_of_Casualties'],
                    accident[accident['Light_Conditions'] == 'Darkness - lights unlit']['Number_of_Casualties'])
print(p_value)

<H1>4. The Latitudinal location of the accident has no correlation to the Longitudinal location of the accident</H1>

In [None]:
lat_long = accident['Latitude'].corr(accident['Longitude'])

In [None]:
lat_long

<H1>5. There is no correlation between Latitudinal location of the accident and the Number of Casualties</H1>

In [None]:
lat_cas = accident['Latitude'].corr(accident['Number_of_Casualties'])

In [None]:
lat_cas

<H1>6. There is no correlation between Longitudinal location of the accident and the Number of Casualties</H1>

In [None]:
long_cas = accident['Longitude'].corr(accident['Number_of_Casualties'])

In [None]:
long_cas

<H1>7. The Latitudinal Location has no correlation on the Number of Vehicles have been accident</H1>

In [None]:
lat_veh = accident['Latitude'].corr(accident['Number_of_Vehicles'])

In [None]:
lat_veh

<H1>8. The Longitudinal Location has no correlation on the Number of Vehicles have been accident</H1>

In [None]:
long_veh = accident['Longitude'].corr(accident['Number_of_Vehicles'])

In [None]:
long_veh

<H1>9. The Number of Vehicles has no correlation on the Number of Casualties</H1>

In [None]:
cas_veh = accident['Number_of_Vehicles'].corr(accident['Number_of_Casualties'])

In [None]:
cas_veh