In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [3]:
UkAcc = pd.read_csv('uk_road_accident.csv')

In [4]:
UkAcc

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


In [5]:
UkAcc.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

In [6]:
UkAcc['Latitude'] = UkAcc['Latitude'].fillna(UkAcc['Latitude'].mean())
UkAcc['Longitude'] = UkAcc['Longitude'].fillna(UkAcc['Longitude'].mean())
UkAcc['Road_Surface_Conditions'] = UkAcc['Road_Surface_Conditions'].fillna(UkAcc['Road_Surface_Conditions'].mode()[0])
UkAcc['Road_Type'] = UkAcc['Road_Type'].fillna(UkAcc['Road_Type'].mode()[0])
UkAcc['Urban_or_Rural_Area'] = UkAcc['Urban_or_Rural_Area'].fillna(UkAcc['Urban_or_Rural_Area'].mode()[0])
UkAcc['Weather_Conditions'] = UkAcc['Weather_Conditions'].fillna(UkAcc['Weather_Conditions'].mode()[0])

In [7]:
UkAcc.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

<h1>Clearing any Inconsistencies with the Data</h1>

In [8]:
UkAcc['Accident Date'] = UkAcc['Accident Date'].str.strip()
UkAcc['Accident Date'] = UkAcc['Accident Date'].astype('str')
UkAcc['Accident Date'] = UkAcc['Accident Date'].str.replace('/','-')
UkAcc['Accident Date'] = pd.to_datetime(UkAcc['Accident Date'], dayfirst = True, errors = 'coerce')

In [9]:
UkAcc.dtypes

Index                              object
Accident_Severity                  object
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                   object
District Area                      object
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions            object
Road_Type                          object
Urban_or_Rural_Area                object
Weather_Conditions                 object
Vehicle_Type                       object
dtype: object

In [10]:
UkAcc.info

<bound method DataFrame.info of                 Index Accident_Severity Accident Date   Latitude  \
0       200701BS64157           Serious    2019-06-05  51.506187   
1       200701BS65737           Serious    2019-07-02  51.495029   
2       200701BS66127           Serious    2019-08-26  51.517715   
3       200701BS66128           Serious    2019-08-16  51.495478   
4       200701BS66837            Slight    2019-09-03  51.488576   
...               ...               ...           ...        ...   
660674  201091NM01760            Slight    2022-02-18  57.374005   
660675  201091NM01881            Slight    2022-02-21  57.232273   
660676  201091NM01935            Slight    2022-02-23  57.585044   
660677  201091NM01964           Serious    2022-02-23  57.214898   
660678  201091NM02142           Serious    2022-02-28  57.575210   

                   Light_Conditions           District Area  Longitude  \
0             Darkness - lights lit  Kensington and Chelsea  -0.209082   
1  

In [11]:
UkAcc['Accident_Severity'] = UkAcc['Accident_Severity'].astype('category')

In [12]:
UkAcc.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                   object
District Area                      object
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions            object
Road_Type                          object
Urban_or_Rural_Area                object
Weather_Conditions                 object
Vehicle_Type                       object
dtype: object

<h1>Q1: What is the typical count of vehicles involved based on accident severity</h1>

In [16]:
UkAcc.groupby('Accident_Severity')['Number_of_Vehicles'].mean()

Accident_Severity
Fatal      1.786976
Serious    1.678327
Slight     1.855864
Name: Number_of_Vehicles, dtype: float64

<h1>Insights: Accidents that are more serious generally involve a greater number of vehicles, indicating that collisions with multiple vehicles are more hazardous</h1>
<hr>

<h1>Q2: Which Area does accidents occur? is it on urban or rural areas?</h1>

In [19]:
UkAcc['Urban_or_Rural_Area'].value_counts()

Urban_or_Rural_Area
Urban          421678
Rural          238990
Unallocated        11
Name: count, dtype: int64

<h1>Insights: The data shows that Urban Areas have a higher number of accidents</h1>
<hr>

<h1>Q3: What is the most prevalent level of accident severity</h1>

In [20]:
UkAcc['Accident_Severity'].value_counts()

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

<h1>Insights: Minor collisions are the most frequent kind, showing that the majority of roadway incidents lead to slight injuries instead of severe damage</h1>
<hr>

<h1>Q4: Which lighting conditions pose the greatest risk</h1>

In [22]:
UkAcc.groupby('Light_Conditions')['Accident_Severity'].value_counts(normalize=True)* 100

Light_Conditions             Accident_Severity
Darkness - lighting unknown  Slight               86.705737
                             Serious              12.245527
                             Fatal                 1.048735
Darkness - lights lit        Slight               83.770828
                             Serious              14.791047
                             Fatal                 1.438126
Darkness - lights unlit      Slight               84.073928
                             Serious              14.156508
                             Fatal                 1.769564
Darkness - no lighting       Slight               76.531239
                             Serious              19.162860
                             Fatal                 4.305901
Daylight                     Slight               86.422414
                             Serious              12.530729
                             Fatal                 1.046857
Name: proportion, dtype: float64

<h1>Insights: The most hazardous lighting situation is "Darkness - no lighting," which accounts for the greatest percentage of fatal accidents compared to other lighting conditions</h1>
<hr>

<h1>Q5: Which types of roads experience the most serious accidents</h1>

In [24]:
UkAcc.groupby('Road_Type')['Accident_Severity'].value_counts(normalize=True)* 100

Road_Type           Accident_Severity
Dual carriageway    Slight               86.360436
                    Serious              11.814049
                    Fatal                 1.825515
One way street      Slight               87.093443
                    Serious              12.205915
                    Fatal                 0.700642
Roundabout          Slight               91.346154
                    Serious               8.331060
                    Fatal                 0.322786
Single carriageway  Slight               84.476395
                    Serious              14.202789
                    Fatal                 1.320815
Slip road           Slight               90.626331
                    Serious               8.677745
                    Fatal                 0.695924
Name: proportion, dtype: float64

<h1>Insights: Single carriageways exhibit the highest proportion of severe accidents among various road categories, suggesting that these roads are comparatively more hazardous regarding accidents</h1>
<hr>

<h1>Q6: What is the highest number of fatalities documented in a lone incident</h1>

In [26]:
UkAcc['Number_of_Casualties'].max()

np.int64(68)

<h1>Insights: The most fatal incident noted resulted in 68 fatalities, highlighting the significant effect a single event can create</h1>
<hr>

<h1>Q7: Which types of vehicles are most frequently involved in accidents</h1>

In [27]:
UkAcc['Vehicle_Type'].value_counts()

Vehicle_Type
Car                                      497992
Van / Goods 3.5 tonnes mgw or under       34160
Bus or coach (17 or more pass seats)      25878
Motorcycle over 500cc                     25657
Goods 7.5 tonnes mgw and over             17307
Motorcycle 125cc and under                15269
Taxi/Private hire car                     13294
Motorcycle over 125cc and up to 500cc      7656
Motorcycle 50cc and under                  7603
Goods over 3.5t. and under 7.5t            6096
Other vehicle                              5637
Minibus (8 - 16 passenger seats)           1976
Agricultural vehicle                       1947
Pedal cycle                                 197
Data missing or out of range                  6
Ridden horse                                  4
Name: count, dtype: int64

<h1>Insights: Cars experience the highest rate of severe accidents in comparison to other types of vehicles, with vans coming next but having a significant gap over the rest</h1>
<hr>

<h1>Q8: Is there a connection between vehicle type and injuries</h1>

In [29]:
UkAcc.groupby('Vehicle_Type')['Number_of_Casualties'].count()

Vehicle_Type
Agricultural vehicle                       1947
Bus or coach (17 or more pass seats)      25878
Car                                      497992
Data missing or out of range                  6
Goods 7.5 tonnes mgw and over             17307
Goods over 3.5t. and under 7.5t            6096
Minibus (8 - 16 passenger seats)           1976
Motorcycle 125cc and under                15269
Motorcycle 50cc and under                  7603
Motorcycle over 125cc and up to 500cc      7656
Motorcycle over 500cc                     25657
Other vehicle                              5637
Pedal cycle                                 197
Ridden horse                                  4
Taxi/Private hire car                     13294
Van / Goods 3.5 tonnes mgw or under       34160
Name: Number_of_Casualties, dtype: int64

<h1>Insights: Cars, as the most prevalent type of vehicle, also account for the highest total of casualties. This corresponds with their prominent presence on the roads</h1>
<hr>

<h1>Q9: What is the number of accidents that happen under different weather conditions</h1>

In [31]:
UkAcc['Weather_Conditions'].value_counts()

Weather_Conditions
Fine no high winds       535013
Raining no high winds     79696
Other                     17150
Raining + high winds       9615
Fine + high winds          8554
Snowing no high winds      6238
Fog or mist                3528
Snowing + high winds        885
Name: count, dtype: int64

<h1>Insights: Most accidents occur in fine weather with no high winds, not because good weather itself is risky, but because it represents the majority of driving conditions. In contrast, rain and other adverse weather conditions show far fewer accidents, largely due to reduced travel during such times.</h1>
<hr>

<h1>Q10: How are accidents distributed based on the number of vehicles involved</h1>

In [33]:
UkAcc['Number_of_Vehicles'].value_counts()

Number_of_Vehicles
2     391995
1     200787
3      52806
4      11300
5       2464
6        758
7        296
8        147
9         57
10        27
11        12
14         9
12         7
13         6
16         4
28         1
15         1
32         1
19         1
Name: count, dtype: int64

<h1>Insight: Around 85–90% of accidents involve either a single vehicle or two vehicles, while multi-vehicle accidents are relatively uncommon. This indicates that most crashes are straightforward single-vehicle incidents or two-party collisions.</h1><hr>

<h1>Q11: Are fatal accidents more common in rural areas?</h1>

In [35]:
rural_fatal = UkAcc[(UkAcc['Urban_or_Rural_Area'] == 'Rural') & (UkAcc['Accident_Severity'] == 'Fatal')]
len(rural_fatal)

5601

In [38]:
urban_fatal = UkAcc[(UkAcc['Urban_or_Rural_Area'] == 'Urban') & (UkAcc['Accident_Severity'] == 'Fatal')]
len(urban_fatal)

3060

<h1>Insights: Yes, based on its output. Accidents are more common in Rural areas than Urban.</h1><hr>

<h1>Q12: How many accidents occur on roundabouts vs other road types?</h1>

In [40]:
UkAcc.groupby(UkAcc['Road_Type'] == 'Roundabout').size()

Road_Type
False    616687
True      43992
dtype: int64

<h1>Insights: Roundabouts record about 44,000 accidents compared to 616,000 on other road types. However, this difference mainly reflects their limited presence in the road network rather than poor safety. In fact, accidents at roundabouts generally have lower severity rates.</h1><hr>

<h1>Q13: Which weather condition has the lowest accident count?</h1>

In [42]:
UkAcc['Weather_Conditions'].value_counts().tail(1)

Weather_Conditions
Snowing + high winds    885
Name: count, dtype: int64

<h1>Insights: The fewest accidents occur during fog, mist, or snow combined with high winds, suggesting that drivers tend to avoid travel in the most hazardous weather conditions.</h1><hr>

<h1>Q14: Are there specific districts with unusual accident types or frequencies?</h1>

In [44]:
UkAcc.groupby('District Area')['Accident_Severity'].value_counts().unstack().fillna(0).astype(int)

Accident_Severity,Fatal,Serious,Slight
District Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aberdeen City,12,239,1072
Aberdeenshire,66,463,1401
Adur,8,101,510
Allerdale,24,143,961
Alnwick,6,33,193
...,...,...,...
Wychavon,30,193,1138
Wycombe,20,216,1493
Wyre,15,186,1037
Wyre Forest,22,132,815


<h1>Insights: Aberdeenshire +records a higher number of fatal accidents compared to other districts, likely reflecting factors such as rural road conditions, higher speeds, and limited access to emergency services.</h1><hr>

<h1>Q15: What is the average number of casualties per vehicle type?</h1>

In [56]:
UkAcc.groupby('Vehicle_Type')['Number_of_Casualties'].mean().sort_values(ascending=False)

Vehicle_Type
Data missing or out of range             1.500000
Pedal cycle                              1.370558
Taxi/Private hire car                    1.368663
Goods over 3.5t. and under 7.5t          1.362861
Motorcycle over 500cc                    1.359434
Car                                      1.358841
Van / Goods 3.5 tonnes mgw or under      1.354537
Goods 7.5 tonnes mgw and over            1.351881
Bus or coach (17 or more pass seats)     1.349216
Minibus (8 - 16 passenger seats)         1.345648
Motorcycle over 125cc and up to 500cc    1.343521
Agricultural vehicle                     1.342065
Other vehicle                            1.340075
Motorcycle 50cc and under                1.337235
Motorcycle 125cc and under               1.332635
Ridden horse                             1.250000
Name: Number_of_Casualties, dtype: float64

<h1>Insights: Pedal cycles and taxis/private hire cars show some of the highest average accident rates, reflecting either the greater vulnerability of these vehicles or the higher passenger exposure they carry.</h1><hr>

<h1>Q16: How does the number of casualties compare between different road surface conditions for car accidents only?</h1>

In [58]:
UkAcc[UkAcc['Vehicle_Type'] == 'Car'].groupby('Road_Surface_Conditions')['Number_of_Casualties'].mean()

Road_Surface_Conditions
Dry                     1.336710
Flood over 3cm. deep    1.486486
Frost or ice            1.350156
Snow                    1.363150
Wet or damp             1.411987
Name: Number_of_Casualties, dtype: float64

<h1>Insights: Car accidents on dry roads serve as the baseline for casualty rates, whereas icy or flooded roads show significantly higher rates (1.35+). This highlights how adverse surface conditions amplify accident severity, even when controlling for vehicle type.</h1><hr>

<h1>Q17: What is the total number of casualties by district area?</h1>

In [60]:
UkAcc.groupby('District Area')['Number_of_Casualties'].sum()

District Area
Aberdeen City    1508
Aberdeenshire    2529
Adur              817
Allerdale        1663
Alnwick           351
                 ... 
Wychavon         1943
Wycombe          2442
Wyre             1706
Wyre Forest      1340
York             2453
Name: Number_of_Casualties, Length: 422, dtype: int64

<h1>Insights: Major metropolitan districts record the highest total casualties, reflecting their higher accident frequency. These casualty counts highlight where the overall road safety burden is greatest and where resources for emergency services and safety interventions may be most needed.</h1><hr>

<h1>Q18: Which district has the highest average casualties per accident?</h1>

In [64]:
UkAcc.groupby('District Area')['Number_of_Casualties'].mean().idxmax()

'Blaeu Gwent'

<h1>Insights: This identifies the district with the most severe accidents on average.</h1><hr>

<h1>Q19: Which district area has the highest number of accidents?</h1>

In [66]:
UkAcc['District Area'].value_counts().idxmax()

'Birmingham'

<h1>Insights: This identifies the district with the most accidents which is Birmingham.</h1><hr>

<h1>Q20: How does accident severity vary across different weather conditions?</h1>

In [67]:
UkAcc.groupby(['Weather_Conditions','Accident_Severity']).size().unstack()

Accident_Severity,Fatal,Serious,Slight
Weather_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fine + high winds,175,1245,7134
Fine no high winds,7207,73285,454521
Fog or mist,82,483,2963
Other,165,1801,15184
Raining + high winds,145,1261,8209
Raining no high winds,848,9468,69380
Snowing + high winds,3,109,773
Snowing no high winds,36,565,5637


<h1>Q20 Insight1: In fine weather with no high winds, there are 441,739 slight, 72,046 serious, and 7,100 fatal accidents. This indicates that most accidents occur in good weather, not because conditions are dangerous, but because traffic volumes are higher.</h1><hr>
<h1>Q20 Insight2: Accidents are most frequent during fine weather with no high winds (over 534,000 total), showing that good conditions lead to more travel and hence more exposure, not necessarily higher risk.</h1><hr>
<h1>Q20 Insight3: Snowing with high winds has the lowest accident counts (only 885 total), indicating that drivers largely avoid traveling in extremely hazardous conditions.</h1><hr>
<h1>Q20 Insight4: Fatal accidents are highest in fine no high winds (7,207), but as a proportion of total accidents, fatalities are more significant in fog or mist and snowing with high winds, where limited visibility and road hazards increase severity.</h1><hr>
<h1>Q20 Insight5: Rainy conditions without high winds account for nearly 80,000 accidents, much higher than rain + high winds (around 9,600), suggesting that rainfall alone already contributes to a notable increase in accidents, even without wind.</h1><hr>
<h1>Q20 Insight6: “Other” weather conditions (unclassified) show a surprisingly high number of accidents (over 17,000), meaning atypical conditions may also pose safety challenges, or that reporting inconsistencies affect accident recording.</h1><hr>