<h1>UK ACCIDENT REPORT DATA ANALYSIS</h1>

<h3>Analyst: Abdul Barry A. Adam</h3>

<h1>IMPORTING LIBRARIES NEEDED</h1>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway

<h1>IMPORTING DATASETS, LOCALLY, TO DATAFRAME</h1>

In [3]:
accident = pd.read_csv("./datasets/accident_data.csv")

In [4]:
accident

Unnamed: 0,Index,Accident_Severity,Accident_Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


<h1>INFORMATION</h1>

In [5]:
accident.describe()

Unnamed: 0,Latitude,Longitude,Number_of_Casualties,Number_of_Vehicles
count,660654.0,660653.0,660679.0,660679.0
mean,52.553866,-1.43121,1.35704,1.831255
std,1.406922,1.38333,0.824847,0.715269
min,49.91443,-7.516225,1.0,1.0
25%,51.49069,-2.332291,1.0,1.0
50%,52.315641,-1.411667,1.0,2.0
75%,53.453452,-0.232869,1.0,2.0
max,60.757544,1.76201,68.0,32.0


In [6]:
accident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident_Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

In [7]:
accident.dtypes

Index                       object
Accident_Severity           object
Accident_Date               object
Latitude                   float64
Light_Conditions            object
District Area               object
Longitude                  float64
Number_of_Casualties         int64
Number_of_Vehicles           int64
Road_Surface_Conditions     object
Road_Type                   object
Urban_or_Rural_Area         object
Weather_Conditions          object
Vehicle_Type                object
dtype: object

<h1>CONVERT COLUMNS TO ITS APPROPRIATE DATA TYPE</h1>

In [8]:
accident['Index'] = accident['Index'].astype('category')
accident['Accident_Severity'] = accident['Accident_Severity'].astype('category')
accident['Accident_Date'] = pd.to_datetime(accident['Accident_Date'], dayfirst = True, errors = 'coerce') 
accident['Light_Conditions'] = accident['Light_Conditions'].astype('category')
accident['District Area'] = accident['District Area'].astype('category')
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].astype('category')
accident['Road_Type'] = accident['Road_Type'].astype('category')
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].astype('category')
accident['Weather_Conditions'] = accident['Weather_Conditions'].astype('category')
accident['Vehicle_Type'] = accident['Vehicle_Type'].astype('category')

<h1>DETERMINING NULL VALUES AND FILLNG THEM</h1>

<h4>DETERMING NULLS</h4>

In [9]:
accident.isnull().sum()

Index                           0
Accident_Severity               0
Accident_Date              395672
Latitude                       25
Light_Conditions                0
District Area                   0
Longitude                      26
Number_of_Casualties            0
Number_of_Vehicles              0
Road_Surface_Conditions       726
Road_Type                    4520
Urban_or_Rural_Area            15
Weather_Conditions          14128
Vehicle_Type                    0
dtype: int64

In [10]:
FreqDate = accident['Accident_Date'].mode()[0]
AveLat = accident['Latitude'].mean()
AveLot = accident['Longitude'].mean()
FreqSurConditions = accident['Road_Surface_Conditions'].mode()[0]
FreqgRoadType = accident['Road_Type'].mode()[0]
FreqArea = accident['Urban_or_Rural_Area'].mode()[0]


<h4>FILLING NULLS</h4>

In [11]:
accident['Accident_Date'] = accident['Accident_Date'].fillna(FreqDate)
accident['Latitude'] = accident['Latitude'].fillna(AveLat)
accident['Longitude'] = accident['Longitude'].fillna(AveLot)
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].fillna(FreqSurConditions)
accident['Road_Type'] = accident['Road_Type'].fillna(FreqgRoadType)
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].fillna(FreqArea)


<h4>LAST CHECKUP</h4>

In [12]:
accident.isnull().sum()

Index                          0
Accident_Severity              0
Accident_Date                  0
Latitude                       0
Light_Conditions               0
District Area                  0
Longitude                      0
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions        0
Road_Type                      0
Urban_or_Rural_Area            0
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

<h1>DATA ANALYTICS</h1>
<h2>UniVariate</h2>
<h3>Vehicle's type that is frequently on accident</h3>
<h4>Vehicle Type</h4>
<hr>

In [15]:
types = accident['Vehicle_Type'].value_counts()
types

Vehicle_Type
Car                                      497992
Van / Goods 3.5 tonnes mgw or under       34160
Bus or coach (17 or more pass seats)      25878
Motorcycle over 500cc                     25657
Goods 7.5 tonnes mgw and over             17307
Motorcycle 125cc and under                15269
Taxi/Private hire car                     13294
Motorcycle over 125cc and up to 500cc      7656
Motorcycle 50cc and under                  7603
Goods over 3.5t. and under 7.5t            6096
Other vehicle                              5637
Minibus (8 - 16 passenger seats)           1976
Agricultural vehicle                       1947
Pedal cycle                                 197
Data missing or out of range                  6
Ridden horse                                  4
Name: count, dtype: int64

<h1>INSIGHT #1</h1>
<h2>Car is the most frequent type of vehicle involved in an accidents</h2>