# Analysis 2021 LA county Accidents data (Daytime)

Credit to https://smoosavi.org/datasets/us_accidents

In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_csv("2021_LA_Day_accidents.csv")

In [3]:
df.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Description,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-225056,2,08/09/2021 17:37,08/09/2021 20:05,33.916425,-118.269578,33.917889,-118.265243,0.268,Stationary traffic from S San Pedro St to E 12...,...,False,False,False,False,True,False,Day,Day,Day,Day
1,A-225279,2,09/12/2021 11:57,09/12/2021 13:22,34.112956,-118.482256,34.100885,-118.477917,0.87,Incident on I-405 SB near BEL AIR CREST RD Dri...,...,False,False,False,False,False,False,Day,Day,Day,Day
2,A-226122,2,19/10/2021 08:38,19/10/2021 10:50,34.003725,-118.286382,34.003764,-118.282105,0.245,Slow traffic from S Vermont Ave (W Vernon Ave)...,...,False,False,False,False,False,False,Day,Day,Day,Day
3,A-226314,2,05/11/2021 13:20,05/11/2021 13:48,33.928495,-118.289676,33.928275,-118.286206,0.2,Stationary traffic from exit [7B] to I-105 E d...,...,False,False,False,False,False,False,Day,Day,Day,Day
4,A-226718,2,03/11/2021 08:07,03/11/2021 08:30,34.04571,-118.205286,34.048458,-118.211488,0.403,Slow traffic from N Evergreen Ave (E Cesar E C...,...,False,False,False,False,True,False,Day,Day,Day,Day


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3224 entries, 0 to 3223
Data columns (total 47 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   ID                     3224 non-null   object 
 1   Severity               3224 non-null   int64  
 2   Start_Time             3224 non-null   object 
 3   End_Time               3224 non-null   object 
 4   Start_Lat              3224 non-null   float64
 5   Start_Lng              3224 non-null   float64
 6   End_Lat                3224 non-null   float64
 7   End_Lng                3224 non-null   float64
 8   Distance(mi)           3224 non-null   float64
 9   Description            3224 non-null   object 
 10  Number                 1641 non-null   float64
 11  Street                 3224 non-null   object 
 12  Side                   3224 non-null   object 
 13  City                   3224 non-null   object 
 14  County                 3224 non-null   object 
 15  Stat

In [5]:
df.duplicated().any()

False

In [6]:
df[df.duplicated()]

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Description,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight


In [7]:
df['clean_Start_Time'] = pd.to_datetime(df['Start_Time'], format = '%d/%m/%Y %H:%M', errors = 'coerce')

In [8]:
df['clean_End_Time'] = pd.to_datetime(df['End_Time'], format = '%d/%m/%Y %H:%M', errors = 'coerce')

In [9]:
df['Year'] = df['clean_Start_Time'].dt.year

In [10]:
df['Month'] = df['clean_Start_Time'].dt.month

In [11]:
df_aircondition = df[["Temperature(F)", "Pressure(in)", "Visibility(mi)", "Wind_Speed(mph)", "Humidity(%)"]].dropna(how='any')

### 1. How many car accidents happend in daytime in LA (2021)

In [12]:
len(df)

3224

In [13]:
df.Severity.value_counts() # different severity type (1-4)

2    3222
4       2
Name: Severity, dtype: int64

In [14]:
df.Stop.value_counts() # whether a stop sign near where the accident happend

False    3106
True      118
Name: Stop, dtype: int64

In [15]:
df.Side.value_counts()

R    2485
L     739
Name: Side, dtype: int64

### 2. Driving conditions when the car accident happend

In [16]:
round(df["Distance(mi)"].mean(),2) # The avarage distance when the car accident happened

0.77

In [17]:
round(df["Distance(mi)"].max(),2)

12.99

In [18]:
df_aircondition.agg(['min', 'max', 'mean']).round().astype(int).reset_index() 
# 4 weather conditions which may affect traffic accidents

Unnamed: 0,index,Temperature(F),Pressure(in),Visibility(mi),Wind_Speed(mph),Humidity(%)
0,min,42,29,0,0,6
1,max,92,30,10,26,100
2,mean,68,30,9,4,60


### 3. The duration time caused by car accidents

In [19]:
df['duration_time_hour'] = (df['clean_End_Time'] - df['clean_Start_Time']).dt.seconds/60/60

In [20]:
df['duration_time_hour'] = round(df['duration_time_hour']).astype(int)

In [21]:
df['duration_time_hour']

0       2
1       1
2       2
3       0
4       0
       ..
3219    2
3220    0
3221    2
3222    5
3223    2
Name: duration_time_hour, Length: 3224, dtype: int32

In [22]:
df['Description'].astype(str)

0       Stationary traffic from S San Pedro St to E 12...
1       Incident on I-405 SB near BEL AIR CREST RD Dri...
2       Slow traffic from S Vermont Ave (W Vernon Ave)...
3       Stationary traffic from exit [7B] to I-105 E d...
4       Slow traffic from N Evergreen Ave (E Cesar E C...
                              ...                        
3219    Slow traffic from S Wilton Pl to W 111th St du...
3220    Slow traffic from Crenshaw Blvd (Stocker St) t...
3221    Accident on La Brea Ave (I-10) from Crenshaw B...
3222    Stationary traffic from Bergreen Pl to I-405 N...
3223    Incident on WESTCHESTER PL near W 12TH ST Driv...
Name: Description, Length: 3224, dtype: object

In [23]:
i10W_counts = df[df['Description'].str.contains('I-10 W')] # filter dataframe to only contain 'I-10 W' in Description column

In [24]:
i10W_counts.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Description,...,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight,clean_Start_Time,clean_End_Time,Year,Month,duration_time_hour
18,A-244651,2,13/10/2021 07:15,13/10/2021 10:50,34.055571,-118.1981,34.05519,-118.211842,0.787,Stationary traffic from exit [19C] to I-10 W d...,...,False,Day,Day,Day,Day,2021-10-13 07:15:00,2021-10-13 10:50:00,2021,10,4
24,A-252155,2,23/10/2021 14:38,23/10/2021 16:52,34.05923,-118.184949,34.059368,-118.185151,0.015,Accident from I-10 W to Medford St.,...,False,Day,Day,Day,Day,2021-10-23 14:38:00,2021-10-23 16:52:00,2021,10,2
38,A-256229,2,30/10/2021 13:12,30/10/2021 17:02,34.03437,-118.350153,34.032301,-118.388671,2.21,Slow traffic on I-10 W - Santa Monica Fwy W fr...,...,False,Day,Day,Day,Day,2021-10-30 13:12:00,2021-10-30 17:02:00,2021,10,4
41,A-258324,2,25/04/2021 17:40,25/04/2021 19:28,34.032376,-118.222395,34.023922,-118.244706,1.405,Incident on I-10 WB near S ALAMEDA ST Drive wi...,...,False,Day,Day,Day,Day,2021-04-25 17:40:00,2021-04-25 19:28:00,2021,4,2
42,A-258412,2,02/07/2021 11:33,02/07/2021 14:27,34.055504,-118.198421,34.054847,-118.210943,0.718,Stationary traffic from exit [19C] to I-10 W d...,...,False,Day,Day,Day,Day,2021-07-02 11:33:00,2021-07-02 14:27:00,2021,7,3


In [25]:
len(i10W_counts)

264

In [27]:
i10W_counts.groupby('Month')

Month
3     14
4     26
5     18
6     62
7     86
8     54
9     42
10    58
11    86
12    82
Name: Severity, dtype: int64

In [28]:
df.groupby('Month')

Month
2        8
3      146
4      292
5      278
6      614
7      656
8      668
9      720
10     744
11     992
12    1334
Name: Severity, dtype: int64