In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

path = "toronto-traffic-collisions.geojson"
gdf = gpd.read_file(path)

The json file is extracted to a GeoDataFrame.
Now pre-process and clean the dataset:

1. get the data types and non-null info of gdf

2. process missing values in column "FATALITIES"
- checking the toronto police service, there is around 50 fatalities caused by motor vehical collisions per year
- save to assume that all other data entries does not have any fatalities
- turn the column from float to int

3. Check INJURY_COLLISIONS, FTR_COLLISIONS, PD_COLLISIONS
- remove the "None" Records
- turn "YES" to 1, "NO" to 0


In [2]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 722921 entries, 0 to 722920
Data columns (total 21 columns):
 #   Column             Non-Null Count   Dtype   
---  ------             --------------   -----   
 0   _id                722921 non-null  int32   
 1   OCC_DATE           722921 non-null  object  
 2   OCC_MONTH          722921 non-null  object  
 3   OCC_DOW            722921 non-null  object  
 4   OCC_YEAR           722921 non-null  object  
 5   OCC_HOUR           722921 non-null  object  
 6   DIVISION           722921 non-null  object  
 7   FATALITIES         619 non-null     float64 
 8   INJURY_COLLISIONS  722921 non-null  object  
 9   FTR_COLLISIONS     722921 non-null  object  
 10  PD_COLLISIONS      722921 non-null  object  
 11  HOOD_158           722921 non-null  object  
 12  NEIGHBOURHOOD_158  722921 non-null  object  
 13  LONG_WGS84         722921 non-null  object  
 14  LAT_WGS84          722921 non-null  object  
 15  AUTOMOBILE         722921 

In [3]:
# all columns, except FATALITIES, does not have null data entries
# investigate the null/non-null entries in FATALITIES
gdf.value_counts("FATALITIES")


FATALITIES
1.0    611
2.0      5
3.0      2
4.0      1
dtype: int64

In [4]:
gdf["FATALITIES"] = gdf["FATALITIES"].fillna(0)
gdf.FATALITIES = gdf.FATALITIES.astype(int)
gdf.value_counts("FATALITIES")


FATALITIES
0    722302
1       611
2         5
3         2
4         1
dtype: int64

In [5]:
injury_collisions_check = gdf.value_counts("INJURY_COLLISIONS")
ftr_collisions_check = gdf.value_counts("FTR_COLLISIONS")
pd_collisions_check = gdf.value_counts("PD_COLLISIONS")
print(injury_collisions_check)
print(ftr_collisions_check)
print(pd_collisions_check)


INJURY_COLLISIONS
NO      625470
YES      97447
None         4
dtype: int64
FTR_COLLISIONS
NO      601454
YES     121463
None         4
dtype: int64
PD_COLLISIONS
YES     520211
NO      202706
None         4
dtype: int64


In [6]:
gdf.loc[gdf["INJURY_COLLISIONS"] == 'None']

Unnamed: 0,_id,OCC_DATE,OCC_MONTH,OCC_DOW,OCC_YEAR,OCC_HOUR,DIVISION,FATALITIES,INJURY_COLLISIONS,FTR_COLLISIONS,PD_COLLISIONS,HOOD_158,NEIGHBOURHOOD_158,LONG_WGS84,LAT_WGS84,AUTOMOBILE,MOTORCYCLE,PASSENGER,BICYCLE,PEDESTRIAN,geometry
164782,164783,1467435600000,July,Saturday,2016,18,D51,1,,,,166,St Lawrence-East Bayfront-The Islands (166),-79.36432245533886,43.64557128467487,,,,,,MULTIPOINT (-79.36432 43.64557)
222183,222184,1493182800000,April,Wednesday,2017,18,D33,1,,,,47,Don Valley Village (47),-79.36614662064541,43.78196340520432,,,,,,MULTIPOINT (-79.36615 43.78196)
512586,512587,1633237200000,October,Sunday,2021,12,D22,1,,,,160,Mimico-Queensway (160),-79.51200263297962,43.61823424253411,,,,,,MULTIPOINT (-79.512 43.61823)
568659,568660,1664427600000,September,Thursday,2022,19,D23,1,,,,1,West Humber-Clairville (1),-79.58679481542516,43.68788885277521,,,,,,MULTIPOINT (-79.58679 43.68789)


In [7]:
# drop the "None" in the INJURY_CONLLISIONS, FTR_COLLISIONS, and PD_COLLISIONS
# turn yes - 1 and no - 0 for the above three rows

gdf = gdf[gdf["INJURY_COLLISIONS"] != 'None']
gdf['INJURY_COLLISIONS'] = gdf['INJURY_COLLISIONS'].replace({'YES': 1, 'NO': 0})
gdf['FTR_COLLISIONS'] = gdf['FTR_COLLISIONS'].replace({'YES': 1, 'NO': 0})
gdf['PD_COLLISIONS'] = gdf['PD_COLLISIONS'].replace({'YES': 1, 'NO': 0})

#gdf.INJURY_COLLISIONS = gdf.INJURY_COLLISIONS.astype(bool)
#gdf.FTR_COLLISIONS = gdf.FTR_COLLISIONS.astype(bool)
#gdf.PD_COLLISIONS = gdf.PD_COLLISIONS.astype(bool)

#check
injury_collisions_check = gdf.value_counts("INJURY_COLLISIONS")
ftr_collisions_check = gdf.value_counts("FTR_COLLISIONS")
pd_collisions_check = gdf.value_counts("PD_COLLISIONS")
print(injury_collisions_check)
print(ftr_collisions_check)
print(pd_collisions_check)

gdf.head(5)

INJURY_COLLISIONS
0    625470
1     97447
dtype: int64
FTR_COLLISIONS
0    601454
1    121463
dtype: int64
PD_COLLISIONS
1    520211
0    202706
dtype: int64


Unnamed: 0,_id,OCC_DATE,OCC_MONTH,OCC_DOW,OCC_YEAR,OCC_HOUR,DIVISION,FATALITIES,INJURY_COLLISIONS,FTR_COLLISIONS,PD_COLLISIONS,HOOD_158,NEIGHBOURHOOD_158,LONG_WGS84,LAT_WGS84,AUTOMOBILE,MOTORCYCLE,PASSENGER,BICYCLE,PEDESTRIAN,geometry
0,1,1388552400000,January,Wednesday,2014,2,D23,0,0,0,1,007,Willowridge-Martingrove-Richview (7),-79.56313850270357,43.67441063320029,YES,NO,NO,NO,NO,MULTIPOINT (-79.56314 43.67441)
1,2,1388552400000,January,Wednesday,2014,14,D32,0,0,0,1,105,Lawrence Park North (105),-79.39758914167055,43.72609117778053,YES,NO,NO,NO,NO,MULTIPOINT (-79.39759 43.72609)
2,3,1388552400000,January,Wednesday,2014,2,NSA,0,1,0,0,NSA,NSA,0.0,0.0,YES,NO,NO,NO,NO,MULTIPOINT (0 0)
3,4,1388552400000,January,Wednesday,2014,3,NSA,0,0,0,1,NSA,NSA,0.0,0.0,YES,NO,NO,NO,NO,MULTIPOINT (0 0)
4,5,1388552400000,January,Wednesday,2014,5,NSA,0,1,0,0,NSA,NSA,0.0,0.0,YES,NO,NO,NO,NO,MULTIPOINT (0 0)


4. Investigate No Specified Address Records
- Note: No Specified Address (NSA) includes any occurrences reported outside the City of Toronto limits or those that have no verified location.
- a new column, has_valid_location, is created to label the NSA records. yes - 1, no - 0

In [8]:
# check neighbourhood and long_wgs84, lat_wgs84
hood_158_check = gdf.value_counts("HOOD_158")
print(hood_158_check)

HOOD_158
NSA    118704
119     17562
001     14609
027     11948
166     10205
126      9971
159      9834
042      9667
130      9377
031      9330
170      9108
095      8975
142      8802
070      8638
128      8429
085      8279
165      7687
164      7472
156      7471
120      6880
021      6783
040      6771
078      6593
090      6518
073      6343
144      6343
098      6282
168      6062
047      5822
141      5614
118      5457
129      5403
034      5145
039      5127
016      5068
052      5049
058      4891
136      4883
025      4802
154      4782
056      4694
050      4669
146      4552
062      4518
155      4488
036      4436
002      4172
048      4127
087      4087
063      4070
111      3876
092      3869
138      3856
024      3839
044      3835
081      3792
055      3766
158      3736
145      3735
102      3731
096      3698
171      3651
172      3644
122      3633
163      3589
147      3515
160      3451
030      3373
086      3358
099      3325
088      33

In [9]:
# add 'has_valid_location' tag
gdf["has_valid_location"] = 1
gdf.loc[gdf["HOOD_158"] == 'NSA', 'has_valid_location'] = 0

# change HOOD_158, NEIGHBOURHOOD_158, DIVISION, LONG_WGS84, LAT_WGS84 to nan
# hood code
gdf.loc[gdf["HOOD_158"] == 'NSA', 'HOOD_158'] = np.nan
# neighbourhood
gdf.loc[gdf["NEIGHBOURHOOD_158"] == 'NSA', "NEIGHBOURHOOD_158"] = np.nan
# division
gdf.loc[gdf["DIVISION"] == 'NSA', "DIVISION"] = np.nan
# gps
gdf["LONG_WGS84"] = gdf["LONG_WGS84"].astype(float)
gdf["LAT_WGS84"] = gdf["LAT_WGS84"].astype(float)
gdf.loc[gdf["has_valid_location"] == 0, ["LONG_WGS84", "LAT_WGS84"]] = np.nan


# check
hood_158_check = gdf.value_counts("HOOD_158")
neighbourhood_check = gdf.value_counts("NEIGHBOURHOOD_158")
division_check = gdf.value_counts("DIVISION")
long_check = gdf.value_counts("LONG_WGS84")
lat_check = gdf.value_counts("LAT_WGS84")

print(hood_158_check)
print(neighbourhood_check)
print(division_check)
print(long_check)
print(lat_check)

gdf.head(10)


HOOD_158
119    17562
001    14609
027    11948
166    10205
126     9971
159     9834
042     9667
130     9377
031     9330
170     9108
095     8975
142     8802
070     8638
128     8429
085     8279
165     7687
164     7472
156     7471
120     6880
021     6783
040     6771
078     6593
090     6518
073     6343
144     6343
098     6282
168     6062
047     5822
141     5614
118     5457
129     5403
034     5145
039     5127
016     5068
052     5049
058     4891
136     4883
025     4802
154     4782
056     4694
050     4669
146     4552
062     4518
155     4488
036     4436
002     4172
048     4127
087     4087
063     4070
111     3876
092     3869
138     3856
024     3839
044     3835
081     3792
055     3766
158     3736
145     3735
102     3731
096     3698
171     3651
172     3644
122     3633
163     3589
147     3515
160     3451
030     3373
086     3358
099     3325
088     3324
105     3301
116     3269
006     3197
033     3197
148     3054
079     3017
007

Unnamed: 0,_id,OCC_DATE,OCC_MONTH,OCC_DOW,OCC_YEAR,OCC_HOUR,DIVISION,FATALITIES,INJURY_COLLISIONS,FTR_COLLISIONS,PD_COLLISIONS,HOOD_158,NEIGHBOURHOOD_158,LONG_WGS84,LAT_WGS84,AUTOMOBILE,MOTORCYCLE,PASSENGER,BICYCLE,PEDESTRIAN,geometry,has_valid_location
0,1,1388552400000,January,Wednesday,2014,2,D23,0,0,0,1,7.0,Willowridge-Martingrove-Richview (7),-79.563139,43.674411,YES,NO,NO,NO,NO,MULTIPOINT (-79.56314 43.67441),1
1,2,1388552400000,January,Wednesday,2014,14,D32,0,0,0,1,105.0,Lawrence Park North (105),-79.397589,43.726091,YES,NO,NO,NO,NO,MULTIPOINT (-79.39759 43.72609),1
2,3,1388552400000,January,Wednesday,2014,2,,0,1,0,0,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
3,4,1388552400000,January,Wednesday,2014,3,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
4,5,1388552400000,January,Wednesday,2014,5,,0,1,0,0,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
5,6,1388552400000,January,Wednesday,2014,5,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
6,7,1388552400000,January,Wednesday,2014,8,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
7,8,1388552400000,January,Wednesday,2014,8,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
8,9,1388552400000,January,Wednesday,2014,9,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0
9,10,1388552400000,January,Wednesday,2014,9,,0,1,0,0,,,,,YES,NO,YES,NO,NO,MULTIPOINT (0 0),0


5. Check month, date, year and hour
- make sure that date is valid (from 2014 to 2025)
- add column 'OCC_DOM', the day of the month of that the vehicle collision incident occurs

In [10]:
# find max and min of OCC_DATE
occ_date_max = gdf["OCC_DATE"].max()
occ_date_min = gdf["OCC_DATE"].min()
print(occ_date_max, occ_date_min)
# the earliest date is 2014, the lastest date in 2025
# add OCC_DAY 
gdf["OCC_DOM"] = pd.to_datetime(gdf['OCC_DATE'], unit='ms').dt.day
occ_date_check = gdf.value_counts("OCC_DOM")
print(occ_date_check)
gdf.head(n = 10)

1735621200000 1388552400000
OCC_DOM
13    24359
21    24311
24    24158
20    24155
27    24134
19    24086
28    24082
11    24067
18    23969
10    23934
12    23928
4     23922
23    23894
16    23854
22    23787
14    23732
15    23724
17    23603
3     23522
6     23509
5     23428
26    23347
8     23337
9     23267
7     23261
2     23248
1     23244
25    22620
29    22354
30    21817
31    14264
dtype: int64


Unnamed: 0,_id,OCC_DATE,OCC_MONTH,OCC_DOW,OCC_YEAR,OCC_HOUR,DIVISION,FATALITIES,INJURY_COLLISIONS,FTR_COLLISIONS,PD_COLLISIONS,HOOD_158,NEIGHBOURHOOD_158,LONG_WGS84,LAT_WGS84,AUTOMOBILE,MOTORCYCLE,PASSENGER,BICYCLE,PEDESTRIAN,geometry,has_valid_location,OCC_DOM
0,1,1388552400000,January,Wednesday,2014,2,D23,0,0,0,1,7.0,Willowridge-Martingrove-Richview (7),-79.563139,43.674411,YES,NO,NO,NO,NO,MULTIPOINT (-79.56314 43.67441),1,1
1,2,1388552400000,January,Wednesday,2014,14,D32,0,0,0,1,105.0,Lawrence Park North (105),-79.397589,43.726091,YES,NO,NO,NO,NO,MULTIPOINT (-79.39759 43.72609),1,1
2,3,1388552400000,January,Wednesday,2014,2,,0,1,0,0,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
3,4,1388552400000,January,Wednesday,2014,3,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
4,5,1388552400000,January,Wednesday,2014,5,,0,1,0,0,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
5,6,1388552400000,January,Wednesday,2014,5,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
6,7,1388552400000,January,Wednesday,2014,8,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
7,8,1388552400000,January,Wednesday,2014,8,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
8,9,1388552400000,January,Wednesday,2014,9,,0,0,0,1,,,,,YES,NO,NO,NO,NO,MULTIPOINT (0 0),0,1
9,10,1388552400000,January,Wednesday,2014,9,,0,1,0,0,,,,,YES,NO,YES,NO,NO,MULTIPOINT (0 0),0,1


In [11]:
# check month, date of week, year, hour
occ_month_check = gdf.value_counts("OCC_MONTH")
occ_dow_check = gdf.value_counts("OCC_DOW")
occ_year_check = gdf.value_counts("OCC_YEAR")
occ_hour_check = gdf.value_counts("OCC_HOUR")
print(occ_month_check)
print(occ_dow_check)
print(occ_year_check)
print(occ_hour_check)


OCC_MONTH
November     65204
October      64878
December     63893
September    62607
June         61460
January      61395
July         59679
February     59344
August       59213
May          58910
March        55693
April        50641
dtype: int64
OCC_DOW
Friday       120310
Thursday     114844
Wednesday    111432
Tuesday      110918
Monday        97519
Saturday      94522
Sunday        73372
dtype: int64
OCC_YEAR
2019    82831
2018    79271
2017    74193
2024    69914
2016    69667
2023    67529
2015    67265
2014    64596
2022    59172
2020    44738
2021    43741
dtype: int64
OCC_HOUR
17    60166
15    59694
16    57709
14    51015
18    50217
12    47530
13    47024
11    42805
8     41936
9     38457
10    36528
19    34981
20    24760
7     23799
21    20188
22    16164
6     15614
23    12525
0      8470
1      7389
2      6946
3      6851
5      6834
4      5315
dtype: int64


6. Check AUTOMOBILE, MOTORCYCLE, PASSENGER, BICYCLE, and PEDESTRIAN
- around 1% (6293 out of 713136) of collision record does not have record on the vehicle/person involved in the collision
- due lack of significant to future analysis of collision records, these incomplete records will be removed from the dataset

- there are 133 records where the collision does not involve a vehnicle nor a motorcycle but contains a passenger, these records are insignificant (133 out of around 700k records) and are removed from the dataset

In [12]:
# check AUTOMOBILE, MOTORCYCLE, PASSENGER, BICYCLE, PEDESTRIAN
automobile_check = gdf.value_counts("AUTOMOBILE")
motorcycle_check = gdf.value_counts("MOTORCYCLE")
passenger_check = gdf.value_counts("PASSENGER")
bicycle_check = gdf.value_counts("BICYCLE")
pedestrian_check = gdf.value_counts("PEDESTRIAN")
print(automobile_check)
print(motorcycle_check)
print(passenger_check)
print(bicycle_check)
print(pedestrian_check)

AUTOMOBILE
YES    713136
N/R      6293
NO       3488
dtype: int64
MOTORCYCLE
NO     712294
N/R      6293
YES      4330
dtype: int64
PASSENGER
NO     661886
YES     54738
N/R      6293
dtype: int64
BICYCLE
NO     705184
YES     11440
N/R      6293
dtype: int64
PEDESTRIAN
NO     698519
YES     18105
N/R      6293
dtype: int64


In [13]:
gdf_nr_check = gdf.loc[gdf["AUTOMOBILE"] == 'N/R']
gdf_nr_check.head(n = 20)

Unnamed: 0,_id,OCC_DATE,OCC_MONTH,OCC_DOW,OCC_YEAR,OCC_HOUR,DIVISION,FATALITIES,INJURY_COLLISIONS,FTR_COLLISIONS,PD_COLLISIONS,HOOD_158,NEIGHBOURHOOD_158,LONG_WGS84,LAT_WGS84,AUTOMOBILE,MOTORCYCLE,PASSENGER,BICYCLE,PEDESTRIAN,geometry,has_valid_location,OCC_DOM
43,44,1388552400000,January,Wednesday,2014,13,D42,0,0,0,1,148.0,East L'Amoreaux (148),-79.295476,43.799898,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (-79.29548 43.7999),1,1
47,48,1388552400000,January,Wednesday,2014,13,D41,0,0,0,1,126.0,Dorset Park (126),-79.283825,43.765164,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (-79.28383 43.76516),1,1
48,49,1388552400000,January,Wednesday,2014,14,D42,0,0,0,1,118.0,Tam O'Shanter-Sullivan (118),-79.293463,43.78212,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (-79.29346 43.78212),1,1
50,51,1388552400000,January,Wednesday,2014,9,D14,0,0,1,0,85.0,South Parkdale (85),-79.428841,43.638652,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (-79.42884 43.63865),1,1
128,129,1388638800000,January,Thursday,2014,22,,0,0,0,1,,,,,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (0 0),0,2
131,132,1388638800000,January,Thursday,2014,3,,0,0,1,0,,,,,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (0 0),0,2
303,304,1388725200000,January,Friday,2014,0,,0,0,0,1,,,,,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (0 0),0,3
329,330,1388725200000,January,Friday,2014,10,,0,0,0,1,,,,,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (0 0),0,3
434,435,1388725200000,January,Friday,2014,14,D53,0,0,0,1,173.0,North Toronto (173),-79.394125,43.707598,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (-79.39413 43.7076),1,3
665,666,1388811600000,January,Saturday,2014,6,D55,0,0,0,1,59.0,Danforth East York (59),-79.341685,43.69116,N/R,N/R,N/R,N/R,N/R,MULTIPOINT (-79.34169 43.69116),1,4


In [16]:
# drop the records with N/R
gdf = gdf[gdf["AUTOMOBILE"] != 'N/R'].copy()
# switch the records to YES - 1, NO - 0
gdf['AUTOMOBILE'] = gdf['AUTOMOBILE'].replace({'YES': 1, 'NO': 0})
gdf['MOTORCYCLE'] = gdf['MOTORCYCLE'].replace({'YES': 1, 'NO': 0})
gdf['PASSENGER'] = gdf['PASSENGER'].replace({'YES': 1, 'NO': 0})
gdf['BICYCLE'] = gdf['BICYCLE'].replace({'YES': 1, 'NO': 0})
gdf['PEDESTRIAN'] = gdf['PEDESTRIAN'].replace({'YES': 1, 'NO': 0})
# check again
automobile_check = gdf.value_counts("AUTOMOBILE")
motorcycle_check = gdf.value_counts("MOTORCYCLE")
passenger_check = gdf.value_counts("PASSENGER")
bicycle_check = gdf.value_counts("BICYCLE")
pedestrian_check = gdf.value_counts("PEDESTRIAN")
print(automobile_check)
print(motorcycle_check)
print(passenger_check)
print(bicycle_check)
print(pedestrian_check)


AUTOMOBILE
1    713136
0      3488
dtype: int64
MOTORCYCLE
0    712294
1      4330
dtype: int64
PASSENGER
0    661886
1     54738
dtype: int64
BICYCLE
0    705184
1     11440
dtype: int64
PEDESTRIAN
0    698519
1     18105
dtype: int64


In [26]:
# check if there is a record that neither have AUTOMOBILE nor MOTORCYCLE but have passenger
gdf_passanger_check = gdf.loc[(gdf['AUTOMOBILE'] == 0) & (gdf['MOTORCYCLE'] == 0) & (gdf['BICYCLE'] == 0) & (gdf['PASSENGER'] == 1)]
len(gdf_passanger_check.index)

114

In [27]:
#remove the record that only involves a passenger but not a vehicle
gdf = gdf[~((gdf['AUTOMOBILE'] == 0) & (gdf['MOTORCYCLE'] == 0) & (gdf['BICYCLE'] == 0) & (gdf['PASSENGER'] == 1))]
# check again
automobile_check = gdf.value_counts("AUTOMOBILE")
motorcycle_check = gdf.value_counts("MOTORCYCLE")
passenger_check = gdf.value_counts("PASSENGER")
bicycle_check = gdf.value_counts("BICYCLE")
pedestrian_check = gdf.value_counts("PEDESTRIAN")
print(automobile_check)
print(motorcycle_check)
print(passenger_check)
print(bicycle_check)
print(pedestrian_check)


AUTOMOBILE
1    713136
0      3374
dtype: int64
MOTORCYCLE
0    712180
1      4330
dtype: int64
PASSENGER
0    661886
1     54624
dtype: int64
BICYCLE
0    705070
1     11440
dtype: int64
PEDESTRIAN
0    698409
1     18101
dtype: int64
