In [1]:
import pandas as pd
import numpy as np
import os
import util

In [2]:
pd.set_option('max_columns', None)

In [3]:
parent = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

In [4]:
ped_collapse = pd.read_csv(parent + '\Data\Created datasets\KSI_Pedestrian_Count_truncate3.csv', index_col = 0)
ksi_df = pd.read_csv(parent + '\Data\KSI.csv')

In [5]:
# including PEDTYPE too - but it is probably VERY highly correlated with PEDACT
# investigate this if you want to reduce dimensionality

cols = ['PEDTYPE', 'PEDACT', 'PEDCOND', 'TRAFFCTL', 'LIGHT', 'VISIBILITY', 'RDSFCOND']

In [6]:
ksi_df['LATITUDE'] = ksi_df['LATITUDE'].apply(util.truncate, args = (3, ))
ksi_df['LONGITUDE'] = ksi_df['LONGITUDE'].apply(util.truncate, args = (3, ))

In [7]:
# conditions important

ksi_df = ksi_df[(ksi_df['IMPACTYPE'] == 'Pedestrian Collisions') & (ksi_df['INVTYPE'] == 'Pedestrian')]

In [8]:
ksi_df['COMBINED'] = ksi_df[cols].apply(tuple, axis = 1)

In [9]:
time_df = ksi_df.groupby(['LATITUDE', 'LONGITUDE'])['TIME'].apply(lambda x: x.mean()).reset_index()

In [10]:
# level_2 indicates # of modes returned
# important because multiple drivers may have been at fault

# mode taken ACROSS columns

feature_collapse = ksi_df.groupby(['LATITUDE', 'LONGITUDE'])['COMBINED'].apply(lambda x: x.mode()).reset_index()

feature_collapse

Unnamed: 0,LATITUDE,LONGITUDE,level_2,COMBINED
0,43.594,-79.533,0,(Vehicle is going straight thru inter.while pe...
1,43.594,-79.533,1,(Vehicle turns left while ped crosses with ROW...
2,43.595,-79.528,0,"(<Null>, Other, Other, No Control, Dark, artif..."
3,43.597,-79.522,0,"(Vehicle is reversing and hits pedestrian, Oth..."
4,43.598,-79.516,0,(Vehicle is going straight thru inter.while pe...
...,...,...,...,...
2543,43.836,-79.251,0,(Vehicle is going straight thru inter.while pe...
2544,43.836,-79.251,1,(Vehicle turns left while ped crosses with ROW...
2545,43.836,-79.231,0,(Pedestrian involved in a collision with trans...
2546,43.837,-79.249,0,"(Pedestrian hit at mid-block, Running onto Roa..."


In [11]:
feature_collapse = feature_collapse.join(feature_collapse['COMBINED'].apply(pd.Series, index = cols))
feature_collapse = feature_collapse.merge(time_df, on = ['LATITUDE', 'LONGITUDE'], how = 'inner')
feature_collapse.drop(['level_2', 'COMBINED'], axis = 1, inplace = True)

In [12]:
feature_collapse

Unnamed: 0,LATITUDE,LONGITUDE,PEDTYPE,PEDACT,PEDCOND,TRAFFCTL,LIGHT,VISIBILITY,RDSFCOND,TIME
0,43.594,-79.533,Vehicle is going straight thru inter.while ped...,Crossing marked crosswalk without ROW,Inattentive,Traffic Signal,Daylight,Clear,Dry,1367.000000
1,43.594,-79.533,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Normal,Traffic Signal,Dusk,Clear,Dry,1367.000000
2,43.595,-79.528,<Null>,Other,Other,No Control,"Dark, artificial",Clear,Dry,1949.000000
3,43.597,-79.522,Vehicle is reversing and hits pedestrian,Other,Normal,No Control,Daylight,Clear,Dry,600.000000
4,43.598,-79.516,Vehicle is going straight thru inter.while ped...,Running onto Roadway,Normal,Traffic Signal,Dark,Rain,Wet,1956.333333
...,...,...,...,...,...,...,...,...,...,...
2543,43.836,-79.251,Vehicle is going straight thru inter.while ped...,Crossing with right of way,Normal,Traffic Signal,"Dusk, artificial",Rain,Wet,1305.500000
2544,43.836,-79.251,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Normal,Traffic Signal,"Dark, artificial",Rain,Wet,1305.500000
2545,43.836,-79.231,Pedestrian involved in a collision with transi...,Running onto Roadway,Inattentive,No Control,Daylight,Clear,Dry,725.000000
2546,43.837,-79.249,Pedestrian hit at mid-block,Running onto Roadway,Inattentive,No Control,Dark,Clear,Dry,2054.000000


In [36]:
ksi_df[(ksi_df['LONGITUDE'] == -79.533) & (ksi_df['LATITUDE'] == 43.594)]	

Unnamed: 0,X,Y,INDEX_,ACCNUM,YEAR,DATE,TIME,HOUR,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,DIVISION,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,POLICE_DIVISION,HOOD_ID,NEIGHBOURHOOD,ObjectId,COMBINED
2288,-8853683.0,5402879.0,4275178,1008571,2007,2007/11/15 05:00:00+00,724,7,LAKE SHORE BLVD W,LONG BRANCH AVE,<Null>,Major Arterial,Etobicoke York,3,22,43.594,-79.533,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Pedestrian Collisions,Pedestrian,20 to 24,Major,<Null>,North,Other,<Null>,<Null>,<Null>,Vehicle is going straight thru inter.while ped...,Crossing marked crosswalk without ROW,Inattentive,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,D22,19,Long Branch (19),2289,"(Crossing marked crosswalk without ROW, Inatte..."
3095,-8853683.0,5402879.0,5370894,1043273,2008,2008/06/11 04:00:00+00,2010,20,LAKE SHORE BLVD W,LONG BRANCH AVE,<Null>,Major Arterial,Etobicoke York,3,22,43.594,-79.533,Intersection,At Intersection,Traffic Signal,Clear,Dusk,Dry,Fatal,Pedestrian Collisions,Pedestrian,60 to 64,Fatal,26,South,Other,<Null>,<Null>,<Null>,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Normal,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D22,19,Long Branch (19),3096,"(Crossing with right of way, Normal, 2010, Tra..."


In [10]:
# PEDTYPE <Null> and PEDACT, PEDCOND is Other
# weird

ksi_df[(ksi_df['LONGITUDE'] == -79.528) & (ksi_df['LATITUDE'] == 43.595)]	

Unnamed: 0,X,Y,INDEX_,ACCNUM,YEAR,DATE,TIME,HOUR,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,DIVISION,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,POLICE_DIVISION,HOOD_ID,NEIGHBOURHOOD,ObjectId,COMBINED
16000,-8853047.0,5403083.0,81505308,342262,2020,2020/02/17 05:00:00+00,1949,19,LAKE SHORE BV W,THIRTY NINTH ST,15 m South of,Major Arterial,Etobicoke York,3,22,43.595,-79.528,Mid-Block,Non Intersection,No Control,Clear,"Dark, artificial",Dry,Non-Fatal Injury,Pedestrian Collisions,Pedestrian,30 to 34,Major,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Other,Other,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D22,19,Long Branch (19),16001,"(<Null>, Other, Other, 1949, No Control, Dark,..."


In [17]:
feature_collapse.merge(ped_collapse, on = ['LATITUDE', 'LONGITUDE'], how = 'inner')

Unnamed: 0,LATITUDE,LONGITUDE,PEDTYPE,PEDACT,PEDCOND,TRAFFCTL,LIGHT,VISIBILITY,RDSFCOND,TIME,COUNT
0,43.594,-79.533,Vehicle is going straight thru inter.while ped...,Crossing marked crosswalk without ROW,Inattentive,Traffic Signal,Daylight,Clear,Dry,1367.000000,2
1,43.594,-79.533,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Normal,Traffic Signal,Dusk,Clear,Dry,1367.000000,2
2,43.595,-79.528,<Null>,Other,Other,No Control,"Dark, artificial",Clear,Dry,1949.000000,1
3,43.597,-79.522,Vehicle is reversing and hits pedestrian,Other,Normal,No Control,Daylight,Clear,Dry,600.000000,1
4,43.598,-79.516,Vehicle is going straight thru inter.while ped...,Running onto Roadway,Normal,Traffic Signal,Dark,Rain,Wet,1956.333333,2
...,...,...,...,...,...,...,...,...,...,...,...
2543,43.836,-79.251,Vehicle is going straight thru inter.while ped...,Crossing with right of way,Normal,Traffic Signal,"Dusk, artificial",Rain,Wet,1305.500000,2
2544,43.836,-79.251,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Normal,Traffic Signal,"Dark, artificial",Rain,Wet,1305.500000,2
2545,43.836,-79.231,Pedestrian involved in a collision with transi...,Running onto Roadway,Inattentive,No Control,Daylight,Clear,Dry,725.000000,1
2546,43.837,-79.249,Pedestrian hit at mid-block,Running onto Roadway,Inattentive,No Control,Dark,Clear,Dry,2054.000000,1


In [18]:
feature_collapse.to_csv('Pedestrian_Feature_Count.csv')