In [42]:
import pandas as pd
import util
import numpy as np

In [43]:
hazardous_df = pd.read_csv('Data/Hazardous_Driving_Areas_Toronto.csv')
road = pd.read_csv('Data/Road_Impediments_Rounded3.csv')
ksi_count = pd.read_csv('Data/Created Datasets/KSI_Collapse_Count_truncate3.csv', index_col = 0)
ksi_feature = pd.read_csv('Data/Created Datasets/KSI_Feature_Collapse.csv', index_col = 0)

In [44]:
# only need to aggregate hazardous
# road is already in aggregated form

hazardous_df = hazardous_df[['Latitude' , 'Longitude' , 'IncidentsTotal', 'SeverityScore']]
hazardous_df['Longitude'] = hazardous_df['Longitude'].apply(util.truncate, args = (3, ))
hazardous_df['Latitude'] = hazardous_df['Latitude'].apply(util.truncate, args = (3,))
hazardous_df = hazardous_df.pivot_table(values = ['IncidentsTotal', 'SeverityScore'], index = ['Latitude', 'Longitude'], aggfunc = {'IncidentsTotal': sum, 'SeverityScore': np.mean}).reset_index()

In [45]:
# Merging
# need a right merge here
# because places that might have road impediments might not have harsh breaking incidents

# KEEP IN MIND THAT I HAD INNER MERGE BEFORE - FOR POSTERITY'S SAKE

merge = hazardous_df.merge(road, on = ['Latitude', 'Longitude'], how = 'right')

In [46]:
merge

Unnamed: 0,Latitude,Longitude,IncidentsTotal,SeverityScore,PercentOfVehicles,AvgAcceleration,PercentCar,PercentMPV,PercentLDT,PercentMDT,PercentHDT,PercentOther
0,37.801,-95.952,,,0.008,0.268,0.000,0.030,0.653,0.065,0.201,0.050
1,43.645,-79.393,,,0.094,0.732,0.023,0.261,0.298,0.094,0.179,0.145
2,43.609,-79.555,687.0,0.0585,0.002,0.562,0.022,0.125,0.279,0.067,0.363,0.143
3,43.685,-79.283,,,0.092,0.467,0.004,0.236,0.407,0.107,0.052,0.195
4,43.669,-79.426,,,0.045,0.520,0.011,0.306,0.229,0.107,0.127,0.221
...,...,...,...,...,...,...,...,...,...,...,...,...
49995,43.690,-79.359,,,0.000,0.000,0.030,0.121,0.291,0.016,0.161,0.382
49996,43.690,-79.400,,,0.087,0.436,0.017,0.475,0.203,0.067,0.043,0.195
49997,43.690,-79.358,,,0.023,0.343,0.000,0.527,0.344,0.076,0.008,0.046
49998,43.690,-79.369,,,0.003,0.462,0.127,0.215,0.424,0.112,0.048,0.074


In [47]:
# Semi-Final merge with ksi_count

merge =  merge.merge(ksi_count, left_on = ['Latitude', 'Longitude'], right_on = ['LATITUDE', 'LONGITUDE'], how = 'inner')

In [48]:
merge

Unnamed: 0,Latitude,Longitude,IncidentsTotal,SeverityScore,PercentOfVehicles,AvgAcceleration,PercentCar,PercentMPV,PercentLDT,PercentMDT,PercentHDT,PercentOther,LATITUDE,LONGITUDE,COUNT
0,43.676,-79.397,,,0.012,0.320,0.059,0.220,0.333,0.081,0.116,0.191,43.676,-79.397,1
1,43.651,-79.445,,,0.035,0.680,0.025,0.172,0.345,0.094,0.197,0.167,43.651,-79.445,2
2,43.808,-79.321,,,0.059,0.124,0.000,0.647,0.353,0.000,0.000,0.000,43.808,-79.321,1
3,43.685,-79.481,,,0.005,0.274,0.022,0.118,0.353,0.073,0.237,0.197,43.685,-79.481,1
4,43.682,-79.445,,,0.023,0.403,0.028,0.193,0.363,0.069,0.150,0.198,43.682,-79.445,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3344,43.699,-79.519,,,0.061,0.603,0.031,0.212,0.512,0.060,0.081,0.103,43.699,-79.519,1
3345,43.699,-79.587,7.0,0.009,0.047,0.425,0.004,0.069,0.199,0.085,0.488,0.155,43.699,-79.587,1
3346,43.699,-79.318,,,0.074,0.590,0.047,0.245,0.401,0.074,0.123,0.109,43.699,-79.318,1
3347,43.690,-79.292,,,0.006,0.462,0.056,0.249,0.346,0.080,0.152,0.118,43.690,-79.292,2


In [49]:
# Final merge with ksi_feature

final_merge = merge.merge(ksi_feature, on = ['LATITUDE', 'LONGITUDE'], how = 'inner')

In [50]:
ksi_feature[(ksi_feature['LONGITUDE'] ==-79.321) & (ksi_feature['LATITUDE'] ==43.808)]

Unnamed: 0,LATITUDE,LONGITUDE,INVAGE,WARDNUM,RDSFCOND,VISIBILITY,LIGHT,TRAFFCTL,VEHTYPE,DRIVACT,DRIVCOND,SPEEDING,AG_DRIV,ALCOHOL,DISABILITY,REDLIGHT,MANOEUVER
3759,43.808,-79.321,25 to 29,22,Dry,Clear,Daylight,No Control,"Automobile, Station Wagon",Driving Properly,Normal,<Null>,<Null>,<Null>,<Null>,<Null>,Going Ahead


In [51]:
final_merge

Unnamed: 0,Latitude,Longitude,IncidentsTotal,SeverityScore,PercentOfVehicles,AvgAcceleration,PercentCar,PercentMPV,PercentLDT,PercentMDT,...,TRAFFCTL,VEHTYPE,DRIVACT,DRIVCOND,SPEEDING,AG_DRIV,ALCOHOL,DISABILITY,REDLIGHT,MANOEUVER
0,43.676,-79.397,,,0.012,0.320,0.059,0.220,0.333,0.081,...,No Control,"Automobile, Station Wagon",Driving Properly,Inattentive,<Null>,Yes,<Null>,<Null>,<Null>,Going Ahead
1,43.651,-79.445,,,0.035,0.680,0.025,0.172,0.345,0.094,...,No Control,"Automobile, Station Wagon",Failed to Yield Right of Way,Normal,<Null>,<Null>,<Null>,<Null>,<Null>,Going Ahead
2,43.808,-79.321,,,0.059,0.124,0.000,0.647,0.353,0.000,...,No Control,"Automobile, Station Wagon",Driving Properly,Normal,<Null>,<Null>,<Null>,<Null>,<Null>,Going Ahead
3,43.685,-79.481,,,0.005,0.274,0.022,0.118,0.353,0.073,...,Traffic Signal,"Automobile, Station Wagon",Driving Properly,Normal,Yes,Yes,<Null>,<Null>,<Null>,Going Ahead
4,43.682,-79.445,,,0.023,0.403,0.028,0.193,0.363,0.069,...,No Control,"Automobile, Station Wagon",Lost control,Normal,<Null>,Yes,<Null>,<Null>,<Null>,Going Ahead
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3318,43.699,-79.519,,,0.061,0.603,0.031,0.212,0.512,0.060,...,Traffic Signal,"Automobile, Station Wagon",Failed to Yield Right of Way,Inattentive,<Null>,Yes,<Null>,<Null>,<Null>,Turning Right
3319,43.699,-79.587,7.0,0.009,0.047,0.425,0.004,0.069,0.199,0.085,...,Traffic Signal,"Automobile, Station Wagon",Driving Properly,Normal,<Null>,<Null>,<Null>,<Null>,<Null>,Stopped
3320,43.699,-79.318,,,0.074,0.590,0.047,0.245,0.401,0.074,...,No Control,"Automobile, Station Wagon",Driving Properly,"Ability Impaired, Alcohol Over .08",Yes,Yes,Yes,<Null>,<Null>,Going Ahead
3321,43.690,-79.292,,,0.006,0.462,0.056,0.249,0.346,0.080,...,Stop Sign,"Automobile, Station Wagon",<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Turning Right


In [52]:
final_merge.drop(['Latitude', 'Longitude'], axis = 1, inplace = True)

In [53]:
final_merge.to_csv('ALL_merge_truncate3.csv')