# Collisions 

# TODO
'case_id' #OK
'collision_date': str, #OK,
'collision_severity': #OK
'collision_time': str, #OK, 
'county_city_location': str, #OK 
'hit_and_run': #OK
'jurisdiction': #OK
'lighting': #OK
'location_type': #OK
'officer_id': #OK
'pcf_violation': #OK
'pcf_violation_category': #OK,
'pcf_violation_subsection': #OK,
'population': #OK
'primary_collision_factor': #OK,
'process_date': str, #OK,
'ramp_intersection': #OK
'road_condition_1': #OK,
'road_condition_2': #OK,
'road_surface': #OK,
'tow_away': #OK,
'type_of_collision': #OK,
'weather_1': #OK,
'weather_2': #OK,

In [None]:
import pandas as pd
import numpy as np
import datetime
import time

In [None]:
dtype_collisions={
    'case_id': np.uint64, # cannot use in tsince biggest case id is 9.870011e+18, np.uint64: 64-bit unsigned integer (from 0 to 2**64-1)
    'collision_date': str, #datetime.date,
    'collision_severity': str,# => convert to ID
    'collision_time': str, #time, 
    'county_city_location': str,
    'hit_and_run': str,# => convert to ID
    'jurisdiction': pd.Int16Dtype(),
    'lighting': str,
    'location_type': str,
    'officer_id': str,
    'pcf_violation': pd.Int64Dtype(),
    'pcf_violation_category': str,
    'pcf_violation_subsection': str,
    'population': pd.Int64Dtype(),
    'primary_collision_factor': str,
    'process_date': str, #datetime.date,
    'ramp_intersection': pd.Int64Dtype(),
    'road_condition_1': str,
    'road_condition_2': str,
    'road_surface': str,
    'tow_away': 'boolean',
    'type_of_collision': str,
    'weather_1': str,
    'weather_2': str, 
}

In [None]:
collisions_data = pd.read_csv("CSV-2018/collisions2018.csv", dtype=dtype_collisions ,usecols=dtype_collisions.keys()) #

In [None]:
collisions_data

# Data Cleaning

In [None]:
clean_data = collisions_data.copy()

Translation tables

In [None]:
translation_collision_severity ={
    "fatal": 1,
    "severe injury": 2,
    "other injury": 3,
    "pain": 4,
    "property damage only": 0,
}

translation_hit_and_run ={
    "felony": "F",
    "misdemeanor": "M",
    "not hit and run": "N",
}

translation_lighting ={
    "daylight": "A",
    "dusk or dawn": "B",
    "dark with street lights": "C",
    "dark with no street lights": "D",
    "dark with street lights not functioning": "E",
}

translation_location_type ={
    "highway": "H",
    "intersection": "I",
    "ramp": "R",
}

translation_pcf_violation_category ={
    'unknown': 0,
    'dui': 1,
    'impeding traffic': 2,
    'speeding': 3,
    'following too closely': 4,
    'wrong side of road': 5,
    'improper passing': 6,
    'unsafe lane change': 7,
    'improper turning': 8,
    'automobile right of way':9,
    'pedestrian right of way': 10,
    'pedestrian violation': 11,
    'traffic signals and signs': 12,
    'hazardous parking': 13,
    'lights': 14,
    'brakes': 15,
    'other equipment': 16,
    'other hazardous violation': 17,
    'other than driver (or pedestrian)': 18,
    'unsafe starting or backing': 19,
    'other improper driving': 22,
    'pedestrian dui': 23,
    'fell asleep': 24,    
}

translation_primary_collision_factor ={
    "vehicle code violation": "A",
    "other improper driving": "B",
    "other than driver": "C",
    "unknown": "D",
    "fell asleep": "E",
}

translation_road_condition ={
    'holes': "A",
    'loose material': "B",
    'obstruction': "C",
    'construction': "D",
    'reduced width':"E",
    'flooded': "F",
    'other':"G",
    'normal': "H",
}

translation_road_surface ={
    'dry': "A",
    'wet': "B",
    'snowy': "C", 
    'slippery': "D", 
}

translation_type_of_collision ={
    'head-on': "A",
    'sideswipe':"B",
    'rear end':"C",
    'broadside':"D",
    'hit object':"E",
    'overturned':"F",
    'pedestrian':"G",
    'other':"H",
}

translation_weather ={
    'clear': "A", 
    'cloudy': "B",
    'raining': "C",
    'snowing':"D",
    'fog': "E",
    'other': "F", 
    'wind':"G",
}

Clean columns that need to be cleaned

In [None]:
clean_data["collision_severity"] = collisions_data["collision_severity"].map(lambda s: translation_collision_severity[s]).astype(pd.Int16Dtype())
clean_data["hit_and_run"] = collisions_data["hit_and_run"].map(lambda s: translation_hit_and_run.get(s, None))
clean_data["lighting"] = collisions_data["lighting"].map(lambda s: translation_lighting.get(s, None))
clean_data["location_type"] = collisions_data["location_type"].map(lambda s: translation_location_type.get(s, None))
clean_data["primary_collision_factor"] = collisions_data["primary_collision_factor"].map(lambda s: translation_primary_collision_factor.get(s, None))
clean_data["pcf_violation_category"] = collisions_data["pcf_violation_category"].map(lambda s: translation_pcf_violation_category.get(s, None)).astype(pd.Int16Dtype())
clean_data["road_condition_1"] = collisions_data["road_condition_1"].map(lambda s: translation_road_condition.get(s, None))
clean_data["road_condition_2"] = collisions_data["road_condition_2"].map(lambda s: translation_road_condition.get(s, None))
clean_data["road_surface"] = collisions_data["road_surface"].map(lambda s: translation_road_surface.get(s, None))
clean_data["type_of_collision"] = collisions_data["type_of_collision"].map(lambda s: translation_type_of_collision.get(s, None))
clean_data["weather_1"] = collisions_data["weather_1"].map(lambda s: translation_weather.get(s, None))
clean_data["weather_2"] = collisions_data["weather_2"].map(lambda s: translation_weather.get(s, None))
clean_data["collision_date"] = collisions_data["collision_date"].astype("datetime64[ns]")
clean_data["process_date"] = collisions_data["process_date"].astype("datetime64[ns]")
clean_data["collision_time"] = collisions_data["collision_time"].astype("datetime64[ns]").dt.time

In [None]:
clean_data

                                ------------------------------------------