# Slice the large CSV into a data cube with figures aggregated

## Load data

In [1]:
# import libraries
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

# read data
df = pd.read_csv('../../../data/crashes.csv')

## Loading checking

In [52]:
print(df.head())
df.shape

                                     CRASH_RECORD_ID     RD_NO  \
0  79c7a2ce89f446262efd86df3d72d18b04ba487024b7c4...  JC199149   
1  792b539deaaad65ee5b4a9691d927a34d298eb33d42af0...  JB422857   
2  0115ade9a755e835255508463f7e9c4a9a0b47e9304238...  JF318029   
3  017040c61958d2fa977c956b2bd2d6759ef7754496dc96...  JF324552   
4  78eee027ec3dcc85d36c9e3fdae4729dcc56440105d65b...  JB291672   

  CRASH_DATE_EST_I  POSTED_SPEED_LIMIT TRAFFIC_CONTROL_DEVICE  \
0              NaN                  30         TRAFFIC SIGNAL   
1              NaN                  30            NO CONTROLS   
2              NaN                  30                UNKNOWN   
3              NaN                  30         TRAFFIC SIGNAL   
4              NaN                  30            NO CONTROLS   

       DEVICE_CONDITION WEATHER_CONDITION      LIGHTING_CONDITION  \
0  FUNCTIONING PROPERLY             CLEAR                DAYLIGHT   
1           NO CONTROLS             CLEAR                DAYLIGHT   
2    

(541604, 53)

## People involved and injured

In [6]:
# calculate the total of row count, INJURIES_TOTAL, INJURIES_FATAL, INJURIES_INCAPACITATING, INJURIES_NON_INCAPACITATING, INJURIES_REPORTED_NOT_EVIDENT, INJURIES_NO_INDICATION, INJURIES_UNKNOWN
injuries = df.groupby(['CRASH_YEAR','SIDE']).agg({'INJURIES_TOTAL':'sum','INJURIES_FATAL':'sum','INJURIES_INCAPACITATING':'sum','INJURIES_NON_INCAPACITATING':'sum','INJURIES_REPORTED_NOT_EVIDENT':'sum','INJURIES_NO_INDICATION':'sum'}).reset_index()

# convert columns other than 'CRASH_YEAR', 'SIDE' to int
injuries.iloc[:,2:] = injuries.iloc[:,2:].fillna(0).astype(int)

# select columns to be used
crash_ppl_involved = injuries[['CRASH_YEAR','SIDE','INJURIES_TOTAL','INJURIES_NO_INDICATION']]
crash_ppl_injured = injuries[['CRASH_YEAR','SIDE','INJURIES_FATAL','INJURIES_INCAPACITATING','INJURIES_NON_INCAPACITATING','INJURIES_REPORTED_NOT_EVIDENT']]

# rename columns
crash_ppl_involved.columns = ['CRASH_YEAR','SIDE','Injured','No indication of injury']
crash_ppl_injured.columns = ['CRASH_YEAR','SIDE','Fatal','Incapacitating','Non-incapacitating','Reported but not evident']

# output to JSON
crash_ppl_involved.to_json('crash_ppl_involved.json',orient='records',indent=2)
crash_ppl_injured.to_json('crash_ppl_injured.json',orient='records',indent=2)

## Time

In [8]:
#pivot CRASH_DAY_OF_WEEK and CRASH_HOUR to create a new column for each month and day
crash_day_time = df.pivot_table(index=['CRASH_YEAR','SIDE'], columns=['CRASH_DAY_OF_WEEK','CRASH_HOUR'], values='CRASH_RECORD_ID', aggfunc='count').reset_index()

#convert columns other than 'CRASH_YEAR', 'SIDE' to int
crash_day_time.iloc[:,2:] = crash_day_time.iloc[:,2:].fillna(0).astype(int)

#flatten the column to 1 layer and rename the columns of CRASH_DAY_OF_WEEK and CRASH_HOUR into a comma seperated string
crash_day_time.columns = ['CRASH_YEAR','SIDE', \
                          'Sun,0','Sun,1','Sun,2','Sun,3','Sun,4','Sun,5','Sun,6','Sun,7','Sun,8','Sun,9','Sun,10','Sun,11','Sun,12','Sun,13','Sun,14','Sun,15','Sun,16','Sun,17','Sun,18','Sun,19','Sun,20','Sun,21','Sun,22','Sun,23', \
                          'Mon,0','Mon,1','Mon,2','Mon,3','Mon,4','Mon,5','Mon,6','Mon,7','Mon,8','Mon,9','Mon,10','Mon,11','Mon,12','Mon,13','Mon,14','Mon,15','Mon,16','Mon,17','Mon,18','Mon,19','Mon,20','Mon,21','Mon,22','Mon,23', \
                          'Tue,0','Tue,1','Tue,2','Tue,3','Tue,4','Tue,5','Tue,6','Tue,7','Tue,8','Tue,9','Tue,10','Tue,11','Tue,12','Tue,13','Tue,14','Tue,15','Tue,16','Tue,17','Tue,18','Tue,19','Tue,20','Tue,21','Tue,22','Tue,23', \
                          'Wed,0','Wed,1','Wed,2','Wed,3','Wed,4','Wed,5','Wed,6','Wed,7','Wed,8','Wed,9','Wed,10','Wed,11','Wed,12','Wed,13','Wed,14','Wed,15','Wed,16','Wed,17','Wed,18','Wed,19','Wed,20','Wed,21','Wed,22','Wed,23', \
                          'Thu,0','Thu,1','Thu,2','Thu,3','Thu,4','Thu,5','Thu,6','Thu,7','Thu,8','Thu,9','Thu,10','Thu,11','Thu,12','Thu,13','Thu,14','Thu,15','Thu,16','Thu,17','Thu,18','Thu,19','Thu,20','Thu,21','Thu,22','Thu,23', \
                          'Fri,0','Fri,1','Fri,2','Fri,3','Fri,4','Fri,5','Fri,6','Fri,7','Fri,8','Fri,9','Fri,10','Fri,11','Fri,12','Fri,13','Fri,14','Fri,15','Fri,16','Fri,17','Fri,18','Fri,19','Fri,20','Fri,21','Fri,22','Fri,23', \
                          'Sat,0','Sat,1','Sat,2','Sat,3','Sat,4','Sat,5','Sat,6','Sat,7','Sat,8','Sat,9','Sat,10','Sat,11','Sat,12','Sat,13','Sat,14','Sat,15','Sat,16','Sat,17','Sat,18','Sat,19','Sat,20','Sat,21','Sat,22','Sat,23']
crash_day_time.head()

#output to JSON
crash_day_time.to_json('crash_day_time.json', orient='records',indent=2)

## Causes

In [2]:
causes = df.pivot_table(index=['CRASH_YEAR','SIDE'],columns='PRIM_CONTRIBUTORY_CAUSE',values='CRASH_RECORD_ID',aggfunc='count').reset_index()

# convert columns other than 'CRASH_YEAR', 'SIDE' to int
causes.iloc[:,2:] = causes.iloc[:,2:].fillna(0).astype(int)

# ensure the names are sorted in alphabetical order
cols = causes.columns.tolist()
# remove CRASH_YEAR and SIDE from the list
cols.remove('CRASH_YEAR')
cols.remove('SIDE')
# reorder the columns
cols.sort()
cols = ['CRASH_YEAR', 'SIDE'] + cols
causes = causes[cols]

# rename the columns
causes.columns = ['CRASH_YEAR','SIDE', \
                'Animal', \
                'Bicycle advancing legally on red light', \
                'Cell phone use other than texting', \
                'Disregarding other traffic signs', \
                'Disregarding road markings', \
                'Disregarding stop sign', \
                'Disregarding traffic signals', \
                'Disregarding yield sign', \
                'Distraction - from inside vehicle', \
                'Distraction - from outside vehicle', \
                'Distraction - other electronic device (navigation device, DVD player, etc.)', \
                'Driving on wrong side/wrong way', \
                'Driving skills/knowledge/experience', \
                'Vehicle condition', \
                'Evasive action due to animal, object, nonmotorist', \
                'Exceeding authorized speed limit', \
                'Exceeding safe speed for conditions', \
                'Failing to reduce speed to avoid crash', \
                'Failing to yield right-of-way', \
                'Following too closely', \
                'Had been drinking (not arrested)', \
                'Improper backing', \
                'Improper lane usage', \
                'Improper overtaking/passing', \
                'Improper turning/no signal', \
                'Motorcycle advancing legally on red light', \
                'Not applicable', \
                'Obstructed crosswalks', \
                'Operating vehicle in erratic, reckless, careless, negligent or aggressive manner', \
                'Passing stopped school bus', \
                'Physical condition of driver', \
                'Related to bus stop', \
                'Road construction/maintenance', \
                'Road engineering/surface/marking defects', \
                'Texting', \
                'Turning right on red', \
                'Unable to determine', \
                'Under the influence of alcohol/drugs (arrested)', \
                'Vision obscured (signs, tree limbs, buildings, etc.)', \
                'Weather']

# output to JSON
causes.to_json('crash_cause.json', orient='records',indent=2)

## Environment (Sankey)

In [28]:
# extract the environmental conditions
env = df[['CRASH_YEAR','SIDE','CRASH_RECORD_ID','LIGHTING_CONDITION','WEATHER_CONDITION','ROADWAY_SURFACE_COND']]

lighting_dict = {
  'DARKNESS': 'Darkness', 
  'DARKNESS, LIGHTED ROAD': 'Darkness', 
  'DAWN': 'Dawn', 
  'DAYLIGHT': 'Daylight', 
  'DUSK': 'Dusk', 
  'UNKNOWN': 'Unknown lighting'
}

weather_dict = {
  'BLOWING SAND, SOIL, DIRT': 'Other/Unknown weather', 
  'BLOWING SNOW': 'Rain/Snow', 
  'CLEAR': 'Clear', 
  'CLOUDY/OVERCAST': 'Cloudy', 
  'FOG/SMOKE/HAZE': 'Other/Unknown weather', 
  'FREEZING RAIN/DRIZZLE': 'Rain/Snow', 
  'OTHER': 'Other/Unknown weather', 
  'RAIN': 'Rain/Snow', 
  'SEVERE CROSS WIND GATE': 'Other/Unknown weather', 
  'SLEET/HAIL': 'Rain/Snow', 
  'SNOW': 'Rain/Snow', 
  'UNKNOWN': 'Other/Unknown weather'
}

roadway_dict = {
  'DRY': 'Dry', 
  'ICE': 'Snow/Ice', 
  'OTHER': 'Other/Unknown surface', 
  'SAND, MUD, DIRT': 'Other/Unknown surface', 
  'SNOW OR SLUSH': 'Snow/Ice', 
  'UNKNOWN': 'Other/Unknown surface', 
  'WET': 'Wet'
}

env['LIGHTING_GRP'] = env['LIGHTING_CONDITION'].map(lighting_dict)
env['WEATHER_GRP'] = env['WEATHER_CONDITION'].map(weather_dict)
env['ROADWAY_GRP'] = env['ROADWAY_SURFACE_COND'].map(roadway_dict)

# pivot table for LIGHTING_GRP and WEATHER_GRP
env_lighting_weather = env.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR', 'SIDE'], columns=['LIGHTING_GRP', 'WEATHER_GRP'], aggfunc='count', fill_value=0)

# pivot table for WEATHER_GRP and ROADWAY_GRP
env_weather_roadway = env.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR', 'SIDE'], columns=['WEATHER_GRP', 'ROADWAY_GRP'], aggfunc='count', fill_value=0)

# merge the two pivot tables
env_lighting_weather_roadway = pd.merge(env_lighting_weather, env_weather_roadway, on=['CRASH_YEAR', 'SIDE'])
env_lighting_weather_roadway.reset_index(inplace=True)
env_lighting_weather_roadway.columns = ['CRASH_YEAR','SIDE', \
                                        'Darkness,Clear', 'Darkness,Cloudy', 'Darkness,Other/Unknown weather', 'Darkness,Rain/Snow', \
                                        'Dawn,Clear', 'Dawn,Cloudy', 'Dawn,Other/Unknown weather', 'Dawn,Rain/Snow', \
                                        'Daylight,Clear', 'Daylight,Cloudy', 'Daylight,Other/Unknown weather', 'Daylight,Rain/Snow', \
                                        'Dusk,Clear', 'Dusk,Cloudy', 'Dusk,Other/Unknown weather', 'Dusk,Rain/Snow', \
                                        'Unknown lighting,Clear', 'Unknown lighting,Cloudy', 'Unknown lighting,Other/Unknown weather', 'Unknown lighting,Rain/Snow', \
                                        'Clear,Dry', 'Clear,Snow/Ice', 'Clear,Other/Unknown surface', 'Clear,Wet', \
                                        'Cloudy,Dry', 'Cloudy,Snow/Ice', 'Cloudy,Other/Unknown surface', 'Cloudy,Wet', \
                                        'Other/Unknown weather,Dry', 'Other/Unknown weather,Snow/Ice', 'Other/Unknown weather,Other/Unknown surface', 'Other/Unknown weather,Wet', \
                                        'Rain/Snow,Dry', 'Rain/Snow,Snow/Ice', 'Rain/Snow,Other/Unknown surface', 'Rain/Snow,Wet']
env_lighting_weather_roadway.head()

# output to JSON
env_lighting_weather_roadway.to_json('crash_env.json', orient='records',indent=2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  env['LIGHTING_GRP'] = env['LIGHTING_CONDITION'].map(lighting_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  env['WEATHER_GRP'] = env['WEATHER_CONDITION'].map(weather_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  env['ROADWAY_GRP'] = env['ROADWAY_SURFACE_COND'].map(roadway_dict)


## Enviornment (Severity)

In [29]:
import numpy as np

env = df[['CRASH_YEAR','SIDE','LIGHTING_CONDITION','WEATHER_CONDITION','ROADWAY_SURFACE_COND','MOST_SEVERE_INJURY']]

lighting_dict = {
  'DARKNESS': 'Darkness', 
  'DARKNESS, LIGHTED ROAD': 'Darkness', 
  'DAWN': 'Dawn', 
  'DAYLIGHT': 'Daylight', 
  'DUSK': 'Dusk', 
  'UNKNOWN': 'Unknown lighting'
}

weather_dict = {
  'BLOWING SAND, SOIL, DIRT': 'Other/Unknown weather', 
  'BLOWING SNOW': 'Rain/Snow', 
  'CLEAR': 'Clear', 
  'CLOUDY/OVERCAST': 'Cloudy', 
  'FOG/SMOKE/HAZE': 'Other/Unknown weather', 
  'FREEZING RAIN/DRIZZLE': 'Rain/Snow', 
  'OTHER': 'Other/Unknown weather', 
  'RAIN': 'Rain/Snow', 
  'SEVERE CROSS WIND GATE': 'Other/Unknown weather', 
  'SLEET/HAIL': 'Rain/Snow', 
  'SNOW': 'Rain/Snow', 
  'UNKNOWN': 'Other/Unknown weather'
}

roadway_dict = {
  'DRY': 'Dry', 
  'ICE': 'Snow/Ice', 
  'OTHER': 'Other/Unknown surface', 
  'SAND, MUD, DIRT': 'Other/Unknown surface', 
  'SNOW OR SLUSH': 'Snow/Ice', 
  'UNKNOWN': 'Other/Unknown surface', 
  'WET': 'Wet'
}

injury_dict = {
  'FATAL': 'Severe injury',
  'INCAPACITATING INJURY': 'Severe injury',
  'NO INDICATION OF INJURY': 'No injury',
  'NONINCAPACITATING INJURY': 'Moderate injury',
  'REPORTED, NOT EVIDENT': 'No injury'
}

env['LIGHTING_GRP'] = env['LIGHTING_CONDITION'].map(lighting_dict)
env['WEATHER_GRP'] = env['WEATHER_CONDITION'].map(weather_dict)
env['ROADWAY_GRP'] = env['ROADWAY_SURFACE_COND'].map(roadway_dict)
env['INJURY_GRP'] = env['MOST_SEVERE_INJURY'].map(injury_dict)

# drop original columns
env.drop(['LIGHTING_CONDITION','WEATHER_CONDITION','ROADWAY_SURFACE_COND','MOST_SEVERE_INJURY'], axis=1, inplace=True)

# injury indicators
for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
    env['{}'.format(INJURY_GRP)] = np.where(env['INJURY_GRP'] == INJURY_GRP, 1, 0)

for LIGHTING_GRP in ('Daylight', 'Dusk', 'Dawn', 'Darkness', 'Unknown lighting'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{}'.format(LIGHTING_GRP, INJURY_GRP)] = np.where((env['LIGHTING_GRP'] == LIGHTING_GRP) & (env['INJURY_GRP'] == INJURY_GRP), 1, 0)

for WEATHER_GRP in ('Clear', 'Cloudy', 'Rain/Snow', 'Other/Unknown weather'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{}'.format(WEATHER_GRP, INJURY_GRP)] = np.where((env['WEATHER_GRP'] == WEATHER_GRP) & (env['INJURY_GRP'] == INJURY_GRP), 1, 0)

for ROADWAY_GRP in ('Dry', 'Wet', 'Snow/Ice', 'Other/Unknown surface'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{}'.format(ROADWAY_GRP, INJURY_GRP)] = np.where((env['ROADWAY_GRP'] == ROADWAY_GRP) & (env['INJURY_GRP'] == INJURY_GRP), 1, 0)

# group by CRASH_YEAR and SIDE
env = env.groupby(['CRASH_YEAR','SIDE']).sum()
env.reset_index(inplace=True)
print(env.head())

# ouput as JSON
env.to_json('crash_env.json', orient='records',indent=2)

   CRASH_YEAR                SIDE  Severe injury  Moderate injury  No injury  \
0        2018             Central            198              828      11479   
1        2018      Far North Side            248             1033      13801   
2        2018  Far Southeast Side            173              774       7952   
3        2018  Far Southwest Side            139              582       6577   
4        2018          North Side            186              811       9982   

   Daylight,Severe injury  Daylight,Moderate injury  Daylight,No injury  \
0                     110                       497                7616   
1                     158                       690                9493   
2                      98                       471                5080   
3                      87                       342                4381   
4                     111                       503                6428   

   Dusk,Severe injury  Dusk,Moderate injury  ...  Dry,No injury  \
0

#### One-factor lift (for evaluation only, should re-calculate on website)

In [31]:
# for each condition, calculate the probability
for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
    env['{} %'.format(INJURY_GRP)] = env['{}'.format(INJURY_GRP)] / (env['Severe injury'] + env['Moderate injury'] + env['No injury'])

for LIGHTING_GRP in ('Daylight', 'Dawn', 'Darkness', 'Dusk', 'Unknown lighting'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{} %'.format(LIGHTING_GRP, INJURY_GRP)] = \
          env['{},{}'.format(LIGHTING_GRP, INJURY_GRP)] \
            / (env['{},{}'.format(LIGHTING_GRP, 'Severe injury')] + env['{},{}'.format(LIGHTING_GRP, 'Moderate injury')] + env['{},{}'.format(LIGHTING_GRP, 'No injury')])
        
for WEATHER_GRP in ('Cloudy', 'Clear', 'Rain/Snow', 'Other/Unknown weather'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{} %'.format(WEATHER_GRP, INJURY_GRP)] = \
          env['{},{}'.format(WEATHER_GRP, INJURY_GRP)] \
            / (env['{},{}'.format(WEATHER_GRP, 'Severe injury')] + env['{},{}'.format(WEATHER_GRP, 'Moderate injury')] + env['{},{}'.format(WEATHER_GRP, 'No injury')])
        
for ROADWAY_GRP in ('Snow/Ice', 'Wet', 'Dry', 'Other/Unknown surface'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{} %'.format(ROADWAY_GRP, INJURY_GRP)] = \
          env['{},{}'.format(ROADWAY_GRP, INJURY_GRP)] \
            / (env['{},{}'.format(ROADWAY_GRP, 'Severe injury')] + env['{},{}'.format(ROADWAY_GRP, 'Moderate injury')] + env['{},{}'.format(ROADWAY_GRP, 'No injury')])

# for each condition, calculate the lift
for LIGHTING_GRP in ('Daylight', 'Dawn', 'Darkness', 'Dusk', 'Unknown lighting'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{} lift'.format(LIGHTING_GRP, INJURY_GRP)] = \
          env['{},{} %'.format(LIGHTING_GRP, INJURY_GRP)] / env['{} %'.format(INJURY_GRP)]
        
for WEATHER_GRP in ('Cloudy', 'Clear', 'Rain/Snow', 'Other/Unknown weather'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
          env['{},{} %'.format(WEATHER_GRP, INJURY_GRP)] / env['{} %'.format(INJURY_GRP)]
        
for ROADWAY_GRP in ('Snow/Ice', 'Wet', 'Dry', 'Other/Unknown surface'):
    for INJURY_GRP in ('Severe injury', 'Moderate injury', 'No injury'):
        env['{},{} lift'.format(ROADWAY_GRP, INJURY_GRP)] = \
          env['{},{} %'.format(ROADWAY_GRP, INJURY_GRP)] / env['{} %'.format(INJURY_GRP)]
        
#preview the columns with lift
env[['CRASH_YEAR','SIDE','Darkness,Moderate injury lift','Darkness,No injury lift','Darkness,Severe injury lift','Daylight,Moderate injury lift','Daylight,No injury lift','Daylight,Severe injury lift','Dusk,Moderate injury lift','Dusk,No injury lift','Dusk,Severe injury lift','Dawn,Moderate injury lift','Dawn,No injury lift','Dawn,Severe injury lift','Cloudy,Moderate injury lift','Cloudy,No injury lift','Cloudy,Severe injury lift','Rain/Snow,Moderate injury lift','Rain/Snow,No injury lift','Rain/Snow,Severe injury lift','Other/Unknown weather,Moderate injury lift','Other/Unknown weather,No injury lift','Other/Unknown weather,Severe injury lift','Dry,Moderate injury lift','Dry,No injury lift','Dry,Severe injury lift','Wet,Moderate injury lift','Wet,No injury lift','Wet,Severe injury lift','Snow/Ice,Moderate injury lift','Snow/Ice,No injury lift','Snow/Ice,Severe injury lift','Other/Unknown surface,Moderate injury lift','Other/Unknown surface,No injury lift','Other/Unknown surface,Severe injury lift']].head()

  env['{},{} lift'.format(LIGHTING_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(WEATHER_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(ROADWAY_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(ROADWAY_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(ROADWAY_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(ROADWAY_GRP, INJURY_GRP)] = \
  env['{},{} lift'.format(ROADWAY_GRP, INJURY_G

Unnamed: 0,CRASH_YEAR,SIDE,"Darkness,Moderate injury lift","Darkness,No injury lift","Darkness,Severe injury lift","Daylight,Moderate injury lift","Daylight,No injury lift","Daylight,Severe injury lift","Dusk,Moderate injury lift","Dusk,No injury lift",...,"Dry,Severe injury lift","Wet,Moderate injury lift","Wet,No injury lift","Wet,Severe injury lift","Snow/Ice,Moderate injury lift","Snow/Ice,No injury lift","Snow/Ice,Severe injury lift","Other/Unknown surface,Moderate injury lift","Other/Unknown surface,No injury lift","Other/Unknown surface,Severe injury lift"
0,2018,Central,1.258555,0.975273,1.352311,0.912808,1.008965,0.844853,1.125664,0.984502,...,0.908447,1.363146,0.962276,1.668418,1.115537,0.987251,1.255954,0.551009,1.044216,0.314212
1,2018,Far North Side,1.205443,0.978616,1.334268,0.974193,1.003204,0.929184,1.114519,0.97881,...,0.99805,1.216135,0.976206,1.423843,0.80893,1.023411,0.493091,0.430535,1.051116,0.527446
2,2018,Far Southeast Side,1.174065,0.974764,1.381202,0.958627,1.006368,0.89238,1.265088,0.985089,...,1.075085,1.191534,0.974551,1.312836,0.812451,1.037614,0.110148,0.590621,1.05777,0.176162
3,2018,Far Southwest Side,1.299259,0.96989,1.171706,0.891583,1.010658,0.949649,0.980456,0.995466,...,1.084825,1.290315,0.972608,1.080523,0.961497,1.024541,0.0,0.589105,1.047565,0.469831
4,2018,North Side,1.149249,0.98151,1.34152,0.966972,1.00398,0.930415,1.016337,1.004094,...,1.01706,1.210893,0.978143,1.253467,0.693463,1.021492,1.183167,0.612505,1.041991,0.436025


#### Two-factor combination

In [37]:
import numpy as np

env2 = df[['CRASH_YEAR','SIDE','LIGHTING_CONDITION','WEATHER_CONDITION','ROADWAY_SURFACE_COND','MOST_SEVERE_INJURY']]

lighting_dict = {
  'DARKNESS': 'Darkness', 
  'DARKNESS, LIGHTED ROAD': 'Darkness', 
  'DAWN': 'Dawn', 
  'DAYLIGHT': 'Daylight', 
  'DUSK': 'Dusk', 
  'UNKNOWN': 'Unknown lighting'
}

weather_dict = {
  'BLOWING SAND, SOIL, DIRT': 'Other/Unknown weather', 
  'BLOWING SNOW': 'Rain/Snow', 
  'CLEAR': 'Clear', 
  'CLOUDY/OVERCAST': 'Cloudy', 
  'FOG/SMOKE/HAZE': 'Other/Unknown weather', 
  'FREEZING RAIN/DRIZZLE': 'Rain/Snow', 
  'OTHER': 'Other/Unknown weather', 
  'RAIN': 'Rain/Snow', 
  'SEVERE CROSS WIND GATE': 'Other/Unknown weather', 
  'SLEET/HAIL': 'Rain/Snow', 
  'SNOW': 'Rain/Snow', 
  'UNKNOWN': 'Other/Unknown weather'
}

roadway_dict = {
  'DRY': 'Dry', 
  'ICE': 'Snow/Ice', 
  'OTHER': 'Other/Unknown surface', 
  'SAND, MUD, DIRT': 'Other/Unknown surface', 
  'SNOW OR SLUSH': 'Snow/Ice', 
  'UNKNOWN': 'Other/Unknown surface', 
  'WET': 'Wet'
}

injury_dict2 = {
  'FATAL': 'Injured',
  'INCAPACITATING INJURY': 'Injured',
  'NO INDICATION OF INJURY': 'No injury',
  'NONINCAPACITATING INJURY': 'Injured',
  'REPORTED, NOT EVIDENT': 'No injury'
}

env2['LIGHTING_GRP'] = env2['LIGHTING_CONDITION'].map(lighting_dict)
env2['WEATHER_GRP'] = env2['WEATHER_CONDITION'].map(weather_dict)
env2['ROADWAY_GRP'] = env2['ROADWAY_SURFACE_COND'].map(roadway_dict)
env2['INJURY_GRP_2'] = env2['MOST_SEVERE_INJURY'].map(injury_dict2)

# drop original columns
env2.drop(['LIGHTING_CONDITION','WEATHER_CONDITION','ROADWAY_SURFACE_COND','MOST_SEVERE_INJURY'], axis=1, inplace=True)

# injury indicators
for INJURY_GRP_2 in ('Injured', 'No injury'):
    env2['{}'.format(INJURY_GRP_2)] = np.where(env2['INJURY_GRP_2'] == INJURY_GRP_2, 1, 0)

for LIGHTING_GRP in ('Daylight', 'Dusk', 'Dawn', 'Darkness', 'Unknown lighting'):
    for WEATHER_GRP in ('Clear', 'Cloudy', 'Rain/Snow', 'Other/Unknown weather'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{}'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = np.where((env2['LIGHTING_GRP'] == LIGHTING_GRP) & (env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)

for LIGHTING_GRP in ('Daylight', 'Dusk', 'Dawn', 'Darkness', 'Unknown lighting'):
    for ROADWAY_GRP in ('Dry', 'Wet', 'Snow/Ice', 'Other/Unknown surface'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{}'.format(LIGHTING_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['LIGHTING_GRP'] == LIGHTING_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)

for WEATHER_GRP in ('Clear', 'Cloudy', 'Rain/Snow', 'Other/Unknown weather'):
    for ROADWAY_GRP in ('Dry', 'Wet', 'Snow/Ice', 'Other/Unknown surface'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)

# group by CRASH_YEAR and SIDE
env2 = env2.groupby(['CRASH_YEAR','SIDE']).sum()
env2.reset_index(inplace=True)
env2.head()

# output as JSON
env2.to_json('crash_env2.json', orient='records', indent=2)

  env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)
  env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)
  env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)
  env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1, 0)
  env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = np.where((env2['WEATHER_GRP'] == WEATHER_GRP) & (env2['ROADWAY_GRP'] == ROADWAY_GRP) & (env2['INJURY_GRP_2'] == INJURY_GRP_2), 1

#### Two-factor lift (for evaluation only, should re-calculate on website)

In [38]:
# for each condition, calculate the probability
for INJURY_GRP_2 in ('Injured', 'No injury'):
    env2['{} %'.format(INJURY_GRP_2)] = env2['{}'.format(INJURY_GRP_2)] / (env2['Injured'] + env2['No injury'])

for LIGHTING_GRP in ('Daylight', 'Dawn', 'Darkness', 'Dusk'):
    for WEATHER_GRP in ('Cloudy', 'Clear', 'Rain/Snow'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
              env2['{},{},{}'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] \
                / (env2['{},{},{}'.format(LIGHTING_GRP, WEATHER_GRP, 'Injured')] + env2['{},{},{}'.format(LIGHTING_GRP, WEATHER_GRP, 'No injury')])

for LIGHTING_GRP in ('Daylight', 'Dawn', 'Darkness', 'Dusk'):
    for ROADWAY_GRP in ('Snow/Ice', 'Wet', 'Dry'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{} %'.format(LIGHTING_GRP, ROADWAY_GRP, INJURY_GRP_2)] = \
              env2['{},{},{}'.format(LIGHTING_GRP, ROADWAY_GRP, INJURY_GRP_2)] \
                / (env2['{},{},{}'.format(LIGHTING_GRP, ROADWAY_GRP, 'Injured')] + env2['{},{},{}'.format(LIGHTING_GRP, ROADWAY_GRP, 'No injury')])

for WEATHER_GRP in ('Cloudy', 'Clear', 'Rain/Snow'):
    for ROADWAY_GRP in ('Snow/Ice', 'Wet', 'Dry'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{} %'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = \
              env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] \
                / (env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, 'Injured')] + env2['{},{},{}'.format(WEATHER_GRP, ROADWAY_GRP, 'No injury')])

# for each condition, calculate the lift        
for LIGHTING_GRP in ('Daylight', 'Dawn', 'Darkness', 'Dusk'):
    for WEATHER_GRP in ('Cloudy', 'Clear', 'Rain/Snow'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{} lift'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
              env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] / env2['{} %'.format(INJURY_GRP_2)]
            
for LIGHTING_GRP in ('Daylight', 'Dawn', 'Darkness', 'Dusk'):
    for ROADWAY_GRP in ('Snow/Ice', 'Wet', 'Dry'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{} lift'.format(LIGHTING_GRP, ROADWAY_GRP, INJURY_GRP_2)] = \
              env2['{},{},{} %'.format(LIGHTING_GRP, ROADWAY_GRP, INJURY_GRP_2)] / env2['{} %'.format(INJURY_GRP_2)]
            
for WEATHER_GRP in ('Cloudy', 'Clear', 'Rain/Snow'):
    for ROADWAY_GRP in ('Snow/Ice', 'Wet', 'Dry'):
        for INJURY_GRP_2 in ('Injured', 'No injury'):
            env2['{},{},{} lift'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] = \
              env2['{},{},{} %'.format(WEATHER_GRP, ROADWAY_GRP, INJURY_GRP_2)] / env2['{} %'.format(INJURY_GRP_2)]

  env2['{} %'.format(INJURY_GRP_2)] = env2['{}'.format(INJURY_GRP_2)] / (env2['Injured'] + env2['No injury'])
  env2['{} %'.format(INJURY_GRP_2)] = env2['{}'.format(INJURY_GRP_2)] / (env2['Injured'] + env2['No injury'])
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_GRP, INJURY_GRP_2)] = \
  env2['{},{},{} %'.format(LIGHTING_GRP, WEATHER_G

## Type

In [4]:
crash_type = df.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR', 'SIDE'], columns=['FIRST_CRASH_TYPE'], aggfunc='count', fill_value=0)
crash_type.head()

Unnamed: 0_level_0,FIRST_CRASH_TYPE,ANGLE,ANIMAL,FIXED OBJECT,HEAD ON,OTHER NONCOLLISION,OTHER OBJECT,OVERTURNED,PARKED MOTOR VEHICLE,PEDALCYCLIST,PEDESTRIAN,REAR END,REAR TO FRONT,REAR TO REAR,REAR TO SIDE,SIDESWIPE OPPOSITE DIRECTION,SIDESWIPE SAME DIRECTION,TRAIN,TURNING
CRASH_YEAR,SIDE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018,Central,838,2,528,60,46,91,2,1638,264,413,2854,0,0,0,142,3633,0,2010
2018,Far North Side,1608,29,582,122,47,132,4,3512,276,375,3886,0,0,0,263,2279,0,1994
2018,Far Southeast Side,1117,12,635,85,53,126,15,2151,43,209,1988,0,0,0,131,1134,1,1224
2018,Far Southwest Side,841,3,360,57,14,73,7,1693,48,171,1814,0,0,0,88,1006,0,1137
2018,North Side,968,4,398,97,41,91,1,2448,365,342,2775,0,0,0,197,1753,0,1512
