## Slice the large CSV into a data cube with figures aggregated

In [19]:
# import libraries
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

# read data
df = pd.read_csv('../../../data/crashes.csv')

## Loading checking

In [52]:
print(df.head())
df.shape

                                     CRASH_RECORD_ID     RD_NO  \
0  79c7a2ce89f446262efd86df3d72d18b04ba487024b7c4...  JC199149   
1  792b539deaaad65ee5b4a9691d927a34d298eb33d42af0...  JB422857   
2  0115ade9a755e835255508463f7e9c4a9a0b47e9304238...  JF318029   
3  017040c61958d2fa977c956b2bd2d6759ef7754496dc96...  JF324552   
4  78eee027ec3dcc85d36c9e3fdae4729dcc56440105d65b...  JB291672   

  CRASH_DATE_EST_I  POSTED_SPEED_LIMIT TRAFFIC_CONTROL_DEVICE  \
0              NaN                  30         TRAFFIC SIGNAL   
1              NaN                  30            NO CONTROLS   
2              NaN                  30                UNKNOWN   
3              NaN                  30         TRAFFIC SIGNAL   
4              NaN                  30            NO CONTROLS   

       DEVICE_CONDITION WEATHER_CONDITION      LIGHTING_CONDITION  \
0  FUNCTIONING PROPERLY             CLEAR                DAYLIGHT   
1           NO CONTROLS             CLEAR                DAYLIGHT   
2    

(541604, 53)

## People involved and injured

In [6]:
# calculate the total of row count, INJURIES_TOTAL, INJURIES_FATAL, INJURIES_INCAPACITATING, INJURIES_NON_INCAPACITATING, INJURIES_REPORTED_NOT_EVIDENT, INJURIES_NO_INDICATION, INJURIES_UNKNOWN
injuries = df.groupby(['CRASH_YEAR','SIDE']).agg({'INJURIES_TOTAL':'sum','INJURIES_FATAL':'sum','INJURIES_INCAPACITATING':'sum','INJURIES_NON_INCAPACITATING':'sum','INJURIES_REPORTED_NOT_EVIDENT':'sum','INJURIES_NO_INDICATION':'sum'}).reset_index()

# convert columns other than 'CRASH_YEAR', 'SIDE' to int
injuries.iloc[:,2:] = injuries.iloc[:,2:].fillna(0).astype(int)

# select columns to be used
crash_ppl_involved = injuries[['CRASH_YEAR','SIDE','INJURIES_TOTAL','INJURIES_NO_INDICATION']]
crash_ppl_injured = injuries[['CRASH_YEAR','SIDE','INJURIES_FATAL','INJURIES_INCAPACITATING','INJURIES_NON_INCAPACITATING','INJURIES_REPORTED_NOT_EVIDENT']]

# rename columns
crash_ppl_involved.columns = ['CRASH_YEAR','SIDE','Injured','No indication of injury']
crash_ppl_injured.columns = ['CRASH_YEAR','SIDE','Fatal','Incapacitating','Non-incapacitating','Reported but not evident']

# output to JSON
crash_ppl_involved.to_json('crash_ppl_involved.json',orient='records',indent=2)
crash_ppl_injured.to_json('crash_ppl_injured.json',orient='records',indent=2)

## Time

In [8]:
#pivot CRASH_DAY_OF_WEEK and CRASH_HOUR to create a new column for each month and day
crash_day_time = df.pivot_table(index=['CRASH_YEAR','SIDE'], columns=['CRASH_DAY_OF_WEEK','CRASH_HOUR'], values='CRASH_RECORD_ID', aggfunc='count').reset_index()

#convert columns other than 'CRASH_YEAR', 'SIDE' to int
crash_day_time.iloc[:,2:] = crash_day_time.iloc[:,2:].fillna(0).astype(int)

#flatten the column to 1 layer and rename the columns of CRASH_DAY_OF_WEEK and CRASH_HOUR into a comma seperated string
crash_day_time.columns = ['CRASH_YEAR','SIDE', \
                          'Sun,0','Sun,1','Sun,2','Sun,3','Sun,4','Sun,5','Sun,6','Sun,7','Sun,8','Sun,9','Sun,10','Sun,11','Sun,12','Sun,13','Sun,14','Sun,15','Sun,16','Sun,17','Sun,18','Sun,19','Sun,20','Sun,21','Sun,22','Sun,23', \
                          'Mon,0','Mon,1','Mon,2','Mon,3','Mon,4','Mon,5','Mon,6','Mon,7','Mon,8','Mon,9','Mon,10','Mon,11','Mon,12','Mon,13','Mon,14','Mon,15','Mon,16','Mon,17','Mon,18','Mon,19','Mon,20','Mon,21','Mon,22','Mon,23', \
                          'Tue,0','Tue,1','Tue,2','Tue,3','Tue,4','Tue,5','Tue,6','Tue,7','Tue,8','Tue,9','Tue,10','Tue,11','Tue,12','Tue,13','Tue,14','Tue,15','Tue,16','Tue,17','Tue,18','Tue,19','Tue,20','Tue,21','Tue,22','Tue,23', \
                          'Wed,0','Wed,1','Wed,2','Wed,3','Wed,4','Wed,5','Wed,6','Wed,7','Wed,8','Wed,9','Wed,10','Wed,11','Wed,12','Wed,13','Wed,14','Wed,15','Wed,16','Wed,17','Wed,18','Wed,19','Wed,20','Wed,21','Wed,22','Wed,23', \
                          'Thu,0','Thu,1','Thu,2','Thu,3','Thu,4','Thu,5','Thu,6','Thu,7','Thu,8','Thu,9','Thu,10','Thu,11','Thu,12','Thu,13','Thu,14','Thu,15','Thu,16','Thu,17','Thu,18','Thu,19','Thu,20','Thu,21','Thu,22','Thu,23', \
                          'Fri,0','Fri,1','Fri,2','Fri,3','Fri,4','Fri,5','Fri,6','Fri,7','Fri,8','Fri,9','Fri,10','Fri,11','Fri,12','Fri,13','Fri,14','Fri,15','Fri,16','Fri,17','Fri,18','Fri,19','Fri,20','Fri,21','Fri,22','Fri,23', \
                          'Sat,0','Sat,1','Sat,2','Sat,3','Sat,4','Sat,5','Sat,6','Sat,7','Sat,8','Sat,9','Sat,10','Sat,11','Sat,12','Sat,13','Sat,14','Sat,15','Sat,16','Sat,17','Sat,18','Sat,19','Sat,20','Sat,21','Sat,22','Sat,23']
crash_day_time.head()

#output to JSON
crash_day_time.to_json('crash_day_time.json', orient='records',indent=2)

## Causes

In [2]:
causes = df.pivot_table(index=['CRASH_YEAR','SIDE'],columns='PRIM_CONTRIBUTORY_CAUSE',values='CRASH_RECORD_ID',aggfunc='count').reset_index()

# convert columns other than 'CRASH_YEAR', 'SIDE' to int
causes.iloc[:,2:] = causes.iloc[:,2:].fillna(0).astype(int)

# ensure the names are sorted in alphabetical order
cols = causes.columns.tolist()
# remove CRASH_YEAR and SIDE from the list
cols.remove('CRASH_YEAR')
cols.remove('SIDE')
# reorder the columns
cols.sort()
cols = ['CRASH_YEAR', 'SIDE'] + cols
causes = causes[cols]

# rename the columns
causes.columns = ['CRASH_YEAR','SIDE', \
                'Animal', \
                'Bicycle advancing legally on red light', \
                'Cell phone use other than texting', \
                'Disregarding other traffic signs', \
                'Disregarding road markings', \
                'Disregarding stop sign', \
                'Disregarding traffic signals', \
                'Disregarding yield sign', \
                'Distraction - from inside vehicle', \
                'Distraction - from outside vehicle', \
                'Distraction - other electronic device (navigation device, DVD player, etc.)', \
                'Driving on wrong side/wrong way', \
                'Driving skills/knowledge/experience', \
                'Vehicle condition', \
                'Evasive action due to animal, object, nonmotorist', \
                'Exceeding authorized speed limit', \
                'Exceeding safe speed for conditions', \
                'Failing to reduce speed to avoid crash', \
                'Failing to yield right-of-way', \
                'Following too closely', \
                'Had been drinking (not arrested)', \
                'Improper backing', \
                'Improper lane usage', \
                'Improper overtaking/passing', \
                'Improper turning/no signal', \
                'Motorcycle advancing legally on red light', \
                'Not applicable', \
                'Obstructed crosswalks', \
                'Operating vehicle in erratic, reckless, careless, negligent or aggressive manner', \
                'Passing stopped school bus', \
                'Physical condition of driver', \
                'Related to bus stop', \
                'Road construction/maintenance', \
                'Road engineering/surface/marking defects', \
                'Texting', \
                'Turning right on red', \
                'Unable to determine', \
                'Under the influence of alcohol/drugs (arrested)', \
                'Vision obscured (signs, tree limbs, buildings, etc.)', \
                'Weather']

# output to JSON
causes.to_json('crash_cause.json', orient='records',indent=2)

## Environment (Sankey)

In [28]:
# extract the environmental conditions
env = df[['CRASH_YEAR','SIDE','CRASH_RECORD_ID','LIGHTING_CONDITION','WEATHER_CONDITION','ROADWAY_SURFACE_COND']]

lighting_dict = {
  'DARKNESS': 'Darkness', 
  'DARKNESS, LIGHTED ROAD': 'Darkness', 
  'DAWN': 'Dawn', 
  'DAYLIGHT': 'Daylight', 
  'DUSK': 'Dusk', 
  'UNKNOWN': 'Unknown lighting'
}

weather_dict = {
  'BLOWING SAND, SOIL, DIRT': 'Other/Unknown weather', 
  'BLOWING SNOW': 'Rain/Snow', 
  'CLEAR': 'Clear', 
  'CLOUDY/OVERCAST': 'Cloudy', 
  'FOG/SMOKE/HAZE': 'Other/Unknown weather', 
  'FREEZING RAIN/DRIZZLE': 'Rain/Snow', 
  'OTHER': 'Other/Unknown weather', 
  'RAIN': 'Rain/Snow', 
  'SEVERE CROSS WIND GATE': 'Other/Unknown weather', 
  'SLEET/HAIL': 'Rain/Snow', 
  'SNOW': 'Rain/Snow', 
  'UNKNOWN': 'Other/Unknown weather'
}

roadway_dict = {
  'DRY': 'Dry', 
  'ICE': 'Snow/Ice', 
  'OTHER': 'Other/Unknown surface', 
  'SAND, MUD, DIRT': 'Other/Unknown surface', 
  'SNOW OR SLUSH': 'Snow/Ice', 
  'UNKNOWN': 'Other/Unknown surface', 
  'WET': 'Wet'
}

env['LIGHTING_GRP'] = env['LIGHTING_CONDITION'].map(lighting_dict)
env['WEATHER_GRP'] = env['WEATHER_CONDITION'].map(weather_dict)
env['ROADWAY_GRP'] = env['ROADWAY_SURFACE_COND'].map(roadway_dict)

# pivot table for LIGHTING_GRP and WEATHER_GRP
env_lighting_weather = env.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR', 'SIDE'], columns=['LIGHTING_GRP', 'WEATHER_GRP'], aggfunc='count', fill_value=0)

# pivot table for WEATHER_GRP and ROADWAY_GRP
env_weather_roadway = env.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR', 'SIDE'], columns=['WEATHER_GRP', 'ROADWAY_GRP'], aggfunc='count', fill_value=0)

# merge the two pivot tables
env_lighting_weather_roadway = pd.merge(env_lighting_weather, env_weather_roadway, on=['CRASH_YEAR', 'SIDE'])
env_lighting_weather_roadway.reset_index(inplace=True)
env_lighting_weather_roadway.columns = ['CRASH_YEAR','SIDE', \
                                        'Darkness,Clear', 'Darkness,Cloudy', 'Darkness,Other/Unknown weather', 'Darkness,Rain/Snow', \
                                        'Dawn,Clear', 'Dawn,Cloudy', 'Dawn,Other/Unknown weather', 'Dawn,Rain/Snow', \
                                        'Daylight,Clear', 'Daylight,Cloudy', 'Daylight,Other/Unknown weather', 'Daylight,Rain/Snow', \
                                        'Dusk,Clear', 'Dusk,Cloudy', 'Dusk,Other/Unknown weather', 'Dusk,Rain/Snow', \
                                        'Unknown lighting,Clear', 'Unknown lighting,Cloudy', 'Unknown lighting,Other/Unknown weather', 'Unknown lighting,Rain/Snow', \
                                        'Clear,Dry', 'Clear,Snow/Ice', 'Clear,Other/Unknown surface', 'Clear,Wet', \
                                        'Cloudy,Dry', 'Cloudy,Snow/Ice', 'Cloudy,Other/Unknown surface', 'Cloudy,Wet', \
                                        'Other/Unknown weather,Dry', 'Other/Unknown weather,Snow/Ice', 'Other/Unknown weather,Other/Unknown surface', 'Other/Unknown weather,Wet', \
                                        'Rain/Snow,Dry', 'Rain/Snow,Snow/Ice', 'Rain/Snow,Other/Unknown surface', 'Rain/Snow,Wet']
env_lighting_weather_roadway.head()

# output to JSON
env_lighting_weather_roadway.to_json('crash_env.json', orient='records',indent=2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  env['LIGHTING_GRP'] = env['LIGHTING_CONDITION'].map(lighting_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  env['WEATHER_GRP'] = env['WEATHER_CONDITION'].map(weather_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  env['ROADWAY_GRP'] = env['ROADWAY_SURFACE_COND'].map(roadway_dict)


## Enviornment (Severity)

In [38]:
weather = df[['CRASH_YEAR','SIDE','CRASH_RECORD_ID','WEATHER_CONDITION','MOST_SEVERE_INJURY']]

weather_dict = {
  'BLOWING SAND, SOIL, DIRT': 'Other/Unknown weather', 
  'BLOWING SNOW': 'Rain/Snow', 
  'CLEAR': 'Clear', 
  'CLOUDY/OVERCAST': 'Cloudy', 
  'FOG/SMOKE/HAZE': 'Other/Unknown weather', 
  'FREEZING RAIN/DRIZZLE': 'Rain/Snow', 
  'OTHER': 'Other/Unknown weather', 
  'RAIN': 'Rain/Snow', 
  'SEVERE CROSS WIND GATE': 'Other/Unknown weather', 
  'SLEET/HAIL': 'Rain/Snow', 
  'SNOW': 'Rain/Snow', 
  'UNKNOWN': 'Other/Unknown weather'
}

injury_dict = {
  'FATAL': 'Severe injury',
  'INCAPACITATING INJURY': 'Severe injury',
  'NO INDICATION OF INJURY': 'No injury',
  'NONINCAPACITATING INJURY': 'Moderate injury',
  'REPORTED, NOT EVIDENT': 'No injury'
}

weather['WEATHER_GRP'] = weather['WEATHER_CONDITION'].map(weather_dict)
weather['INJURY_GRP'] = weather['MOST_SEVERE_INJURY'].map(injury_dict)

weather_inju = weather.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR','SIDE'], columns=['WEATHER_GRP','INJURY_GRP'], aggfunc='count', fill_value=0)
weather_inju.reset_index(inplace=True)
weather_inju.columns = ['CRASH_YEAR','SIDE', \
                        'Clear,Moderate injury', 'Clear,No injury', 'Clear,Severe injury', \
                        'Cloudy,Moderate injury', 'Cloudy,No injury', 'Cloudy,Severe injury', \
                        'Other/Unknown weather,Moderate injury', 'Other/Unknown weather,No injury', 'Other/Unknown weather,Severe injury', \
                        'Rain/Snow,Moderate injury', 'Rain/Snow,No injury', 'Rain/Snow,Severe injury']
weather_inju.head()

Unnamed: 0,CRASH_YEAR,SIDE,"Clear,Moderate injury","Clear,No injury","Clear,Severe injury","Cloudy,Moderate injury","Cloudy,No injury","Cloudy,Severe injury","Other/Unknown weather,Moderate injury","Other/Unknown weather,No injury","Other/Unknown weather,Severe injury","Rain/Snow,Moderate injury","Rain/Snow,No injury","Rain/Snow,Severe injury"
0,2018,Central,651,9429,149,25,278,8,17,424,1,135,1348,40
1,2018,Far North Side,805,10725,193,45,417,7,28,802,3,155,1857,45
2,2018,Far Southeast Side,599,6212,142,35,267,5,20,428,0,120,1045,26
3,2018,Far Southwest Side,457,5107,117,19,280,6,17,335,2,89,855,14
4,2018,North Side,649,7828,145,25,321,2,22,602,8,115,1231,31


In [39]:
# extract the environmental conditions
lighting = df[['CRASH_YEAR','SIDE','CRASH_RECORD_ID','LIGHTING_CONDITION','MOST_SEVERE_INJURY']]

lighting_dict = {
  'DARKNESS': 'Darkness', 
  'DARKNESS, LIGHTED ROAD': 'Darkness', 
  'DAWN': 'Dawn', 
  'DAYLIGHT': 'Daylight', 
  'DUSK': 'Dusk', 
  'UNKNOWN': 'Unknown lighting'
}

injury_dict = {
  'FATAL': 'Severe injury',
  'INCAPACITATING INJURY': 'Severe injury',
  'NO INDICATION OF INJURY': 'No injury',
  'NONINCAPACITATING INJURY': 'Moderate injury',
  'REPORTED, NOT EVIDENT': 'No injury'
}

lighting['LIGHTING_GRP'] = lighting['LIGHTING_CONDITION'].map(lighting_dict)
lighting['INJURY_GRP'] = lighting['MOST_SEVERE_INJURY'].map(injury_dict)

lighting_inju = lighting.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR','SIDE'], columns=['LIGHTING_GRP','INJURY_GRP'], aggfunc='count', fill_value=0)
lighting_inju.reset_index(inplace=True)
lighting_inju.columns = ['CRASH_YEAR','SIDE', \
                         'Darkness,Moderate injury', 'Darkness,No injury', 'Darkness,Severe injury', \
                          'Dawn,Moderate injury', 'Dawn,No injury', 'Dawn,Severe injury', \
                          'Daylight,Moderate injury', 'Daylight,No injury', 'Daylight,Severe injury', \
                          'Dusk,Moderate injury', 'Dusk,No injury', 'Dusk,Severe injury', \
                          'Unknown lighting,Moderate injury', 'Unknown lighting,No injury', 'Unknown lighting,Severe injury']
lighting_inju.head()

Unnamed: 0,CRASH_YEAR,SIDE,"Darkness,Moderate injury","Darkness,No injury","Darkness,Severe injury","Dawn,Moderate injury","Dawn,No injury","Dawn,Severe injury","Daylight,Moderate injury","Daylight,No injury","Daylight,Severe injury","Dusk,Moderate injury","Dusk,No injury","Dusk,Severe injury","Unknown lighting,Moderate injury","Unknown lighting,No injury","Unknown lighting,Severe injury"
0,2018,Central,288,3094,74,18,206,5,497,7616,110,24,291,7,1,272,2
1,2018,Far North Side,286,3102,76,21,240,3,690,9493,158,30,352,11,6,614,0
2,2018,Far Southeast Side,251,2141,66,14,166,6,471,5080,98,34,272,3,4,293,0
3,2018,Far Southwest Side,195,1645,42,17,112,4,342,4381,87,19,218,6,9,221,0
4,2018,North Side,254,2670,68,20,126,1,503,6428,111,25,304,4,9,454,2


In [40]:
# extract the environmental conditions
roadway = df[['CRASH_YEAR','SIDE','CRASH_RECORD_ID','ROADWAY_SURFACE_COND','MOST_SEVERE_INJURY']]

roadway_dict = {
  'DRY': 'Dry', 
  'ICE': 'Snow/Ice', 
  'OTHER': 'Other/Unknown surface', 
  'SAND, MUD, DIRT': 'Other/Unknown surface', 
  'SNOW OR SLUSH': 'Snow/Ice', 
  'UNKNOWN': 'Other/Unknown surface', 
  'WET': 'Wet'
}

injury_dict = {
  'FATAL': 'Severe injury',
  'INCAPACITATING INJURY': 'Severe injury',
  'NO INDICATION OF INJURY': 'No injury',
  'NONINCAPACITATING INJURY': 'Moderate injury',
  'REPORTED, NOT EVIDENT': 'No injury'
}

roadway['ROADWAY_GRP'] = roadway['ROADWAY_SURFACE_COND'].map(roadway_dict)
roadway['INJURY_GRP'] = roadway['MOST_SEVERE_INJURY'].map(injury_dict)

roadway_inju = roadway.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR','SIDE'], columns=['ROADWAY_GRP','INJURY_GRP'], aggfunc='count', fill_value=0)
roadway_inju.reset_index(inplace=True)
roadway_inju.columns = ['CRASH_YEAR','SIDE', \
                        'Dry,Moderate injury', 'Dry,No injury', 'Dry,Severe injury', \
                        'Other/Unknown surface,Moderate injury', 'Other/Unknown surface,No injury', 'Other/Unknown surface,Severe injury', \
                        'Snow/Ice,Moderate injury', 'Snow/Ice,No injury', 'Snow/Ice,Severe injury', \
                        'Wet,Moderate injury', 'Wet,No injury', 'Wet,Severe injury']
roadway_inju.head()

Unnamed: 0,CRASH_YEAR,SIDE,"Dry,Moderate injury","Dry,No injury","Dry,Severe injury","Other/Unknown surface,Moderate injury","Other/Unknown surface,No injury","Other/Unknown surface,Severe injury","Snow/Ice,Moderate injury","Snow/Ice,No injury","Snow/Ice,Severe injury","Wet,Moderate injury","Wet,No injury","Wet,Severe injury"
0,2018,Central,616,8977,140,22,578,3,26,319,7,164,1605,48
1,2018,Far North Side,773,10015,180,34,1109,10,41,693,6,185,1984,52
2,2018,Far Southeast Side,577,5841,137,30,552,2,33,433,1,134,1126,33
3,2018,Far Southwest Side,427,4929,113,21,422,4,24,289,0,110,937,22
4,2018,North Side,600,7157,136,49,1026,8,23,417,9,139,1382,33


In [49]:
total = df[['CRASH_YEAR','SIDE','CRASH_RECORD_ID','MOST_SEVERE_INJURY']]

injury_dict = {
  'FATAL': 'Severe injury',
  'INCAPACITATING INJURY': 'Severe injury',
  'NO INDICATION OF INJURY': 'No injury',
  'NONINCAPACITATING INJURY': 'Moderate injury',
  'REPORTED, NOT EVIDENT': 'No injury'
}

total['INJURY_GRP'] = total['MOST_SEVERE_INJURY'].map(injury_dict)

total_inju = total.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR','SIDE'], columns=['INJURY_GRP'], aggfunc='count', fill_value=0)
total_inju.reset_index(inplace=True)
total_inju.columns = ['CRASH_YEAR','SIDE', 'Moderate injury', 'No injury', 'Severe injury']
total_inju.head()

Unnamed: 0,CRASH_YEAR,SIDE,Moderate injury,No injury,Severe injury
0,2018,Central,828,11479,198
1,2018,Far North Side,1033,13801,248
2,2018,Far Southeast Side,774,7952,173
3,2018,Far Southwest Side,582,6577,139
4,2018,North Side,811,9982,186


In [59]:
# merge lighting_inju, weather_inju and roadway_inju
env = pd.merge(lighting_inju, weather_inju, on=['CRASH_YEAR', 'SIDE'])
env = pd.merge(env, roadway_inju, on=['CRASH_YEAR', 'SIDE'])

# # ouput as JSON
env.to_json('crash_env_inju.json', orient='records',indent=2)

#### Lift (for evaluation only, should re-calculate on website)

In [58]:
env = pd.merge(env, total_inju, on=['CRASH_YEAR', 'SIDE'])

# for each condition, calculate the probability
env['Darkness,Moderate injury %'] = env['Darkness,Moderate injury'] / (env['Darkness,Moderate injury'] + env['Darkness,No injury'] + env['Darkness,Severe injury'])
env['Darkness,No injury %'] = env['Darkness,No injury'] / (env['Darkness,Moderate injury'] + env['Darkness,No injury'] + env['Darkness,Severe injury'])
env['Darkness,Severe injury %'] = env['Darkness,Severe injury'] / (env['Darkness,Moderate injury'] + env['Darkness,No injury'] + env['Darkness,Severe injury'])
env['Dawn,Moderate injury %'] = env['Dawn,Moderate injury'] / (env['Dawn,Moderate injury'] + env['Dawn,No injury'] + env['Dawn,Severe injury'])
env['Dawn,No injury %'] = env['Dawn,No injury'] / (env['Dawn,Moderate injury'] + env['Dawn,No injury'] + env['Dawn,Severe injury'])
env['Dawn,Severe injury %'] = env['Dawn,Severe injury'] / (env['Dawn,Moderate injury'] + env['Dawn,No injury'] + env['Dawn,Severe injury'])
env['Daylight,Moderate injury %'] = env['Daylight,Moderate injury'] / (env['Daylight,Moderate injury'] + env['Daylight,No injury'] + env['Daylight,Severe injury'])
env['Daylight,No injury %'] = env['Daylight,No injury'] / (env['Daylight,Moderate injury'] + env['Daylight,No injury'] + env['Daylight,Severe injury'])
env['Daylight,Severe injury %'] = env['Daylight,Severe injury'] / (env['Daylight,Moderate injury'] + env['Daylight,No injury'] + env['Daylight,Severe injury'])
env['Dusk,Moderate injury %'] = env['Dusk,Moderate injury'] / (env['Dusk,Moderate injury'] + env['Dusk,No injury'] + env['Dusk,Severe injury'])
env['Dusk,No injury %'] = env['Dusk,No injury'] / (env['Dusk,Moderate injury'] + env['Dusk,No injury'] + env['Dusk,Severe injury'])
env['Dusk,Severe injury %'] = env['Dusk,Severe injury'] / (env['Dusk,Moderate injury'] + env['Dusk,No injury'] + env['Dusk,Severe injury'])
env['Unknown lighting,Moderate injury %'] = env['Unknown lighting,Moderate injury'] / (env['Unknown lighting,Moderate injury'] + env['Unknown lighting,No injury'] + env['Unknown lighting,Severe injury'])
env['Unknown lighting,No injury %'] = env['Unknown lighting,No injury'] / (env['Unknown lighting,Moderate injury'] + env['Unknown lighting,No injury'] + env['Unknown lighting,Severe injury'])
env['Unknown lighting,Severe injury %'] = env['Unknown lighting,Severe injury'] / (env['Unknown lighting,Moderate injury'] + env['Unknown lighting,No injury'] + env['Unknown lighting,Severe injury'])
env['Clear,Moderate injury %'] = env['Clear,Moderate injury'] / (env['Clear,Moderate injury'] + env['Clear,No injury'] + env['Clear,Severe injury'])
env['Clear,No injury %'] = env['Clear,No injury'] / (env['Clear,Moderate injury'] + env['Clear,No injury'] + env['Clear,Severe injury'])
env['Clear,Severe injury %'] = env['Clear,Severe injury'] / (env['Clear,Moderate injury'] + env['Clear,No injury'] + env['Clear,Severe injury'])
env['Cloudy,Moderate injury %'] = env['Cloudy,Moderate injury'] / (env['Cloudy,Moderate injury'] + env['Cloudy,No injury'] + env['Cloudy,Severe injury'])
env['Cloudy,No injury %'] = env['Cloudy,No injury'] / (env['Cloudy,Moderate injury'] + env['Cloudy,No injury'] + env['Cloudy,Severe injury'])
env['Cloudy,Severe injury %'] = env['Cloudy,Severe injury'] / (env['Cloudy,Moderate injury'] + env['Cloudy,No injury'] + env['Cloudy,Severe injury'])
env['Rain/Snow,Moderate injury %'] = env['Rain/Snow,Moderate injury'] / (env['Rain/Snow,Moderate injury'] + env['Rain/Snow,No injury'] + env['Rain/Snow,Severe injury'])
env['Rain/Snow,No injury %'] = env['Rain/Snow,No injury'] / (env['Rain/Snow,Moderate injury'] + env['Rain/Snow,No injury'] + env['Rain/Snow,Severe injury'])
env['Rain/Snow,Severe injury %'] = env['Rain/Snow,Severe injury'] / (env['Rain/Snow,Moderate injury'] + env['Rain/Snow,No injury'] + env['Rain/Snow,Severe injury'])
env['Other/Unknown weather,Moderate injury %'] = env['Other/Unknown weather,Moderate injury'] / (env['Other/Unknown weather,Moderate injury'] + env['Other/Unknown weather,No injury'] + env['Other/Unknown weather,Severe injury'])
env['Other/Unknown weather,No injury %'] = env['Other/Unknown weather,No injury'] / (env['Other/Unknown weather,Moderate injury'] + env['Other/Unknown weather,No injury'] + env['Other/Unknown weather,Severe injury'])
env['Other/Unknown weather,Severe injury %'] = env['Other/Unknown weather,Severe injury'] / (env['Other/Unknown weather,Moderate injury'] + env['Other/Unknown weather,No injury'] + env['Other/Unknown weather,Severe injury'])
env['Dry,Moderate injury %'] = env['Dry,Moderate injury'] / (env['Dry,Moderate injury'] + env['Dry,No injury'] + env['Dry,Severe injury'])
env['Dry,No injury %'] = env['Dry,No injury'] / (env['Dry,Moderate injury'] + env['Dry,No injury'] + env['Dry,Severe injury'])
env['Dry,Severe injury %'] = env['Dry,Severe injury'] / (env['Dry,Moderate injury'] + env['Dry,No injury'] + env['Dry,Severe injury'])
env['Wet,Moderate injury %'] = env['Wet,Moderate injury'] / (env['Wet,Moderate injury'] + env['Wet,No injury'] + env['Wet,Severe injury'])
env['Wet,No injury %'] = env['Wet,No injury'] / (env['Wet,Moderate injury'] + env['Wet,No injury'] + env['Wet,Severe injury'])
env['Wet,Severe injury %'] = env['Wet,Severe injury'] / (env['Wet,Moderate injury'] + env['Wet,No injury'] + env['Wet,Severe injury'])
env['Snow/Ice,Moderate injury %'] = env['Snow/Ice,Moderate injury'] / (env['Snow/Ice,Moderate injury'] + env['Snow/Ice,No injury'] + env['Snow/Ice,Severe injury'])
env['Snow/Ice,No injury %'] = env['Snow/Ice,No injury'] / (env['Snow/Ice,Moderate injury'] + env['Snow/Ice,No injury'] + env['Snow/Ice,Severe injury'])
env['Snow/Ice,Severe injury %'] = env['Snow/Ice,Severe injury'] / (env['Snow/Ice,Moderate injury'] + env['Snow/Ice,No injury'] + env['Snow/Ice,Severe injury'])
env['Other/Unknown surface,Moderate injury %'] = env['Other/Unknown surface,Moderate injury'] / (env['Other/Unknown surface,Moderate injury'] + env['Other/Unknown surface,No injury'] + env['Other/Unknown surface,Severe injury'])
env['Other/Unknown surface,No injury %'] = env['Other/Unknown surface,No injury'] / (env['Other/Unknown surface,Moderate injury'] + env['Other/Unknown surface,No injury'] + env['Other/Unknown surface,Severe injury'])
env['Other/Unknown surface,Severe injury %'] = env['Other/Unknown surface,Severe injury'] / (env['Other/Unknown surface,Moderate injury'] + env['Other/Unknown surface,No injury'] + env['Other/Unknown surface,Severe injury'])
env['Moderate injury %'] = env['Moderate injury'] / (env['Moderate injury'] + env['No injury'] + env['Severe injury'])
env['No injury %'] = env['No injury'] / (env['Moderate injury'] + env['No injury'] + env['Severe injury'])
env['Severe injury %'] = env['Severe injury'] / (env['Moderate injury'] + env['No injury'] + env['Severe injury'])

# for each condition, calculate the lift
env['Darkness,Moderate injury lift'] = env['Darkness,Moderate injury %'] / env['Moderate injury %']
env['Darkness,No injury lift'] = env['Darkness,No injury %'] / env['No injury %']
env['Darkness,Severe injury lift'] = env['Darkness,Severe injury %'] / env['Severe injury %']
env['Daylight,Moderate injury lift'] = env['Daylight,Moderate injury %'] / env['Moderate injury %']
env['Daylight,No injury lift'] = env['Daylight,No injury %'] / env['No injury %']
env['Daylight,Severe injury lift'] = env['Daylight,Severe injury %'] / env['Severe injury %']
env['Dusk,Moderate injury lift'] = env['Dusk,Moderate injury %'] / env['Moderate injury %']
env['Dusk,No injury lift'] = env['Dusk,No injury %'] / env['No injury %']
env['Dusk,Severe injury lift'] = env['Dusk,Severe injury %'] / env['Severe injury %']
env['Dawn,Moderate injury lift'] = env['Dawn,Moderate injury %'] / env['Moderate injury %']
env['Dawn,No injury lift'] = env['Dawn,No injury %'] / env['No injury %']
env['Dawn,Severe injury lift'] = env['Dawn,Severe injury %'] / env['Severe injury %']
env['Cloudy,Moderate injury lift'] = env['Cloudy,Moderate injury %'] / env['Moderate injury %']
env['Cloudy,No injury lift'] = env['Cloudy,No injury %'] / env['No injury %']
env['Cloudy,Severe injury lift'] = env['Cloudy,Severe injury %'] / env['Severe injury %']
env['Rain/Snow,Moderate injury lift'] = env['Rain/Snow,Moderate injury %'] / env['Moderate injury %']
env['Rain/Snow,No injury lift'] = env['Rain/Snow,No injury %'] / env['No injury %']
env['Rain/Snow,Severe injury lift'] = env['Rain/Snow,Severe injury %'] / env['Severe injury %']
env['Other/Unknown weather,Moderate injury lift'] = env['Other/Unknown weather,Moderate injury %'] / env['Moderate injury %']
env['Other/Unknown weather,No injury lift'] = env['Other/Unknown weather,No injury %'] / env['No injury %']
env['Other/Unknown weather,Severe injury lift'] = env['Other/Unknown weather,Severe injury %'] / env['Severe injury %']
env['Dry,Moderate injury lift'] = env['Dry,Moderate injury %'] / env['Moderate injury %']
env['Dry,No injury lift'] = env['Dry,No injury %'] / env['No injury %']
env['Dry,Severe injury lift'] = env['Dry,Severe injury %'] / env['Severe injury %']
env['Wet,Moderate injury lift'] = env['Wet,Moderate injury %'] / env['Moderate injury %']
env['Wet,No injury lift'] = env['Wet,No injury %'] / env['No injury %']
env['Wet,Severe injury lift'] = env['Wet,Severe injury %'] / env['Severe injury %']
env['Snow/Ice,Moderate injury lift'] = env['Snow/Ice,Moderate injury %'] / env['Moderate injury %']
env['Snow/Ice,No injury lift'] = env['Snow/Ice,No injury %'] / env['No injury %']
env['Snow/Ice,Severe injury lift'] = env['Snow/Ice,Severe injury %'] / env['Severe injury %']
env['Other/Unknown surface,Moderate injury lift'] = env['Other/Unknown surface,Moderate injury %'] / env['Moderate injury %']
env['Other/Unknown surface,No injury lift'] = env['Other/Unknown surface,No injury %'] / env['No injury %']
env['Other/Unknown surface,Severe injury lift'] = env['Other/Unknown surface,Severe injury %'] / env['Severe injury %']

#preview the columns with lift
env[['CRASH_YEAR','SIDE','Darkness,Moderate injury lift','Darkness,No injury lift','Darkness,Severe injury lift','Daylight,Moderate injury lift','Daylight,No injury lift','Daylight,Severe injury lift','Dusk,Moderate injury lift','Dusk,No injury lift','Dusk,Severe injury lift','Dawn,Moderate injury lift','Dawn,No injury lift','Dawn,Severe injury lift','Cloudy,Moderate injury lift','Cloudy,No injury lift','Cloudy,Severe injury lift','Rain/Snow,Moderate injury lift','Rain/Snow,No injury lift','Rain/Snow,Severe injury lift','Other/Unknown weather,Moderate injury lift','Other/Unknown weather,No injury lift','Other/Unknown weather,Severe injury lift','Dry,Moderate injury lift','Dry,No injury lift','Dry,Severe injury lift','Wet,Moderate injury lift','Wet,No injury lift','Wet,Severe injury lift','Snow/Ice,Moderate injury lift','Snow/Ice,No injury lift','Snow/Ice,Severe injury lift','Other/Unknown surface,Moderate injury lift','Other/Unknown surface,No injury lift','Other/Unknown surface,Severe injury lift']].head()

Unnamed: 0,CRASH_YEAR,SIDE,"Darkness,Moderate injury lift","Darkness,No injury lift","Darkness,Severe injury lift","Daylight,Moderate injury lift","Daylight,No injury lift","Daylight,Severe injury lift","Dusk,Moderate injury lift","Dusk,No injury lift",...,"Dry,Severe injury lift","Wet,Moderate injury lift","Wet,No injury lift","Wet,Severe injury lift","Snow/Ice,Moderate injury lift","Snow/Ice,No injury lift","Snow/Ice,Severe injury lift","Other/Unknown surface,Moderate injury lift","Other/Unknown surface,No injury lift","Other/Unknown surface,Severe injury lift"
0,2018,Central,1.258555,0.975273,1.352311,0.912808,1.008965,0.844853,1.125664,0.984502,...,0.908447,1.363146,0.962276,1.668418,1.115537,0.987251,1.255954,0.551009,1.044216,0.314212
1,2018,Far North Side,1.205443,0.978616,1.334268,0.974193,1.003204,0.929184,1.114519,0.97881,...,0.99805,1.216135,0.976206,1.423843,0.80893,1.023411,0.493091,0.430535,1.051116,0.527446
2,2018,Far Southeast Side,1.174065,0.974764,1.381202,0.958627,1.006368,0.89238,1.265088,0.985089,...,1.075085,1.191534,0.974551,1.312836,0.812451,1.037614,0.110148,0.590621,1.05777,0.176162
3,2018,Far Southwest Side,1.299259,0.96989,1.171706,0.891583,1.010658,0.949649,0.980456,0.995466,...,1.084825,1.290315,0.972608,1.080523,0.961497,1.024541,0.0,0.589105,1.047565,0.469831
4,2018,North Side,1.149249,0.98151,1.34152,0.966972,1.00398,0.930415,1.016337,1.004094,...,1.01706,1.210893,0.978143,1.253467,0.693463,1.021492,1.183167,0.612505,1.041991,0.436025


## Type

In [4]:
crash_type = df.pivot_table(values='CRASH_RECORD_ID', index=['CRASH_YEAR', 'SIDE'], columns=['FIRST_CRASH_TYPE'], aggfunc='count', fill_value=0)
crash_type.head()

Unnamed: 0_level_0,FIRST_CRASH_TYPE,ANGLE,ANIMAL,FIXED OBJECT,HEAD ON,OTHER NONCOLLISION,OTHER OBJECT,OVERTURNED,PARKED MOTOR VEHICLE,PEDALCYCLIST,PEDESTRIAN,REAR END,REAR TO FRONT,REAR TO REAR,REAR TO SIDE,SIDESWIPE OPPOSITE DIRECTION,SIDESWIPE SAME DIRECTION,TRAIN,TURNING
CRASH_YEAR,SIDE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018,Central,838,2,528,60,46,91,2,1638,264,413,2854,0,0,0,142,3633,0,2010
2018,Far North Side,1608,29,582,122,47,132,4,3512,276,375,3886,0,0,0,263,2279,0,1994
2018,Far Southeast Side,1117,12,635,85,53,126,15,2151,43,209,1988,0,0,0,131,1134,1,1224
2018,Far Southwest Side,841,3,360,57,14,73,7,1693,48,171,1814,0,0,0,88,1006,0,1137
2018,North Side,968,4,398,97,41,91,1,2448,365,342,2775,0,0,0,197,1753,0,1512
