# One Hot Encoding

#### Import df

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
df = pd.read_csv('../data/crashes_cleaned.csv')

In [2]:
df.head(2)

Unnamed: 0,posted_speed_limit,weather_condition,lighting_condition,alignment,roadway_surface_cond,num_units,crash_hour,crash_day_of_week,crash_month,sex,age,serious_accident
0,35,CLEAR,DAYLIGHT,STRAIGHT AND LEVEL,DRY,2,15,1,5,1,44.0,0
1,25,CLEAR,DAYLIGHT,CURVE ON GRADE,DRY,2,7,6,6,1,71.0,0


In [3]:
df.columns

Index(['posted_speed_limit', 'weather_condition', 'lighting_condition',
       'alignment', 'roadway_surface_cond', 'num_units', 'crash_hour',
       'crash_day_of_week', 'crash_month', 'sex', 'age', 'serious_accident'],
      dtype='object')

#### Remove target from dataframe to put back on later

In [4]:
target = df[['serious_accident']]

In [5]:
df.drop(columns=['serious_accident'], inplace=True)

#### Get dummies

In [6]:
df = pd.get_dummies(df)

In [7]:
df.columns = [x.lower() for x in df.columns]

In [8]:
df.head()

Unnamed: 0,posted_speed_limit,num_units,crash_hour,crash_day_of_week,crash_month,sex,age,weather_condition_blowing snow,weather_condition_clear,weather_condition_cloudy/overcast,...,"alignment_curve, level",alignment_straight and level,alignment_straight on grade,alignment_straight on hillcrest,roadway_surface_cond_dry,roadway_surface_cond_ice,"roadway_surface_cond_sand, mud, dirt",roadway_surface_cond_snow or slush,roadway_surface_cond_unknown,roadway_surface_cond_wet
0,35,2,15,1,5,1,44.0,0,1,0,...,0,1,0,0,1,0,0,0,0,0
1,25,2,7,6,6,1,71.0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,15,2,16,3,9,1,60.0,0,1,0,...,0,1,0,0,1,0,0,0,0,0
3,30,2,9,6,3,1,38.0,0,1,0,...,0,1,0,0,0,0,0,1,0,0
4,30,2,5,4,3,1,52.0,0,0,0,...,0,1,0,0,0,0,0,0,0,1


#### Get dummies for date and time columns

In [9]:
df = pd.get_dummies(df, columns=['crash_hour', 'crash_day_of_week', 'crash_month'])
print(df.columns)
df.head()

Index(['posted_speed_limit', 'num_units', 'sex', 'age',
       'weather_condition_blowing snow', 'weather_condition_clear',
       'weather_condition_cloudy/overcast', 'weather_condition_fog/smoke/haze',
       'weather_condition_freezing rain/drizzle', 'weather_condition_rain',
       'weather_condition_severe cross wind gate',
       'weather_condition_sleet/hail', 'weather_condition_snow',
       'weather_condition_unknown', 'lighting_condition_darkness',
       'lighting_condition_darkness, lighted road', 'lighting_condition_dawn',
       'lighting_condition_daylight', 'lighting_condition_dusk',
       'lighting_condition_unknown', 'alignment_curve on grade',
       'alignment_curve on hillcrest', 'alignment_curve, level',
       'alignment_straight and level', 'alignment_straight on grade',
       'alignment_straight on hillcrest', 'roadway_surface_cond_dry',
       'roadway_surface_cond_ice', 'roadway_surface_cond_sand, mud, dirt',
       'roadway_surface_cond_snow or slush', 'ro

Unnamed: 0,posted_speed_limit,num_units,sex,age,weather_condition_blowing snow,weather_condition_clear,weather_condition_cloudy/overcast,weather_condition_fog/smoke/haze,weather_condition_freezing rain/drizzle,weather_condition_rain,...,crash_month_3,crash_month_4,crash_month_5,crash_month_6,crash_month_7,crash_month_8,crash_month_9,crash_month_10,crash_month_11,crash_month_12
0,35,2,1,44.0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,25,2,1,71.0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,15,2,1,60.0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,30,2,1,38.0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,30,2,1,52.0,0,0,0,0,0,1,...,1,0,0,0,0,0,0,0,0,0


#### Add back the target and export

In [10]:
df = df.join(target)
df.columns

Index(['posted_speed_limit', 'num_units', 'sex', 'age',
       'weather_condition_blowing snow', 'weather_condition_clear',
       'weather_condition_cloudy/overcast', 'weather_condition_fog/smoke/haze',
       'weather_condition_freezing rain/drizzle', 'weather_condition_rain',
       'weather_condition_severe cross wind gate',
       'weather_condition_sleet/hail', 'weather_condition_snow',
       'weather_condition_unknown', 'lighting_condition_darkness',
       'lighting_condition_darkness, lighted road', 'lighting_condition_dawn',
       'lighting_condition_daylight', 'lighting_condition_dusk',
       'lighting_condition_unknown', 'alignment_curve on grade',
       'alignment_curve on hillcrest', 'alignment_curve, level',
       'alignment_straight and level', 'alignment_straight on grade',
       'alignment_straight on hillcrest', 'roadway_surface_cond_dry',
       'roadway_surface_cond_ice', 'roadway_surface_cond_sand, mud, dirt',
       'roadway_surface_cond_snow or slush', 'ro

In [11]:
df.to_csv(r'../data/crashes_cleaned_ohe.csv', index=False)

In [12]:
df.shape

(55718, 76)