In [1]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

%matplotlib inline

pd.options.display.float_format = '{:20,.2f}'.format
pd.options.display.max_columns = 1500
pd.options.display.max_rows = 1500

In [2]:
def speeding(x):
    if x == 9:
        x = np.nan
    elif not x:
        x = x
    else:
        x = 1
    return x

def five_more(x):
    if x == 99:
        x = np.nan
    elif x >= 5:
        x = 5
    return x

def binary(x):
    if not x:
        x = x
    else:
        x = 1
    return x

def car_year(x):
    if x == 9999 or x == 9998:
        x = np.nan
    elif x > 2010:
        x = 1
    elif x > 2005:
        x = 2
    elif x > 2000:
        x = 3
    elif x > 1995:
        x = 4
    else:
        x = 5
    return x

def make_nan(x, unknown):
    if x in unknown:
        x = np.nan
    else:
        x = x
    return x

def age(x):
    if x < 16:
        x = 15
    elif x > 85:
        x = 75
    else:
        x = x
    return x

In [3]:
cnx = create_engine('postgresql://aliphelan:****@34.198.205.31:5432/aliphelan')

In [4]:
df = pd.read_sql_query('''SELECT * FROM speed_data_drivers;''',cnx)

In [5]:
df.shape

(48613, 105)

In [6]:
names = ['speedrel', 'st_case', 'veh_no', 'state', 've_total', 've_forms', 'pvh_invl', 'peds', 'pernotmvit', 'permvit', 'persons', 'fatals', 'numoccs', 'mod_year', 'deaths', 'age', 'drunk_dr', 'drinking', 'month', 'day_week', 'hour', 'nhs', 'rur_urb', 'func_sys', 'route', 'sp_jur', 'harm_ev', 'man_coll', 'reljct1', 'reljct2', 'typ_int', 'wrk_zone', 'rel_road', 'lgt_cond', 'weather1', 'weather2', 'weather', 'sch_bus', 'cf1', 'cf2', 'cf3', 'hit_run', 'reg_stat', 'owner', 'make', 'body_typ', 'tow_veh', 'j_knife', 'gvwr', 'v_config', 'cargo_bt', 'haz_inv', 'bus_use', 'spec_use', 'emer_use', 'underide', 'rollover', 'rolinloc', 'impact1', 'deformed', 'towed', 'm_harm', 'veh_sc1', 'fire_exp', 'l_state', 'l_status', 'l_type', 'cdl_stat', 'l_restri', 'prev_acc', 'prev_sus', 'prev_dwi', 'prev_spd', 'prev_oth', 'dr_sf1', 'dr_sf2', 'dr_sf3', 'dr_sf4', 'vtrafway', 'vnum_lan', 'vspd_lim', 'valign', 'vprofile', 'vsurcond', 'vtrafcon', 'vtcont_f', 'p_crash1', 'p_crash2', 'pcrash4', 'pcrash5', 'acc_type', 'dr_drink', 'sex', 'inj_sev', 'rest_use', 'rest_mis', 'air_bag', 'ejection', 'ej_path', 'extricat', 'drugs', 'work_inj', 'hispanic', 'race']
df = df[names]

In [7]:
df.head()

Unnamed: 0,speedrel,st_case,veh_no,state,ve_total,ve_forms,pvh_invl,peds,pernotmvit,permvit,persons,fatals,numoccs,mod_year,deaths,age,drunk_dr,drinking,month,day_week,hour,nhs,rur_urb,func_sys,route,sp_jur,harm_ev,man_coll,reljct1,reljct2,typ_int,wrk_zone,rel_road,lgt_cond,weather1,weather2,weather,sch_bus,cf1,cf2,cf3,hit_run,reg_stat,owner,make,body_typ,tow_veh,j_knife,gvwr,v_config,cargo_bt,haz_inv,bus_use,spec_use,emer_use,underide,rollover,rolinloc,impact1,deformed,towed,m_harm,veh_sc1,fire_exp,l_state,l_status,l_type,cdl_stat,l_restri,prev_acc,prev_sus,prev_dwi,prev_spd,prev_oth,dr_sf1,dr_sf2,dr_sf3,dr_sf4,vtrafway,vnum_lan,vspd_lim,valign,vprofile,vsurcond,vtrafcon,vtcont_f,p_crash1,p_crash2,pcrash4,pcrash5,acc_type,dr_drink,sex,inj_sev,rest_use,rest_mis,air_bag,ejection,ej_path,extricat,drugs,work_inj,hispanic,race
0,0,10012,3,1,3,3,0,0,0,6,6,1,1,2010,1,27,0,9,1,6,6,0,2,3,2,0,12,6,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,20,4,0,0,0,0,0,1,0,0,0,0,0,0,12,6,2,12,0,0,1,6,1,0,0,1,0,0,0,0,0,0,0,0,1,4,50,1,1,1,0,0,1,62,1,2,98,0,2,4,7,0,1,0,0,0,9,0,1,1
1,3,10013,1,1,1,1,0,0,0,1,1,1,1,2007,1,55,1,0,1,7,10,0,1,5,4,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,35,4,0,0,0,0,0,1,0,0,0,0,1,1,0,6,2,1,0,0,1,6,1,0,0,0,1,1,0,0,89,58,0,0,1,2,45,3,5,1,0,0,14,13,4,6,98,1,1,4,7,0,8,1,9,0,0,0,7,1
2,0,10034,2,1,6,6,0,0,0,10,10,1,3,2008,1,50,1,0,1,5,14,0,2,4,2,0,12,1,0,3,2,0,1,1,10,0,10,0,0,0,0,0,1,2,20,20,0,0,0,0,0,1,0,0,0,0,0,0,6,6,2,12,0,0,1,6,1,0,1,0,0,0,0,0,89,0,0,0,5,5,45,1,1,2,3,3,5,53,1,1,21,0,2,3,3,0,8,0,0,0,0,8,0,0
3,0,10054,1,1,2,2,0,0,0,2,2,1,1,9999,1,57,0,9,2,3,17,0,1,3,2,0,12,1,0,1,1,0,1,5,1,0,1,0,0,0,0,0,92,0,98,88,0,0,0,0,0,1,0,0,0,0,0,0,6,6,2,12,0,0,1,6,1,0,0,0,0,0,0,1,0,0,0,0,2,2,65,1,1,1,0,0,1,53,1,1,25,0,1,4,17,0,0,8,0,0,9,0,7,2
4,0,10067,2,1,4,4,0,0,0,8,8,1,2,1998,1,39,0,0,2,4,11,1,2,3,3,0,12,2,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,2,12,31,0,0,0,0,0,1,0,0,0,0,0,0,12,6,2,12,0,0,1,6,1,0,0,1,0,0,0,0,0,0,0,0,1,4,55,3,5,1,0,0,14,62,1,1,51,0,2,3,3,0,1,0,0,0,0,8,0,0


In [8]:
# Speeding: 0 = No, 9 = Unknown, All others = Yes
df.speedrel.value_counts()

0    37681
4     3986
3     3461
9     2154
5     1249
2       82
Name: speedrel, dtype: int64

In [9]:
df['speedrel'] = df['speedrel'].apply(speeding)
df.speedrel.value_counts()

0.00    37681
1.00     8778
Name: speedrel, dtype: int64

In [10]:
df = df.dropna()
df.reset_index(inplace=True, drop=True)

In [11]:
df.head()

Unnamed: 0,speedrel,st_case,veh_no,state,ve_total,ve_forms,pvh_invl,peds,pernotmvit,permvit,persons,fatals,numoccs,mod_year,deaths,age,drunk_dr,drinking,month,day_week,hour,nhs,rur_urb,func_sys,route,sp_jur,harm_ev,man_coll,reljct1,reljct2,typ_int,wrk_zone,rel_road,lgt_cond,weather1,weather2,weather,sch_bus,cf1,cf2,cf3,hit_run,reg_stat,owner,make,body_typ,tow_veh,j_knife,gvwr,v_config,cargo_bt,haz_inv,bus_use,spec_use,emer_use,underide,rollover,rolinloc,impact1,deformed,towed,m_harm,veh_sc1,fire_exp,l_state,l_status,l_type,cdl_stat,l_restri,prev_acc,prev_sus,prev_dwi,prev_spd,prev_oth,dr_sf1,dr_sf2,dr_sf3,dr_sf4,vtrafway,vnum_lan,vspd_lim,valign,vprofile,vsurcond,vtrafcon,vtcont_f,p_crash1,p_crash2,pcrash4,pcrash5,acc_type,dr_drink,sex,inj_sev,rest_use,rest_mis,air_bag,ejection,ej_path,extricat,drugs,work_inj,hispanic,race
0,0.0,10012,3,1,3,3,0,0,0,6,6,1,1,2010,1,27,0,9,1,6,6,0,2,3,2,0,12,6,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,20,4,0,0,0,0,0,1,0,0,0,0,0,0,12,6,2,12,0,0,1,6,1,0,0,1,0,0,0,0,0,0,0,0,1,4,50,1,1,1,0,0,1,62,1,2,98,0,2,4,7,0,1,0,0,0,9,0,1,1
1,1.0,10013,1,1,1,1,0,0,0,1,1,1,1,2007,1,55,1,0,1,7,10,0,1,5,4,0,1,0,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,35,4,0,0,0,0,0,1,0,0,0,0,1,1,0,6,2,1,0,0,1,6,1,0,0,0,1,1,0,0,89,58,0,0,1,2,45,3,5,1,0,0,14,13,4,6,98,1,1,4,7,0,8,1,9,0,0,0,7,1
2,0.0,10034,2,1,6,6,0,0,0,10,10,1,3,2008,1,50,1,0,1,5,14,0,2,4,2,0,12,1,0,3,2,0,1,1,10,0,10,0,0,0,0,0,1,2,20,20,0,0,0,0,0,1,0,0,0,0,0,0,6,6,2,12,0,0,1,6,1,0,1,0,0,0,0,0,89,0,0,0,5,5,45,1,1,2,3,3,5,53,1,1,21,0,2,3,3,0,8,0,0,0,0,8,0,0
3,0.0,10054,1,1,2,2,0,0,0,2,2,1,1,9999,1,57,0,9,2,3,17,0,1,3,2,0,12,1,0,1,1,0,1,5,1,0,1,0,0,0,0,0,92,0,98,88,0,0,0,0,0,1,0,0,0,0,0,0,6,6,2,12,0,0,1,6,1,0,0,0,0,0,0,1,0,0,0,0,2,2,65,1,1,1,0,0,1,53,1,1,25,0,1,4,17,0,0,8,0,0,9,0,7,2
4,0.0,10067,2,1,4,4,0,0,0,8,8,1,2,1998,1,39,0,0,2,4,11,1,2,3,3,0,12,2,0,1,1,0,1,1,1,0,1,0,0,0,0,0,1,2,12,31,0,0,0,0,0,1,0,0,0,0,0,0,12,6,2,12,0,0,1,6,1,0,0,1,0,0,0,0,0,0,0,0,1,4,55,3,5,1,0,0,14,62,1,1,51,0,2,3,3,0,1,0,0,0,0,8,0,0


In [12]:
df.shape

(46459, 104)

In [13]:
# Drop columns which insurance would not have access too
df.drop(['st_case', 'veh_no', 'state', 've_total', 've_forms', 'pvh_invl', 'peds', 'pernotmvit', 'permvit', 'persons', 'fatals', 'numoccs', 'deaths', 'drunk_dr', 'drinking', 'month', 'day_week', 'hour', 'nhs', 'func_sys', 'route', 'sp_jur', 'harm_ev', 'man_coll', 'reljct1', 'reljct2', 'typ_int', 'wrk_zone', 'rel_road', 'lgt_cond', 'weather1', 'weather2', 'weather', 'sch_bus', 'cf1', 'cf2', 'cf3', 'hit_run', 'j_knife', 'underide', 'rollover', 'rolinloc', 'impact1', 'deformed', 'towed', 'm_harm', 'veh_sc1', 'fire_exp', 'dr_sf1', 'dr_sf2', 'dr_sf3', 'dr_sf4', 'vtrafway', 'vnum_lan', 'vspd_lim', 'valign', 'vprofile', 'vsurcond', 'vtrafcon', 'vtcont_f', 'p_crash1', 'p_crash2', 'pcrash4', 'pcrash5', 'acc_type', 'dr_drink', 'inj_sev', 'rest_use', 'rest_mis', 'air_bag', 'ejection', 'ej_path', 'extricat', 'drugs', 'work_inj'], axis=1, inplace=True)
df.head()

Unnamed: 0,speedrel,mod_year,age,rur_urb,reg_stat,owner,make,body_typ,tow_veh,gvwr,v_config,cargo_bt,haz_inv,bus_use,spec_use,emer_use,l_state,l_status,l_type,cdl_stat,l_restri,prev_acc,prev_sus,prev_dwi,prev_spd,prev_oth,sex,hispanic,race
0,0.0,2010,27,2,1,1,20,4,0,0,0,0,1,0,0,0,1,6,1,0,0,1,0,0,0,0,2,1,1
1,1.0,2007,55,1,1,1,35,4,0,0,0,0,1,0,0,0,1,6,1,0,0,0,1,1,0,0,1,7,1
2,0.0,2008,50,2,1,2,20,20,0,0,0,0,1,0,0,0,1,6,1,0,1,0,0,0,0,0,2,0,0
3,0.0,9999,57,1,92,0,98,88,0,0,0,0,1,0,0,0,1,6,1,0,0,0,0,0,0,1,1,7,2
4,0.0,1998,39,2,1,2,12,31,0,0,0,0,1,0,0,0,1,6,1,0,0,1,0,0,0,0,2,0,0


In [14]:
df.mod_year.value_counts()

2007    2983
2005    2934
2006    2876
2003    2660
2004    2641
2014    2439
2002    2410
2008    2340
2001    2234
2013    2229
2000    2085
2012    2019
2015    1931
2011    1781
1999    1734
2009    1581
2010    1507
1998    1397
1997    1210
1996     841
1995     809
9999     648
1994     638
1993     431
1992     344
1991     281
2016     231
1990     230
1988     161
1989     154
1987     102
1986     101
1985      83
1984      61
1983      45
1982      33
1979      26
1981      26
1980      25
1978      24
1977      21
1976      15
1969      13
1973      13
1971      11
1972      11
1970      10
1975      10
1967       9
1968       8
1974       7
1966       6
1965       5
1932       3
1940       3
1958       2
1957       2
1937       2
1942       2
9998       2
1950       2
1962       2
1948       2
1945       1
1951       1
1947       1
1923       1
1955       1
1929       1
1961       1
1941       1
1963       1
1931       1
1959       1
1960       1
1952       1
Name: mod_ye

In [15]:
# group car years together
df['mod_year'] = df['mod_year'].apply(car_year)
df.mod_year.value_counts()

3.00    12879
2.00    11287
1.00    10630
4.00     7267
5.00     3746
Name: mod_year, dtype: int64

In [16]:
df.age.value_counts()

22     1232
23     1198
24     1174
25     1157
21     1142
20     1087
26     1077
19     1058
27     1040
29      962
31      929
18      925
30      914
28      909
32      864
35      851
51      849
34      825
33      815
37      804
53      797
52      789
54      785
46      762
55      761
36      756
45      751
56      748
50      744
44      742
38      740
49      727
57      711
43      710
48      706
39      706
42      705
41      700
47      689
58      687
40      687
17      634
59      633
60      614
62      544
61      540
63      514
68      468
64      464
66      455
999     451
65      436
67      397
69      389
16      380
72      352
70      321
71      318
73      253
74      239
76      238
75      230
77      202
78      200
80      193
81      177
79      175
83      163
84      153
82      149
85      136
86      127
87      106
88       98
15       85
89       80
998      59
90       57
91       45
92       34
93       27
94       24
14       17
95  

In [17]:
# drivers age 
df['age'] = df['age'].apply(lambda x: make_nan(x, [998, 999]))
df['age'] = df['age'].apply(age)
df.age.value_counts()

22.00    1232
23.00    1198
24.00    1174
25.00    1157
21.00    1142
20.00    1087
26.00    1077
19.00    1058
27.00    1040
29.00     962
31.00     929
18.00     925
30.00     914
28.00     909
32.00     864
75.00     855
35.00     851
51.00     849
34.00     825
33.00     815
37.00     804
53.00     797
52.00     789
54.00     785
46.00     762
55.00     761
36.00     756
45.00     751
56.00     748
50.00     744
44.00     742
38.00     740
49.00     727
57.00     711
43.00     710
39.00     706
48.00     706
42.00     705
41.00     700
47.00     689
58.00     687
40.00     687
17.00     634
59.00     633
60.00     614
62.00     544
61.00     540
63.00     514
68.00     468
64.00     464
66.00     455
65.00     436
67.00     397
69.00     389
16.00     380
72.00     352
70.00     321
71.00     318
73.00     253
74.00     239
76.00     238
77.00     202
78.00     200
80.00     193
81.00     177
79.00     175
83.00     163
84.00     153
82.00     149
15.00     142
85.00     136
Name: 

In [18]:
# code all unknowns and not reported to nan
df['rur_urb'] = df['rur_urb'].apply(lambda x: make_nan(x, [8, 9]))
df['reg_stat'] = df['reg_stat'].apply(lambda x: make_nan(x, [91, 99]))
df['owner'] = df['owner'].apply(lambda x: make_nan(x, [9]))
df['make'] = df['make'].apply(lambda x: make_nan(x, [97, 99]))
df['body_typ'] = df['body_typ'].apply(lambda x: make_nan(x, [99, 98]))
df['tow_veh'] = df['tow_veh'].apply(lambda x: make_nan(x, [9]))
df['gvwr'] = df['gvwr'].apply(lambda x: make_nan(x, [9]))
df['v_config'] = df['v_config'].apply(lambda x: make_nan(x, [99]))
df['cargo_bt'] = df['cargo_bt'].apply(lambda x: make_nan(x, [98, 99]))
df['bus_use'] = df['bus_use'].apply(lambda x: make_nan(x, [98,99]))
df['spec_use'] = df['spec_use'].apply(lambda x: make_nan(x, [98, 99]))
df['emer_use'] = df['emer_use'].apply(lambda x: make_nan(x, [8, 9]))
df['emer_use'] = df['emer_use'].apply(binary)
df['l_state'] = df['l_state'].apply(lambda x: make_nan(x, [98, 99]))
df['l_status'] = df['l_status'].apply(lambda x: make_nan(x, [9]))
df['l_type'] = df['l_type'].apply(lambda x: make_nan(x, [9]))
df['cdl_stat'] = df['cdl_stat'].apply(lambda x: make_nan(x, [99]))
df['l_restri'] = df['l_restri'].apply(lambda x: make_nan(x, [9]))
df['prev_acc'] = df['prev_acc'].apply(lambda x: make_nan(x, [98, 99]))
df['prev_acc'] = df['prev_acc'].apply(binary)
df['prev_sus'] = df['prev_sus'].apply(lambda x: make_nan(x, [99]))
df['prev_sus'] = df['prev_sus'].apply(binary)
df['prev_dwi'] = df['prev_dwi'].apply(lambda x: make_nan(x, [99]))
df['prev_dwi'] = df['prev_dwi'].apply(binary)
df['prev_spd'] = df['prev_spd'].apply(lambda x: make_nan(x, [99]))
df['prev_spd'] = df['prev_spd'].apply(binary)
df['prev_oth'] = df['prev_oth'].apply(lambda x: make_nan(x, [99]))
df['prev_oth'] = df['prev_oth'].apply(binary)
df['sex'] = df['sex'].apply(lambda x: make_nan(x, [8, 9]))
df['hispanic'] = df['hispanic'].apply(lambda x: make_nan(x, [99]))
df['race'] = df['race'].apply(lambda x: make_nan(x, [99]))

In [19]:
df = df.dropna()
df.reset_index(inplace=True, drop=True)

In [20]:
df.shape

(38395, 29)

In [21]:
df.head()

Unnamed: 0,speedrel,mod_year,age,rur_urb,reg_stat,owner,make,body_typ,tow_veh,gvwr,v_config,cargo_bt,haz_inv,bus_use,spec_use,emer_use,l_state,l_status,l_type,cdl_stat,l_restri,prev_acc,prev_sus,prev_dwi,prev_spd,prev_oth,sex,hispanic,race
0,0.0,2.0,27.0,2.0,1.0,1.0,20.0,4.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0
1,1.0,2.0,55.0,1.0,1.0,1.0,35.0,4.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,7.0,1.0
2,0.0,2.0,50.0,2.0,1.0,2.0,20.0,20.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
3,0.0,4.0,39.0,2.0,1.0,2.0,12.0,31.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,4.0,21.0,2.0,1.0,2.0,20.0,31.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [33]:
df.spec_use.value_counts()

 0.00    38062
 3.00      121
 2.00       89
 5.00       60
 1.00       26
 6.00       24
 7.00       10
13.00        1
 8.00        1
 4.00        1
Name: spec_use, dtype: int64

In [22]:
for_dummies = df.ix[:, 1:]
for_dummies.head()

Unnamed: 0,mod_year,age,rur_urb,reg_stat,owner,make,body_typ,tow_veh,gvwr,v_config,cargo_bt,haz_inv,bus_use,spec_use,emer_use,l_state,l_status,l_type,cdl_stat,l_restri,prev_acc,prev_sus,prev_dwi,prev_spd,prev_oth,sex,hispanic,race
0,2.0,27.0,2.0,1.0,1.0,20.0,4.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0
1,2.0,55.0,1.0,1.0,1.0,35.0,4.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,7.0,1.0
2,2.0,50.0,2.0,1.0,2.0,20.0,20.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
3,4.0,39.0,2.0,1.0,2.0,12.0,31.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,4.0,21.0,2.0,1.0,2.0,20.0,31.0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,6.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [23]:
# Create dummy variables
speeding = pd.DataFrame(df['speedrel'])

for col in for_dummies:
    dum = pd.get_dummies(for_dummies[col], prefix = col)
    speeding = speeding.join(dum)


In [24]:
speeding.head()

Unnamed: 0,speedrel,mod_year_1.0,mod_year_2.0,mod_year_3.0,mod_year_4.0,mod_year_5.0,age_15.0,age_16.0,age_17.0,age_18.0,age_19.0,age_20.0,age_21.0,age_22.0,age_23.0,age_24.0,age_25.0,age_26.0,age_27.0,age_28.0,age_29.0,age_30.0,age_31.0,age_32.0,age_33.0,age_34.0,age_35.0,age_36.0,age_37.0,age_38.0,age_39.0,age_40.0,age_41.0,age_42.0,age_43.0,age_44.0,age_45.0,age_46.0,age_47.0,age_48.0,age_49.0,age_50.0,age_51.0,age_52.0,age_53.0,age_54.0,age_55.0,age_56.0,age_57.0,age_58.0,age_59.0,age_60.0,age_61.0,age_62.0,age_63.0,age_64.0,age_65.0,age_66.0,age_67.0,age_68.0,age_69.0,age_70.0,age_71.0,age_72.0,age_73.0,age_74.0,age_75.0,age_76.0,age_77.0,age_78.0,age_79.0,age_80.0,age_81.0,age_82.0,age_83.0,age_84.0,age_85.0,rur_urb_1.0,rur_urb_2.0,rur_urb_6.0,reg_stat_0.0,reg_stat_1.0,reg_stat_2.0,reg_stat_3.0,reg_stat_4.0,reg_stat_5.0,reg_stat_6.0,reg_stat_8.0,reg_stat_9.0,reg_stat_10.0,reg_stat_11.0,reg_stat_12.0,reg_stat_13.0,reg_stat_15.0,reg_stat_16.0,reg_stat_17.0,reg_stat_18.0,reg_stat_19.0,reg_stat_20.0,reg_stat_21.0,reg_stat_22.0,reg_stat_23.0,reg_stat_24.0,reg_stat_25.0,reg_stat_26.0,reg_stat_27.0,reg_stat_28.0,reg_stat_29.0,reg_stat_30.0,reg_stat_31.0,reg_stat_32.0,reg_stat_33.0,reg_stat_34.0,reg_stat_35.0,reg_stat_36.0,reg_stat_37.0,reg_stat_38.0,reg_stat_39.0,reg_stat_40.0,reg_stat_41.0,reg_stat_42.0,reg_stat_44.0,reg_stat_45.0,reg_stat_46.0,reg_stat_47.0,reg_stat_48.0,reg_stat_49.0,reg_stat_50.0,reg_stat_51.0,reg_stat_53.0,reg_stat_54.0,reg_stat_55.0,reg_stat_56.0,reg_stat_92.0,reg_stat_93.0,reg_stat_94.0,reg_stat_95.0,reg_stat_96.0,reg_stat_97.0,reg_stat_98.0,owner_0.0,owner_1.0,owner_2.0,owner_3.0,owner_4.0,owner_5.0,make_1.0,make_2.0,make_3.0,make_6.0,make_7.0,make_9.0,make_10.0,make_12.0,make_13.0,make_14.0,make_18.0,make_19.0,make_20.0,make_21.0,make_22.0,make_23.0,make_24.0,make_25.0,make_29.0,make_30.0,make_32.0,make_34.0,make_35.0,make_36.0,make_37.0,make_38.0,make_39.0,make_41.0,make_42.0,make_45.0,make_47.0,make_48.0,make_49.0,make_50.0,make_51.0,make_52.0,make_53.0,make_54.0,make_55.0,make_58.0,make_59.0,make_62.0,make_63.0,make_64.0,make_65.0,make_67.0,make_69.0,make_71.0,make_72.0,make_73.0,make_74.0,make_76.0,make_77.0,make_82.0,make_84.0,make_85.0,make_86.0,make_87.0,make_89.0,make_90.0,make_92.0,make_93.0,make_94.0,make_98.0,body_typ_1.0,body_typ_2.0,body_typ_3.0,body_typ_4.0,body_typ_5.0,body_typ_6.0,body_typ_7.0,body_typ_8.0,body_typ_9.0,body_typ_10.0,body_typ_11.0,body_typ_12.0,body_typ_14.0,body_typ_15.0,body_typ_16.0,body_typ_17.0,body_typ_19.0,body_typ_20.0,body_typ_21.0,body_typ_22.0,body_typ_28.0,body_typ_29.0,body_typ_30.0,body_typ_31.0,body_typ_32.0,body_typ_39.0,body_typ_40.0,body_typ_42.0,body_typ_45.0,body_typ_48.0,body_typ_49.0,body_typ_50.0,body_typ_51.0,body_typ_52.0,body_typ_55.0,body_typ_58.0,body_typ_60.0,body_typ_61.0,body_typ_62.0,body_typ_63.0,body_typ_64.0,body_typ_65.0,body_typ_66.0,body_typ_67.0,body_typ_71.0,body_typ_72.0,body_typ_73.0,body_typ_78.0,body_typ_79.0,body_typ_80.0,body_typ_81.0,body_typ_82.0,body_typ_83.0,body_typ_88.0,body_typ_89.0,body_typ_90.0,body_typ_91.0,body_typ_92.0,body_typ_93.0,body_typ_95.0,body_typ_97.0,tow_veh_0.0,tow_veh_1.0,tow_veh_2.0,tow_veh_3.0,tow_veh_4.0,tow_veh_5.0,tow_veh_6.0,gvwr_0.0,gvwr_1.0,gvwr_2.0,gvwr_3.0,gvwr_8.0,v_config_0.0,v_config_1.0,v_config_2.0,v_config_4.0,v_config_5.0,v_config_6.0,v_config_7.0,v_config_8.0,v_config_10.0,v_config_19.0,v_config_20.0,v_config_21.0,cargo_bt_0.0,cargo_bt_1.0,cargo_bt_2.0,cargo_bt_3.0,cargo_bt_4.0,cargo_bt_5.0,cargo_bt_6.0,cargo_bt_7.0,cargo_bt_8.0,cargo_bt_9.0,cargo_bt_10.0,cargo_bt_11.0,cargo_bt_12.0,cargo_bt_22.0,cargo_bt_96.0,cargo_bt_97.0,haz_inv_1,haz_inv_2,bus_use_0.0,bus_use_1.0,bus_use_4.0,bus_use_5.0,bus_use_6.0,bus_use_7.0,bus_use_8.0,spec_use_0.0,spec_use_1.0,spec_use_2.0,spec_use_3.0,spec_use_4.0,spec_use_5.0,spec_use_6.0,spec_use_7.0,spec_use_8.0,spec_use_13.0,emer_use_0.0,emer_use_1.0,l_state_1.0,l_state_2.0,l_state_4.0,l_state_5.0,l_state_6.0,l_state_8.0,l_state_9.0,l_state_10.0,l_state_11.0,l_state_12.0,l_state_13.0,l_state_15.0,l_state_16.0,l_state_17.0,l_state_18.0,l_state_19.0,l_state_20.0,l_state_21.0,l_state_22.0,l_state_23.0,l_state_24.0,l_state_25.0,l_state_26.0,l_state_27.0,l_state_28.0,l_state_29.0,l_state_30.0,l_state_31.0,l_state_32.0,l_state_33.0,l_state_34.0,l_state_35.0,l_state_36.0,l_state_37.0,l_state_38.0,l_state_39.0,l_state_40.0,l_state_41.0,l_state_42.0,l_state_43.0,l_state_44.0,l_state_45.0,l_state_46.0,l_state_47.0,l_state_48.0,l_state_49.0,l_state_50.0,l_state_51.0,l_state_53.0,l_state_54.0,l_state_55.0,l_state_56.0,l_state_93.0,l_state_95.0,l_state_96.0,l_state_97.0,l_status_0.0,l_status_1.0,l_status_2.0,l_status_3.0,l_status_4.0,l_status_6.0,l_type_0.0,l_type_1.0,l_type_2.0,l_type_7.0,l_type_8.0,cdl_stat_0.0,cdl_stat_1.0,cdl_stat_2.0,cdl_stat_3.0,cdl_stat_4.0,cdl_stat_5.0,cdl_stat_6.0,cdl_stat_7.0,cdl_stat_8.0,l_restri_0.0,l_restri_1.0,l_restri_2.0,l_restri_3.0,prev_acc_0.0,prev_acc_1.0,prev_sus_0.0,prev_sus_1.0,prev_dwi_0.0,prev_dwi_1.0,prev_spd_0.0,prev_spd_1.0,prev_oth_0.0,prev_oth_1.0,sex_1.0,sex_2.0,hispanic_0.0,hispanic_1.0,hispanic_2.0,hispanic_3.0,hispanic_4.0,hispanic_5.0,hispanic_6.0,hispanic_7.0,race_0.0,race_1.0,race_2.0,race_3.0,race_4.0,race_5.0,race_6.0,race_7.0,race_18.0,race_19.0,race_28.0,race_38.0,race_48.0,race_58.0,race_68.0,race_78.0,race_97.0,race_98.0
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
X = speeding.ix[:, 1:]
y = speeding['speedrel']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

In [27]:
dtree = DecisionTreeClassifier().fit(X_train, y_train)
rtree = RandomForestClassifier().fit(X_train, y_train)

y_test_pred_d = dtree.predict(X_test)
y_test_pred_r = rtree.predict(X_test)

In [28]:
accuracy_score(y_test, y_test_pred_d)

0.75312500000000004

In [29]:
accuracy_score(y_test, y_test_pred_r)

0.81770833333333337

In [30]:
features = zip(X.columns, rtree.feature_importances_)
sorted(features, key=lambda x: x[1])

[(u'reg_stat_96.0', 0.0),
 (u'make_1.0', 0.0),
 (u'make_25.0', 0.0),
 (u'make_29.0', 0.0),
 (u'make_89.0', 0.0),
 (u'body_typ_42.0', 0.0),
 (u'body_typ_45.0', 0.0),
 (u'body_typ_51.0', 0.0),
 (u'body_typ_58.0', 0.0),
 (u'body_typ_64.0', 0.0),
 (u'body_typ_71.0', 0.0),
 (u'body_typ_72.0', 0.0),
 (u'body_typ_78.0', 0.0),
 (u'body_typ_79.0', 0.0),
 (u'body_typ_93.0', 0.0),
 (u'body_typ_95.0', 0.0),
 (u'tow_veh_3.0', 0.0),
 (u'v_config_8.0', 0.0),
 (u'v_config_10.0', 0.0),
 (u'v_config_20.0', 0.0),
 (u'bus_use_7.0', 0.0),
 (u'bus_use_8.0', 0.0),
 (u'spec_use_4.0', 0.0),
 (u'spec_use_13.0', 0.0),
 (u'make_93.0', 1.9382095473980796e-08),
 (u'bus_use_4.0', 1.5036707334493629e-07),
 (u'spec_use_7.0', 3.3275330450489223e-07),
 (u'make_64.0', 5.4138294109434472e-07),
 (u'make_92.0', 6.8029201952990236e-07),
 (u'tow_veh_4.0', 7.0943945137310255e-07),
 (u'make_74.0', 8.5035763846303046e-07),
 (u'body_typ_60.0', 1.3171920397232568e-06),
 (u'reg_stat_94.0', 2.3533750075231723e-06),
 (u'v_config_19.0

In [31]:
sorted(features, key=lambda x: x[1], reverse=True)

[(u'mod_year_3.0', 0.016784218347818188),
 (u'race_1.0', 0.015494069297020002),
 (u'mod_year_2.0', 0.015171430808600266),
 (u'rur_urb_1.0', 0.014724485019779904),
 (u'rur_urb_2.0', 0.014341132921081783),
 (u'race_0.0', 0.01398247994846955),
 (u'mod_year_4.0', 0.012710269872341987),
 (u'make_20.0', 0.012370426153722967),
 (u'prev_acc_0.0', 0.012267565065183925),
 (u'owner_1.0', 0.012081508652625321),
 (u'owner_2.0', 0.01197995807947853),
 (u'mod_year_1.0', 0.011895056038703193),
 (u'hispanic_0.0', 0.011778376115590867),
 (u'l_restri_0.0', 0.011705183595423533),
 (u'body_typ_4.0', 0.011394975132113145),
 (u'prev_acc_1.0', 0.011264190335677344),
 (u'make_12.0', 0.010994885480803631),
 (u'prev_oth_1.0', 0.010326221310575426),
 (u'l_restri_3.0', 0.010064478978355733),
 (u'body_typ_31.0', 0.0098047522288005912),
 (u'prev_oth_0.0', 0.0095651022933375771),
 (u'body_typ_80.0', 0.0089025605707228114),
 (u'make_37.0', 0.0086311190018649279),
 (u'make_49.0', 0.008490453125792153),
 (u'prev_spd_0.0

In [32]:
'''names = []
accs = []
for algorithm in (LogisticRegression, 
                  KNeighborsClassifier,
                  GaussianNB,
                  SVC,
                  DecisionTreeClassifier,
                  RandomForestClassifier):

    accuracy = np.mean(cross_val_score(algorithm(), X, y, cv=10))
    print '%-30s %.4f' % (algorithm.__name__, accuracy)
    names.append(algorithm.__name__)
    accs.append(accuracy)

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-32-c6384bc5300b>, line 13)

In [None]:
df.make.value_counts()