In [1]:
%%latex
\tableofcontents

<IPython.core.display.Latex object>

# Ambulance_Dispatch_2024_Reduce_Dimensionality

## Goals

- The goal of this notebook is to reduce the dimensionality of each dataset by identifying and dropping features that are well predicted by the remaining features by multicollinearity.

- In the next notebook we will build models on three groups of features:
    - Easy:  Features that the emergency dispatcher already has or can, without really precise maps, determine from the location
    - Medium:  Additionally, features that the emergency dispatcher can determine from precise maps and information from the cell service provider about the primary user of the phone
    - Hard:  Additionally, features about the vehicle that can be learned only by correlating information about the identity of the likely driver with vehicle registration and/or insurance records.  Not easily available in real time without lots of preparation, raises privacy concerns, and not likely to be very accurate

- The outbook of this notebook is the three sets of features, expressed as a dummy variable for each binned value of the feature, after dimensionality reduction.



## Methods

- For each of [Easy, Medium, Hard]:
    - For each feature:
        - Create a linear model (LinearRegression from sklearn) mapping the other features onto this feature.
        - Find the $R^2$ score from fitting the model:  ``r2 = LinearRegression().fit(X, y).score(X, y)`` where ``y`` is this feature and ``X`` is all of the other features.  
        - If $R^2=1$, then this feature is perfectly predicted by the other features.  
        - If this $R^2$ score is high, like greater than 0.9, then this feature is well predicted by other features, and we should consider dropping it.
        - The reason to not drop all features with high $R^2$ scores in the same step is that two (or more) features could be highly collinear, and we may only want to drop one of them.  If we drop one of them and recalculate, we may see that the other(s) now have low $R^2$ scores.

    - While the $\max(R^2) > 0.90$, run the above method and drop the feature with the highest $R^2$ score.
        - This choice of threshold of 0.90 is somewhat arbitary, perhaps arbitrarily high, and testing the results of different choices is an opportunity for future research.

    - Transform the features, which have 2-10 values, into 1-9 dummy features.  We choose to drop the first value because each of the dummy features of an original feature would be perfectly collinear with the others.

    - Repeat the ``While the $\max(R^2) > 0.90$,...'' process to reduce the number of features.  
    
    - Write the reduced dummy-variable features to file to use in the next notebook, where we will build models predicting whether each crash person needs an ambulance.
    
- We considered using Principal Component Analysis, but decided on $R^2$ instead.


# Setup

## Import Libraries

In [2]:
print ('Install Packages')

import sys, copy, math, time, os, csv

print ('Python version: {}'.format(sys.version))

import numpy as np
print ('NumPy version: {}'.format(np.__version__))
np.set_printoptions(suppress=True)

import pandas as pd
print ('Pandas version:  {}'.format(pd.__version__))
pd.set_option('display.max_rows', 500)

import sklearn
print ('sklearn version: {}'.format(sklearn.__version__))
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

# Set Randomness.  
import random
random_seed = 0
print ('random_seed = ', random_seed)
np.random.seed(random_seed) # NumPy
random.seed(random_seed) # Python
#tf.random.set_seed(42) # Tensorflow

import warnings
warnings.filterwarnings('ignore')

print ('Finished Installing Packages')

Install Packages
Python version: 3.10.14 | packaged by conda-forge | (main, Mar 20 2024, 12:51:49) [Clang 16.0.6 ]
NumPy version: 1.26.4
Pandas version:  2.2.2
sklearn version: 1.5.0
random_seed =  0
Finished Installing Packages


## Get Data

This function pulls in the saved output from Ambulance_Dispatch_2024_03_Impute_Missing_Data.

In [3]:
def Get_Data(target):
    print ('Get_Data')
    
    # data imputed by MissForest
#    data = pd.read_csv('../../Big_Files/CRSS_03_Imputed_by_MF_Data.csv', index_col=None) 
    # data imputed by IVEware
#    data = pd.read_csv('../../Big_Files/CRSS_03_Imputed_by_IVEware_seed_0.csv', index_col=None)

    filename = '../../Big_Files/CRSS_03' + Run[:6] + '.csv'
    print (filename)
    print ()
    data = pd.read_csv(filename, index_col=None)
    
    print ('data.shape = ', data.shape)
    data = data.reindex(sorted(data.columns), axis=1)
    first_column = data.pop(target) 
    data.insert(0, target, first_column)     

    return data

#data =  Get_Data('HOSPITAL')

In [4]:
def Thin_to_Hard_Features(data):
    print ('Thin_to_Hard_Features()')

    Merge = [
        'CASENUM',
        'VEH_NO',
        'PER_NO',        
    ]

    Accident = [
        'DAY_WEEK',
        'HOUR',
        'INT_HWY',
        'LGT_COND',
        'MONTH',
#        'PEDS',
        'PERMVIT',
#        'PERNOTMVIT', # Pedestrians, which we have taken out
        'PJ',
        'PSU',
        'PVH_INVL',
        'REGION',
        'REL_ROAD',
        'RELJCT1',
        'RELJCT2',
#        'SCH_BUS',
        'TYP_INT',
        'URBANICITY',
        'VE_FORMS',
        'VE_TOTAL',
        'WEATHER',
        'WRK_ZONE',
#        'YEAR',
    ]
    
    Vehicle = [
        'BODY_TYP',
#        'BUS_USE',
#        'EMER_USE',
        'MAKE',
#        'MOD_YEAR',
        'MODEL',
        'NUMOCCS',
        'VALIGN',
#        'VNUM_LAN',
        'VPROFILE',
        'VSPD_LIM',
#        'VSURCOND',
        'VTRAFCON',
        'VTRAFWAY',
    ]
    
    Person = [
        'AGE',
#        'LOCATION', # Pedestrian location; taken out
        'PER_TYP',
        'SEX',
        'HOSPITAL',    
    ]

    Engineered = [
        'VEH_AGE',
    ]
    
    # Put features in alphabetical order
    Features = Accident + Vehicle + Person + Engineered
    Features = sorted(Features)

    print ('Removed Features')
    for feature in data:
        if feature not in Features:
            print (feature)
    print ()
    
    data = data.filter(Features, axis=1)
    
    print ('data.shape: ', data.shape)
    
    print ('End Thin_to_Hard_Features()')
    print ()
        
    return data

def Test_Thin_to_Hard_Features():
    data = Get_Data()
    data = Thin_to_Hard_Features(data)
    for feature in data:
        display(data[feature].value_counts())
        
#Test_Thin_to_Hard_Features()

In [5]:
def Thin_to_Medium_Features(data):
    print ('Thin_to_Medium_Features()')

    Accident = [
        'DAY_WEEK',
        'HOUR',
        'INT_HWY',
#        'LGT_COND',
        'MONTH',
#        'PEDS',
#        'PERMVIT',
#        'PERNOTMVIT',
        'PJ',
        'PSU',
#        'PVH_INVL',
        'REGION',
        'REL_ROAD',
        'RELJCT1',
#        'RELJCT2',
#        'SCH_BUS',
        'TYP_INT',
        'URBANICITY',
#        'VE_FORMS',
#        'VE_TOTAL',
        'WEATHER',
#        'WRK_ZONE',
#        'YEAR',
    ]
    
    Vehicle = [
#        'BODY_TYP',
#        'BUS_USE',
#        'EMER_USE',
#        'MAKE',
#        'MOD_YEAR',
#        'MODEL',
#        'NUMOCCS',
        'VALIGN',
#        'VNUM_LAN',
        'VPROFILE',
        'VSPD_LIM',
#        'VSURCOND',
        'VTRAFCON',
        'VTRAFWAY',
    ]
    
    Person = [
        'AGE',
#        'LOCATION',
#        'PER_TYP',
        'SEX',
        'HOSPITAL',    
    ]

    Engineered = [
#        'VEH_AGE',
    ]
    
    # Put features in alphabetical order
    Features = Accident + Vehicle + Person + Engineered
    Features = sorted(Features)
    
    print ('Removed Features')
    for feature in data:
        if feature not in Features:
            print (feature)
    print ()
    
    data = data.filter(Features, axis=1)
    
    print ('data.shape: ', data.shape)
    
    print ('End Thin_to_Medium_Features()')
    print ()
        
    return data

def Test_Thin_to_Medium_Features():
    data = Get_Data()
    data = Thin_to_Medium_Features(data)
    for feature in data:
        display(data[feature].value_counts())
        
#Test_Thin_to_Medium_Features()

In [6]:
def Thin_to_Easy_Features(data):
    print ('Thin_to_Easy_Features()')

    Accident = [
        'DAY_WEEK',
        'HOUR',
#        'INT_HWY',
#        'LGT_COND',
        'MONTH',
#        'PEDS',
#        'PERMVIT',
#        'PERNOTMVIT',
        'PJ',
        'PSU',
#        'PVH_INVL',
        'REGION',
#        'REL_ROAD',
#        'RELJCT1',
#        'RELJCT2',
#        'SCH_BUS',
#        'TYP_INT',
        'URBANICITY',
#        'VE_FORMS',
#        'VE_TOTAL',
        'WEATHER',
#        'WRK_ZONE',
#        'YEAR',
    ]
    
    Vehicle = [
#        'BODY_TYP',
#        'BUS_USE',
#        'EMER_USE',
#        'MAKE',
#        'MOD_YEAR',
#        'MODEL',
#        'NUMOCCS',
#        'VALIGN',
#        'VNUM_LAN',
#        'VPROFILE',
#        'VSPD_LIM',
#        'VSURCOND',
#        'VTRAFCON',
#        'VTRAFWAY',
    ]
    
    Person = [
#        'AGE',
#        'LOCATION',
#        'PER_TYP',
#        'SEX',
        'HOSPITAL',    
    ]

    Engineered = [
#        'VEH_AGE',
#        'AGE_x_SEX',
#        'AGE_x_SCH_BUS'
    ]
    
    # Put features in alphabetical order
    Features = Accident + Vehicle + Person + Engineered
    Features = sorted(Features)

    print ('Removed Features')
    for feature in data:
        if feature not in Features:
            print (feature)
    print ()
        
    data = data.filter(Features, axis=1)
    
    print ('data.shape: ', data.shape)
    
    print ('End Thin_to_Easy_Features()')
    print ()
        
    return data

def Test_Thin_to_Easy_Features():
    data = Get_Data()
    data = Thin_to_Easy_Features(data)
    for feature in data:
        display(data[feature].value_counts())
        
#Test_Thin_to_Easy_Features()

In [7]:
def Get_Dummies(data, target):
    print ('Get_Dummies')
    print (data.shape)
    data = data.astype('category')
    Target = data.pop(target)
    
    data_Dummies = pd.get_dummies(data, prefix = data.columns, drop_first = True)

    # Use this version if the dataset has "99" signifying "Missing/Unknown",
    # but not if missing values have already been imputed.
#    data_Dummies = pd.get_dummies(data, prefix = data.columns, drop_first = False)
#    for feature in data_Dummies:
#        if '99' in feature:
#            data_Dummies.drop(columns=[feature], inplace=True)

    data_Dummies = data_Dummies.join(Target)
#    for feature in data_Dummies:
#        print (feature)
    print (data_Dummies.shape)
    print ()
    

    return data_Dummies

#data = Get_Dummies(data, 'HOSPITAL')

In [8]:
def Principal_Component_Analysis(data):
    print ('Principal_Component_Analysis()')
    Features = [feature for feature in data]
    n_components=300
    pca = PCA(n_components)
    print ('pca.fit()')
    pca.fit(data)
    
    data_pca = pca.transform(data)
    print (data_pca.shape)
    print (data_pca[:10])
    print ()
    data_pca = np.ascontiguousarray(data_pca)
    print (data_pca.shape)
    print (data_pca[:10])
    print ()
    data_pca = pd.DataFrame(data_pca) #, columns=['PCA%i' % i for i in range(n_components)], index=data.index)
    print (data_pca.head())
    print (data_pca.shape)
    print ()
    

    return data_pca

In [9]:
# Adapted from https://towardsdatascience.com/statistics-in-python-collinearity-and-multicollinearity-4cc4dcd82b3f
def calculate_vif(df, features):  
    print ('calculate_vif()')
    r2_Dict, tolerance, vif = {}, {}, {}
    # all the features that you want to examine
    for feature in features:
        # extract all the other features you will regress against
        X = [f for f in features if f != feature]        
        X, y = df[X], df[feature]
        # extract r-squared from the fit
        r2 = LinearRegression().fit(X, y).score(X, y)
        r2_Dict[feature] = r2
#        print ('Built LinearRegression model with target', feature, ' that has score  R^2 = ', round(r2,2))
        # calculate tolerance
        tolerance[feature] = 1 - r2
        # calculate VIF
        if tolerance[feature] !=0:
            vif[feature] = 1/(tolerance[feature])
        else:
            vif[feature] = 10000

    return pd.DataFrame({'r2': r2_Dict, 'Tolerance': tolerance, 'VIF': vif}), tolerance, r2_Dict

In [10]:
# Iteratively remove the feature with the largest VIF ('Variance Inflaction Factor')
# until the largest VIF is 10, or smallest Tolerance is 0.1, or largest R^2 is 0.9

# This process could be much more efficient if, whenever we got R^2==1.0,
    # we dropped the feature immediately and started the calculate_vif() over.
def Reduce_Dimensionality(data, target):
    Target = data.pop(target)
    Features = [feature for feature in data]
    VIF, Tolerance_Dict, r2_Dict = calculate_vif(data, Features)
    Max_r2_Feature = VIF['r2'].idxmax()
    display(VIF)
    print (Max_r2_Feature)
    
    Dropped_Features = []
    
    if r2_Dict[Max_r2_Feature] > 0.9:
        data.drop(columns = [Max_r2_Feature], inplace=True)
        print ('Drop ', Max_r2_Feature)
        Dropped_Features.append(Max_r2_Feature)
    print ()
    while r2_Dict[Max_r2_Feature] > 0.9:
        Features = [feature for feature in data]
        VIF, Tolerance_Dict, r2_Dict = calculate_vif(data, Features)
        Max_r2_Feature = VIF['r2'].idxmax()
        display(VIF)
        print (Max_r2_Feature)
        if r2_Dict[Max_r2_Feature] > 0.9:
            data.drop(columns = [Max_r2_Feature], inplace=True)
            print ('Drop ', Max_r2_Feature)
            Dropped_Features.append(Max_r2_Feature)
        print ()

    data = data.join(Target)
    
    return data, Dropped_Features
        
    

In [11]:
%%time
# About two hours
def Main():
    target = 'HOSPITAL'

    for i in range (3):
        Level = ['_Easy', '_Medium', '_Hard']
        data = Get_Data(target)
        data = data.astype('int64')
        if i==2:
            print ('Thin_to_Hard_Features()')
            data = Thin_to_Hard_Features(data)
        if i==1:
            print ('Thin_to_Medium_Features')
            data = Thin_to_Medium_Features(data)
        if i==0:
            print ('Thin_to_Easy_Features')
            data = Thin_to_Easy_Features(data)
            
#    Features = [feature for feature in data]
#    VIF, VIF_Dict = calculate_vif(data, Features)
#    display(VIF)
#    print ()


        if Run[7]=='1': 
            data, Dropped_Features = Reduce_Dimensionality(data, target)
            # Write Dropped_Features to csv
            filename = './Analysis/CRSS_04_Dropped_Features' + Run + Level[i] + '.csv'
            print (filename)
            print ()
            f = open(filename, 'w')
            for feature in Dropped_Features:
                f.write('%s\n' % (feature))
        
    
        for feature in data:
            data[feature] = pd.to_numeric(data[feature])
        print (data.shape)

        data_dummies = Get_Dummies(data, target)
#        for feature in data_dummies:
#            print (feature)
        print (data_dummies.shape)
        print ()
        
        """
        if Run[7]==1: 
            data_dummies, Dropped_Features = Reduce_Dimensionality(data_dummies, target)
            filename = './Analysis/CRSS_04_Dropped_Features' + Run + Level[i] + '.csv'
            print (filename)
            print ()
            f = open(filename, 'a')
            for feature in Dropped_Features:
                f.write('%s\n' % (feature))
            f.close()
        """

#        for feature in data_dummies:
#            print (feature)
        print (data_dummies.shape)
        print ()
        
        
        if i==2:
            filename = '../../Big_Files/CRSS_04' + Run + '_Hard.csv'
            data_dummies.to_csv(filename, index=False)
            print ()
            print (filename)
            print ()
        if i==1:
            filename = '../../Big_Files/CRSS_04' + Run + '_Medium.csv'
            data_dummies.to_csv(filename, index=False)
            print ()
            print (filename)
            print ()
        if i==0:
            filename = '../../Big_Files/CRSS_04' + Run + '_Easy.csv'
            data_dummies.to_csv(filename, index=False)
            print ()
            print (filename)
            print ()
    
    print ()
    print ('Finished!')
    
    
#Main()

CPU times: user 5 μs, sys: 1 μs, total: 6 μs
Wall time: 9.78 μs


In [12]:
Run = ''
Runs = [
#    '_0_0_0_0',
#    '_0_0_0_1',
    '_0_0_1_0',
    '_0_0_1_1',
    '_0_1_0_0',
    '_0_1_0_1',
    '_0_1_1_0',
    '_0_1_1_1',
    '_1_0_0_0',
    '_1_0_0_1',
    '_1_0_1_0',
    '_1_0_1_1',
    '_1_1_0_0',
    '_1_1_0_1',
    '_1_1_1_0',
    '_1_1_1_1',
]

for run in Runs:
    Run = run
    print ()
    print ('------------------------------------')
    print ('Run = ', Run)

    random_seed = int(Run[1])
    print ('random_seed = ', random_seed)
    random.seed(random_seed) # Python
    np.random.seed(random_seed) # NumPy
    #tf.random.set_seed(42) # Tensorflow
    
    Main()
    print ()



------------------------------------
Run =  _0_0_1_0
random_seed =  0
Get_Data
../../Big_Files/CRSS_03_0_0_1.csv

data.shape =  (802700, 68)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
Unnamed: 0
VALIGN
VEH_AGE
VE_FORMS
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTCONT_F
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_0_0_1_0_Easy.csv

Get_Data
../../Big_Files/CRSS_03_0_0_1.csv

data.shape =  (802700, 68)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed F

Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.001919,0.998081,1.001923
HOUR,0.003341,0.996659,1.003352
MONTH,0.001533,0.998467,1.001535
PJ,0.544922,0.455078,2.197425
PSU,0.558646,0.441354,2.265757
REGION,0.054948,0.945052,1.058143
URBANICITY,0.021225,0.978775,1.021686
WEATHER,0.007372,0.992628,1.007427


PSU

./Analysis/CRSS_04_Dropped_Features_0_0_1_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.265204,0.734796,1.360922
DAY_WEEK_2,0.274802,0.725198,1.378934
DAY_WEEK_3,0.36885,0.63115,1.58441
DAY_WEEK_4,0.292632,0.707368,1.413691
HOUR_1,0.507626,0.492374,2.030978
HOUR_2,0.520898,0.479102,2.087237
HOUR_3,0.72653,0.27347,3.656703
HOUR_4,0.870921,0.129079,7.74719
HOUR_5,0.873106,0.126894,7.880572
HOUR_6,0.790258,0.209742,4.767765


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_0_1_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_0_0_1_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_0_0_1.csv

data.shape =  (802700, 68)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VEH_AGE
VE_FORMS
VE_TOTAL
VSURCOND
VTCONT_F
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004459,0.995541,1.004479
DAY_WEEK,0.006147,0.993853,1.006185
HOUR,0.024705,0.975295,1.025331
INT_HWY,0.439333,0.560667,1.78359
MONTH,0.002359,0.997641,1.002365
PJ,0.56762,0.43238,2.312782
PSU,0.564426,0.435574,2.295822
REGION,0.077074,0.922926,1.083511
RELJCT1,0.099627,0.900373,1.110651
REL_ROAD,0.153378,0.846622,1.181165


PJ

./Analysis/CRSS_04_Dropped_Features_0_0_1_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536943,0.463057,2.159562
AGE_3,0.600812,0.399188,2.505083
AGE_4,0.512805,0.487195,2.052564
AGE_5,0.668914,0.331086,3.020364
AGE_6,0.910135,0.089865,11.127861
AGE_7,0.846159,0.153841,6.500209
AGE_8,0.68275,0.31725,3.152093
AGE_9,0.647935,0.352065,2.840381
DAY_WEEK_1,0.266834,0.733166,1.363948


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536943,0.463057,2.159562
AGE_3,0.600812,0.399188,2.505083
AGE_4,0.512805,0.487195,2.052564
AGE_5,0.668914,0.331086,3.020364
AGE_6,0.910135,0.089865,11.12786
AGE_7,0.846159,0.153841,6.500209
AGE_8,0.68275,0.31725,3.152093
AGE_9,0.647935,0.352065,2.840381
DAY_WEEK_1,0.266834,0.733166,1.363948


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536941,0.463059,2.159552
AGE_3,0.60081,0.39919,2.505073
AGE_4,0.512804,0.487196,2.052563
AGE_5,0.668914,0.331086,3.020363
AGE_6,0.910135,0.089865,11.12786
AGE_7,0.846159,0.153841,6.500209
AGE_8,0.68275,0.31725,3.152092
AGE_9,0.647934,0.352066,2.84038
DAY_WEEK_1,0.266831,0.733169,1.363943


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536941,0.463059,2.15955
AGE_3,0.60081,0.39919,2.505073
AGE_4,0.512804,0.487196,2.052563
AGE_5,0.668914,0.331086,3.020363
AGE_6,0.910135,0.089865,11.127848
AGE_7,0.846159,0.153841,6.500199
AGE_8,0.68275,0.31725,3.152084
AGE_9,0.647934,0.352066,2.84038
DAY_WEEK_1,0.266831,0.733169,1.363942


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.031437,0.968563,1.032457
AGE_2,0.024766,0.975234,1.025395
AGE_3,0.032265,0.967735,1.033341
AGE_4,0.021025,0.978975,1.021476
AGE_5,0.035985,0.964015,1.037328
AGE_7,0.078098,0.921902,1.084715
AGE_8,0.043404,0.956596,1.045373
AGE_9,0.046198,0.953802,1.048436
DAY_WEEK_1,0.266815,0.733185,1.363912
DAY_WEEK_2,0.27697,0.72303,1.383069


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_0_1_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_0_0_1_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_0_0_1.csv

data.shape =  (802700, 68)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VSURCOND
VTCONT_F

data.shape:  (802700, 33)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.231482,0.768518,1.301206
BODY_TYP,0.724571,0.275429,3.630705
DAY_WEEK,0.019266,0.980734,1.019644
HOUR,0.035253,0.964747,1.036541
INT_HWY,0.445899,0.554101,1.804726
LGT_COND,0.064477,0.935523,1.068921
MAKE,0.204663,0.795337,1.257328
MODEL,0.730183,0.269817,3.706221
MONTH,0.009515,0.990485,1.009606
NUMOCCS,0.438369,0.561631,1.780529


VE_FORMS
Drop  VE_FORMS

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.231479,0.768521,1.301201
BODY_TYP,0.724571,0.275429,3.630705
DAY_WEEK,0.019256,0.980744,1.019634
HOUR,0.034986,0.965014,1.036254
INT_HWY,0.445893,0.554107,1.804707
LGT_COND,0.064344,0.935656,1.068768
MAKE,0.204659,0.795341,1.257322
MODEL,0.730183,0.269817,3.706218
MONTH,0.009513,0.990487,1.009605
NUMOCCS,0.438369,0.561631,1.780529


MODEL

./Analysis/CRSS_04_Dropped_Features_0_0_1_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597998,0.402002,2.487548
AGE_2,0.537839,0.462161,2.163746
AGE_3,0.619067,0.380933,2.625134
AGE_4,0.538079,0.461921,2.164874
AGE_5,0.693589,0.306411,3.263593
AGE_6,0.920152,0.079848,12.523823
AGE_7,0.86374,0.13626,7.338918
AGE_8,0.711458,0.288542,3.465705
AGE_9,0.676572,0.323428,3.091877
BODY_TYP_1,0.999672,0.000328,3046.469225


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597998,0.402002,2.487548
AGE_2,0.537839,0.462161,2.163746
AGE_3,0.619067,0.380933,2.625134
AGE_4,0.538079,0.461921,2.164874
AGE_5,0.693589,0.306411,3.263593
AGE_6,0.920152,0.079848,12.523823
AGE_7,0.86374,0.13626,7.338918
AGE_8,0.711458,0.288542,3.465704
AGE_9,0.676572,0.323428,3.091876
BODY_TYP_1,0.999672,0.000328,3046.469225


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597995,0.402005,2.487528
AGE_2,0.537836,0.462164,2.163736
AGE_3,0.619067,0.380933,2.625134
AGE_4,0.538079,0.461921,2.164874
AGE_5,0.693589,0.306411,3.263592
AGE_6,0.920152,0.079848,12.523822
AGE_7,0.86374,0.13626,7.338918
AGE_8,0.711458,0.288542,3.465701
AGE_9,0.676571,0.323429,3.091868
BODY_TYP_1,0.099861,0.900139,1.11094


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597995,0.402005,2.487528
AGE_2,0.537834,0.462166,2.163725
AGE_3,0.619065,0.380935,2.625123
AGE_4,0.538079,0.461921,2.164872
AGE_5,0.693589,0.306411,3.263591
AGE_6,0.920152,0.079848,12.523821
AGE_7,0.86374,0.13626,7.338917
AGE_8,0.711458,0.288542,3.465701
AGE_9,0.676571,0.323429,3.091868
BODY_TYP_1,0.099859,0.900141,1.110937


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597995,0.402005,2.487528
AGE_2,0.537834,0.462166,2.163724
AGE_3,0.619065,0.380935,2.625123
AGE_4,0.538079,0.461921,2.164871
AGE_5,0.693589,0.306411,3.263591
AGE_6,0.920152,0.079848,12.523818
AGE_7,0.86374,0.13626,7.338916
AGE_8,0.711458,0.288542,3.465698
AGE_9,0.676571,0.323429,3.091868
BODY_TYP_1,0.099859,0.900141,1.110937


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487527
AGE_2,0.537834,0.462166,2.163723
AGE_3,0.619064,0.380936,2.625113
AGE_4,0.538076,0.461924,2.164856
AGE_5,0.693587,0.306413,3.263567
AGE_6,0.920152,0.079848,12.523742
AGE_7,0.86374,0.13626,7.338914
AGE_8,0.711458,0.288542,3.465695
AGE_9,0.676569,0.323431,3.091848
BODY_TYP_1,0.099844,0.900156,1.110919


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537833,0.462167,2.163722
AGE_3,0.619061,0.380939,2.625095
AGE_4,0.538073,0.461927,2.164843
AGE_5,0.693586,0.306414,3.263554
AGE_6,0.920152,0.079848,12.523737
AGE_7,0.86374,0.13626,7.338903
AGE_8,0.711456,0.288544,3.465677
AGE_9,0.676569,0.323431,3.091847
BODY_TYP_1,0.099113,0.900887,1.110017


TYP_INT_2
Drop  TYP_INT_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537833,0.462167,2.163721
AGE_3,0.619061,0.380939,2.62509
AGE_4,0.538072,0.461928,2.164838
AGE_5,0.693586,0.306414,3.263554
AGE_6,0.920152,0.079848,12.523736
AGE_7,0.86374,0.13626,7.338903
AGE_8,0.711456,0.288544,3.465675
AGE_9,0.676568,0.323432,3.091843
BODY_TYP_1,0.099109,0.900891,1.110012


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169751,0.830249,1.204458
AGE_2,0.115079,0.884921,1.130044
AGE_3,0.046023,0.953977,1.048243
AGE_4,0.028404,0.971596,1.029235
AGE_5,0.045075,0.954925,1.047202
AGE_7,0.091902,0.908098,1.101202
AGE_8,0.049985,0.950015,1.052615
AGE_9,0.055438,0.944562,1.058692
BODY_TYP_1,0.099035,0.900965,1.109921
BODY_TYP_3,0.1567,0.8433,1.185818


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169445,0.830555,1.204015
AGE_2,0.115014,0.884986,1.129962
AGE_3,0.045867,0.954133,1.048072
AGE_4,0.028363,0.971637,1.029191
AGE_5,0.045029,0.954971,1.047153
AGE_7,0.09187,0.90813,1.101164
AGE_8,0.049948,0.950052,1.052574
AGE_9,0.055401,0.944599,1.05865
BODY_TYP_1,0.099014,0.900986,1.109895
BODY_TYP_3,0.156656,0.843344,1.185756


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169263,0.830737,1.203751
AGE_2,0.114942,0.885058,1.12987
AGE_3,0.045867,0.954133,1.048071
AGE_4,0.028325,0.971675,1.02915
AGE_5,0.044993,0.955007,1.047112
AGE_7,0.091264,0.908736,1.100429
AGE_8,0.049075,0.950925,1.051608
AGE_9,0.054042,0.945958,1.057129
BODY_TYP_1,0.09897,0.90103,1.109841
BODY_TYP_3,0.156656,0.843344,1.185755


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_0_1_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_0_0_1_1_Hard.csv


Finished!


------------------------------------
Run =  _0_1_0_0
random_seed =  0
Get_Data
../../Big_Files/CRSS_03_0_1_0.csv

data.shape =  (802700, 64)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
VALIGN
VEH_AGE
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_0_1_0_0_Easy.csv

Get_Data
../../Big_Files/CRSS_03_0_1_

Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.001941,0.998059,1.001945
HOUR,0.003367,0.996633,1.003378
MONTH,0.001541,0.998459,1.001544
PJ,0.544921,0.455079,2.197419
PSU,0.558642,0.441358,2.265735
REGION,0.054884,0.945116,1.058071
URBANICITY,0.021228,0.978772,1.021688
WEATHER,0.007293,0.992707,1.007346


PSU

./Analysis/CRSS_04_Dropped_Features_0_1_0_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.265281,0.734719,1.361065
DAY_WEEK_2,0.27491,0.72509,1.37914
DAY_WEEK_3,0.368987,0.631013,1.584753
DAY_WEEK_4,0.29271,0.70729,1.413848
HOUR_1,0.508426,0.491574,2.034284
HOUR_2,0.520803,0.479197,2.086823
HOUR_3,0.726447,0.273553,3.655598
HOUR_4,0.87102,0.12898,7.753113
HOUR_5,0.872805,0.127195,7.86192
HOUR_6,0.790779,0.209221,4.779631


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_1_0_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_0_1_0_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_0_1_0.csv

data.shape =  (802700, 64)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
VEH_AGE
VE_TOTAL
VSURCOND
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004674,0.995326,1.004696
DAY_WEEK,0.0062,0.9938,1.006238
HOUR,0.025111,0.974889,1.025758
INT_HWY,0.426355,0.573645,1.743239
MONTH,0.002407,0.997593,1.002413
PJ,0.567769,0.432231,2.313579
PSU,0.565145,0.434855,2.299619
REGION,0.079031,0.920969,1.085813
RELJCT1,0.103647,0.896353,1.115632
REL_ROAD,0.153531,0.846469,1.181379


PJ

./Analysis/CRSS_04_Dropped_Features_0_1_0_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607442,0.392558,2.547396
AGE_2,0.542916,0.457084,2.187781
AGE_3,0.60211,0.39789,2.513257
AGE_4,0.514028,0.485972,2.057733
AGE_5,0.669989,0.330011,3.030201
AGE_6,0.910617,0.089383,11.187791
AGE_7,0.846873,0.153127,6.530546
AGE_8,0.683791,0.316209,3.16247
AGE_9,0.649071,0.350929,2.849576
DAY_WEEK_1,0.266957,0.733043,1.364177


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607442,0.392558,2.547396
AGE_2,0.542916,0.457084,2.187781
AGE_3,0.602109,0.397891,2.513254
AGE_4,0.514028,0.485972,2.057733
AGE_5,0.669989,0.330011,3.030201
AGE_6,0.910617,0.089383,11.187791
AGE_7,0.846873,0.153127,6.530546
AGE_8,0.683794,0.316206,3.162491
AGE_9,0.649071,0.350929,2.849576
DAY_WEEK_1,0.266961,0.733039,1.364185


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607442,0.392558,2.547396
AGE_2,0.542916,0.457084,2.187781
AGE_3,0.60211,0.39789,2.513257
AGE_4,0.514028,0.485972,2.057733
AGE_5,0.669989,0.330011,3.030201
AGE_6,0.910617,0.089383,11.187791
AGE_7,0.846873,0.153127,6.530546
AGE_8,0.683794,0.316206,3.162491
AGE_9,0.649071,0.350929,2.849576
DAY_WEEK_1,0.266962,0.733038,1.364185


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607442,0.392558,2.547396
AGE_2,0.542914,0.457086,2.18777
AGE_3,0.602108,0.397892,2.513245
AGE_4,0.514028,0.485972,2.057731
AGE_5,0.669989,0.330011,3.0302
AGE_6,0.910617,0.089383,11.187787
AGE_7,0.846873,0.153127,6.530545
AGE_8,0.683793,0.316207,3.16249
AGE_9,0.649071,0.350929,2.849576
DAY_WEEK_1,0.266959,0.733041,1.36418


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.032708,0.967292,1.033814
AGE_2,0.0256,0.9744,1.026273
AGE_3,0.032797,0.967203,1.033909
AGE_4,0.021275,0.978725,1.021737
AGE_5,0.036409,0.963591,1.037785
AGE_7,0.078783,0.921217,1.085521
AGE_8,0.043794,0.956206,1.0458
AGE_9,0.046673,0.953327,1.048958
DAY_WEEK_1,0.266943,0.733057,1.364151
DAY_WEEK_2,0.277121,0.722879,1.383357


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_1_0_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_0_1_0_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_0_1_0.csv

data.shape =  (802700, 64)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
VSURCOND

data.shape:  (802700, 32)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.237798,0.762202,1.311988
BODY_TYP,0.72159,0.27841,3.591819
DAY_WEEK,0.019389,0.980611,1.019772
HOUR,0.035016,0.964984,1.036287
INT_HWY,0.433665,0.566335,1.765741
LGT_COND,0.06593,0.93407,1.070584
MAKE,0.208102,0.791898,1.262789
MODEL,0.727826,0.272174,3.674126
MONTH,0.009527,0.990473,1.009618
NUMOCCS,0.43964,0.56036,1.784568


MODEL

./Analysis/CRSS_04_Dropped_Features_0_1_0_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608137,0.391863,2.551912
AGE_2,0.5438,0.4562,2.192019
AGE_3,0.620482,0.379518,2.634924
AGE_4,0.539533,0.460467,2.171708
AGE_5,0.694872,0.305128,3.277313
AGE_6,0.920724,0.079276,12.614217
AGE_7,0.864555,0.135445,7.383067
AGE_8,0.712727,0.287273,3.481013
AGE_9,0.677931,0.322069,3.104926
BODY_TYP_1,0.999863,0.000137,7307.457818


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608137,0.391863,2.551912
AGE_2,0.5438,0.4562,2.192019
AGE_3,0.620482,0.379518,2.634922
AGE_4,0.539533,0.460467,2.171708
AGE_5,0.694872,0.305128,3.277313
AGE_6,0.920724,0.079276,12.614216
AGE_7,0.864555,0.135445,7.383067
AGE_8,0.712727,0.287273,3.481013
AGE_9,0.677931,0.322069,3.104926
BODY_TYP_1,0.999863,0.000137,7307.457818


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608137,0.391863,2.551912
AGE_2,0.5438,0.4562,2.192019
AGE_3,0.620482,0.379518,2.634924
AGE_4,0.539533,0.460467,2.171708
AGE_5,0.694872,0.305128,3.277313
AGE_6,0.920724,0.079276,12.614217
AGE_7,0.864555,0.135445,7.383067
AGE_8,0.712727,0.287273,3.481013
AGE_9,0.677931,0.322069,3.104926
BODY_TYP_1,0.999863,0.000137,7307.457818


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608137,0.391863,2.551912
AGE_2,0.543798,0.456202,2.192011
AGE_3,0.620482,0.379518,2.634924
AGE_4,0.539533,0.460467,2.171707
AGE_5,0.694871,0.305129,3.277308
AGE_6,0.920724,0.079276,12.614214
AGE_7,0.864555,0.135445,7.383067
AGE_8,0.712727,0.287273,3.481013
AGE_9,0.677931,0.322069,3.104925
BODY_TYP_1,0.099261,0.900739,1.1102


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608137,0.391863,2.551912
AGE_2,0.543796,0.456204,2.192001
AGE_3,0.620481,0.379519,2.634912
AGE_4,0.539532,0.460468,2.171705
AGE_5,0.694871,0.305129,3.277306
AGE_6,0.920724,0.079276,12.614211
AGE_7,0.864555,0.135445,7.383066
AGE_8,0.712727,0.287273,3.481012
AGE_9,0.677931,0.322069,3.104925
BODY_TYP_1,0.09926,0.90074,1.110199


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608136,0.391864,2.551908
AGE_2,0.543796,0.456204,2.192
AGE_3,0.620479,0.379521,2.634903
AGE_4,0.539529,0.460471,2.171691
AGE_5,0.694869,0.305131,3.277283
AGE_6,0.920724,0.079276,12.614143
AGE_7,0.864555,0.135445,7.383065
AGE_8,0.712727,0.287273,3.48101
AGE_9,0.677929,0.322071,3.104908
BODY_TYP_1,0.099247,0.900753,1.110182


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608136,0.391864,2.551908
AGE_2,0.543795,0.456205,2.191999
AGE_3,0.620476,0.379524,2.634883
AGE_4,0.539526,0.460474,2.171676
AGE_5,0.694868,0.305132,3.277269
AGE_6,0.920724,0.079276,12.614142
AGE_7,0.864555,0.135445,7.383055
AGE_8,0.712726,0.287274,3.480992
AGE_9,0.677929,0.322071,3.104906
BODY_TYP_1,0.098512,0.901488,1.109278


RELJCT2_1
Drop  RELJCT2_1

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608135,0.391865,2.551896
AGE_2,0.543795,0.456205,2.191999
AGE_3,0.620476,0.379524,2.634883
AGE_4,0.539525,0.460475,2.171672
AGE_5,0.694868,0.305132,3.277267
AGE_6,0.920724,0.079276,12.614115
AGE_7,0.864554,0.135446,7.383038
AGE_8,0.712726,0.287274,3.480991
AGE_9,0.677927,0.322073,3.104886
BODY_TYP_1,0.098509,0.901491,1.109273


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.178154,0.821846,1.216774
AGE_2,0.119748,0.880252,1.136039
AGE_3,0.046721,0.953279,1.04901
AGE_4,0.028572,0.971428,1.029412
AGE_5,0.045259,0.954741,1.047404
AGE_7,0.092843,0.907157,1.102345
AGE_8,0.050474,0.949526,1.053157
AGE_9,0.055846,0.944154,1.059149
BODY_TYP_1,0.098442,0.901558,1.109191
BODY_TYP_3,0.152994,0.847006,1.180629


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.177852,0.822148,1.216326
AGE_2,0.119678,0.880322,1.135948
AGE_3,0.046562,0.953438,1.048836
AGE_4,0.02853,0.97147,1.029368
AGE_5,0.045213,0.954787,1.047354
AGE_7,0.092814,0.907186,1.10231
AGE_8,0.05044,0.94956,1.053119
AGE_9,0.055812,0.944188,1.059111
BODY_TYP_1,0.098422,0.901578,1.109166
BODY_TYP_3,0.152955,0.847045,1.180575


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.177668,0.822332,1.216054
AGE_2,0.119601,0.880399,1.135849
AGE_3,0.046561,0.953439,1.048835
AGE_4,0.028492,0.971508,1.029327
AGE_5,0.045175,0.954825,1.047312
AGE_7,0.092202,0.907798,1.101566
AGE_8,0.049562,0.950438,1.052146
AGE_9,0.054447,0.945553,1.057582
BODY_TYP_1,0.098375,0.901625,1.109108
BODY_TYP_3,0.152955,0.847045,1.180574


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_1_0_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_0_1_0_1_Hard.csv


Finished!


------------------------------------
Run =  _0_1_1_0
random_seed =  0
Get_Data
../../Big_Files/CRSS_03_0_1_1.csv

data.shape =  (802700, 65)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
Unnamed: 0
VALIGN
VEH_AGE
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_0_1_1_0_Easy.csv

Get_Data
../../Big_Files/C

Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.001919,0.998081,1.001923
HOUR,0.003341,0.996659,1.003352
MONTH,0.001533,0.998467,1.001535
PJ,0.544922,0.455078,2.197425
PSU,0.558646,0.441354,2.265757
REGION,0.054948,0.945052,1.058143
URBANICITY,0.021225,0.978775,1.021686
WEATHER,0.007372,0.992628,1.007427


PSU

./Analysis/CRSS_04_Dropped_Features_0_1_1_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.265204,0.734796,1.360922
DAY_WEEK_2,0.274802,0.725198,1.378934
DAY_WEEK_3,0.36885,0.63115,1.58441
DAY_WEEK_4,0.292632,0.707368,1.413691
HOUR_1,0.507626,0.492374,2.030978
HOUR_2,0.520898,0.479102,2.087237
HOUR_3,0.72653,0.27347,3.656703
HOUR_4,0.870921,0.129079,7.74719
HOUR_5,0.873106,0.126894,7.880572
HOUR_6,0.790258,0.209742,4.767765


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_1_1_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_0_1_1_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_0_1_1.csv

data.shape =  (802700, 65)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VEH_AGE
VE_TOTAL
VSURCOND
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004459,0.995541,1.004479
DAY_WEEK,0.006134,0.993866,1.006172
HOUR,0.02471,0.97529,1.025336
INT_HWY,0.439369,0.560631,1.783703
MONTH,0.002339,0.997661,1.002345
PJ,0.567507,0.432493,2.312176
PSU,0.564422,0.435578,2.295801
REGION,0.076997,0.923003,1.08342
RELJCT1,0.099826,0.900174,1.110897
REL_ROAD,0.153483,0.846517,1.181312


PJ

./Analysis/CRSS_04_Dropped_Features_0_1_1_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536947,0.463053,2.15958
AGE_3,0.600819,0.399181,2.50513
AGE_4,0.512807,0.487193,2.052575
AGE_5,0.668915,0.331085,3.020377
AGE_6,0.910135,0.089865,11.127859
AGE_7,0.846159,0.153841,6.500205
AGE_8,0.682749,0.317251,3.15208
AGE_9,0.647935,0.352065,2.840385
DAY_WEEK_1,0.266835,0.733165,1.36395


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536947,0.463053,2.15958
AGE_3,0.600819,0.399181,2.50513
AGE_4,0.512807,0.487193,2.052575
AGE_5,0.668915,0.331085,3.020377
AGE_6,0.910135,0.089865,11.127859
AGE_7,0.846159,0.153841,6.500205
AGE_8,0.682749,0.317251,3.15208
AGE_9,0.647935,0.352065,2.840385
DAY_WEEK_1,0.266835,0.733165,1.36395


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536945,0.463055,2.159569
AGE_3,0.600818,0.399182,2.505121
AGE_4,0.512807,0.487193,2.052574
AGE_5,0.668915,0.331085,3.020375
AGE_6,0.910135,0.089865,11.127859
AGE_7,0.846159,0.153841,6.500205
AGE_8,0.682749,0.317251,3.15208
AGE_9,0.647935,0.352065,2.840385
DAY_WEEK_1,0.266832,0.733168,1.363945


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536944,0.463056,2.159568
AGE_3,0.600818,0.399182,2.505121
AGE_4,0.512807,0.487193,2.052573
AGE_5,0.668915,0.331085,3.020375
AGE_6,0.910135,0.089865,11.127847
AGE_7,0.846158,0.153842,6.500196
AGE_8,0.682748,0.317252,3.152072
AGE_9,0.647935,0.352065,2.840384
DAY_WEEK_1,0.266832,0.733168,1.363944


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.031443,0.968557,1.032463
AGE_2,0.02479,0.97521,1.02542
AGE_3,0.032314,0.967686,1.033393
AGE_4,0.021035,0.978965,1.021487
AGE_5,0.036,0.964,1.037344
AGE_7,0.078099,0.921901,1.084715
AGE_8,0.043399,0.956601,1.045368
AGE_9,0.046206,0.953794,1.048445
DAY_WEEK_1,0.266816,0.733184,1.363914
DAY_WEEK_2,0.276964,0.723036,1.383057


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_1_1_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_0_1_1_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_0_1_1.csv

data.shape =  (802700, 65)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VSURCOND

data.shape:  (802700, 32)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.231434,0.768566,1.301124
BODY_TYP,0.724573,0.275427,3.630723
DAY_WEEK,0.019241,0.980759,1.019618
HOUR,0.034986,0.965014,1.036254
INT_HWY,0.445915,0.554085,1.804777
LGT_COND,0.064321,0.935679,1.068743
MAKE,0.204671,0.795329,1.257342
MODEL,0.730188,0.269812,3.706279
MONTH,0.009489,0.990511,1.00958
NUMOCCS,0.438204,0.561796,1.780007


MODEL

./Analysis/CRSS_04_Dropped_Features_0_1_1_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597997,0.402003,2.487546
AGE_2,0.537843,0.462157,2.163767
AGE_3,0.619073,0.380927,2.625175
AGE_4,0.538077,0.461923,2.164861
AGE_5,0.693589,0.306411,3.263595
AGE_6,0.92015,0.07985,12.523541
AGE_7,0.863738,0.136262,7.338793
AGE_8,0.711454,0.288546,3.465652
AGE_9,0.676572,0.323428,3.091876
BODY_TYP_1,0.999672,0.000328,3046.488884


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597997,0.402003,2.487546
AGE_2,0.537843,0.462157,2.163767
AGE_3,0.619073,0.380927,2.625175
AGE_4,0.538077,0.461923,2.164861
AGE_5,0.693589,0.306411,3.263595
AGE_6,0.92015,0.07985,12.523541
AGE_7,0.863738,0.136262,7.338792
AGE_8,0.711454,0.288546,3.465651
AGE_9,0.676572,0.323428,3.091875
BODY_TYP_1,0.999672,0.000328,3046.488884


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537841,0.462159,2.163757
AGE_3,0.619073,0.380927,2.625175
AGE_4,0.538077,0.461923,2.164861
AGE_5,0.693589,0.306411,3.263593
AGE_6,0.92015,0.07985,12.52354
AGE_7,0.863738,0.136262,7.338792
AGE_8,0.711454,0.288546,3.465648
AGE_9,0.676571,0.323429,3.091867
BODY_TYP_1,0.099858,0.900142,1.110936


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537838,0.462162,2.163746
AGE_3,0.619071,0.380929,2.625163
AGE_4,0.538076,0.461924,2.164859
AGE_5,0.693589,0.306411,3.263592
AGE_6,0.92015,0.07985,12.523539
AGE_7,0.863738,0.136262,7.338791
AGE_8,0.711454,0.288546,3.465648
AGE_9,0.676571,0.323429,3.091867
BODY_TYP_1,0.099856,0.900144,1.110934


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537838,0.462162,2.163745
AGE_3,0.619071,0.380929,2.625163
AGE_4,0.538076,0.461924,2.164858
AGE_5,0.693589,0.306411,3.263592
AGE_6,0.92015,0.07985,12.523536
AGE_7,0.863738,0.136262,7.33879
AGE_8,0.711453,0.288547,3.465644
AGE_9,0.676571,0.323429,3.091867
BODY_TYP_1,0.099856,0.900144,1.110933


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487524
AGE_2,0.537838,0.462162,2.163744
AGE_3,0.61907,0.38093,2.625153
AGE_4,0.538073,0.461927,2.164844
AGE_5,0.693587,0.306413,3.263568
AGE_6,0.92015,0.07985,12.52346
AGE_7,0.863738,0.136262,7.338788
AGE_8,0.711453,0.288547,3.465641
AGE_9,0.676569,0.323431,3.091847
BODY_TYP_1,0.099842,0.900158,1.110916


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487524
AGE_2,0.537838,0.462162,2.163743
AGE_3,0.619067,0.380933,2.625136
AGE_4,0.53807,0.46193,2.16483
AGE_5,0.693586,0.306414,3.263556
AGE_6,0.92015,0.07985,12.523455
AGE_7,0.863738,0.136262,7.338777
AGE_8,0.711452,0.288548,3.465624
AGE_9,0.676569,0.323431,3.091846
BODY_TYP_1,0.09911,0.90089,1.110013


TYP_INT_2
Drop  TYP_INT_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487524
AGE_2,0.537838,0.462162,2.163742
AGE_3,0.619066,0.380934,2.625129
AGE_4,0.538069,0.461931,2.164827
AGE_5,0.693586,0.306414,3.263555
AGE_6,0.92015,0.07985,12.523453
AGE_7,0.863738,0.136262,7.338776
AGE_8,0.711451,0.288549,3.465621
AGE_9,0.676568,0.323432,3.091844
BODY_TYP_1,0.099106,0.900894,1.110009


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169725,0.830275,1.20442
AGE_2,0.115079,0.884921,1.130045
AGE_3,0.046054,0.953946,1.048277
AGE_4,0.028413,0.971587,1.029244
AGE_5,0.045087,0.954913,1.047216
AGE_7,0.091906,0.908094,1.101208
AGE_8,0.049986,0.950014,1.052616
AGE_9,0.055467,0.944533,1.058724
BODY_TYP_1,0.099033,0.900967,1.109918
BODY_TYP_3,0.156685,0.843315,1.185797


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169417,0.830583,1.203974
AGE_2,0.115014,0.884986,1.129962
AGE_3,0.045897,0.954103,1.048105
AGE_4,0.028371,0.971629,1.0292
AGE_5,0.045042,0.954958,1.047166
AGE_7,0.091875,0.908125,1.10117
AGE_8,0.049949,0.950051,1.052575
AGE_9,0.055429,0.944571,1.058682
BODY_TYP_1,0.099011,0.900989,1.109892
BODY_TYP_3,0.156641,0.843359,1.185735


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169236,0.830764,1.203711
AGE_2,0.114942,0.885058,1.12987
AGE_3,0.045896,0.954104,1.048104
AGE_4,0.028333,0.971667,1.02916
AGE_5,0.045005,0.954995,1.047126
AGE_7,0.091268,0.908732,1.100435
AGE_8,0.049076,0.950924,1.051608
AGE_9,0.05407,0.94593,1.05716
BODY_TYP_1,0.098968,0.901032,1.109839
BODY_TYP_3,0.15664,0.84336,1.185734


PSU_3

./Analysis/CRSS_04_Dropped_Features_0_1_1_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_0_1_1_1_Hard.csv


Finished!


------------------------------------
Run =  _1_0_0_0
random_seed =  1
Get_Data
../../Big_Files/CRSS_03_1_0_0.csv

data.shape =  (802700, 67)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
VALIGN
VEH_AGE
VE_FORMS
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTCONT_F
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_1_0_0_0_Easy.csv

Get_Data
.

Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.001938,0.998062,1.001942
HOUR,0.003358,0.996642,1.003369
MONTH,0.001539,0.998461,1.001541
PJ,0.54492,0.45508,2.197414
PSU,0.558642,0.441358,2.265737
REGION,0.05489,0.94511,1.058078
URBANICITY,0.021226,0.978774,1.021686
WEATHER,0.007298,0.992702,1.007351


PSU

./Analysis/CRSS_04_Dropped_Features_1_0_0_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.265291,0.734709,1.361083
DAY_WEEK_2,0.274908,0.725092,1.379136
DAY_WEEK_3,0.368983,0.631017,1.584743
DAY_WEEK_4,0.292707,0.707293,1.413841
HOUR_1,0.508384,0.491616,2.034107
HOUR_2,0.52085,0.47915,2.087029
HOUR_3,0.726463,0.273537,3.65581
HOUR_4,0.871028,0.128972,7.753608
HOUR_5,0.872817,0.127183,7.862668
HOUR_6,0.790796,0.209204,4.780019


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_0_0_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_1_0_0_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_1_0_0.csv

data.shape =  (802700, 67)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
VEH_AGE
VE_FORMS
VE_TOTAL
VSURCOND
VTCONT_F
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004693,0.995307,1.004715
DAY_WEEK,0.00618,0.99382,1.006219
HOUR,0.025061,0.974939,1.025706
INT_HWY,0.425172,0.574828,1.73965
MONTH,0.002401,0.997599,1.002407
PJ,0.567771,0.432229,2.313589
PSU,0.565196,0.434804,2.299888
REGION,0.079646,0.920354,1.086538
RELJCT1,0.103989,0.896011,1.116057
REL_ROAD,0.153296,0.846704,1.18105


PJ

./Analysis/CRSS_04_Dropped_Features_1_0_0_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.60776,0.39224,2.549459
AGE_2,0.542785,0.457215,2.187154
AGE_3,0.601919,0.398081,2.512053
AGE_4,0.513817,0.486183,2.056838
AGE_5,0.669828,0.330172,3.028726
AGE_6,0.91056,0.08944,11.180734
AGE_7,0.846797,0.153203,6.527305
AGE_8,0.683626,0.316374,3.160819
AGE_9,0.648939,0.351061,2.848509
DAY_WEEK_1,0.266957,0.733043,1.364176


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.60776,0.39224,2.549459
AGE_2,0.542785,0.457215,2.187154
AGE_3,0.601914,0.398086,2.512021
AGE_4,0.513817,0.486183,2.056838
AGE_5,0.669828,0.330172,3.028726
AGE_6,0.91056,0.08944,11.180734
AGE_7,0.846797,0.153203,6.527305
AGE_8,0.683627,0.316373,3.160824
AGE_9,0.648939,0.351061,2.848509
DAY_WEEK_1,0.266957,0.733043,1.364177


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.60776,0.39224,2.549459
AGE_2,0.542785,0.457215,2.187154
AGE_3,0.601919,0.398081,2.512053
AGE_4,0.513817,0.486183,2.056838
AGE_5,0.669828,0.330172,3.028726
AGE_6,0.91056,0.08944,11.180734
AGE_7,0.846797,0.153203,6.527305
AGE_8,0.683627,0.316373,3.160824
AGE_9,0.648939,0.351061,2.848509
DAY_WEEK_1,0.266961,0.733039,1.364183


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.60776,0.39224,2.549459
AGE_2,0.542783,0.457217,2.187144
AGE_3,0.601917,0.398083,2.512041
AGE_4,0.513817,0.486183,2.056837
AGE_5,0.669828,0.330172,3.028725
AGE_6,0.91056,0.08944,11.180731
AGE_7,0.846797,0.153203,6.527304
AGE_8,0.683627,0.316373,3.160823
AGE_9,0.648939,0.351061,2.848509
DAY_WEEK_1,0.266958,0.733042,1.364178


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.032725,0.967275,1.033832
AGE_2,0.025652,0.974348,1.026327
AGE_3,0.032787,0.967213,1.033899
AGE_4,0.021281,0.978719,1.021744
AGE_5,0.03642,0.96358,1.037797
AGE_7,0.07885,0.92115,1.0856
AGE_8,0.04382,0.95618,1.045828
AGE_9,0.046703,0.953297,1.048991
DAY_WEEK_1,0.266941,0.733059,1.364147
DAY_WEEK_2,0.277107,0.722893,1.38333


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_0_0_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_1_0_0_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_1_0_0.csv

data.shape =  (802700, 67)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
VSURCOND
VTCONT_F

data.shape:  (802700, 33)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.238128,0.761872,1.312556
BODY_TYP,0.721619,0.278381,3.592195
DAY_WEEK,0.019376,0.980624,1.019758
HOUR,0.035271,0.964729,1.036561
INT_HWY,0.432495,0.567505,1.762098
LGT_COND,0.066015,0.933985,1.070681
MAKE,0.208377,0.791623,1.263227
MODEL,0.727943,0.272057,3.675694
MONTH,0.0095,0.9905,1.009591
NUMOCCS,0.439575,0.560425,1.784359


VE_FORMS
Drop  VE_FORMS

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.238124,0.761876,1.31255
BODY_TYP,0.721618,0.278382,3.592192
DAY_WEEK,0.019366,0.980634,1.019748
HOUR,0.035,0.965,1.03627
INT_HWY,0.43249,0.56751,1.762085
LGT_COND,0.065877,0.934123,1.070523
MAKE,0.208373,0.791627,1.263221
MODEL,0.727942,0.272058,3.675693
MONTH,0.009498,0.990502,1.009589
NUMOCCS,0.439574,0.560426,1.784358


MODEL

./Analysis/CRSS_04_Dropped_Features_1_0_0_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.554202
AGE_2,0.54366,0.45634,2.191349
AGE_3,0.6203,0.3797,2.633661
AGE_4,0.539333,0.460667,2.170763
AGE_5,0.69472,0.30528,3.275686
AGE_6,0.920682,0.079318,12.607531
AGE_7,0.864495,0.135505,7.379786
AGE_8,0.712588,0.287412,3.479328
AGE_9,0.677815,0.322185,3.103809
BODY_TYP_1,0.999863,0.000137,7308.533458


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.554202
AGE_2,0.54366,0.45634,2.191348
AGE_3,0.620301,0.379699,2.633663
AGE_4,0.539333,0.460667,2.170763
AGE_5,0.69472,0.30528,3.275686
AGE_6,0.920682,0.079318,12.607531
AGE_7,0.864495,0.135505,7.379785
AGE_8,0.712588,0.287412,3.479328
AGE_9,0.677815,0.322185,3.103809
BODY_TYP_1,0.999863,0.000137,7308.533456


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.554202
AGE_2,0.54366,0.45634,2.191349
AGE_3,0.620301,0.379699,2.633663
AGE_4,0.539333,0.460667,2.170763
AGE_5,0.69472,0.30528,3.275686
AGE_6,0.920682,0.079318,12.607531
AGE_7,0.864495,0.135505,7.379786
AGE_8,0.712588,0.287412,3.479328
AGE_9,0.677815,0.322185,3.103809
BODY_TYP_1,0.999863,0.000137,7308.533458


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.554202
AGE_2,0.543658,0.456342,2.191341
AGE_3,0.620301,0.379699,2.633663
AGE_4,0.539333,0.460667,2.170763
AGE_5,0.69472,0.30528,3.27568
AGE_6,0.920682,0.079318,12.607529
AGE_7,0.864495,0.135505,7.379786
AGE_8,0.712588,0.287412,3.479328
AGE_9,0.677815,0.322185,3.103808
BODY_TYP_1,0.099337,0.900663,1.110293


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.554202
AGE_2,0.543656,0.456344,2.19133
AGE_3,0.620299,0.379701,2.633651
AGE_4,0.539332,0.460668,2.170762
AGE_5,0.69472,0.30528,3.275679
AGE_6,0.920682,0.079318,12.607526
AGE_7,0.864495,0.135505,7.379785
AGE_8,0.712588,0.287412,3.479327
AGE_9,0.677815,0.322185,3.103808
BODY_TYP_1,0.099336,0.900664,1.110292


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.5542
AGE_2,0.543656,0.456344,2.19133
AGE_3,0.620297,0.379703,2.633641
AGE_4,0.539329,0.460671,2.170747
AGE_5,0.694717,0.305283,3.275654
AGE_6,0.920682,0.079318,12.607453
AGE_7,0.864495,0.135505,7.379783
AGE_8,0.712588,0.287412,3.479324
AGE_9,0.677813,0.322187,3.103789
BODY_TYP_1,0.099322,0.900678,1.110275


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608488,0.391512,2.554199
AGE_2,0.543656,0.456344,2.191329
AGE_3,0.620295,0.379705,2.633621
AGE_4,0.539326,0.460674,2.170732
AGE_5,0.694716,0.305284,3.275641
AGE_6,0.920682,0.079318,12.607451
AGE_7,0.864494,0.135506,7.379772
AGE_8,0.712586,0.287414,3.479306
AGE_9,0.677813,0.322187,3.103787
BODY_TYP_1,0.098587,0.901413,1.109369


RELJCT2_1
Drop  RELJCT2_1

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608486,0.391514,2.554186
AGE_2,0.543656,0.456344,2.191328
AGE_3,0.620295,0.379705,2.633621
AGE_4,0.539325,0.460675,2.170727
AGE_5,0.694716,0.305284,3.27564
AGE_6,0.920682,0.079318,12.607435
AGE_7,0.864494,0.135506,7.37976
AGE_8,0.712586,0.287414,3.479304
AGE_9,0.67781,0.32219,3.103762
BODY_TYP_1,0.098584,0.901416,1.109366


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.178738,0.821262,1.217638
AGE_2,0.119811,0.880189,1.136119
AGE_3,0.04671,0.95329,1.048999
AGE_4,0.02858,0.97142,1.02942
AGE_5,0.045262,0.954738,1.047408
AGE_7,0.092905,0.907095,1.10242
AGE_8,0.050518,0.949482,1.053206
AGE_9,0.055886,0.944114,1.059194
BODY_TYP_1,0.098518,0.901482,1.109285
BODY_TYP_3,0.153083,0.846917,1.180753


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.178427,0.821573,1.217177
AGE_2,0.119742,0.880258,1.13603
AGE_3,0.046551,0.953449,1.048824
AGE_4,0.028537,0.971463,1.029375
AGE_5,0.045215,0.954785,1.047356
AGE_7,0.092878,0.907122,1.102387
AGE_8,0.050485,0.949515,1.053169
AGE_9,0.055852,0.944148,1.059156
BODY_TYP_1,0.098498,0.901502,1.10926
BODY_TYP_3,0.153043,0.846957,1.180698


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.178244,0.821756,1.216907
AGE_2,0.119668,0.880332,1.135935
AGE_3,0.04655,0.95345,1.048823
AGE_4,0.028499,0.971501,1.029335
AGE_5,0.045178,0.954822,1.047315
AGE_7,0.092264,0.907736,1.101642
AGE_8,0.049606,0.950394,1.052196
AGE_9,0.054488,0.945512,1.057628
BODY_TYP_1,0.098451,0.901549,1.109202
BODY_TYP_3,0.153043,0.846957,1.180697


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_0_0_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_1_0_0_1_Hard.csv


Finished!


------------------------------------
Run =  _1_0_1_0
random_seed =  1
Get_Data
../../Big_Files/CRSS_03_1_0_1.csv

data.shape =  (802700, 68)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
Unnamed: 0
VALIGN
VEH_AGE
VE_FORMS
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTCONT_F
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_1_0_1_0_Easy.csv


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.001919,0.998081,1.001923
HOUR,0.003341,0.996659,1.003352
MONTH,0.001533,0.998467,1.001535
PJ,0.544922,0.455078,2.197425
PSU,0.558646,0.441354,2.265757
REGION,0.054948,0.945052,1.058143
URBANICITY,0.021225,0.978775,1.021686
WEATHER,0.007372,0.992628,1.007427


PSU

./Analysis/CRSS_04_Dropped_Features_1_0_1_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.265204,0.734796,1.360922
DAY_WEEK_2,0.274802,0.725198,1.378934
DAY_WEEK_3,0.36885,0.63115,1.58441
DAY_WEEK_4,0.292632,0.707368,1.413691
HOUR_1,0.507626,0.492374,2.030978
HOUR_2,0.520898,0.479102,2.087237
HOUR_3,0.72653,0.27347,3.656703
HOUR_4,0.870921,0.129079,7.74719
HOUR_5,0.873106,0.126894,7.880572
HOUR_6,0.790258,0.209742,4.767765


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_0_1_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_1_0_1_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_1_0_1.csv

data.shape =  (802700, 68)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VEH_AGE
VE_FORMS
VE_TOTAL
VSURCOND
VTCONT_F
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004459,0.995541,1.004479
DAY_WEEK,0.006147,0.993853,1.006185
HOUR,0.024705,0.975295,1.025331
INT_HWY,0.439333,0.560667,1.78359
MONTH,0.002359,0.997641,1.002365
PJ,0.56762,0.43238,2.312782
PSU,0.564426,0.435574,2.295822
REGION,0.077074,0.922926,1.083511
RELJCT1,0.099627,0.900373,1.110651
REL_ROAD,0.153378,0.846622,1.181165


PJ

./Analysis/CRSS_04_Dropped_Features_1_0_1_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536943,0.463057,2.159562
AGE_3,0.600812,0.399188,2.505083
AGE_4,0.512805,0.487195,2.052564
AGE_5,0.668914,0.331086,3.020364
AGE_6,0.910135,0.089865,11.127861
AGE_7,0.846159,0.153841,6.500209
AGE_8,0.68275,0.31725,3.152093
AGE_9,0.647935,0.352065,2.840381
DAY_WEEK_1,0.266834,0.733166,1.363948


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536943,0.463057,2.159562
AGE_3,0.600812,0.399188,2.505083
AGE_4,0.512805,0.487195,2.052564
AGE_5,0.668914,0.331086,3.020364
AGE_6,0.910135,0.089865,11.12786
AGE_7,0.846159,0.153841,6.500209
AGE_8,0.68275,0.31725,3.152093
AGE_9,0.647935,0.352065,2.840381
DAY_WEEK_1,0.266834,0.733166,1.363948


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536941,0.463059,2.159552
AGE_3,0.60081,0.39919,2.505073
AGE_4,0.512804,0.487196,2.052563
AGE_5,0.668914,0.331086,3.020363
AGE_6,0.910135,0.089865,11.12786
AGE_7,0.846159,0.153841,6.500209
AGE_8,0.68275,0.31725,3.152092
AGE_9,0.647934,0.352066,2.84038
DAY_WEEK_1,0.266831,0.733169,1.363943


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597318,0.402682,2.483352
AGE_2,0.536941,0.463059,2.15955
AGE_3,0.60081,0.39919,2.505073
AGE_4,0.512804,0.487196,2.052563
AGE_5,0.668914,0.331086,3.020363
AGE_6,0.910135,0.089865,11.127848
AGE_7,0.846159,0.153841,6.500199
AGE_8,0.68275,0.31725,3.152084
AGE_9,0.647934,0.352066,2.84038
DAY_WEEK_1,0.266831,0.733169,1.363942


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.031437,0.968563,1.032457
AGE_2,0.024766,0.975234,1.025395
AGE_3,0.032265,0.967735,1.033341
AGE_4,0.021025,0.978975,1.021476
AGE_5,0.035985,0.964015,1.037328
AGE_7,0.078098,0.921902,1.084715
AGE_8,0.043404,0.956596,1.045373
AGE_9,0.046198,0.953802,1.048436
DAY_WEEK_1,0.266815,0.733185,1.363912
DAY_WEEK_2,0.27697,0.72303,1.383069


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_0_1_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_1_0_1_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_1_0_1.csv

data.shape =  (802700, 68)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VSURCOND
VTCONT_F

data.shape:  (802700, 33)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.231482,0.768518,1.301206
BODY_TYP,0.724571,0.275429,3.630705
DAY_WEEK,0.019266,0.980734,1.019644
HOUR,0.035253,0.964747,1.036541
INT_HWY,0.445899,0.554101,1.804726
LGT_COND,0.064477,0.935523,1.068921
MAKE,0.204663,0.795337,1.257328
MODEL,0.730183,0.269817,3.706221
MONTH,0.009515,0.990485,1.009606
NUMOCCS,0.438369,0.561631,1.780529


VE_FORMS
Drop  VE_FORMS

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.231479,0.768521,1.301201
BODY_TYP,0.724571,0.275429,3.630705
DAY_WEEK,0.019256,0.980744,1.019634
HOUR,0.034986,0.965014,1.036254
INT_HWY,0.445893,0.554107,1.804707
LGT_COND,0.064344,0.935656,1.068768
MAKE,0.204659,0.795341,1.257322
MODEL,0.730183,0.269817,3.706218
MONTH,0.009513,0.990487,1.009605
NUMOCCS,0.438369,0.561631,1.780529


MODEL

./Analysis/CRSS_04_Dropped_Features_1_0_1_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597998,0.402002,2.487548
AGE_2,0.537839,0.462161,2.163746
AGE_3,0.619067,0.380933,2.625134
AGE_4,0.538079,0.461921,2.164874
AGE_5,0.693589,0.306411,3.263593
AGE_6,0.920152,0.079848,12.523823
AGE_7,0.86374,0.13626,7.338918
AGE_8,0.711458,0.288542,3.465705
AGE_9,0.676572,0.323428,3.091877
BODY_TYP_1,0.999672,0.000328,3046.469225


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597998,0.402002,2.487548
AGE_2,0.537839,0.462161,2.163746
AGE_3,0.619067,0.380933,2.625134
AGE_4,0.538079,0.461921,2.164874
AGE_5,0.693589,0.306411,3.263593
AGE_6,0.920152,0.079848,12.523823
AGE_7,0.86374,0.13626,7.338918
AGE_8,0.711458,0.288542,3.465704
AGE_9,0.676572,0.323428,3.091876
BODY_TYP_1,0.999672,0.000328,3046.469225


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597995,0.402005,2.487528
AGE_2,0.537836,0.462164,2.163736
AGE_3,0.619067,0.380933,2.625134
AGE_4,0.538079,0.461921,2.164874
AGE_5,0.693589,0.306411,3.263592
AGE_6,0.920152,0.079848,12.523822
AGE_7,0.86374,0.13626,7.338918
AGE_8,0.711458,0.288542,3.465701
AGE_9,0.676571,0.323429,3.091868
BODY_TYP_1,0.099861,0.900139,1.11094


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597995,0.402005,2.487528
AGE_2,0.537834,0.462166,2.163725
AGE_3,0.619065,0.380935,2.625123
AGE_4,0.538079,0.461921,2.164872
AGE_5,0.693589,0.306411,3.263591
AGE_6,0.920152,0.079848,12.523821
AGE_7,0.86374,0.13626,7.338917
AGE_8,0.711458,0.288542,3.465701
AGE_9,0.676571,0.323429,3.091868
BODY_TYP_1,0.099859,0.900141,1.110937


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597995,0.402005,2.487528
AGE_2,0.537834,0.462166,2.163724
AGE_3,0.619065,0.380935,2.625123
AGE_4,0.538079,0.461921,2.164871
AGE_5,0.693589,0.306411,3.263591
AGE_6,0.920152,0.079848,12.523818
AGE_7,0.86374,0.13626,7.338916
AGE_8,0.711458,0.288542,3.465698
AGE_9,0.676571,0.323429,3.091868
BODY_TYP_1,0.099859,0.900141,1.110937


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487527
AGE_2,0.537834,0.462166,2.163723
AGE_3,0.619064,0.380936,2.625113
AGE_4,0.538076,0.461924,2.164856
AGE_5,0.693587,0.306413,3.263567
AGE_6,0.920152,0.079848,12.523742
AGE_7,0.86374,0.13626,7.338914
AGE_8,0.711458,0.288542,3.465695
AGE_9,0.676569,0.323431,3.091848
BODY_TYP_1,0.099844,0.900156,1.110919


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537833,0.462167,2.163722
AGE_3,0.619061,0.380939,2.625095
AGE_4,0.538073,0.461927,2.164843
AGE_5,0.693586,0.306414,3.263554
AGE_6,0.920152,0.079848,12.523737
AGE_7,0.86374,0.13626,7.338903
AGE_8,0.711456,0.288544,3.465677
AGE_9,0.676569,0.323431,3.091847
BODY_TYP_1,0.099113,0.900887,1.110017


TYP_INT_2
Drop  TYP_INT_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537833,0.462167,2.163721
AGE_3,0.619061,0.380939,2.62509
AGE_4,0.538072,0.461928,2.164838
AGE_5,0.693586,0.306414,3.263554
AGE_6,0.920152,0.079848,12.523736
AGE_7,0.86374,0.13626,7.338903
AGE_8,0.711456,0.288544,3.465675
AGE_9,0.676568,0.323432,3.091843
BODY_TYP_1,0.099109,0.900891,1.110012


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169751,0.830249,1.204458
AGE_2,0.115079,0.884921,1.130044
AGE_3,0.046023,0.953977,1.048243
AGE_4,0.028404,0.971596,1.029235
AGE_5,0.045075,0.954925,1.047202
AGE_7,0.091902,0.908098,1.101202
AGE_8,0.049985,0.950015,1.052615
AGE_9,0.055438,0.944562,1.058692
BODY_TYP_1,0.099035,0.900965,1.109921
BODY_TYP_3,0.1567,0.8433,1.185818


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169445,0.830555,1.204015
AGE_2,0.115014,0.884986,1.129962
AGE_3,0.045867,0.954133,1.048072
AGE_4,0.028363,0.971637,1.029191
AGE_5,0.045029,0.954971,1.047153
AGE_7,0.09187,0.90813,1.101164
AGE_8,0.049948,0.950052,1.052574
AGE_9,0.055401,0.944599,1.05865
BODY_TYP_1,0.099014,0.900986,1.109895
BODY_TYP_3,0.156656,0.843344,1.185756


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169263,0.830737,1.203751
AGE_2,0.114942,0.885058,1.12987
AGE_3,0.045867,0.954133,1.048071
AGE_4,0.028325,0.971675,1.02915
AGE_5,0.044993,0.955007,1.047112
AGE_7,0.091264,0.908736,1.100429
AGE_8,0.049075,0.950925,1.051608
AGE_9,0.054042,0.945958,1.057129
BODY_TYP_1,0.09897,0.90103,1.109841
BODY_TYP_3,0.156656,0.843344,1.185755


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_0_1_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_1_0_1_1_Hard.csv


Finished!


------------------------------------
Run =  _1_1_0_0
random_seed =  1
Get_Data
../../Big_Files/CRSS_03_1_1_0.csv

data.shape =  (802700, 64)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
VALIGN
VEH_AGE
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_1_1_0_0_Easy.csv

Get_Data
../../Big_Files/CRSS_03_1_1_

Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.00194,0.99806,1.001943
HOUR,0.003365,0.996635,1.003377
MONTH,0.001543,0.998457,1.001546
PJ,0.544919,0.455081,2.197412
PSU,0.558642,0.441358,2.265733
REGION,0.054882,0.945118,1.058068
URBANICITY,0.021225,0.978775,1.021685
WEATHER,0.007288,0.992712,1.007341


PSU

./Analysis/CRSS_04_Dropped_Features_1_1_0_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.26528,0.73472,1.361062
DAY_WEEK_2,0.274914,0.725086,1.379146
DAY_WEEK_3,0.368981,0.631019,1.584739
DAY_WEEK_4,0.292711,0.707289,1.41385
HOUR_1,0.508527,0.491473,2.034699
HOUR_2,0.520912,0.479088,2.0873
HOUR_3,0.72652,0.27348,3.656568
HOUR_4,0.871061,0.128939,7.755596
HOUR_5,0.872847,0.127153,7.864516
HOUR_6,0.790847,0.209153,4.781188


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_1_0_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_1_1_0_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_1_1_0.csv

data.shape =  (802700, 64)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
VEH_AGE
VE_TOTAL
VSURCOND
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004695,0.995305,1.004717
DAY_WEEK,0.006207,0.993793,1.006246
HOUR,0.025049,0.974951,1.025692
INT_HWY,0.425809,0.574191,1.741582
MONTH,0.002412,0.997588,1.002417
PJ,0.567827,0.432173,2.313891
PSU,0.565116,0.434884,2.299462
REGION,0.078887,0.921113,1.085643
RELJCT1,0.10402,0.89598,1.116096
REL_ROAD,0.153585,0.846415,1.181454


PJ

./Analysis/CRSS_04_Dropped_Features_1_1_0_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607887,0.392113,2.550282
AGE_2,0.542668,0.457332,2.186594
AGE_3,0.601899,0.398101,2.511927
AGE_4,0.513779,0.486221,2.056679
AGE_5,0.669794,0.330206,3.028416
AGE_6,0.910546,0.089454,11.1789
AGE_7,0.846765,0.153235,6.525936
AGE_8,0.683593,0.316407,3.160482
AGE_9,0.648886,0.351114,2.848074
DAY_WEEK_1,0.266942,0.733058,1.364148


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607887,0.392113,2.550282
AGE_2,0.542668,0.457332,2.186594
AGE_3,0.601896,0.398104,2.511908
AGE_4,0.513779,0.486221,2.056679
AGE_5,0.669794,0.330206,3.028416
AGE_6,0.910546,0.089454,11.1789
AGE_7,0.846765,0.153235,6.525936
AGE_8,0.683593,0.316407,3.160486
AGE_9,0.648886,0.351114,2.848074
DAY_WEEK_1,0.26694,0.73306,1.364145


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607887,0.392113,2.550282
AGE_2,0.542668,0.457332,2.186594
AGE_3,0.601899,0.398101,2.511927
AGE_4,0.513779,0.486221,2.056679
AGE_5,0.669794,0.330206,3.028416
AGE_6,0.910546,0.089454,11.1789
AGE_7,0.846765,0.153235,6.525936
AGE_8,0.683593,0.316407,3.160486
AGE_9,0.648886,0.351114,2.848074
DAY_WEEK_1,0.266944,0.733056,1.364152


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.607886,0.392114,2.550282
AGE_2,0.542665,0.457335,2.186583
AGE_3,0.601898,0.398102,2.511916
AGE_4,0.513779,0.486221,2.056678
AGE_5,0.669794,0.330206,3.028415
AGE_6,0.910546,0.089454,11.178897
AGE_7,0.846765,0.153235,6.525935
AGE_8,0.683593,0.316407,3.160485
AGE_9,0.648886,0.351114,2.848074
DAY_WEEK_1,0.266941,0.733059,1.364147


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.032786,0.967214,1.033898
AGE_2,0.025592,0.974408,1.026264
AGE_3,0.032815,0.967185,1.033929
AGE_4,0.021282,0.978718,1.021745
AGE_5,0.036423,0.963577,1.037799
AGE_7,0.078844,0.921156,1.085593
AGE_8,0.043809,0.956191,1.045816
AGE_9,0.046708,0.953292,1.048997
DAY_WEEK_1,0.266924,0.733076,1.364114
DAY_WEEK_2,0.277122,0.722878,1.383359


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_1_0_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_1_1_0_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_1_1_0.csv

data.shape =  (802700, 64)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
VSURCOND

data.shape:  (802700, 32)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.238216,0.761784,1.312709
BODY_TYP,0.721665,0.278335,3.592789
DAY_WEEK,0.019395,0.980605,1.019778
HOUR,0.034981,0.965019,1.03625
INT_HWY,0.433117,0.566883,1.764032
LGT_COND,0.065959,0.934041,1.070617
MAKE,0.208136,0.791864,1.262843
MODEL,0.727848,0.272152,3.674423
MONTH,0.009507,0.990493,1.009599
NUMOCCS,0.439639,0.560361,1.784565


MODEL

./Analysis/CRSS_04_Dropped_Features_1_1_0_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.554934
AGE_2,0.543557,0.456443,2.190853
AGE_3,0.620289,0.379711,2.633584
AGE_4,0.539306,0.460694,2.170639
AGE_5,0.694703,0.305297,3.275498
AGE_6,0.920675,0.079325,12.606347
AGE_7,0.864474,0.135526,7.378643
AGE_8,0.712563,0.287437,3.47902
AGE_9,0.677772,0.322228,3.103388
BODY_TYP_1,0.999863,0.000137,7307.818609


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.554934
AGE_2,0.543557,0.456443,2.190855
AGE_3,0.620289,0.379711,2.633584
AGE_4,0.539306,0.460694,2.170639
AGE_5,0.694703,0.305297,3.275497
AGE_6,0.920675,0.079325,12.606345
AGE_7,0.864474,0.135526,7.378641
AGE_8,0.712563,0.287437,3.47902
AGE_9,0.677772,0.322228,3.103388
BODY_TYP_1,0.999863,0.000137,7307.818609


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.554934
AGE_2,0.543557,0.456443,2.190855
AGE_3,0.620289,0.379711,2.633585
AGE_4,0.539306,0.460694,2.170639
AGE_5,0.694703,0.305297,3.275498
AGE_6,0.920675,0.079325,12.606347
AGE_7,0.864474,0.135526,7.378643
AGE_8,0.712563,0.287437,3.47902
AGE_9,0.677772,0.322228,3.103388
BODY_TYP_1,0.999863,0.000137,7307.818609


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.554934
AGE_2,0.543556,0.456444,2.190848
AGE_3,0.620289,0.379711,2.633584
AGE_4,0.539306,0.460694,2.170639
AGE_5,0.694702,0.305298,3.275492
AGE_6,0.920675,0.079325,12.606344
AGE_7,0.864474,0.135526,7.378643
AGE_8,0.712563,0.287437,3.47902
AGE_9,0.677771,0.322229,3.103387
BODY_TYP_1,0.099347,0.900653,1.110305


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.554934
AGE_2,0.543553,0.456447,2.190837
AGE_3,0.620288,0.379712,2.633573
AGE_4,0.539306,0.460694,2.170637
AGE_5,0.694702,0.305298,3.275491
AGE_6,0.920675,0.079325,12.606341
AGE_7,0.864474,0.135526,7.378642
AGE_8,0.712563,0.287437,3.479019
AGE_9,0.677771,0.322229,3.103387
BODY_TYP_1,0.099346,0.900654,1.110304


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.554931
AGE_2,0.543553,0.456447,2.190836
AGE_3,0.620286,0.379714,2.633562
AGE_4,0.539303,0.460697,2.170623
AGE_5,0.6947,0.3053,3.275467
AGE_6,0.920674,0.079326,12.606269
AGE_7,0.864474,0.135526,7.378641
AGE_8,0.712562,0.287438,3.479016
AGE_9,0.677769,0.322231,3.103369
BODY_TYP_1,0.099332,0.900668,1.110288


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.6086,0.3914,2.55493
AGE_2,0.543553,0.456447,2.190835
AGE_3,0.620283,0.379717,2.633542
AGE_4,0.539299,0.460701,2.170607
AGE_5,0.694699,0.305301,3.275452
AGE_6,0.920674,0.079326,12.606268
AGE_7,0.864474,0.135526,7.378632
AGE_8,0.712561,0.287439,3.479
AGE_9,0.677769,0.322231,3.103367
BODY_TYP_1,0.098596,0.901404,1.10938


RELJCT2_1
Drop  RELJCT2_1

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.608598,0.391402,2.554921
AGE_2,0.543553,0.456447,2.190833
AGE_3,0.620283,0.379717,2.633542
AGE_4,0.539299,0.460701,2.170603
AGE_5,0.694698,0.305302,3.275451
AGE_6,0.920674,0.079326,12.606247
AGE_7,0.864473,0.135527,7.378616
AGE_8,0.712561,0.287439,3.478999
AGE_9,0.677767,0.322233,3.103344
BODY_TYP_1,0.098593,0.901407,1.109377


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.178971,0.821029,1.217984
AGE_2,0.119859,0.880141,1.136182
AGE_3,0.046741,0.953259,1.049033
AGE_4,0.028577,0.971423,1.029418
AGE_5,0.04525,0.95475,1.047395
AGE_7,0.092878,0.907122,1.102387
AGE_8,0.050502,0.949498,1.053188
AGE_9,0.055875,0.944125,1.059182
BODY_TYP_1,0.098528,0.901472,1.109297
BODY_TYP_3,0.152973,0.847027,1.180601


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.178645,0.821355,1.217501
AGE_2,0.119787,0.880213,1.136088
AGE_3,0.04658,0.95342,1.048856
AGE_4,0.028536,0.971464,1.029374
AGE_5,0.045204,0.954796,1.047344
AGE_7,0.09285,0.90715,1.102354
AGE_8,0.050468,0.949532,1.053151
AGE_9,0.055841,0.944159,1.059143
BODY_TYP_1,0.098508,0.901492,1.109272
BODY_TYP_3,0.152937,0.847063,1.180549


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.17846,0.82154,1.217225
AGE_2,0.11971,0.88029,1.135989
AGE_3,0.046579,0.953421,1.048855
AGE_4,0.028497,0.971503,1.029333
AGE_5,0.045166,0.954834,1.047302
AGE_7,0.092237,0.907763,1.101609
AGE_8,0.049589,0.950411,1.052176
AGE_9,0.054473,0.945527,1.057612
BODY_TYP_1,0.098461,0.901539,1.109214
BODY_TYP_3,0.152936,0.847064,1.180549


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_1_0_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_1_1_0_1_Hard.csv


Finished!


------------------------------------
Run =  _1_1_1_0
random_seed =  1
Get_Data
../../Big_Files/CRSS_03_1_1_1.csv

data.shape =  (802700, 65)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
Unnamed: 0
VALIGN
VEH_AGE
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_1_1_1_0_Easy.csv

Get_Data
../../Big_Files/C

Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK,0.001919,0.998081,1.001923
HOUR,0.003341,0.996659,1.003352
MONTH,0.001533,0.998467,1.001535
PJ,0.544922,0.455078,2.197425
PSU,0.558646,0.441354,2.265757
REGION,0.054948,0.945052,1.058143
URBANICITY,0.021225,0.978775,1.021686
WEATHER,0.007372,0.992628,1.007427


PSU

./Analysis/CRSS_04_Dropped_Features_1_1_1_1_Easy.csv

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
DAY_WEEK_1,0.265204,0.734796,1.360922
DAY_WEEK_2,0.274802,0.725198,1.378934
DAY_WEEK_3,0.36885,0.63115,1.58441
DAY_WEEK_4,0.292632,0.707368,1.413691
HOUR_1,0.507626,0.492374,2.030978
HOUR_2,0.520898,0.479102,2.087237
HOUR_3,0.72653,0.27347,3.656703
HOUR_4,0.870921,0.129079,7.74719
HOUR_5,0.873106,0.126894,7.880572
HOUR_6,0.790258,0.209742,4.767765


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_1_1_1_Easy.csv

(802700, 40)


../../Big_Files/CRSS_04_1_1_1_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_1_1_1.csv

data.shape =  (802700, 65)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VEH_AGE
VE_TOTAL
VSURCOND
WRK_ZONE

data.shape:  (802700, 20)
End Thin_to_Medium_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.004459,0.995541,1.004479
DAY_WEEK,0.006134,0.993866,1.006172
HOUR,0.02471,0.97529,1.025336
INT_HWY,0.439369,0.560631,1.783703
MONTH,0.002339,0.997661,1.002345
PJ,0.567507,0.432493,2.312176
PSU,0.564422,0.435578,2.295801
REGION,0.076997,0.923003,1.08342
RELJCT1,0.099826,0.900174,1.110897
REL_ROAD,0.153483,0.846517,1.181312


PJ

./Analysis/CRSS_04_Dropped_Features_1_1_1_1_Medium.csv

(802700, 20)
Get_Dummies
(802700, 20)
(802700, 80)

(802700, 80)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536947,0.463053,2.15958
AGE_3,0.600819,0.399181,2.50513
AGE_4,0.512807,0.487193,2.052575
AGE_5,0.668915,0.331085,3.020377
AGE_6,0.910135,0.089865,11.127859
AGE_7,0.846159,0.153841,6.500205
AGE_8,0.682749,0.317251,3.15208
AGE_9,0.647935,0.352065,2.840385
DAY_WEEK_1,0.266835,0.733165,1.36395


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536947,0.463053,2.15958
AGE_3,0.600819,0.399181,2.50513
AGE_4,0.512807,0.487193,2.052575
AGE_5,0.668915,0.331085,3.020377
AGE_6,0.910135,0.089865,11.127859
AGE_7,0.846159,0.153841,6.500205
AGE_8,0.682749,0.317251,3.15208
AGE_9,0.647935,0.352065,2.840385
DAY_WEEK_1,0.266835,0.733165,1.36395


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536945,0.463055,2.159569
AGE_3,0.600818,0.399182,2.505121
AGE_4,0.512807,0.487193,2.052574
AGE_5,0.668915,0.331085,3.020375
AGE_6,0.910135,0.089865,11.127859
AGE_7,0.846159,0.153841,6.500205
AGE_8,0.682749,0.317251,3.15208
AGE_9,0.647935,0.352065,2.840385
DAY_WEEK_1,0.266832,0.733168,1.363945


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597319,0.402681,2.483353
AGE_2,0.536944,0.463056,2.159568
AGE_3,0.600818,0.399182,2.505121
AGE_4,0.512807,0.487193,2.052573
AGE_5,0.668915,0.331085,3.020375
AGE_6,0.910135,0.089865,11.127847
AGE_7,0.846158,0.153842,6.500196
AGE_8,0.682748,0.317252,3.152072
AGE_9,0.647935,0.352065,2.840384
DAY_WEEK_1,0.266832,0.733168,1.363944


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.031443,0.968557,1.032463
AGE_2,0.02479,0.97521,1.02542
AGE_3,0.032314,0.967686,1.033393
AGE_4,0.021035,0.978965,1.021487
AGE_5,0.036,0.964,1.037344
AGE_7,0.078099,0.921901,1.084715
AGE_8,0.043399,0.956601,1.045368
AGE_9,0.046206,0.953794,1.048445
DAY_WEEK_1,0.266816,0.733184,1.363914
DAY_WEEK_2,0.276964,0.723036,1.383057


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_1_1_1_Medium.csv

(802700, 76)


../../Big_Files/CRSS_04_1_1_1_1_Medium.csv

Get_Data
../../Big_Files/CRSS_03_1_1_1.csv

data.shape =  (802700, 65)
Thin_to_Hard_Features()
Thin_to_Hard_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
J_KNIFE
MAK_MOD
MAN_COLL
MAX_SEV
M_HARM
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
P_CRASH1
P_CRASH2
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
Unnamed: 0
VSURCOND

data.shape:  (802700, 32)
End Thin_to_Hard_Features()

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE,0.231434,0.768566,1.301124
BODY_TYP,0.724573,0.275427,3.630723
DAY_WEEK,0.019241,0.980759,1.019618
HOUR,0.034986,0.965014,1.036254
INT_HWY,0.445915,0.554085,1.804777
LGT_COND,0.064321,0.935679,1.068743
MAKE,0.204671,0.795329,1.257342
MODEL,0.730188,0.269812,3.706279
MONTH,0.009489,0.990511,1.00958
NUMOCCS,0.438204,0.561796,1.780007


MODEL

./Analysis/CRSS_04_Dropped_Features_1_1_1_1_Hard.csv

(802700, 32)
Get_Dummies
(802700, 32)
(802700, 139)

(802700, 139)

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597997,0.402003,2.487546
AGE_2,0.537843,0.462157,2.163767
AGE_3,0.619073,0.380927,2.625175
AGE_4,0.538077,0.461923,2.164861
AGE_5,0.693589,0.306411,3.263595
AGE_6,0.92015,0.07985,12.523541
AGE_7,0.863738,0.136262,7.338793
AGE_8,0.711454,0.288546,3.465652
AGE_9,0.676572,0.323428,3.091876
BODY_TYP_1,0.999672,0.000328,3046.488884


VALIGN_3
Drop  VALIGN_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597997,0.402003,2.487546
AGE_2,0.537843,0.462157,2.163767
AGE_3,0.619073,0.380927,2.625175
AGE_4,0.538077,0.461923,2.164861
AGE_5,0.693589,0.306411,3.263595
AGE_6,0.92015,0.07985,12.523541
AGE_7,0.863738,0.136262,7.338792
AGE_8,0.711454,0.288546,3.465651
AGE_9,0.676572,0.323428,3.091875
BODY_TYP_1,0.999672,0.000328,3046.488884


BODY_TYP_2
Drop  BODY_TYP_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537841,0.462159,2.163757
AGE_3,0.619073,0.380927,2.625175
AGE_4,0.538077,0.461923,2.164861
AGE_5,0.693589,0.306411,3.263593
AGE_6,0.92015,0.07985,12.52354
AGE_7,0.863738,0.136262,7.338792
AGE_8,0.711454,0.288546,3.465648
AGE_9,0.676571,0.323429,3.091867
BODY_TYP_1,0.099858,0.900142,1.110936


VSPD_LIM_3
Drop  VSPD_LIM_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537838,0.462162,2.163746
AGE_3,0.619071,0.380929,2.625163
AGE_4,0.538076,0.461924,2.164859
AGE_5,0.693589,0.306411,3.263592
AGE_6,0.92015,0.07985,12.523539
AGE_7,0.863738,0.136262,7.338791
AGE_8,0.711454,0.288546,3.465648
AGE_9,0.676571,0.323429,3.091867
BODY_TYP_1,0.099856,0.900144,1.110934


VPROFILE_4
Drop  VPROFILE_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487526
AGE_2,0.537838,0.462162,2.163745
AGE_3,0.619071,0.380929,2.625163
AGE_4,0.538076,0.461924,2.164858
AGE_5,0.693589,0.306411,3.263592
AGE_6,0.92015,0.07985,12.523536
AGE_7,0.863738,0.136262,7.33879
AGE_8,0.711453,0.288547,3.465644
AGE_9,0.676571,0.323429,3.091867
BODY_TYP_1,0.099856,0.900144,1.110933


MAKE_6
Drop  MAKE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487524
AGE_2,0.537838,0.462162,2.163744
AGE_3,0.61907,0.38093,2.625153
AGE_4,0.538073,0.461927,2.164844
AGE_5,0.693587,0.306413,3.263568
AGE_6,0.92015,0.07985,12.52346
AGE_7,0.863738,0.136262,7.338788
AGE_8,0.711453,0.288547,3.465641
AGE_9,0.676569,0.323431,3.091847
BODY_TYP_1,0.099842,0.900158,1.110916


MODEL_9
Drop  MODEL_9

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487524
AGE_2,0.537838,0.462162,2.163743
AGE_3,0.619067,0.380933,2.625136
AGE_4,0.53807,0.46193,2.16483
AGE_5,0.693586,0.306414,3.263556
AGE_6,0.92015,0.07985,12.523455
AGE_7,0.863738,0.136262,7.338777
AGE_8,0.711452,0.288548,3.465624
AGE_9,0.676569,0.323431,3.091846
BODY_TYP_1,0.09911,0.90089,1.110013


TYP_INT_2
Drop  TYP_INT_2

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.597994,0.402006,2.487524
AGE_2,0.537838,0.462162,2.163742
AGE_3,0.619066,0.380934,2.625129
AGE_4,0.538069,0.461931,2.164827
AGE_5,0.693586,0.306414,3.263555
AGE_6,0.92015,0.07985,12.523453
AGE_7,0.863738,0.136262,7.338776
AGE_8,0.711451,0.288549,3.465621
AGE_9,0.676568,0.323432,3.091844
BODY_TYP_1,0.099106,0.900894,1.110009


AGE_6
Drop  AGE_6

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169725,0.830275,1.20442
AGE_2,0.115079,0.884921,1.130045
AGE_3,0.046054,0.953946,1.048277
AGE_4,0.028413,0.971587,1.029244
AGE_5,0.045087,0.954913,1.047216
AGE_7,0.091906,0.908094,1.101208
AGE_8,0.049986,0.950014,1.052616
AGE_9,0.055467,0.944533,1.058724
BODY_TYP_1,0.099033,0.900967,1.109918
BODY_TYP_3,0.156685,0.843315,1.185797


PERMVIT_3
Drop  PERMVIT_3

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169417,0.830583,1.203974
AGE_2,0.115014,0.884986,1.129962
AGE_3,0.045897,0.954103,1.048105
AGE_4,0.028371,0.971629,1.0292
AGE_5,0.045042,0.954958,1.047166
AGE_7,0.091875,0.908125,1.10117
AGE_8,0.049949,0.950051,1.052575
AGE_9,0.055429,0.944571,1.058682
BODY_TYP_1,0.099011,0.900989,1.109892
BODY_TYP_3,0.156641,0.843359,1.185735


HOUR_4
Drop  HOUR_4

calculate_vif()


Unnamed: 0,r2,Tolerance,VIF
AGE_1,0.169236,0.830764,1.203711
AGE_2,0.114942,0.885058,1.12987
AGE_3,0.045896,0.954104,1.048104
AGE_4,0.028333,0.971667,1.02916
AGE_5,0.045005,0.954995,1.047126
AGE_7,0.091268,0.908732,1.100435
AGE_8,0.049076,0.950924,1.051608
AGE_9,0.05407,0.94593,1.05716
BODY_TYP_1,0.098968,0.901032,1.109839
BODY_TYP_3,0.15664,0.84336,1.185734


PSU_3

./Analysis/CRSS_04_Dropped_Features_1_1_1_1_Hard.csv

(802700, 129)


../../Big_Files/CRSS_04_1_1_1_1_Hard.csv


Finished!



In [15]:
%%time
run = '_0_0_1_0_1'
Run = run
random_seed = int(run[1])
Main()

Get_Data
../../Big_Files/CRSS_03_0_0_1.csv

data.shape =  (802700, 68)
Thin_to_Easy_Features
Thin_to_Easy_Features()
Removed Features
ACC_TYPE
AGE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP
EJECTION
HARM_EV
HIT_RUN
IMPACT1
INJ_SEV
INT_HWY
J_KNIFE
LGT_COND
MAKE
MAK_MOD
MAN_COLL
MAX_SEV
MAX_VSEV
MODEL
M_HARM
NUMOCCS
NUM_INJ
NUM_INJV
PCRASH4
PCRASH5
PERMVIT
PER_TYP
PVH_INVL
P_CRASH1
P_CRASH2
RELJCT1
RELJCT2
REL_ROAD
REST_MIS
REST_USE
ROLINLOC
ROLLOVER
SEAT_POS
SEX
SPEC_USE
SPEEDREL
TOWED
TOW_VEH
TYP_INT
Unnamed: 0
VALIGN
VEH_AGE
VE_FORMS
VE_TOTAL
VPROFILE
VSPD_LIM
VSURCOND
VTCONT_F
VTRAFCON
VTRAFWAY
WRK_ZONE

data.shape:  (802700, 9)
End Thin_to_Easy_Features()

(802700, 9)
Get_Dummies
(802700, 9)
(802700, 40)

(802700, 40)

(802700, 40)


../../Big_Files/CRSS_04_0_0_1_0_1_Easy.csv

Get_Data
../../Big_Files/CRSS_03_0_0_1.csv

data.shape =  (802700, 68)
Thin_to_Medium_Features
Thin_to_Medium_Features()
Removed Features
ACC_TYPE
AIR_BAG
ALC_STATUS
BODY_TYP
CARGO_BT
DEFORMED
DR_ZIP