- CRSS where each sample corresponds to one person, not to one accident.  
- This one corresponds better to evaluating per phone.

In [1]:
%%latex
\tableofcontents

<IPython.core.display.Latex object>

# Setup
## Import Libraries

In [2]:
import sys, copy, math, time

print ('Python version: {}'.format(sys.version))

from IPython.display import display, HTML

from collections import Counter

import numpy as np
print ('NumPy version: {}'.format(np.__version__))
np.set_printoptions(suppress=True)

import pandas as pd
print ('Pandas version:  {}'.format(pd.__version__))
pd.set_option('display.max_rows', 500)



Python version: 3.9.7 (default, Oct 22 2021, 13:24:00) 
[Clang 13.0.0 (clang-1300.0.29.3)]
NumPy version: 1.21.3
Pandas version:  1.2.4


## Import Data

### Vehicle.csv from CRSS

In [3]:
def Import_Data_Vehicle():
    print ('Import_Data_Vehicle()')

    df = pd.DataFrame([])
    for year in ['2016','2017','2018']:
        filename = '../../Big_Files/CRSS_2020_Update/CRSS' + year + 'CSV/VEHICLE.CSV'
#        filename = '../../CRSS/CRSS' + year + 'CSV/VEHICLE.CSV'
        temp = pd.read_csv(filename, index_col=None, low_memory=False)
        print (year, len(temp))
        df = df.append(temp)

    for year in ['2019','2020']:
        filename = '../../Big_Files/CRSS_2020_Update/CRSS' + year + 'CSV/vehicle.csv'
#        filename = '../../CRSS/CRSS' + year + 'CSV/vehicle.csv'
        temp = pd.read_csv(filename, index_col=None, encoding='latin1', low_memory=False)
        print (year, len(temp))
        df = df.append(temp)

    print (df.shape)
    print ()
    return df

### person.csv from CRSS

In [4]:
def Import_Data_Person():
    print ('Import_Data_Person()')

    df = pd.DataFrame([])
    for year in ['2016','2017','2018']:
        filename = '../../Big_Files/CRSS_2020_Update/CRSS' + year + 'CSV/PERSON.CSV'
#        filename = '../../CRSS/CRSS' + year + 'CSV/PERSON.CSV'
        temp = pd.read_csv(filename, index_col=None)
        print (year, len(temp))
        df = df.append(temp)

    for year in ['2019','2020']:
        filename = '../../Big_Files/CRSS_2020_Update/CRSS' + year + 'CSV/person.csv'
#        filename = '../../CRSS/CRSS' + year + 'CSV/person.csv'
        temp = pd.read_csv(filename, index_col=None, encoding='latin1')
        print (year, len(temp))
        df = df.append(temp)

#    for feature in df:
#        print (feature)
    

#    for feature in df:
#        if 'NAME' in feature:
#            df.drop(columns=[feature], inplace=True)

    print (df.shape)
    print ()
    return df

### All Data

In [5]:
def Import_Data():
    print ('Import_Data()')
    df_Vehicle = Import_Data_Vehicle()
    df_Person = Import_Data_Person()
    
    print ()
    return df_Vehicle, df_Person

# Build Database

In [6]:
def Build_Vehicle_Dataset(df_Vehicle):
    print ('Build_Vehicle_Dataset()')
    data = pd.DataFrame()

    F = [
        'ACC_TYPE',
        'BDYTYP_IM',
        'BODY_TYP',
        'BUS_USE',
        'CARGO_BT',
        'DEFORMED',
        'DR_PRES',
        'EMER_USE',
        'FIRE_EXP',
        'HAZ_CNO',
        'HAZ_INV',
        'HAZ_PLAC',
        'HAZ_REL',
        'HIT_RUN',
#        'HITRUN_IM',
        'IMPACT1',
        'IMPACT1_IM',
        'J_KNIFE',
        'M_HARM',
        'MAK_MOD',
        'MAKE',
        'MAX_VSEV',
        'MDLYR_IM',
        'MOD_YEAR',
        'MODEL',
        'MXVSEV_IM',
        'NUM_INJV',
        'NUMINJ_IM',
        'NUMOCCS',
        'P_CRASH1',
        'P_CRASH2',
        'P_CRASH3',
        'PCRASH1_IM',
        'PCRASH4',
        'PCRASH5',
        'ROLINLOC',
        'ROLLOVER',
        'SPEC_USE',
        'SPEEDREL',
        'TOW_VEH',
        'TOWED',
        'TRAV_SP',
        'V_ALCH_IM',
        'VALIGN',
        'VEH_ALCH',
        'VEVENT_IM',
        'VNUM_LAN',
        'VPROFILE',
        'VSPD_LIM',
        'VSURCOND',
        'VTCONT_F',
        'VTRAFCON',
        'VTRAFWAY',
        'CASENUM',
        'VEH_NO',
    ]

    for f in F:
        data[f] = df_Vehicle[f]
    
    
    print ()
    return data

## Build_Person_Dataset

In [7]:
def Build_Person_Dataset(df_Person):
    print ('Build_Person_Dataset')
    data = pd.DataFrame()
    
    F = [
        'CASENUM',
        'VEH_NO',
#        'AGE_IM',
#        'SEX_IM',
#        'MAKE',
#        'BODY_TYP',
#        'PER_TYP',
#        'MOD_YEAR',
#        'MAK_MOD',
    ]

    for f in F:
        data[f] = df_Person[f]
        
    data['HOSPITAL'] = df_Person['HOSPITAL'].apply(lambda x:1 if x in [1,2,3,4,5] else 0)

    data = data[data.VEH_NO != 0]

    data = data.reindex(sorted(data.columns), axis=1)    
    
    print ()
    return data

## Merge Datasets

In [8]:
def Build_Dataset(df_Vehicle, df_Person):
    print ('Build_Dataset()')
    print ()
    data_Vehicle = Build_Vehicle_Dataset(df_Vehicle)
    data_Person = Build_Person_Dataset(df_Person)
    
    
    
    U = data_Vehicle.CASENUM.unique()
    V = data_Person.CASENUM.unique()
    U = set(U)
    V = set(V)
    print ('Do all of the cases in data_Person appear in data_Vehicle?')
    print (V.issubset(U))
    print ()
    
    
    
#    data = pd.merge(
#        data_Vehicle, data_Person, 
#        on=['CASENUM', 'VEH_NO'],
#        how="outer", sort=False
#    )
    
    data = pd.merge(
        data_Person, data_Vehicle, 
        left_on=['CASENUM', 'VEH_NO'], right_on = ['CASENUM', 'VEH_NO'], 
        how="inner", sort=False
    )
    
    print ('data_Vehicle.shape')
    print (data_Vehicle.shape)
    print ('data_Person.shape')
    print (data_Person.shape)
    print ('data.shape')
    print (data.shape)
    

#    data.drop(columns=['CASENUM'], inplace=True)
#    data.astype(bool)

    print (data.head())

    return data

## Feature Names

In [9]:
def Feature_Names(data, Named_Features):
    print ('Feature_Names')
    D = {}
    for f in Named_Features:
        g = f + 'NAME'
        A = pd.concat([data[f],data[g]], axis=1)
        A.drop_duplicates(inplace=True)
        A.dropna(inplace=True)
#        print (f)
#        print (len(A))
#        print (A.head())
#        print ()
        B = dict(zip(A[f],A[g]))
        D[f] = B
#        print (B)
#        print ()
#    print (D)
    print ()
    return D
        

In [10]:
def Remove_Unknowns_in_Feature(data, feature):
    
    Unknowns = {
        'HOSPITAL': [],
        'ACC_TYPE': [99],
        'BDYTYP_IM': [],
        'BODY_TYP': [98, 99, 49, 79],
        'BUS_USE': [98, 99],
        'CARGO_BT': [98, 99],
        'DEFORMED': [8, 9],
        'DR_PRES': [9],
        'EMER_USE': [8, 9],
        'FIRE_EXP': [],
        'HAZ_CNO': [88],
        'HAZ_INV': [],
        'HAZ_PLAC': [8],
        'HAZ_REL': [8],
        'HIT_RUN': [9],
        'IMPACT1': [98, 99],
        'IMPACT1_IM': [],
        'J_KNIFE': [],
        'M_HARM': [98, 99],
        'MAK_MOD': [],
        'MAKE': [99],
        'MAX_VSEV': [9],
        'MDLYR_IM': [],
        'MOD_YEAR': [9998, 9999],
        'MODEL': [],
        'MXVSEV_IM': [],
        'NUM_INJV': [99],
        'NUMINJ_IM': [],
        'NUMOCCS': [99],
        'P_CRASH1': [99],
        'P_CRASH2': [99],
        'P_CRASH3': [99],
        'PCRASH1_IM': [],
        'PCRASH4': [9],
        'PCRASH5': [9],
        'ROLINLOC': [9],
        'ROLLOVER': [],
        'SPEC_USE': [98, 99],
        'SPEEDREL': [9],
        'TOW_VEH': [9],
        'TOWED': [8, 9],
        'TRAV_SP': [998, 999],
        'V_ALCH_IM': [],
        'VALIGN': [8, 9],
        'VEH_ALCH': [9],
        'VEVENT_IM': [],
        'VNUM_LAN': [8, 9],
        'VPROFILE': [8, 9],
        'VSPD_LIM': [98, 99],
        'VSURCOND': [98, 99],
        'VTCONT_F': [8, 9],
        'VTRAFCON': [97, 99],
        'VTRAFWAY': [8, 9],
        'CASENUM': [],
        'VEH_NO': [],
    }
    
#    print ('Remove_Unknowns_in_Feature ', feature, Unknowns[feature], len(data))
    data_temp = data[~data[feature].isin(Unknowns[feature])]
#    print ('Remove_Unknowns_in_Feature ', feature, Unknowns[feature], len(data_temp))
#    print ()
    
    return data_temp, Unknowns[feature]

## Correlation

In [11]:
def Correlation(data, target, feature, value, name):
    TN = 0
    FP = 0
    FN = 0
    TP = 0
    contingency_matrix = pd.crosstab(data[target], data[feature])
    cm = contingency_matrix.values.tolist()
    if len(cm)==2 and len(cm[0])==2:
        corr = cm[1][1] / (cm[0][1] + cm[1][1])
        per = (cm[0][1] + cm[1][1])/(cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])
    else:
        corr = 0
        per = 0
    per = round(per*100,4)
    corr = round(corr*100,4)
#    print ("    - ", feature)
#    print ("    - ", value)
#    print ("    - ", name)
#    print (contingency_matrix)
#    print ('        - per = ', per)
#    print ("        - corr = ", corr)
#    print ()
    return (per, corr)

def Correlation_by_Value(data, target, feature, Feature_Names_Dict, Unknowns):
# I decided against the np.unique because it treats each nan as a separate entry.
#    V = np.unique(data[feature].values) 
    V = data[feature].unique()
#    print (V)
    B = []

    for value in V:
        A = pd.DataFrame()
        A[feature] = data[feature].apply(lambda x: 1 if x==value else 0)
        A[target] = data[target]
        if feature in Feature_Names_Dict:
            if value in Feature_Names_Dict[feature]:
                name = Feature_Names_Dict[feature][value]
            else:
                name=str(value)
        else:
            name = str(value)
#        if len(name)>30:
#            name = name[:30]
        per, corr = Correlation(A, target, feature, value, name)
        B.append([feature, value, name, per, corr])
#    print (feature)
    B = sorted(B, key=lambda x:x[4], reverse=True)
    for b in B:
        c = b[1]
        try:
            c = int(c)
        except:
            c=c
        else:
            c = int(c)
#        print (c, end=',')
#    print ()
#    print ()

    # Print grouped into 100/p blocks of same size
    print ("    feature = '%s'" % feature)
    print ('    A = [')
    p = 20
    s = 0.0
    s2 = 0.0
    n=0
    print ("        ['%d', [" % n , end='')
    for b in B:
        t = s + b[3]
        if b[3]<10:
            s2 = s2 + b[3]
        q = int(s/p)
        r = int((t-0.001)/p)
        if r>q or b[3]>10:
            print ("]], # ", round(s2,4), '%')
            s2 = 0.0
            n += 1
            print ("        ['%d', [" % n , end='')
        s = t
        
        c = b[1]
        try:
            c = int(c)
        except:
            c=c
        else:
            c = int(c)
        print (c, end=',')
        if b[3]>10:
            print ("]], # ", round(b[3],4), '%')
            s2=0.0
            n += 1
            print ("        ['%d', [" % n , end='')
    print ("]], # ", round(s2,4), '%')
    print ("        ['Unknowns', [", end='')
    for u in Unknowns:
        print (u, end=', ')
    print ("]]" )
    print ('    ]')
    print ('    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)')
    print ()
    
    C = pd.DataFrame(B)
    C.columns = ['Feature', 'Code', 'Name', 'Per', 'Corr']
#    C.drop(C[C['Per'] < 0.1].index, inplace=True)
#    print (C)
    display(C)

    TeX = open('../Correlation/Correlation_' + feature + '.tex', 'w')
    E = [c for c in B if c[3]>=0.0]
    
        
    
    for c in E:
        a = c[0]
        b = c[1]
        d = c[2]
        e = "{:.4f}".format(c[3])
        f = "{:.4f}".format(c[4])
        TeX.write('\t & \\verb|%s| & %s & %s & %s & %s \\cr\n' % (a,b,d,e,f))
    

    TeX = open('../Correlation/Correlation_Ordered_' + feature + '.tex', 'w')
    E = sorted(B, key=lambda x:x[1], reverse=False)

    
    for c in E:
        a = c[0]
        b = c[1]
        d = c[2]
        e = "{:.4f}".format(c[3])
        f = "{:.4f}".format(c[4])
        TeX.write('\t & \\verb|%s| & %s & %s & %s & %s \\cr\n' % (a,b,d,e,f))
    

    print ()
    return B

def Correlation_All(data, target, Feature_Names_Dict):
    print ('Correlation_All')
    
    C = []
    for feature in data:
        data_temp, Unknowns = Remove_Unknowns_in_Feature(data, feature)
        U = data_temp[feature].unique()
#        print (feature, len(U))
        if len(U)<10000:
            B = Correlation_by_Value(
                data_temp, target, feature, Feature_Names_Dict, Unknowns
            )
            for b in B:
                C.append(b)
#            print ()
#        print ()
#    for c in C:
#        print (c)
#    print ()
    C = sorted(C, key=lambda x:x[4], reverse=True)
    D = pd.DataFrame(C)
    D.columns = ['Feature', 'Code', 'Name', 'Per', 'Corr']
    print (D)
    print ()
    
    D.drop(D[D['Per'] < 0.5].index, inplace=True)
    print (D)
    print ()
    
    TeX = open('../Correlation/Correlation.tex', 'w')
    E = [c for c in C if c[3]>=0.5]
    
    for c in E:
        a = c[0]
        b = c[1]
        d = c[2]
        e = "{:.4f}".format(c[3])
        f = "{:.4f}".format(c[4])
        TeX.write('\\verb|%s| & %s & %s & %s & %s \\cr\n' % (a,b,d,e,f))
    
    return 0

    

# Main()

In [12]:
def Main():
    target = 'HOSPITAL'
    df_Vehicle, df_Person = Import_Data()
    
    """
    print ('Features in df_Vehicle, with Number of Unique Values and Number of Blank Values')
    for feature in df_Vehicle:
        U = df_Vehicle[feature].unique()
        s = df_Vehicle[feature].isna().sum()
        print (feature, len(U), s)
    print ()

    print ('Features in df_Person, with Number of Unique Values and Number of Blank Values')
    for feature in df_Person:
        U = df_Person[feature].unique()
        s = df_Person[feature].isna().sum()
        print (feature, len(U), s)

    print ()
    """
    
    data = Build_Dataset(df_Vehicle, df_Person)
    
    print ('Features in data, with Number of Unique Values and Number of Blank Values')
    for feature in data:
        U = data[feature].unique()
        s = data[feature].isna().sum()
        print (feature, len(U), s)
    print ()
    
    """
    print ('Features in df_Vehicle with Names')
    F = []
    for feature in df_Vehicle:
        if 'NAME' in feature:
            f = feature[:-4]
            if f in data:
                F.append(f)
    F = sorted(F)
    for f in F:
        print ("        '%s'," % f)
    print ()
    """
       
    Named_Features_df_Vehicle = [
        'ACC_TYPE',
        'BDYTYP_IM',
        'BUS_USE',
        'CARGO_BT',
        'DEFORMED',
        'DR_PRES',
        'EMER_USE',
        'FIRE_EXP',
        'HAZ_CNO',
        'HAZ_INV',
        'HAZ_PLAC',
        'HAZ_REL',
#        'HITRUN_IM',
        'HIT_RUN',
        'IMPACT1',
        'IMPACT1_IM',
        'J_KNIFE',
        'MAK_MOD',
        'MAX_VSEV',
        'MDLYR_IM',
        'MOD_YEAR',
        'MXVSEV_IM',
        'M_HARM',
        'NUMINJ_IM',
        'NUMOCCS',
        'NUM_INJV',
        'PCRASH1_IM',
        'PCRASH4',
        'PCRASH5',
        'P_CRASH1',
        'P_CRASH2',
        'P_CRASH3',
        'ROLINLOC',
        'ROLLOVER',
        'SPEC_USE',
        'SPEEDREL',
        'TOWED',
        'TOW_VEH',
        'TRAV_SP',
        'VALIGN',
        'VEH_ALCH',
        'VEVENT_IM',
        'VNUM_LAN',
        'VPROFILE',
        'VSPD_LIM',
        'VSURCOND',
        'VTCONT_F',
        'VTRAFCON',
        'VTRAFWAY',
        'V_ALCH_IM',
    ]

    Feature_Names_Dict = Feature_Names(df_Vehicle, Named_Features_df_Vehicle)

    Named_Features_df_Person = [
        'HOSPITAL',
#        'MAK_MOD',
#        'SEX_IM',
#        'MAKE',
#        'BODY_TYP',
#        'PER_TYP'
#        'MOD_YEAR',
    ]

    Feature_Names_Dict.update(Feature_Names(df_Person, Named_Features_df_Person))

#    print (Feature_Names_Dict)

    Correlation_All(data, target, Feature_Names_Dict)


Main()

Import_Data()
Import_Data_Vehicle()
2016 82149
2017 97625
2018 86105
2019 96717
2020 94718
(457314, 184)

Import_Data_Person()
2016 117759
2017 138913
2018 120230
2019 135410
2020 131962
(644274, 117)


Build_Dataset()

Build_Vehicle_Dataset()

Build_Person_Dataset

Do all of the cases in data_Person appear in data_Vehicle?
True

data_Vehicle.shape
(457314, 54)
data_Person.shape
(621222, 3)
data.shape
(619027, 55)
        CASENUM  HOSPITAL  VEH_NO  ACC_TYPE  BDYTYP_IM  BODY_TYP  BUS_USE  \
0  201600014311         0       1        46          4         4        0   
1  201600014311         0       2        45          5         5        0   
2  201600014315         0       1        68         31        31        0   
3  201600014315         0       2        69         30        30        0   
4  201600014315         0       2        69         30        30        0   

   CARGO_BT  DEFORMED  DR_PRES  ...  VALIGN  VEH_ALCH  VEVENT_IM  VNUM_LAN  \
0         0         4        1  ...      

Unnamed: 0,Feature,Code,Name,Per,Corr
0,HOSPITAL,0,Not Transported for Treatment,14.4076,100.0
1,HOSPITAL,1,EMS Air,14.4076,100.0



    feature = 'VEH_NO'
    A = [
        ['0', [13,7,8,]], #  0.0275 %
        ['1', [1,]], #  54.403 %
        ['2', [9,]], #  0.0044 %
        ['3', [2,]], #  40.0519 %
        ['4', [6,10,3,5,4,11,12,14,15,]], #  5.5133 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VEH_NO,13,13,0.001,66.6667
1,VEH_NO,7,7,0.0173,17.757
2,VEH_NO,8,8,0.0092,15.7895
3,VEH_NO,1,1,54.403,15.0418
4,VEH_NO,9,9,0.0044,14.8148
5,VEH_NO,2,2,40.0519,14.1232
6,VEH_NO,6,6,0.0483,13.3779
7,VEH_NO,10,10,0.0015,11.1111
8,VEH_NO,3,3,4.4416,10.3582
9,VEH_NO,5,5,0.1788,9.8464



    feature = 'ACC_TYPE'
    A = [
        ['0', [61,60,51,50,53,59,52,55,58,6,54,1,10,14,16,5,2,7,8,4,0,62,3,9,89,69,41,64,66,87,90,91,]], #  21.7211 %
        ['1', [83,34,35,88,68,65,86,30,82,38,73,39,]], #  20.5914 %
        ['2', [98,25,22,11,31,77,12,40,85,24,26,32,71,81,79,29,27,43,]], #  22.3576 %
        ['3', [21,33,42,48,75,72,80,15,78,28,76,44,45,84,49,]], #  22.3708 %
        ['4', [20,67,23,74,47,70,46,93,13,92,63,36,37,]], #  12.9592 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,ACC_TYPE,61,"H61-Same Trafficway, Opposite Direction-Forwar...",0.0023,92.8571
1,ACC_TYPE,60,"H60-Same Trafficway, Opposite Direction-Forwar...",0.0021,69.2308
2,ACC_TYPE,51,"G51-Same Trafficway, Opposite Direction-Head-O...",0.6003,53.5734
3,ACC_TYPE,50,"G50-Same Trafficway, Opposite Direction-Head-O...",0.4971,50.9317
4,ACC_TYPE,53,"G53-Same Trafficway, Opposite Direction-Head-O...",0.0275,45.5621
5,ACC_TYPE,59,"H59-Same Trafficway, Opposite Direction-Forwar...",0.0137,45.2381
6,ACC_TYPE,52,"G52-Same Trafficway, Opposite Direction-Head-O...",0.1791,44.4646
7,ACC_TYPE,55,"H55-Same Trafficway, Opposite Direction-Forwar...",0.0011,42.8571
8,ACC_TYPE,58,"H58-Same Trafficway, Opposite Direction-Forwar...",0.0122,41.3333
9,ACC_TYPE,6,B6-Single Driver-Left Roadside Departure-Drive...,1.9688,38.7238



    feature = 'BDYTYP_IM'
    A = [
        ['0', [86,87,82,89,81,83,84,80,88,85,90,95,11,97,96,58,12,45,32,91,10,2,59,3,30,]], #  7.1472 %
        ['1', [4,]], #  36.3414 %
        ['2', [1,19,42,5,8,16,6,52,]], #  10.832 %
        ['3', [14,]], #  15.7266 %
        ['4', [9,20,22,40,]], #  16.7815 %
        ['5', [34,31,15,29,39,55,92,17,21,50,93,48,7,28,51,61,67,63,62,66,65,78,64,72,60,71,73,94,41,13,]], #  13.1715 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,BDYTYP_IM,86,86,0.0002,100.0
1,BDYTYP_IM,87,Unknown Three Wheel Motorcycle Type,0.001,83.3333
2,BDYTYP_IM,82,Three-wheel Motorcycle (2 Rear Wheels),0.0103,70.3125
3,BDYTYP_IM,89,Unknown motored cycle type,0.1119,64.0693
4,BDYTYP_IM,81,Moped or motorized bicycle,0.0861,63.4146
5,BDYTYP_IM,83,Off-road Motorcycle,0.036,62.3318
6,BDYTYP_IM,84,Motor Scooter,0.1439,61.3917
7,BDYTYP_IM,80,Two Wheel Motorcycle (excluding motor scooters),2.2472,60.1898
8,BDYTYP_IM,88,"Other motored cycle type (mini-bikes, pocket m...",0.0292,57.4586
9,BDYTYP_IM,85,Unenclosed Three Wheel Motorcycle / Unenclosed...,0.0079,55.102



    feature = 'BODY_TYP'
    A = [
        ['0', [86,87,82,89,81,83,80,84,88,85,90,95,11,97,96,58,45,12,32,91,10,2,3,59,1,30,]], #  7.8638 %
        ['1', [4,]], #  36.434 %
        ['2', [19,42,5,8,16,6,]], #  9.943 %
        ['3', [14,]], #  15.5655 %
        ['4', [52,9,20,22,40,]], #  16.9082 %
        ['5', [34,31,15,29,39,55,92,17,21,50,93,48,28,7,51,61,67,63,62,66,65,78,64,72,60,71,73,94,41,13,]], #  13.2856 %
        ['Unknowns', [98, 99, 49, 79, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,BODY_TYP,86,86,0.0002,100.0
1,BODY_TYP,87,87,0.001,83.3333
2,BODY_TYP,82,82,0.0107,70.3125
3,BODY_TYP,89,89,0.114,65.1982
4,BODY_TYP,81,81,0.0876,64.6272
5,BODY_TYP,83,83,0.0368,63.1818
6,BODY_TYP,80,80,2.2794,61.4972
7,BODY_TYP,84,84,0.149,61.4607
8,BODY_TYP,88,88,0.0296,58.7571
9,BODY_TYP,85,85,0.0082,55.102



    feature = 'BUS_USE'
    A = [
        ['0', [5,]], #  0.0207 %
        ['1', [0,]], #  99.5385 %
        ['2', [6,7,8,1,4,]], #  0.4407 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,BUS_USE,5,Charter/Tour,0.0207,29.9213
1,BUS_USE,0,Not a Bus,99.5385,14.5594
2,BUS_USE,6,Transit/ Commuter,0.1855,13.8204
3,BUS_USE,7,Shuttle,0.0188,13.0435
4,BUS_USE,8,Modified for Personal/Private Use,0.0078,8.3333
5,BUS_USE,1,School,0.2208,8.2101
6,BUS_USE,4,Intercity,0.0078,6.25



    feature = 'CARGO_BT'
    A = [
        ['0', []], #  0.0 %
        ['1', [0,]], #  96.4126 %
        ['2', [22,10,5,2,4,12,8,1,97,3,96,11,7,6,9,]], #  3.5873 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,CARGO_BT,0,Not Applicable (N/A),96.4126,14.9844
1,CARGO_BT,22,Bus,0.5367,11.6186
2,CARGO_BT,10,Log,0.0183,9.9099
3,CARGO_BT,5,Concrete Mixer,0.0235,9.0909
4,CARGO_BT,2,Cargo Tank,0.117,6.0478
5,CARGO_BT,4,Dump,0.2108,5.6987
6,CARGO_BT,12,Vehicle Towing Another Motor Vehicle,0.0207,5.5556
7,CARGO_BT,8,Grain/ Chips/ Gravel,0.0319,4.6392
8,CARGO_BT,1,Van/Enclosed Box,1.1624,4.4728
9,CARGO_BT,97,Other,0.7837,4.304



    feature = 'DEFORMED'
    A = [
        ['0', []], #  0.0 %
        ['1', [6,]], #  39.2768 %
        ['2', []], #  0.0 %
        ['3', [4,]], #  25.1503 %
        ['4', []], #  0.0 %
        ['5', [2,]], #  31.9667 %
        ['6', [0,]], #  3.6062 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,DEFORMED,6,Disabling Damage,39.2768,31.6576
1,DEFORMED,4,Functional Damage,25.1503,7.2221
2,DEFORMED,2,Minor Damage,31.9667,3.572
3,DEFORMED,0,No Damage,3.6062,2.6344



    feature = 'DR_PRES'
    A = [
        ['0', [0,]], #  0.0216 %
        ['1', [1,]], #  99.9784 %
        ['2', []], #  0.0 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,DR_PRES,0,No Driver Present/Not Applicable,0.0216,24.6269
1,DR_PRES,1,Yes,99.9784,14.4055



    feature = 'EMER_USE'
    A = [
        ['0', [6,5,]], #  0.1932 %
        ['1', [0,]], #  99.6845 %
        ['2', [4,3,2,]], #  0.1222 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,EMER_USE,6,"Emergency Operation, Emergency Warning Equipme...",0.0702,20.4176
1,EMER_USE,5,"Emergency Operation, Emergency Warning Equipme...",0.123,16.9536
2,EMER_USE,0,Not Applicable,99.6845,14.5125
3,EMER_USE,4,"Emergency Operation, Emergency Warning Equipme...",0.0202,12.9032
4,EMER_USE,3,Non-Emergency Transport,0.0114,11.4286
5,EMER_USE,2,"Non-Emergency, Non-Transport",0.0906,10.6115



    feature = 'FIRE_EXP'
    A = [
        ['0', [1,]], #  0.2076 %
        ['1', [0,]], #  99.7924 %
        ['2', []], #  0.0 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,FIRE_EXP,1,Yes,0.2076,40.5447
1,FIRE_EXP,0,No or Not Reported,99.7924,14.3532



    feature = 'HAZ_CNO'
    A = [
        ['0', [9,]], #  0.0011 %
        ['1', [0,]], #  99.9774 %
        ['2', [1,2,8,3,4,6,5,]], #  0.0215 %
        ['Unknowns', [88, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,HAZ_CNO,9,09,0.0011,28.5714
1,HAZ_CNO,0,Not Applicable,99.9774,14.4107
2,HAZ_CNO,1,01,0.0015,11.1111
3,HAZ_CNO,2,02,0.0037,8.6957
4,HAZ_CNO,8,08,0.0024,6.6667
5,HAZ_CNO,3,03,0.0129,5.0
6,HAZ_CNO,4,04,0.0006,0.0
7,HAZ_CNO,6,06,0.0002,0.0
8,HAZ_CNO,5,05,0.0002,0.0



    feature = 'HAZ_INV'
    A = [
        ['0', []], #  0.0 %
        ['1', [1,]], #  99.9604 %
        ['2', [2,]], #  0.0396 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,HAZ_INV,1,No,99.9604,14.4107
1,HAZ_INV,2,Yes,0.0396,6.5306



    feature = 'HAZ_PLAC'
    A = [
        ['0', []], #  0.0 %
        ['1', [0,]], #  99.9654 %
        ['2', [2,1,]], #  0.0345 %
        ['Unknowns', [8, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,HAZ_PLAC,0,Not Applicable,99.9654,14.4107
1,HAZ_PLAC,2,Yes,0.0305,5.291
2,HAZ_PLAC,1,No,0.004,0.0



    feature = 'HAZ_REL'
    A = [
        ['0', [2,]], #  0.0066 %
        ['1', [0,]], #  99.9682 %
        ['2', [1,]], #  0.0252 %
        ['Unknowns', [8, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,HAZ_REL,2,Yes,0.0066,21.9512
1,HAZ_REL,0,Not Applicable,99.9682,14.4107
2,HAZ_REL,1,No,0.0252,1.9231



    feature = 'HIT_RUN'
    A = [
        ['0', []], #  0.0 %
        ['1', [0,]], #  94.8247 %
        ['2', [1,]], #  5.1753 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,HIT_RUN,0,No,94.8247,15.0601
1,HIT_RUN,1,Yes,5.1753,2.4504



    feature = 'IMPACT1'
    A = [
        ['0', [0,14,61,9,81,3,]], #  9.5695 %
        ['1', [12,]], #  42.3692 %
        ['2', [62,11,]], #  8.9371 %
        ['3', [10,1,82,2,8,4,63,19,20,83,]], #  12.8232 %
        ['4', [6,]], #  22.3991 %
        ['5', [7,5,13,18,]], #  3.9018 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,IMPACT1,0,Non-Collision,1.7103,47.4035
1,IMPACT1,14,Undercarriage,0.2627,19.9874
2,IMPACT1,61,Left,1.0508,19.4658
3,IMPACT1,9,9 Clock Point,2.7188,18.5804
4,IMPACT1,81,Right,1.0533,17.5235
5,IMPACT1,3,3 Clock Point,2.7736,16.5526
6,IMPACT1,12,12 Clock Point,42.3692,16.2231
7,IMPACT1,62,Left-Front Side,2.1626,14.3893
8,IMPACT1,11,11 Clock Point,5.171,14.3313
9,IMPACT1,10,10 Clock Point,1.6035,14.0327



    feature = 'IMPACT1_IM'
    A = [
        ['0', [0,14,61,9,81,3,]], #  9.5846 %
        ['1', [12,]], #  42.5618 %
        ['2', [62,11,]], #  8.9862 %
        ['3', [10,1,82,2,4,8,63,20,83,]], #  12.9432 %
        ['4', [6,]], #  22.0047 %
        ['5', [19,7,13,5,18,]], #  3.9193 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,IMPACT1_IM,0,Non-Collision,1.6736,47.4035
1,IMPACT1_IM,14,Undercarriage,0.2733,20.1537
2,IMPACT1_IM,61,Left,1.0568,19.3825
3,IMPACT1_IM,9,9 Clock Point,2.7338,18.5192
4,IMPACT1_IM,81,Right,1.0652,17.5159
5,IMPACT1_IM,3,3 Clock Point,2.7819,16.5612
6,IMPACT1_IM,12,12 Clock Point,42.5618,16.3165
7,IMPACT1_IM,62,Left-Front Side,2.1747,14.4184
8,IMPACT1_IM,11,11 Clock Point,5.2012,14.2808
9,IMPACT1_IM,10,10 Clock Point,1.6103,13.9647



    feature = 'J_KNIFE'
    A = [
        ['0', [2,]], #  0.0483 %
        ['1', [0,]], #  97.4523 %
        ['2', [3,1,]], #  2.4994 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,J_KNIFE,2,Yes - First Event,0.0483,15.3846
1,J_KNIFE,0,Not an Articulated Vehicle,97.4523,14.6576
2,J_KNIFE,3,Yes - Subsequent Event,0.0546,14.497
3,J_KNIFE,1,No,2.4448,4.4205



    feature = 'M_HARM'
    A = [
        ['0', [74,10,1,5,21,42,32,35,19,46,39,30,6,93,20,45,23,3,58,52,34,2,25,26,24,33,31,44,17,38,41,43,7,91,40,48,57,59,53,]], #  10.0639 %
        ['1', [12,]], #  81.0472 %
        ['2', [14,16,55,49,18,50,72,11,73,54,51,8,15,9,]], #  8.8889 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,M_HARM,74,Road Vehicle on Rails,0.0008,80.0
1,M_HARM,10,Railway Vehicle,0.0105,58.4615
2,M_HARM,1,Rollover/Overturn,2.66,53.633
3,M_HARM,5,Fell/Jumped from Vehicle,0.1075,52.782
4,M_HARM,21,Bridge Pier or Support,0.0291,49.4444
5,M_HARM,42,Tree (Standing Only),1.3646,46.8854
6,M_HARM,32,Culvert,0.1496,42.4406
7,M_HARM,35,Embankment,0.3237,37.5936
8,M_HARM,19,Building,0.1416,37.4429
9,M_HARM,46,Traffic Signal Support,0.0545,36.2018



    feature = 'MAK_MOD'
    A = [
        ['0', [6010,76733,72704,71705,22001,43399,50031,2401,74706,30441,53702,71709,34705,99705,7017,20013,50709,99707,37733,37704,76709,20988,37702,53709,3884,12012,37709,73709,76703,98709,72709,98703,53705,98701,73704,76705,53706,58034,37706,98705,72706,73706,37703,76706,34709,50706,53734,73734,98706,73705,53704,76704,98704,37705,73703,53401,98702,50799,76701,99709,34706,77706,76702,71706,37739,37701,50705,76734,74705,53703,41401,42040,98733,99703,55032,98707,7470,9499,42053,69039,32054,38399,9037,99701,72705,93989,64031,73702,19006,76998,69038,6017,49055,7011,98734,20017,7004,55046,76739,22499,30032,35398,20038,20019,35053,74709,12008,94999,50399,12882,36398,31399,19027,12989,98907,42033,77709,12018,32047,21020,36399,41047,12998,13403,18402,14499,12015,52035,24008,69042,24002,54032,53999,59043,49441,22009,22023,19017,37734,24441,92989,12403,21017,98739,18003,22398,53036,2431,6398,21005,9019,10041,19019,23988,69398,69399,84998,45044,6444,42406,5204

Unnamed: 0,Feature,Code,Name,Per,Corr
0,MAK_MOD,6010,6010,0.0003,100.0
1,MAK_MOD,76733,76733,0.0003,100.0
2,MAK_MOD,72704,72704,0.0002,100.0
3,MAK_MOD,71705,Ducati 450-749cc,0.0011,100.0
4,MAK_MOD,22001,Pontiac Lemans/ Tempest (thru 1970),0.0003,100.0
...,...,...,...,...,...
1195,MAK_MOD,84421,International Harvester/Navistar Scout,0.0002,0.0
1196,MAK_MOD,62427,Land Rover Defender,0.0003,0.0
1197,MAK_MOD,48498,Subaru Other (light truck),0.0005,0.0
1198,MAK_MOD,72707,Harley-Davidson Electric Motorcycle,0.0002,0.0



    feature = 'MAKE'
    A = [
        ['0', [74,76,71,72,50,73,77,43,53,98,64,65,21,9,52,22,14,18,92,24,37,39,63,]], #  28.1315 %
        ['1', [35,6,36,55,67,]], #  6.162 %
        ['2', [20,]], #  12.6429 %
        ['3', [13,69,34,]], #  2.0354 %
        ['4', [49,]], #  11.7578 %
        ['5', [30,]], #  1.5107 %
        ['6', [12,]], #  13.4181 %
        ['7', [19,2,]], #  5.6145 %
        ['8', [41,58,7,42,54,47,93,23,59,25,48,38,62,3,32,29,51,31,45,90,10,94,86,89,84,85,82,87,97,33,46,1,]], #  18.727 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,MAKE,74,74,0.0026,68.75
1,MAKE,76,76,0.3487,63.1629
2,MAKE,71,71,0.0248,62.6667
3,MAKE,72,72,0.8679,61.8794
4,MAKE,50,50,0.0294,60.6742
5,MAKE,73,73,0.277,60.6079
6,MAKE,77,77,0.021,51.1811
7,MAKE,43,43,0.0003,50.0
8,MAKE,53,53,0.4677,45.1465
9,MAKE,98,98,0.6615,30.7712



    feature = 'MAX_VSEV'
    A = [
        ['0', [3,5,6,4,2,]], #  17.1638 %
        ['1', [1,]], #  17.3546 %
        ['2', []], #  0.0 %
        ['3', [0,]], #  65.4815 %
        ['4', []], #  0.0 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,MAX_VSEV,3,Suspected Serious Injury (A),6.3571,78.5422
1,MAX_VSEV,5,"Injured, Severity Unknown",0.2877,58.6667
2,MAX_VSEV,6,Died Prior to Crash*,0.0027,56.25
3,MAX_VSEV,4,Fatal Injury (K),1.049,52.727
4,MAX_VSEV,2,Suspected Minor Injury (B),9.4673,46.8744
5,MAX_VSEV,1,Possible Injury (C),17.3546,27.0138
6,MAX_VSEV,0,No Apparent Injury (O),65.4815,0.0069



    feature = 'MDLYR_IM'
    A = [
        ['0', [1929,1947,1962,1968,1951,1956,1974,1982,1978,1955,1953,1960,1959,1950,1970,1986,1975,1985,1981,1966,1965,1987,1983,1973,1931,1977,1979,1984,1964,1976,1991,1980,1971,1988,1993,1992,1990,1994,1998,1997,1995,1989,1996,2002,1999,2001,2000,2005,]], #  22.8558 %
        ['1', [2003,2020,2004,2019,2006,]], #  17.6131 %
        ['2', [2007,1969,1940,2016,2017,2018,]], #  21.4618 %
        ['3', [2009,2008,2015,]], #  18.3831 %
        ['4', [2013,2014,2021,2012,1967,2011,2010,1957,1972,1948,1952,1928,1932,1933,1963,1954,1958,1934,1961,]], #  19.6865 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,MDLYR_IM,1929,1929.0,0.0002,100.0
1,MDLYR_IM,1947,1947.0,0.0003,100.0
2,MDLYR_IM,1962,1962.0,0.0005,66.6667
3,MDLYR_IM,1968,1968.0,0.0048,50.0
4,MDLYR_IM,1951,1951.0,0.001,50.0
5,MDLYR_IM,1956,1956.0,0.0008,40.0
6,MDLYR_IM,1974,1974.0,0.0055,38.2353
7,MDLYR_IM,1982,1982.0,0.0171,37.7358
8,MDLYR_IM,1978,1978.0,0.0121,34.6667
9,MDLYR_IM,1955,1955.0,0.001,33.3333



    feature = 'MOD_YEAR'
    A = [
        ['0', [1929,1947,1962,1968,1951,1956,1974,1982,1978,1955,1953,1960,1959,1950,1970,1986,1975,1985,1981,1966,1965,1987,1983,1973,1931,1977,1979,1984,1964,1976,1991,1980,1971,1988,1993,1992,1990,1994,1998,1995,1997,1996,1989,2002,2001,1999,2000,2005,]], #  22.8303 %
        ['1', [2003,2004,2020,2006,2019,]], #  17.5677 %
        ['2', [2007,2009,1969,1940,2016,2008,]], #  22.0677 %
        ['3', [2017,2018,2015,]], #  18.0075 %
        ['4', [2013,2014,2021,2012,2011,2010,1967,1957,1972,1948,1952,1928,1932,1933,1963,1954,1958,1934,1961,]], #  19.5268 %
        ['Unknowns', [9998, 9999, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,MOD_YEAR,1929,1929,0.0002,100.0
1,MOD_YEAR,1947,1947,0.0003,100.0
2,MOD_YEAR,1962,1962,0.0005,66.6667
3,MOD_YEAR,1968,1968,0.005,50.0
4,MOD_YEAR,1951,1951,0.001,50.0
5,MOD_YEAR,1956,1956,0.0008,40.0
6,MOD_YEAR,1974,1974,0.0057,38.2353
7,MOD_YEAR,1982,1982,0.0177,37.7358
8,MOD_YEAR,1978,1978,0.0125,34.6667
9,MOD_YEAR,1955,1955,0.001,33.3333



    feature = 'MODEL'
    A = [
        ['0', [709,703,701,706,704,705,702,707,799,733,734,907,739,12,56,11,16,4,19,471,424,9,22,29,50,6,3,20,2,21,59,37,18,7,43,13,]], #  20.8933 %
        ['1', [399,52,17,444,25,36,1,38,998,406,408,15,39,47,31,35,32,405,48,]], #  19.3475 %
        ['2', [431,33,27,989,23,445,34,40,404,44,425,446,988,26,51,401,407,28,]], #  22.7449 %
        ['3', [402,42,49,46,443,442,24,499,473,57,54,483,403,441,472,423,41,5,55,480,398,498,]], #  21.1888 %
        ['4', [481,421,45,422,53,470,14,482,463,461,10,8,999,983,982,870,462,981,883,809,882,880,58,881,806,808,804,884,898,850,997,890,732,908,466,805,598,902,731,599,426,904,474,60,427,]], #  15.8254 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,MODEL,709,709,0.5147,64.5009
1,MODEL,703,703,0.1724,61.4808
2,MODEL,701,701,0.0969,61.3333
3,MODEL,706,706,1.3576,61.2208
4,MODEL,704,704,0.0291,61.1111
5,MODEL,705,705,0.4229,60.6952
6,MODEL,702,702,0.0192,58.8235
7,MODEL,707,707,0.0065,57.5
8,MODEL,799,799,0.0015,55.5556
9,MODEL,733,733,0.0024,53.3333



    feature = 'MXVSEV_IM'
    A = [
        ['0', [3,5,4,2,6,]], #  17.1044 %
        ['1', [1,]], #  17.4622 %
        ['2', []], #  0.0 %
        ['3', [0,]], #  65.4333 %
        ['4', []], #  0.0 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,MXVSEV_IM,3,Suspected Serious Injury (A),6.3412,76.5349
1,MXVSEV_IM,5,"Injured, Severity Unknown",0.299,54.9433
2,MXVSEV_IM,4,Fatal Injury (K),1.0423,51.5189
3,MXVSEV_IM,2,Suspected Minor Injury (B),9.4183,45.7514
4,MXVSEV_IM,6,Died Prior to Crash*,0.0036,45.4545
5,MXVSEV_IM,1,Possible Injury (C),17.4622,26.013
6,MXVSEV_IM,0,No Apparent Injury (O),65.4333,0.0



    feature = 'NUM_INJV'
    A = [
        ['0', [26,8,11,7,5,6,4,9,3,2,]], #  10.333 %
        ['1', [1,]], #  24.1729 %
        ['2', [10,14,12,]], #  0.0098 %
        ['3', [0,]], #  65.4842 %
        ['4', []], #  0.0 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,NUM_INJV,26,26,0.0043,100.0
1,NUM_INJV,8,8,0.0249,73.1544
2,NUM_INJV,11,11,0.0077,69.5652
3,NUM_INJV,7,7,0.0545,63.6086
4,NUM_INJV,5,5,0.3416,62.0117
5,NUM_INJV,6,6,0.1448,60.9447
6,NUM_INJV,4,4,0.9077,57.7545
7,NUM_INJV,9,9,0.0083,54.0
8,NUM_INJV,3,3,2.018,53.1906
9,NUM_INJV,2,2,6.8212,49.6112



    feature = 'NUMINJ_IM'
    A = [
        ['0', [26,8,11,7,6,5,4,9,3,2,]], #  10.2074 %
        ['1', [1,]], #  24.3463 %
        ['2', [10,14,12,]], #  0.0095 %
        ['3', [0,]], #  65.4369 %
        ['4', []], #  0.0 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,NUMINJ_IM,26,26,0.0042,100.0
1,NUMINJ_IM,8,8,0.0241,73.1544
2,NUMINJ_IM,11,11,0.0074,69.5652
3,NUMINJ_IM,7,7,0.054,62.2754
4,NUMINJ_IM,6,6,0.1412,60.8696
5,NUMINJ_IM,5,5,0.3449,60.7494
6,NUMINJ_IM,4,4,0.899,56.8194
7,NUMINJ_IM,9,9,0.0081,54.0
8,NUMINJ_IM,3,3,2.0043,52.3898
9,NUMINJ_IM,2,2,6.7202,48.9615



    feature = 'NUMOCCS'
    A = [
        ['0', [59,26,35,31,37,10,14,33,13,8,20,27,11,]], #  0.2623 %
        ['1', [2,]], #  24.6466 %
        ['2', [6,7,21,]], #  1.1684 %
        ['3', [1,]], #  54.7647 %
        ['4', [12,9,]], #  0.0809 %
        ['5', [3,]], #  10.4213 %
        ['6', [38,5,17,4,19,16,34,25,28,24,43,49,23,15,29,22,18,40,32,55,53,50,44,51,30,39,41,75,47,95,52,54,62,60,56,58,46,65,57,48,36,45,77,]], #  8.6562 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,NUMOCCS,59,59,0.0012,85.7143
1,NUMOCCS,26,26,0.0054,81.25
2,NUMOCCS,35,35,0.0025,46.6667
3,NUMOCCS,31,31,0.0022,30.7692
4,NUMOCCS,37,37,0.0017,30.0
5,NUMOCCS,10,10,0.0256,22.8758
6,NUMOCCS,14,14,0.0136,20.9877
7,NUMOCCS,33,33,0.0017,20.0
8,NUMOCCS,13,13,0.0124,18.9189
9,NUMOCCS,8,8,0.1587,18.4794



    feature = 'P_CRASH1'
    A = [
        ['0', [14,0,7,3,17,6,]], #  6.8626 %
        ['1', [1,]], #  50.4172 %
        ['2', []], #  0.0 %
        ['3', [11,]], #  10.3122 %
        ['4', [12,98,2,16,]], #  6.6395 %
        ['5', [5,]], #  15.4783 %
        ['6', [15,4,10,8,9,13,]], #  10.2902 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,P_CRASH1,14,Negotiating a Curve,5.5884,28.4249
1,P_CRASH1,0,No Driver Present / Unknown if Driver Present,0.0253,22.7273
2,P_CRASH1,7,"Disabled or ""Parked"" in Travel lane",0.069,22.619
3,P_CRASH1,3,Accelerating in Road,0.1754,20.0375
4,P_CRASH1,17,Successful Avoidance Maneuver to a Previous Cr...,0.1192,18.3196
5,P_CRASH1,6,Passing or Overtaking Another Vehicle,0.8853,17.8783
6,P_CRASH1,1,Going Straight,50.4172,16.9901
7,P_CRASH1,11,Turning Left,10.3122,13.7164
8,P_CRASH1,12,Making a U-turn,0.5783,12.1522
9,P_CRASH1,98,Other(specify:),0.1619,10.2434



    feature = 'P_CRASH2'
    A = [
        ['0', [8,9,12,13,14,54,1,4,6,5,62,2,66,3,55,]], #  25.2772 %
        ['1', [17,67,72,63,68,91,64,78,15,]], #  14.9584 %
        ['2', [10,71,98,19,18,92,21,65,90,51,70,59,]], #  7.7214 %
        ['3', [53,]], #  19.6503 %
        ['4', [73,60,74,87,61,89,52,]], #  15.3052 %
        ['5', [11,88,16,50,56,20,80,82,81,84,85,83,]], #  17.0873 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,P_CRASH2,8,Other cause of control loss (specify:),0.297,55.0137
1,P_CRASH2,9,Unknown cause of control loss,0.3388,41.3064
2,P_CRASH2,12,Off the edge of the road on the left side,2.0702,37.8007
3,P_CRASH2,13,Off the edge of the road on the right side,3.3889,35.0139
4,P_CRASH2,14,End departure,0.1801,34.7787
5,P_CRASH2,54,Traveling in opposite direction,0.3858,34.2893
6,P_CRASH2,1,Blow out/flat tire,0.193,34.2327
7,P_CRASH2,4,"Non-disabling vehicle problem (e.g., hood flew...",0.0586,34.1667
8,P_CRASH2,6,Traveling too fast for conditions or Road Conf...,2.1167,31.6344
9,P_CRASH2,5,Suddenly Encountered Poor road conditions (pud...,0.2678,28.9793



    feature = 'P_CRASH3'
    A = [
        ['0', [15,98,6,7,0,11,10,8,9,12,5,]], #  15.3542 %
        ['1', [16,]], #  13.4695 %
        ['2', []], #  0.0 %
        ['3', [1,]], #  71.1762 %
        ['4', []], #  0.0 %
        ['Unknowns', [99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,P_CRASH3,15,Braking and Unknown Steering Direction,0.3913,25.5747
1,P_CRASH3,98,Other actions (specify:),1.7734,24.572
2,P_CRASH3,6,Steering left,5.1387,23.9742
3,P_CRASH3,7,Steering right,5.6279,23.6187
4,P_CRASH3,0,No Driver Present / Unknown if Driver Present,0.0866,22.7273
5,P_CRASH3,11,Accelerating and steering left,0.0742,18.9394
6,P_CRASH3,10,Accelerating,0.2643,17.6596
7,P_CRASH3,8,Braking and steering left,0.93,17.4123
8,P_CRASH3,9,Braking and steering right,0.9896,16.0795
9,P_CRASH3,12,Accelerating and steering right,0.0506,15.5556



    feature = 'PCRASH1_IM'
    A = [
        ['0', [14,0,7,3,17,6,]], #  6.8303 %
        ['1', [1,]], #  50.6172 %
        ['2', []], #  0.0 %
        ['3', [11,]], #  10.2666 %
        ['4', [12,98,2,16,]], #  6.6712 %
        ['5', [5,]], #  15.2315 %
        ['6', [15,4,10,8,9,13,]], #  10.3832 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,PCRASH1_IM,14,Negotiating a Curve,5.5561,28.3334
1,PCRASH1_IM,0,No Driver Present / Unknown if Driver Present,0.0249,22.7273
2,PCRASH1_IM,7,"Disabled or ""Parked"" in Travel lane",0.0678,22.619
3,PCRASH1_IM,3,Accelerating in Road,0.1754,19.9816
4,PCRASH1_IM,17,Successful Avoidance Maneuver to a Previous Cr...,0.1186,18.3924
5,PCRASH1_IM,6,Passing or Overtaking Another Vehicle,0.8875,17.692
6,PCRASH1_IM,1,Going Straight,50.6172,16.8686
7,PCRASH1_IM,11,Turning Left,10.2666,13.6736
8,PCRASH1_IM,12,Making a U-turn,0.5754,12.128
9,PCRASH1_IM,98,Other(specify:),0.1666,10.1843



    feature = 'PCRASH4'
    A = [
        ['0', [7,3,4,5,2,0,]], #  3.8805 %
        ['1', [1,]], #  96.1194 %
        ['2', []], #  0.0 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,PCRASH4,7,Other vehicle loss-of-control (specify:),0.0136,37.5
1,PCRASH4,3,Skidding laterally clockwise rotation,0.5746,34.7518
2,PCRASH4,4,Skidding laterally counterclockwise rotation,0.6956,32.3407
3,PCRASH4,5,"Skidding Laterally, Rotation Direction Unknown",0.3932,30.7858
4,PCRASH4,2,Skidding longitudinally rotation less than 30...,2.1774,25.9825
5,PCRASH4,0,No Driver Present / Unknown if Driver Present,0.0261,22.7273
6,PCRASH4,1,Tracking,96.1194,13.2478



    feature = 'PCRASH5'
    A = [
        ['0', [6,]], #  0.1972 %
        ['1', [4,]], #  10.8203 %
        ['2', [0,3,5,]], #  10.7314 %
        ['3', [2,]], #  0.0 %
        ['4', [1,]], #  76.6633 %
        ['5', [7,]], #  1.5877 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,PCRASH5,6,Returned to roadway,0.1972,33.7449
1,PCRASH5,4,Departed roadway,10.8203,31.1067
2,PCRASH5,0,No Driver Present / Unknown if Driver Present,0.025,22.7273
3,PCRASH5,3,"Stayed on roadway, not known if left original ...",2.0638,14.6127
4,PCRASH5,5,Remained off roadway,0.219,12.8984
5,PCRASH5,2,"Stayed on roadway, but left original travel lane",8.4236,12.8117
6,PCRASH5,1,Stayed in original travel lane,76.6633,12.3185
7,PCRASH5,7,Entered roadway,1.5877,8.8837



    feature = 'ROLINLOC'
    A = [
        ['0', [7,3,6,1,5,4,2,]], #  2.9048 %
        ['1', [0,]], #  97.0952 %
        ['2', []], #  0.0 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,ROLINLOC,7,In Parking Lane/Zone,0.0105,60.0
1,ROLINLOC,3,On Median/Separator,0.2237,58.3815
2,ROLINLOC,6,Outside of Trafficway,0.0886,58.2117
3,ROLINLOC,1,On Roadway,0.6817,52.11
4,ROLINLOC,5,On Roadside,1.8139,51.9202
5,ROLINLOC,4,In Gore,0.0268,50.6024
6,ROLINLOC,2,On Shoulder,0.0596,47.1545
7,ROLINLOC,0,No Rollover,97.0952,13.2464



    feature = 'ROLLOVER'
    A = [
        ['0', [1,9,2,]], #  2.951 %
        ['1', [0,]], #  97.0489 %
        ['2', []], #  0.0 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,ROLLOVER,1,"Rollover, Tripped by Object/Vehicle",1.4343,54.7697
1,ROLLOVER,9,"Rollover, Unknown Type",1.3169,51.5456
2,ROLLOVER,2,"Rollover, Untripped",0.1998,43.8965
3,ROLLOVER,0,No Rollover,97.0489,13.2464



    feature = 'SPEC_USE'
    A = [
        ['0', [19,4,10,1,5,]], #  0.5028 %
        ['1', [0,]], #  98.7952 %
        ['2', [3,20,8,6,21,13,2,22,7,23,12,11,]], #  0.7021 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,SPEC_USE,19,Motor Vehicle Used for Vehicle Sharing Mobility,0.002,41.6667
1,SPEC_USE,4,Military,0.0026,31.25
2,SPEC_USE,10,Safety Service Patrols  Incident Response,0.0007,25.0
3,SPEC_USE,1,Taxi,0.2092,18.5039
4,SPEC_USE,5,Police,0.2883,18.2857
5,SPEC_USE,0,No Special Use,98.7952,14.6132
6,SPEC_USE,3,Vehicle Used as Other Bus,0.2517,14.3325
7,SPEC_USE,20,Motor Vehicle Used for Electronic Ride-hailing,0.0239,13.7931
8,SPEC_USE,8,Non-Transport Emergency Services Vehicle,0.0071,11.6279
9,SPEC_USE,6,Ambulance,0.0955,11.3793



    feature = 'SPEEDREL'
    A = [
        ['0', [3,2,5,8,4,]], #  6.1165 %
        ['1', [0,]], #  93.8834 %
        ['2', []], #  0.0 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,SPEEDREL,3,"Yes, Exceeded Speed Limit",0.7271,43.5816
1,SPEEDREL,2,"Yes, Racing",0.0245,41.6107
2,SPEEDREL,5,"Yes, Specifics Unknown",0.8623,29.7824
3,SPEEDREL,8,No Driver Present/Unknown if Driver Present,0.0254,22.7273
4,SPEEDREL,4,"Yes, Too Fast for Conditions",4.4772,21.2781
5,SPEEDREL,0,No,93.8834,13.8132



    feature = 'TOW_VEH'
    A = [
        ['0', [3,]], #  0.0021 %
        ['1', [0,]], #  97.4427 %
        ['2', [6,5,1,2,4,]], #  2.5551 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,TOW_VEH,3,Three or More Trailing Units,0.0021,30.7692
1,TOW_VEH,0,No Trailing Units,97.4427,14.6828
2,TOW_VEH,6,Vehicle Towing Another Motor Vehicle - Non-Fix...,0.0066,12.1951
3,TOW_VEH,5,Vehicle Towing Another Motor Vehicle - Fixed L...,0.0286,6.2147
4,TOW_VEH,1,One Trailing Unit,2.4601,4.7951
5,TOW_VEH,2,Two Trailing Units,0.0553,4.386
6,TOW_VEH,4,"Yes, Number of Trailing Units Unknown",0.0045,0.0



    feature = 'TOWED'
    A = [
        ['0', []], #  0.0 %
        ['1', [2,]], #  33.0112 %
        ['2', [7,]], #  8.3598 %
        ['3', [3,]], #  0.0 %
        ['4', [5,]], #  58.629 %
        ['5', []], #  0.0 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,TOWED,2,Towed Due to Disabling Damage,33.0112,31.9881
1,TOWED,7,"Towed, Unknown Reason",3.2976,30.796
2,TOWED,3,Towed But Not Due to Disabling Damage,5.0622,22.4962
3,TOWED,5,Not Towed,58.629,3.7129



    feature = 'TRAV_SP'
    A = [
        ['0', [103,86,121,122,111,130,110,120,97,112,82,108,76,91,100,83,90,102,84,118,105,115,85,74,80,95,92,93,96,71,89,52,87,145,114,75,42,44,58,81,29,59,63,48,77,60,51,70,50,66,43,73,55,997,78,68,57,53,65,45,46,67,62,33,54,88,24,37,79,38,72,69,]], #  23.8671 %
        ['1', [40,39,27,64,56,47,35,11,32,61,30,19,22,31,12,]], #  16.8789 %
        ['2', [25,23,28,26,49,16,34,98,13,18,20,17,15,9,]], #  11.0565 %
        ['3', [0,]], #  32.6952 %
        ['4', [10,8,99,41,7,1,5,14,2,3,6,36,4,21,101,150,139,104,125,]], #  15.5016 %
        ['Unknowns', [998, 999, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,TRAV_SP,103,103 MPH,0.0013,100.0
1,TRAV_SP,86,086 MPH,0.0013,100.0
2,TRAV_SP,121,121,0.0003,100.0
3,TRAV_SP,122,122 MPH,0.0003,100.0
4,TRAV_SP,111,111 MPH,0.0017,100.0
5,TRAV_SP,130,130 MPH,0.0024,85.7143
6,TRAV_SP,110,110 MPH,0.0061,72.2222
7,TRAV_SP,120,120 MPH,0.0054,68.75
8,TRAV_SP,97,097 MPH,0.001,66.6667
9,TRAV_SP,112,112 MPH,0.001,66.6667



    feature = 'V_ALCH_IM'
    A = [
        ['0', [1,]], #  3.044 %
        ['1', [2,]], #  96.956 %
        ['2', []], #  0.0 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,V_ALCH_IM,1,Alcohol Involved,3.044,35.9338
1,V_ALCH_IM,2,No Alcohol Involved,96.956,13.7318



    feature = 'VALIGN'
    A = [
        ['0', [3,2,4,]], #  8.3849 %
        ['1', [1,]], #  88.9094 %
        ['2', [0,]], #  2.7058 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VALIGN,3,Curve - Left,3.3062,28.7461
1,VALIGN,2,Curve - Right,3.6496,21.163
2,VALIGN,4,Curve - Unknown Direction,1.4291,17.3507
3,VALIGN,1,Straight,88.9094,13.8858
4,VALIGN,0,Non-Trafficway or Driveway Access,2.7058,7.5867



    feature = 'VEH_ALCH'
    A = [
        ['0', [1,8,]], #  3.0238 %
        ['1', [2,]], #  96.9762 %
        ['2', []], #  0.0 %
        ['Unknowns', [9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VEH_ALCH,1,Alcohol Involved,2.9937,39.0308
1,VEH_ALCH,8,No Driver Present,0.0301,22.7273
2,VEH_ALCH,2,No Alcohol Involved,96.9762,13.6707



    feature = 'VEVENT_IM'
    A = [
        ['0', [74,10,1,5,21,42,32,19,35,46,39,30,4,93,20,3,45,23,58,52,2,34,6,25,26,24,33,31,44,38,17,41,43,7,91,40,48,57,59,53,]], #  10.0888 %
        ['1', [12,]], #  81.0178 %
        ['2', [49,16,14,55,18,50,72,11,73,54,51,8,15,9,]], #  8.8932 %
        ['Unknowns', []]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VEVENT_IM,74,Road Vehicle on Rails,0.0008,80.0
1,VEVENT_IM,10,Railway Vehicle,0.012,55.4054
2,VEVENT_IM,1,Rollover/Overturn,2.659,53.633
3,VEVENT_IM,5,Fell/Jumped from Vehicle,0.1081,52.6158
4,VEVENT_IM,21,Bridge Pier or Support,0.0291,49.4444
5,VEVENT_IM,42,Tree (Standing Only),1.3644,46.8979
6,VEVENT_IM,32,Culvert,0.1509,42.2912
7,VEVENT_IM,19,Building,0.1423,37.4574
8,VEVENT_IM,35,Embankment,0.3266,37.3887
9,VEVENT_IM,46,Traffic Signal Support,0.0546,36.0947



    feature = 'VNUM_LAN'
    A = [
        ['0', []], #  0.0 %
        ['1', [2,]], #  44.9531 %
        ['2', []], #  0.0 %
        ['3', [4,]], #  14.8614 %
        ['4', []], #  0.0 %
        ['5', [3,]], #  18.2764 %
        ['6', []], #  0.0 %
        ['7', [5,]], #  10.9793 %
        ['8', [7,1,6,0,]], #  10.9298 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VNUM_LAN,2,Two lanes,44.9531,18.295
1,VNUM_LAN,4,Four lanes,14.8614,13.6266
2,VNUM_LAN,3,Three lanes,18.2764,13.4413
3,VNUM_LAN,5,Five lanes,10.9793,13.3228
4,VNUM_LAN,7,Seven or more lanes,1.3722,11.7972
5,VNUM_LAN,1,One lane,2.3721,11.2615
6,VNUM_LAN,6,Six lanes,3.7086,11.075
7,VNUM_LAN,0,Non-Trafficway or Driveway Access,3.4769,7.5867



    feature = 'VPROFILE'
    A = [
        ['0', [6,5,4,3,]], #  5.9771 %
        ['1', [1,]], #  83.0272 %
        ['2', [2,0,]], #  10.9956 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VPROFILE,6,Downhill,2.5991,22.7381
1,VPROFILE,5,Uphill,1.9644,21.2952
2,VPROFILE,4,Sag (Bottom),0.1796,20.7292
3,VPROFILE,3,Hillcrest,1.234,19.7999
4,VPROFILE,1,Level,83.0272,14.3376
5,VPROFILE,2,"Grade, Unknown Slope",8.0784,14.2725
6,VPROFILE,0,Non-Trafficway or Driveway Access,2.9172,7.5867



    feature = 'VSPD_LIM'
    A = [
        ['0', [90,]], #  0.0002 %
        ['1', [55,]], #  10.9312 %
        ['2', [80,75,70,50,]], #  14.7318 %
        ['3', [65,]], #  0.0 %
        ['4', [45,]], #  21.5203 %
        ['5', [60,]], #  1.9729 %
        ['6', [40,]], #  11.2788 %
        ['7', []], #  0.0 %
        ['8', [35,]], #  19.1915 %
        ['9', []], #  9.1917 %
        ['10', [25,30,20,15,0,10,5,]], #  11.1816 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VSPD_LIM,90,90 MPH,0.0002,100.0
1,VSPD_LIM,55,55 MPH,10.9312,21.4367
2,VSPD_LIM,80,80 MPH,0.1461,19.7957
3,VSPD_LIM,75,75 MPH,0.3286,18.6258
4,VSPD_LIM,70,70 MPH,5.0208,17.8145
5,VSPD_LIM,50,50 MPH,3.5611,17.7435
6,VSPD_LIM,65,65 MPH,5.6752,16.1712
7,VSPD_LIM,45,45 MPH,21.5203,14.8403
8,VSPD_LIM,60,60 MPH,1.9729,14.51
9,VSPD_LIM,40,40 MPH,11.2788,13.4878



    feature = 'VSURCOND'
    A = [
        ['0', [5,11,7,8,6,]], #  0.2729 %
        ['1', [1,]], #  81.6571 %
        ['2', []], #  0.0 %
        ['3', [2,]], #  13.4651 %
        ['4', [10,4,3,0,]], #  4.6048 %
        ['Unknowns', [98, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VSURCOND,5,Sand,0.0088,37.7358
1,VSURCOND,11,"Mud, Dirt or Gravel",0.1017,27.6423
2,VSURCOND,7,Oil,0.0043,26.9231
3,VSURCOND,8,Other,0.0339,19.5122
4,VSURCOND,6,Water (Standing or Moving),0.1242,15.5792
5,VSURCOND,1,Dry,81.6571,14.6778
6,VSURCOND,2,Wet,13.4651,14.514
7,VSURCOND,10,Slush,0.1816,13.2969
8,VSURCOND,4,Ice/Frost,0.8036,12.9477
9,VSURCOND,3,Snow,1.0404,10.9062



    feature = 'VTCONT_F'
    A = [
        ['0', [1,]], #  0.1033 %
        ['1', [0,]], #  62.2729 %
        ['2', [4,]], #  0.006 %
        ['3', [3,]], #  37.5464 %
        ['4', [2,]], #  0.0713 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VTCONT_F,1,Device Not Functioning,0.1033,17.2241
1,VTCONT_F,0,No Controls,62.2729,15.3471
2,VTCONT_F,4,Device Not Functioning or Device Functioning I...,0.006,14.2857
3,VTCONT_F,3,Device Functioning Properly,37.5464,12.9294
4,VTCONT_F,2,Device Functioning-Functioning Improperly,0.0713,12.3487



    feature = 'VTRAFCON'
    A = [
        ['0', [29,40,28,9,4,98,65,]], #  2.0442 %
        ['1', [0,]], #  62.2529 %
        ['2', []], #  0.0 %
        ['3', [3,]], #  24.9677 %
        ['4', [1,20,7,8,23,50,2,21,]], #  10.7352 %
        ['Unknowns', [97, 99, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VTRAFCON,29,Unknown Regulatory Sign,0.0047,25.9259
1,VTRAFCON,40,Warning Sign,0.9904,21.4921
2,VTRAFCON,28,Other Regulatory Sign,0.1326,19.7917
3,VTRAFCON,9,Unknown Highway Traffic Signal,0.0086,18.0
4,VTRAFCON,4,Flashing Traffic Control Signal,0.3094,17.6339
5,VTRAFCON,98,Other,0.4713,17.1795
6,VTRAFCON,65,Railway Crossing Device,0.1272,15.8752
7,VTRAFCON,0,No Controls,62.2529,15.3471
8,VTRAFCON,3,Traffic control signal(on colors) not known wh...,24.9677,12.9974
9,VTRAFCON,1,Traffic control signal (on colors) without Ped...,0.5316,12.9912



    feature = 'VTRAFWAY'
    A = [
        ['0', []], #  0.0 %
        ['1', [1,]], #  45.1904 %
        ['2', []], #  0.0 %
        ['3', [2,]], #  17.174 %
        ['4', [5,]], #  5.757 %
        ['5', [3,]], #  23.3378 %
        ['6', [6,4,0,]], #  8.541 %
        ['Unknowns', [8, 9, ]]
    ]
    data = Build_Individual_Feature_with_Dict(df_Veh, data, feature, A)



Unnamed: 0,Feature,Code,Name,Per,Corr
0,VTRAFWAY,1,"Two-Way, Not Divided",45.1904,16.875
1,VTRAFWAY,2,"Two-Way, Divided, Unprotected Median",17.174,14.4629
2,VTRAFWAY,5,"Two-Way, Not Divided With a Continuous Left-Tu...",5.757,14.3864
3,VTRAFWAY,3,"Two-Way, Divided, Positive Median Barrier",23.3378,14.0477
4,VTRAFWAY,6,Entrance/Exit Ramp,2.6355,11.1775
5,VTRAFWAY,4,One-Way Trafficway,2.835,10.1202
6,VTRAFWAY,0,Non-Trafficway or Driveway Access,3.0705,7.5867



        Feature  Code                           Name      Per   Corr
0      HOSPITAL     0  Not Transported for Treatment  14.4076  100.0
1      HOSPITAL     1                        EMS Air  14.4076  100.0
2     BDYTYP_IM    86                             86   0.0002  100.0
3      BODY_TYP    86                             86   0.0002  100.0
4       MAK_MOD  6010                           6010   0.0003  100.0
...         ...   ...                            ...      ...    ...
2509    TRAV_SP   101                            101   0.0003    0.0
2510    TRAV_SP   150                        150 MPH   0.0010    0.0
2511    TRAV_SP   139                            139   0.0003    0.0
2512    TRAV_SP   104                        104 MPH   0.0003    0.0
2513    TRAV_SP   125                        125 MPH   0.0010    0.0

[2514 rows x 5 columns]

        Feature  Code                           Name      Per      Corr
0      HOSPITAL     0  Not Transported for Treatment  14.4076  100.0000
1