In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
pd.set_option('display.max_columns', None)

In [2]:
# Read that data into Python

In [3]:
numericals = pd.read_csv('numerical7_02.csv', index_col=0)
categoricals = pd.read_csv('categorical7_02.csv', index_col=0)
y = pd.read_csv('target7_02.csv', index_col=0)

In [4]:
display(numericals.shape)
display(categoricals.shape)

(95412, 319)

(95412, 12)

In [5]:
# Check the datatypes of all the columns in the data.

In [6]:
categoricals.dtypes

STATE        object
CLUSTER       int64
HOMEOWNR     object
GENDER       object
DATASRCE      int64
SOLIH       float64
VETERANS     object
RFA_2R       object
RFA_2A       object
GEOCODE2     object
DOMAIN_A     object
DOMAIN_B      int64
dtype: object

In [7]:
numericals.dtypes

ODATEDW       int64
TCODE         int64
DOB           int64
AGE         float64
INCOME      float64
             ...   
AVGGIFT     float64
CONTROLN      int64
HPHONE_D      int64
RFA_2F        int64
CLUSTER2    float64
Length: 319, dtype: object

In [8]:
# For some reason there is a unnamed column

In [9]:
display(numericals.shape)
display(categoricals.shape)

(95412, 319)

(95412, 12)

In [10]:
# Check for null values in the dataframe.

In [11]:
categoricals.isnull().sum()/len(categoricals)

STATE       0.000000
CLUSTER     0.000000
HOMEOWNR    0.000000
GENDER      0.000000
DATASRCE    0.000000
SOLIH       0.935019
VETERANS    0.890727
RFA_2R      0.000000
RFA_2A      0.000000
GEOCODE2    0.000000
DOMAIN_A    0.000000
DOMAIN_B    0.000000
dtype: float64

In [12]:
# This information will not change how the model will turn out so I will be removing it

categoricals['SOLIH'].value_counts(dropna=False)

NaN     89212
12.0     5693
0.0       296
1.0        94
2.0        75
3.0        19
4.0        16
6.0         7
Name: SOLIH, dtype: int64

In [13]:
categoricals = categoricals.drop(['SOLIH'], axis=1)

In [14]:
# Will change NaN to N

categoricals['VETERANS'].value_counts(dropna=False)

NaN    84986
Y      10426
Name: VETERANS, dtype: int64

In [15]:
categoricals['VETERANS'] = categoricals['VETERANS'].fillna('N')

In [16]:
# NEXTDATE doesn't give enough valuable information to keep in my dataset according to the documentation


df = pd.DataFrame(numericals.isna().sum()).reset_index()
df.columns = ['column_name', 'nulls']
df[df['nulls']>0]

Unnamed: 0,column_name,nulls
312,NEXTDATE,9973


In [17]:
# After checking my previous lab I know this is the only nan in the numerical dataset

numericals = numericals.drop(['NEXTDATE'], axis=1)
numericals

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,8901,0,3712,60.000000,5.0,5.0,0,0,39,34,18,10,2,1,5.0,992,264,332,0,35,65,47,53,92,1,0,0,11,0,0,0,0,0,0,0,11,0,0,0,39,48,51,40,50,54,25,31,42,27,11,14,18,17,13,11,15,12,11,34,25,18,26,10,23,18,33,49,28,12,4,61,7,12,19,198,276,97,95,2,2,0,0,7,7,0,479,635,3,2,86,14,96,4,7,38,80,70,32,84,16,6,2,5,9,15,3,17,50,25,0,0,0,2,7,13,27,47,0,1,61,58,61,15,4,2,0,0,14,1,0,0,2,5,17,73,307,318,349,378,12883,13,23,23,23,15,1,0,0,1,4,25,24,26,17,2,0,0,2,28,4,51,1,46,54,3,88,8,0,0,0,0,0,0,4,1,13,14,16,2,45,56,64,50,64,44,62,53,99,0,0,9,3,8,13,9,0,3,9,3,15,19,5,4,3,0,3,41,1,0,7,13,6,5,0,4,9,4,1,3,10,2,1,7,78,2,0,120,16,10,39,21,8,4,3,5,20,3,19,4,0,0,0,18,39,0,34,23,18,16,1,4,0,23,0,0,5,1,0,0,0,0,0,2,0,3,74,88,8,0,4,96,77,19,13,31,5,14,14,31,54,46,0,0,90,0,10,0,0,0,33,65,40,99,99,6,2,10,7,27,74,6,14,240.0,31,14,5.0,9208,12.0,9402,10.0,9512,8911,4.0,7.741935,95515,0,4,39.0
1,9401,1,5202,46.000000,6.0,9.0,16,0,15,55,11,6,2,1,9.0,3611,940,998,99,0,0,50,50,67,0,0,31,6,4,2,6,4,14,0,0,2,0,1,4,34,41,43,32,42,45,32,33,46,21,13,14,33,23,10,4,2,11,16,36,22,15,12,1,5,4,21,75,55,23,9,69,4,3,24,317,360,99,99,0,0,0,0,0,0,0,5468,5218,12,10,96,4,97,3,9,59,94,88,55,95,5,4,1,3,5,4,2,18,44,5,0,0,0,97,98,98,98,99,94,0,83,76,73,21,5,0,0,0,4,0,0,0,91,91,91,94,1088,1096,1026,1037,36175,2,6,2,5,15,14,13,10,33,2,5,2,5,15,14,14,10,32,6,2,66,3,56,44,9,80,14,0,0,0,0,0,0,6,0,2,24,32,12,71,70,83,58,81,57,64,57,99,99,0,22,24,4,21,13,2,1,6,0,4,1,0,3,1,0,6,13,1,2,8,18,11,4,3,4,10,7,11,1,6,2,1,16,69,5,2,160,5,5,12,21,7,30,20,14,24,4,24,10,0,0,0,8,15,0,55,10,11,0,0,2,0,3,1,1,2,3,1,1,0,3,0,0,0,42,39,50,7,27,16,99,92,53,5,10,2,26,56,97,99,0,0,0,96,0,4,0,0,0,99,0,99,99,99,20,4,6,5,12,32,6,13,47.0,3,1,10.0,9310,25.0,9512,25.0,9512,9310,18.0,15.666667,148535,0,2,1.0
2,9001,1,0,61.611649,3.0,1.0,2,0,20,29,33,6,8,1,1.0,7001,2040,2669,0,2,98,49,51,96,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,35,43,46,37,45,49,23,35,40,25,13,20,19,16,13,10,8,15,14,30,22,19,25,10,23,21,35,44,22,6,2,63,9,9,19,183,254,69,69,1,6,5,3,3,3,0,497,546,2,1,78,22,93,7,18,36,76,65,30,86,14,7,2,5,11,17,3,17,60,18,0,1,0,0,1,6,18,50,0,4,36,49,51,14,5,4,2,24,11,2,3,6,0,2,9,44,251,292,292,340,11576,32,18,20,15,12,2,0,0,1,20,19,24,18,16,2,0,0,1,28,8,31,11,38,62,8,74,22,0,0,0,0,0,2,2,1,21,19,24,6,61,65,73,59,70,56,78,62,82,99,4,10,5,2,6,12,0,1,9,5,18,20,5,7,6,0,11,33,4,3,2,12,3,3,2,0,7,8,3,3,6,7,1,8,74,3,1,120,22,20,28,16,6,5,3,1,23,1,16,6,0,0,0,10,21,0,28,23,32,8,1,14,1,5,0,0,7,0,0,0,0,0,1,0,0,2,84,96,3,0,0,92,65,29,9,22,3,12,23,50,69,31,0,0,0,6,35,44,0,15,22,77,17,97,92,9,2,6,5,26,63,6,14,202.0,27,14,2.0,9111,16.0,9207,5.0,9512,9001,12.0,7.481481,15078,1,4,60.0
3,8701,0,2801,70.000000,1.0,4.0,2,0,23,14,31,3,0,3,0.0,640,160,219,0,8,92,54,46,61,0,0,11,32,6,2,0,0,0,0,0,31,0,0,1,32,40,44,34,43,47,25,45,35,20,15,25,17,17,12,7,7,20,17,30,14,19,25,11,23,23,27,50,30,15,8,63,9,6,23,199,283,85,83,3,4,1,0,2,0,2,1000,1263,2,1,48,52,93,7,6,36,73,61,30,84,16,6,3,3,21,12,4,13,36,13,0,0,0,10,25,50,69,92,10,15,42,55,50,15,5,4,0,9,42,4,0,5,1,8,17,34,386,388,396,423,15130,27,12,4,26,22,5,0,0,4,35,5,6,12,30,6,0,0,5,22,14,26,20,46,54,3,58,36,0,0,0,0,0,6,0,0,17,13,15,0,43,69,81,53,68,45,33,31,0,99,23,17,3,0,6,6,0,0,13,42,12,0,0,0,42,0,6,3,0,0,0,23,3,3,6,0,3,3,3,3,3,0,3,6,87,0,0,120,28,12,14,27,10,3,5,0,19,1,17,0,0,0,0,13,23,0,14,40,31,16,0,1,0,13,0,0,4,0,0,0,3,0,0,0,0,29,67,56,41,3,0,94,43,27,4,38,0,10,19,39,45,55,0,0,45,22,17,0,0,16,23,77,22,93,89,16,2,6,6,27,66,6,14,109.0,16,7,2.0,8711,11.0,9411,10.0,9512,8702,9.0,6.812500,172556,1,4,41.0
4,8601,0,2001,78.000000,3.0,2.0,60,1,28,9,53,26,3,2,5.0,2520,627,761,99,0,0,46,54,2,98,0,0,1,0,0,0,0,0,0,0,0,0,0,0,33,45,50,36,46,50,27,34,43,23,14,21,13,15,20,12,5,13,15,34,19,19,31,7,27,16,26,57,36,24,14,42,17,9,33,235,323,99,98,0,0,0,0,0,0,0,576,594,4,3,90,10,97,3,0,42,82,49,22,92,8,20,3,17,9,23,1,1,1,0,21,58,19,0,1,2,16,67,0,2,45,52,53,16,6,0,0,0,9,0,0,0,25,58,74,83,240,250,293,321,9836,24,29,23,13,4,4,0,0,2,21,30,22,16,4,5,0,0,3,35,8,11,14,20,80,4,73,22,1,1,0,0,0,3,1,2,1,24,27,3,76,61,73,51,65,49,80,31,81,99,10,17,8,2,6,15,3,7,22,2,9,0,7,2,2,0,6,1,5,2,2,12,2,7,6,4,15,29,4,3,26,3,2,7,49,12,1,120,16,20,30,13,3,12,5,2,26,1,20,7,1,1,1,15,28,4,9,16,53,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,65,99,0,0,0,90,45,18,25,34,0,1,3,6,33,67,0,0,9,14,72,3,0,0,99,1,21,99,96,6,2,7,11,43,113,10,25,254.0,37,8,3.0,9310,15.0,9601,15.0,9601,7903,14.0,6.864865,7112,1,2,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,1,0,61.611649,5.0,5.0,0,14,36,47,11,7,8,13,5.0,27380,7252,10037,99,0,0,50,50,78,10,6,4,5,0,0,0,1,1,0,0,3,1,0,2,28,35,38,29,38,41,30,45,37,18,16,31,25,15,8,3,1,20,18,31,18,13,7,3,5,20,32,48,28,10,4,58,15,3,24,195,271,54,38,8,32,24,14,0,0,0,988,1025,6,6,56,44,89,11,3,44,72,56,32,83,17,12,3,10,16,15,8,19,55,5,3,6,0,2,10,49,73,92,0,4,40,52,53,15,4,24,8,13,14,15,12,3,69,84,92,97,433,481,499,535,18807,11,13,13,21,22,13,4,2,2,9,11,11,21,24,16,4,2,2,9,6,70,6,63,37,27,76,15,2,2,0,0,0,5,2,1,2,18,20,2,69,81,89,73,83,69,69,57,61,94,7,15,16,5,10,21,0,3,11,1,11,2,3,3,1,4,6,4,7,3,3,17,7,5,3,1,9,8,7,14,7,8,13,6,59,7,0,136,2,7,28,33,8,15,8,3,26,2,19,8,8,15,2,20,35,5,48,15,11,25,1,5,1,9,0,0,4,1,1,1,0,0,1,1,0,4,26,92,3,2,4,95,60,19,3,14,0,7,32,78,91,9,6,5,86,1,12,0,0,1,93,7,98,99,98,16,4,4,3,6,14,5,12,25.0,1,0,25.0,9602,25.0,9602,25.0,9602,9602,8.0,25.000000,184568,0,1,12.0
95408,9601,1,5001,48.000000,7.0,9.0,1,0,31,43,19,4,1,0,5.0,1254,322,361,96,0,4,51,49,91,3,0,2,6,1,0,1,0,0,0,0,5,0,0,1,30,40,40,28,41,43,39,33,42,25,9,19,43,17,7,4,2,10,16,35,23,16,9,2,7,10,20,70,52,25,6,73,4,2,20,307,346,89,88,1,1,0,0,0,0,0,1679,1723,3,3,88,12,97,3,0,63,89,85,60,96,4,2,1,1,7,5,1,28,58,5,2,2,0,18,71,88,91,97,5,1,77,82,75,20,4,1,0,10,7,1,0,5,16,26,44,79,806,836,802,849,26538,8,9,7,6,11,29,13,2,15,10,0,8,2,13,35,16,3,13,8,5,61,7,83,17,36,80,4,4,4,0,0,0,6,5,3,3,25,32,10,61,73,88,56,87,52,48,43,99,0,0,18,31,0,13,17,0,1,2,4,6,0,3,5,1,8,8,9,3,7,9,13,9,6,0,0,4,7,13,3,4,1,0,4,78,12,0,160,1,6,12,24,7,36,14,9,35,5,32,7,0,0,0,21,31,8,43,5,19,15,1,12,1,14,0,0,4,0,0,1,0,0,0,1,0,2,51,94,3,0,2,99,84,29,4,7,2,55,90,94,94,6,0,0,82,2,16,0,0,0,69,31,67,99,97,18,5,3,2,4,10,3,8,20.0,1,0,20.0,9603,20.0,9603,20.0,9603,9603,8.0,20.000000,122706,1,1,2.0
95409,9501,1,3801,60.000000,5.0,5.0,0,0,18,46,20,7,23,0,5.0,552,131,205,99,0,0,53,47,82,14,0,1,9,0,0,0,0,0,0,0,9,0,0,0,28,35,37,30,41,44,32,46,38,17,13,34,21,9,9,9,4,21,17,32,20,10,18,7,17,27,29,44,31,14,5,45,19,5,31,179,268,96,95,1,2,1,0,0,0,0,376,377,4,3,66,34,95,5,10,37,64,43,21,80,20,16,2,14,21,20,9,20,49,12,7,7,1,0,0,0,1,9,0,2,45,51,54,14,5,2,0,0,31,2,0,0,3,34,78,91,263,264,319,345,12178,21,26,20,18,12,0,3,0,0,26,18,17,11,21,0,6,0,0,10,13,26,26,43,57,3,83,17,0,0,0,0,0,0,0,0,25,17,17,0,69,69,70,69,70,69,77,24,62,0,25,5,13,9,5,22,0,2,14,0,13,9,5,2,0,0,4,14,3,11,0,10,5,2,0,5,6,19,3,19,7,23,0,0,52,18,0,120,5,3,51,23,7,11,0,6,32,4,27,7,0,0,0,9,18,0,46,0,20,20,2,8,0,14,0,0,0,1,0,0,0,0,1,0,0,6,82,92,5,3,0,93,42,12,6,51,0,0,0,0,0,99,0,0,97,0,0,0,0,4,99,0,99,99,99,5,2,3,11,14,33,7,17,58.0,7,4,3.0,9603,10.0,9501,10.0,9610,9410,3.0,8.285714,189641,1,3,34.0
95410,8601,0,4005,58.000000,7.0,5.0,0,0,28,35,20,9,1,1,7.0,1746,432,508,99,0,0,47,53,92,1,1,5,8,0,1,2,0,1,0,0,5,0,0,3,34,42,45,36,45,49,25,38,40,22,12,21,21,18,12,7,9,13,16,34,20,17,20,4,16,9,26,65,41,17,6,56,9,8,27,262,324,99,99,0,0,0,0,5,4,1,2421,2459,11,10,88,12,99,1,0,44,85,71,36,84,16,8,2,6,9,12,6,19,56,16,0,0,0,89,96,99,99,99,9,0,90,65,68,18,5,0,0,0,12,0,0,0,88,88,90,91,552,544,568,556,15948,7,4,11,18,38,15,5,3,0,4,6,15,19,38,13,4,3,0,25,2,46,3,43,57,9,80,11,0,0,0,0,1,2,6,0,24,18,28,11,52,73,88,60,85,57,70,54,99,99,0,14,16,6,16,17,0,2,12,1,11,2,0,2,1,0,2,22,4,6,4,19,4,7,2,4,6,7,9,4,9,1,1,7,72,8,2,140,7,6,20,35,12,15,5,6,29,4,21,10,0,0,0,13,28,1,35,18,20,8,0,3,1,9,0,0,2,6,1,2,0,0,0,0,0,14,50,83,8,4,5,99,85,43,9,25,0,0,6,17,99,1,0,0,99,0,1,0,0,0,99,0,99,99,99,12,3,6,3,36,127,9,31,498.0,41,18,5.0,9011,21.0,9608,18.0,9701,8612,4.0,12.146341,4693,1,4,11.0


In [18]:
# Split the data into numerical and catagorical. Decide if any columns need their dtype changed.

In [19]:
categoricals

Unnamed: 0,STATE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,VETERANS,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B
0,IL,36,U,F,3,N,L,E,C,T,2
1,CA,14,H,M,3,N,L,G,A,S,1
2,NC,43,U,M,3,N,L,E,C,R,2
3,CA,44,U,F,3,N,L,E,C,R,2
4,FL,16,H,F,3,N,L,F,A,S,2
...,...,...,...,...,...,...,...,...,...,...,...
95407,other,27,U,M,3,N,L,G,C,C,2
95408,TX,24,H,M,3,N,L,F,A,C,1
95409,MI,30,U,M,3,N,L,E,B,C,3
95410,CA,24,H,F,2,N,L,F,A,C,1


In [20]:
numericals

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,8901,0,3712,60.000000,5.0,5.0,0,0,39,34,18,10,2,1,5.0,992,264,332,0,35,65,47,53,92,1,0,0,11,0,0,0,0,0,0,0,11,0,0,0,39,48,51,40,50,54,25,31,42,27,11,14,18,17,13,11,15,12,11,34,25,18,26,10,23,18,33,49,28,12,4,61,7,12,19,198,276,97,95,2,2,0,0,7,7,0,479,635,3,2,86,14,96,4,7,38,80,70,32,84,16,6,2,5,9,15,3,17,50,25,0,0,0,2,7,13,27,47,0,1,61,58,61,15,4,2,0,0,14,1,0,0,2,5,17,73,307,318,349,378,12883,13,23,23,23,15,1,0,0,1,4,25,24,26,17,2,0,0,2,28,4,51,1,46,54,3,88,8,0,0,0,0,0,0,4,1,13,14,16,2,45,56,64,50,64,44,62,53,99,0,0,9,3,8,13,9,0,3,9,3,15,19,5,4,3,0,3,41,1,0,7,13,6,5,0,4,9,4,1,3,10,2,1,7,78,2,0,120,16,10,39,21,8,4,3,5,20,3,19,4,0,0,0,18,39,0,34,23,18,16,1,4,0,23,0,0,5,1,0,0,0,0,0,2,0,3,74,88,8,0,4,96,77,19,13,31,5,14,14,31,54,46,0,0,90,0,10,0,0,0,33,65,40,99,99,6,2,10,7,27,74,6,14,240.0,31,14,5.0,9208,12.0,9402,10.0,9512,8911,4.0,7.741935,95515,0,4,39.0
1,9401,1,5202,46.000000,6.0,9.0,16,0,15,55,11,6,2,1,9.0,3611,940,998,99,0,0,50,50,67,0,0,31,6,4,2,6,4,14,0,0,2,0,1,4,34,41,43,32,42,45,32,33,46,21,13,14,33,23,10,4,2,11,16,36,22,15,12,1,5,4,21,75,55,23,9,69,4,3,24,317,360,99,99,0,0,0,0,0,0,0,5468,5218,12,10,96,4,97,3,9,59,94,88,55,95,5,4,1,3,5,4,2,18,44,5,0,0,0,97,98,98,98,99,94,0,83,76,73,21,5,0,0,0,4,0,0,0,91,91,91,94,1088,1096,1026,1037,36175,2,6,2,5,15,14,13,10,33,2,5,2,5,15,14,14,10,32,6,2,66,3,56,44,9,80,14,0,0,0,0,0,0,6,0,2,24,32,12,71,70,83,58,81,57,64,57,99,99,0,22,24,4,21,13,2,1,6,0,4,1,0,3,1,0,6,13,1,2,8,18,11,4,3,4,10,7,11,1,6,2,1,16,69,5,2,160,5,5,12,21,7,30,20,14,24,4,24,10,0,0,0,8,15,0,55,10,11,0,0,2,0,3,1,1,2,3,1,1,0,3,0,0,0,42,39,50,7,27,16,99,92,53,5,10,2,26,56,97,99,0,0,0,96,0,4,0,0,0,99,0,99,99,99,20,4,6,5,12,32,6,13,47.0,3,1,10.0,9310,25.0,9512,25.0,9512,9310,18.0,15.666667,148535,0,2,1.0
2,9001,1,0,61.611649,3.0,1.0,2,0,20,29,33,6,8,1,1.0,7001,2040,2669,0,2,98,49,51,96,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,35,43,46,37,45,49,23,35,40,25,13,20,19,16,13,10,8,15,14,30,22,19,25,10,23,21,35,44,22,6,2,63,9,9,19,183,254,69,69,1,6,5,3,3,3,0,497,546,2,1,78,22,93,7,18,36,76,65,30,86,14,7,2,5,11,17,3,17,60,18,0,1,0,0,1,6,18,50,0,4,36,49,51,14,5,4,2,24,11,2,3,6,0,2,9,44,251,292,292,340,11576,32,18,20,15,12,2,0,0,1,20,19,24,18,16,2,0,0,1,28,8,31,11,38,62,8,74,22,0,0,0,0,0,2,2,1,21,19,24,6,61,65,73,59,70,56,78,62,82,99,4,10,5,2,6,12,0,1,9,5,18,20,5,7,6,0,11,33,4,3,2,12,3,3,2,0,7,8,3,3,6,7,1,8,74,3,1,120,22,20,28,16,6,5,3,1,23,1,16,6,0,0,0,10,21,0,28,23,32,8,1,14,1,5,0,0,7,0,0,0,0,0,1,0,0,2,84,96,3,0,0,92,65,29,9,22,3,12,23,50,69,31,0,0,0,6,35,44,0,15,22,77,17,97,92,9,2,6,5,26,63,6,14,202.0,27,14,2.0,9111,16.0,9207,5.0,9512,9001,12.0,7.481481,15078,1,4,60.0
3,8701,0,2801,70.000000,1.0,4.0,2,0,23,14,31,3,0,3,0.0,640,160,219,0,8,92,54,46,61,0,0,11,32,6,2,0,0,0,0,0,31,0,0,1,32,40,44,34,43,47,25,45,35,20,15,25,17,17,12,7,7,20,17,30,14,19,25,11,23,23,27,50,30,15,8,63,9,6,23,199,283,85,83,3,4,1,0,2,0,2,1000,1263,2,1,48,52,93,7,6,36,73,61,30,84,16,6,3,3,21,12,4,13,36,13,0,0,0,10,25,50,69,92,10,15,42,55,50,15,5,4,0,9,42,4,0,5,1,8,17,34,386,388,396,423,15130,27,12,4,26,22,5,0,0,4,35,5,6,12,30,6,0,0,5,22,14,26,20,46,54,3,58,36,0,0,0,0,0,6,0,0,17,13,15,0,43,69,81,53,68,45,33,31,0,99,23,17,3,0,6,6,0,0,13,42,12,0,0,0,42,0,6,3,0,0,0,23,3,3,6,0,3,3,3,3,3,0,3,6,87,0,0,120,28,12,14,27,10,3,5,0,19,1,17,0,0,0,0,13,23,0,14,40,31,16,0,1,0,13,0,0,4,0,0,0,3,0,0,0,0,29,67,56,41,3,0,94,43,27,4,38,0,10,19,39,45,55,0,0,45,22,17,0,0,16,23,77,22,93,89,16,2,6,6,27,66,6,14,109.0,16,7,2.0,8711,11.0,9411,10.0,9512,8702,9.0,6.812500,172556,1,4,41.0
4,8601,0,2001,78.000000,3.0,2.0,60,1,28,9,53,26,3,2,5.0,2520,627,761,99,0,0,46,54,2,98,0,0,1,0,0,0,0,0,0,0,0,0,0,0,33,45,50,36,46,50,27,34,43,23,14,21,13,15,20,12,5,13,15,34,19,19,31,7,27,16,26,57,36,24,14,42,17,9,33,235,323,99,98,0,0,0,0,0,0,0,576,594,4,3,90,10,97,3,0,42,82,49,22,92,8,20,3,17,9,23,1,1,1,0,21,58,19,0,1,2,16,67,0,2,45,52,53,16,6,0,0,0,9,0,0,0,25,58,74,83,240,250,293,321,9836,24,29,23,13,4,4,0,0,2,21,30,22,16,4,5,0,0,3,35,8,11,14,20,80,4,73,22,1,1,0,0,0,3,1,2,1,24,27,3,76,61,73,51,65,49,80,31,81,99,10,17,8,2,6,15,3,7,22,2,9,0,7,2,2,0,6,1,5,2,2,12,2,7,6,4,15,29,4,3,26,3,2,7,49,12,1,120,16,20,30,13,3,12,5,2,26,1,20,7,1,1,1,15,28,4,9,16,53,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,65,99,0,0,0,90,45,18,25,34,0,1,3,6,33,67,0,0,9,14,72,3,0,0,99,1,21,99,96,6,2,7,11,43,113,10,25,254.0,37,8,3.0,9310,15.0,9601,15.0,9601,7903,14.0,6.864865,7112,1,2,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,1,0,61.611649,5.0,5.0,0,14,36,47,11,7,8,13,5.0,27380,7252,10037,99,0,0,50,50,78,10,6,4,5,0,0,0,1,1,0,0,3,1,0,2,28,35,38,29,38,41,30,45,37,18,16,31,25,15,8,3,1,20,18,31,18,13,7,3,5,20,32,48,28,10,4,58,15,3,24,195,271,54,38,8,32,24,14,0,0,0,988,1025,6,6,56,44,89,11,3,44,72,56,32,83,17,12,3,10,16,15,8,19,55,5,3,6,0,2,10,49,73,92,0,4,40,52,53,15,4,24,8,13,14,15,12,3,69,84,92,97,433,481,499,535,18807,11,13,13,21,22,13,4,2,2,9,11,11,21,24,16,4,2,2,9,6,70,6,63,37,27,76,15,2,2,0,0,0,5,2,1,2,18,20,2,69,81,89,73,83,69,69,57,61,94,7,15,16,5,10,21,0,3,11,1,11,2,3,3,1,4,6,4,7,3,3,17,7,5,3,1,9,8,7,14,7,8,13,6,59,7,0,136,2,7,28,33,8,15,8,3,26,2,19,8,8,15,2,20,35,5,48,15,11,25,1,5,1,9,0,0,4,1,1,1,0,0,1,1,0,4,26,92,3,2,4,95,60,19,3,14,0,7,32,78,91,9,6,5,86,1,12,0,0,1,93,7,98,99,98,16,4,4,3,6,14,5,12,25.0,1,0,25.0,9602,25.0,9602,25.0,9602,9602,8.0,25.000000,184568,0,1,12.0
95408,9601,1,5001,48.000000,7.0,9.0,1,0,31,43,19,4,1,0,5.0,1254,322,361,96,0,4,51,49,91,3,0,2,6,1,0,1,0,0,0,0,5,0,0,1,30,40,40,28,41,43,39,33,42,25,9,19,43,17,7,4,2,10,16,35,23,16,9,2,7,10,20,70,52,25,6,73,4,2,20,307,346,89,88,1,1,0,0,0,0,0,1679,1723,3,3,88,12,97,3,0,63,89,85,60,96,4,2,1,1,7,5,1,28,58,5,2,2,0,18,71,88,91,97,5,1,77,82,75,20,4,1,0,10,7,1,0,5,16,26,44,79,806,836,802,849,26538,8,9,7,6,11,29,13,2,15,10,0,8,2,13,35,16,3,13,8,5,61,7,83,17,36,80,4,4,4,0,0,0,6,5,3,3,25,32,10,61,73,88,56,87,52,48,43,99,0,0,18,31,0,13,17,0,1,2,4,6,0,3,5,1,8,8,9,3,7,9,13,9,6,0,0,4,7,13,3,4,1,0,4,78,12,0,160,1,6,12,24,7,36,14,9,35,5,32,7,0,0,0,21,31,8,43,5,19,15,1,12,1,14,0,0,4,0,0,1,0,0,0,1,0,2,51,94,3,0,2,99,84,29,4,7,2,55,90,94,94,6,0,0,82,2,16,0,0,0,69,31,67,99,97,18,5,3,2,4,10,3,8,20.0,1,0,20.0,9603,20.0,9603,20.0,9603,9603,8.0,20.000000,122706,1,1,2.0
95409,9501,1,3801,60.000000,5.0,5.0,0,0,18,46,20,7,23,0,5.0,552,131,205,99,0,0,53,47,82,14,0,1,9,0,0,0,0,0,0,0,9,0,0,0,28,35,37,30,41,44,32,46,38,17,13,34,21,9,9,9,4,21,17,32,20,10,18,7,17,27,29,44,31,14,5,45,19,5,31,179,268,96,95,1,2,1,0,0,0,0,376,377,4,3,66,34,95,5,10,37,64,43,21,80,20,16,2,14,21,20,9,20,49,12,7,7,1,0,0,0,1,9,0,2,45,51,54,14,5,2,0,0,31,2,0,0,3,34,78,91,263,264,319,345,12178,21,26,20,18,12,0,3,0,0,26,18,17,11,21,0,6,0,0,10,13,26,26,43,57,3,83,17,0,0,0,0,0,0,0,0,25,17,17,0,69,69,70,69,70,69,77,24,62,0,25,5,13,9,5,22,0,2,14,0,13,9,5,2,0,0,4,14,3,11,0,10,5,2,0,5,6,19,3,19,7,23,0,0,52,18,0,120,5,3,51,23,7,11,0,6,32,4,27,7,0,0,0,9,18,0,46,0,20,20,2,8,0,14,0,0,0,1,0,0,0,0,1,0,0,6,82,92,5,3,0,93,42,12,6,51,0,0,0,0,0,99,0,0,97,0,0,0,0,4,99,0,99,99,99,5,2,3,11,14,33,7,17,58.0,7,4,3.0,9603,10.0,9501,10.0,9610,9410,3.0,8.285714,189641,1,3,34.0
95410,8601,0,4005,58.000000,7.0,5.0,0,0,28,35,20,9,1,1,7.0,1746,432,508,99,0,0,47,53,92,1,1,5,8,0,1,2,0,1,0,0,5,0,0,3,34,42,45,36,45,49,25,38,40,22,12,21,21,18,12,7,9,13,16,34,20,17,20,4,16,9,26,65,41,17,6,56,9,8,27,262,324,99,99,0,0,0,0,5,4,1,2421,2459,11,10,88,12,99,1,0,44,85,71,36,84,16,8,2,6,9,12,6,19,56,16,0,0,0,89,96,99,99,99,9,0,90,65,68,18,5,0,0,0,12,0,0,0,88,88,90,91,552,544,568,556,15948,7,4,11,18,38,15,5,3,0,4,6,15,19,38,13,4,3,0,25,2,46,3,43,57,9,80,11,0,0,0,0,1,2,6,0,24,18,28,11,52,73,88,60,85,57,70,54,99,99,0,14,16,6,16,17,0,2,12,1,11,2,0,2,1,0,2,22,4,6,4,19,4,7,2,4,6,7,9,4,9,1,1,7,72,8,2,140,7,6,20,35,12,15,5,6,29,4,21,10,0,0,0,13,28,1,35,18,20,8,0,3,1,9,0,0,2,6,1,2,0,0,0,0,0,14,50,83,8,4,5,99,85,43,9,25,0,0,6,17,99,1,0,0,99,0,1,0,0,0,99,0,99,99,99,12,3,6,3,36,127,9,31,498.0,41,18,5.0,9011,21.0,9608,18.0,9701,8612,4.0,12.146341,4693,1,4,11.0


In [21]:
# numericals = numericals[''].value_counts(dropna=False)

In [22]:
categoricals.nunique()

STATE       12
CLUSTER     53
HOMEOWNR     2
GENDER       3
DATASRCE     3
VETERANS     2
RFA_2R       1
RFA_2A       4
GEOCODE2     4
DOMAIN_A     5
DOMAIN_B     4
dtype: int64

In [23]:
categoricals['CLUSTER'].value_counts(dropna=False) # This should be part of numericals

40    6295
35    3624
36    3617
27    3553
24    3539
49    3315
12    3000
18    2897
13    2706
30    2607
39    2601
45    2515
43    2385
11    2378
51    2322
14    2246
41    2176
44    1953
16    1947
2     1942
21    1891
8     1828
10    1804
46    1800
28    1630
17    1621
20    1568
53    1441
3     1428
34    1358
42    1358
23    1324
31    1285
22    1282
25    1275
38    1200
15    1173
1     1140
7     1076
5     1014
37     972
26     947
47     868
29     859
32     795
48     787
50     776
9      770
6      603
33     593
4      561
19     497
52     270
Name: CLUSTER, dtype: int64

In [24]:
categoricals['CLUSTER'] = categoricals['CLUSTER'].astype('category')

In [25]:
categoricals.dtypes

STATE         object
CLUSTER     category
HOMEOWNR      object
GENDER        object
DATASRCE       int64
VETERANS      object
RFA_2R        object
RFA_2A        object
GEOCODE2      object
DOMAIN_A      object
DOMAIN_B       int64
dtype: object

In [26]:
categoricals['DOMAIN_A'].value_counts(dropna=False)

R    22106
S    21924
C    19689
T    19527
U    12166
Name: DOMAIN_A, dtype: int64

In [27]:
categoricals['DOMAIN_B'].value_counts(dropna=False)

2    48356
1    28498
3    16754
4     1804
Name: DOMAIN_B, dtype: int64

In [28]:
# RFA_2R doesn't have any unique values, just a constant

categoricals = categoricals.drop(['RFA_2R'], axis=1)

In [29]:
categoricals

Unnamed: 0,STATE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,VETERANS,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B
0,IL,36,U,F,3,N,E,C,T,2
1,CA,14,H,M,3,N,G,A,S,1
2,NC,43,U,M,3,N,E,C,R,2
3,CA,44,U,F,3,N,E,C,R,2
4,FL,16,H,F,3,N,F,A,S,2
...,...,...,...,...,...,...,...,...,...,...
95407,other,27,U,M,3,N,G,C,C,2
95408,TX,24,H,M,3,N,F,A,C,1
95409,MI,30,U,M,3,N,E,B,C,3
95410,CA,24,H,F,2,N,F,A,C,1


In [30]:
# Split the data into a training set and a test set.

In [31]:
X = pd.concat([categoricals, numericals], axis=1)
y = y.drop(['TARGET_D'],axis=1)

In [32]:
X.isnull().sum()/len(X)

STATE       0.0
CLUSTER     0.0
HOMEOWNR    0.0
GENDER      0.0
DATASRCE    0.0
           ... 
AVGGIFT     0.0
CONTROLN    0.0
HPHONE_D    0.0
RFA_2F      0.0
CLUSTER2    0.0
Length: 328, dtype: float64

In [33]:
#Scale the features either by using normalizer or a standard scaler. 

In [34]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

print('X_train shape: ', X_train.shape)
print('X_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_test shape: ', y_test.shape)

X_train shape:  (71559, 328)
X_test shape:  (23853, 328)
y_train shape:  (71559, 1)
y_test shape:  (23853, 1)


In [35]:
y

Unnamed: 0,TARGET_B
0,0
1,0
2,0
3,0
4,0
...,...
95407,0
95408,0
95409,0
95410,1


In [36]:
X_train_num = X_train.select_dtypes(include = np.number)
X_test_num  = X_test.select_dtypes(include = np.number)
X_train_cat = X_train.select_dtypes(include = object)
X_test_cat  = X_test.select_dtypes(include = object)

In [37]:
print('X_train_num shape: ', X_train_num.shape)
print('X_test_num shape: ', X_test_num.shape)

X_train_num shape:  (71559, 320)
X_test_num shape:  (23853, 320)


In [38]:
# Scale the features either by using normalizer or a standard scaler.

In [39]:
from sklearn.preprocessing import MinMaxScaler
transformer = MinMaxScaler().fit(X_train_num)

In [40]:
X_train_norm = transformer.transform(X_train_num)
X_train_scale = pd.DataFrame(X_train_norm,columns=X_train_num.columns)
X_train_scale.head()

Unnamed: 0,DATASRCE,DOMAIN_B,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,1.0,0.0,0.21147,2.6e-05,0.473841,0.525773,0.833333,0.777778,0.0,0.0,0.525253,0.30303,0.323232,0.111111,0.0,0.045977,0.888889,0.01385,0.015905,0.012937,1.0,0.0,0.0,0.494949,0.515152,0.888889,0.030303,0.010101,0.063158,0.090909,0.0,0.013889,0.0,0.029851,0.0,0.021277,0.0,0.061856,0.0,0.0,0.034884,0.409639,0.506024,0.542169,0.432099,0.555556,0.592593,0.346667,0.424242,0.393939,0.191919,0.10101,0.222222,0.212121,0.151515,0.171717,0.121212,0.044444,0.171717,0.181818,0.292929,0.222222,0.141414,0.242424,0.050505,0.212121,0.141414,0.282828,0.585859,0.373737,0.131313,0.040404,0.626263,0.10101,0.09589,0.20202,0.364615,0.425714,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3695,0.369833,0.692308,0.692308,0.848485,0.161616,0.989899,0.020202,0.0,0.414141,0.838384,0.69697,0.333333,0.919192,0.090909,0.080808,0.04,0.060606,0.080808,0.151515,0.030303,0.253333,0.525253,0.171717,0.023256,0.020202,0.0,0.737374,0.959596,0.989899,1.0,1.0,0.030303,0.010101,0.515152,0.623529,0.611111,0.262295,0.125,0.0,0.0,0.0,0.161616,0.0,0.0,0.0,0.929293,0.969697,1.0,1.0,0.332667,0.369333,0.351333,0.392,0.093907,0.121212,0.070707,0.090909,0.222222,0.333333,0.2,0.06,0.016393,0.020202,0.010101,0.080808,0.090909,0.242424,0.383838,0.131313,0.08,0.010101,0.020202,0.292929,0.040404,0.393939,0.010101,0.30303,0.707071,0.040404,0.808081,0.10101,0.02439,0.02439,0.0,0.0,0.04,0.030303,0.050505,0.0,0.191919,0.255556,0.368421,0.040404,0.787879,0.676768,0.808081,0.575758,0.777778,0.545455,0.59596,0.393939,0.909091,1.0,0.010101,0.121212,0.191919,0.050505,0.111111,0.212121,0.0,0.054545,0.040404,0.0,0.161616,0.050505,0.030303,0.020202,0.020202,0.0,0.060606,0.343434,0.050505,0.03125,0.090909,0.111111,0.050505,0.060606,0.010101,0.029851,0.060606,0.070707,0.030303,0.035088,0.111111,0.0,0.0625,0.10101,0.737374,0.030303,0.0,0.747059,0.020202,0.131313,0.313131,0.252525,0.257143,0.151515,0.050505,0.020619,0.282828,0.090909,0.25,0.111111,0.0,0.0,0.0,0.222222,0.525253,0.033333,0.30303,0.232323,0.323232,0.10101,0.024096,0.060606,0.033333,0.076087,0.0,0.0,0.040404,0.018182,0.0,0.013514,0.0,0.0,0.02,0.0,0.0,0.1,0.515152,0.868687,0.050505,0.094595,0.020833,0.959596,0.707071,0.333333,0.451613,0.692308,0.0,0.0,0.0,0.0,0.090909,0.919192,0.0,0.0,0.979798,0.0,0.020202,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.52381,0.4,0.111111,0.166667,0.55,0.418848,0.315789,0.181818,0.077914,0.118644,0.305556,0.003,0.636612,0.009009,0.82208,0.025,0.502513,0.896387,0.003676,0.024643,0.763736,1.0,0.666667,0.131148
1,0.0,0.333333,0.426523,0.000462,0.227188,0.773196,0.166667,0.555556,0.012448,0.0,0.535354,0.121212,0.666667,0.050505,0.030303,0.022989,0.555556,0.03155,0.041698,0.03768,0.111111,0.0,0.89899,0.505051,0.505051,1.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.578313,0.686747,0.722892,0.555556,0.654321,0.691358,0.24,0.434343,0.383838,0.191919,0.080808,0.151515,0.121212,0.10101,0.181818,0.252525,0.133333,0.181818,0.171717,0.343434,0.151515,0.161616,0.474747,0.131313,0.434343,0.212121,0.515152,0.282828,0.161616,0.070707,0.020202,0.69697,0.10101,0.123288,0.131313,0.241538,0.331429,0.40404,0.393939,0.010101,0.020202,0.010101,0.010101,0.0,0.0,0.0,0.087333,0.098167,0.230769,0.230769,0.878788,0.131313,0.767677,0.242424,0.373737,0.212121,0.747475,0.666667,0.171717,0.848485,0.161616,0.040404,0.02,0.030303,0.141414,0.161616,0.050505,0.2,0.454545,0.383838,0.0,0.0,0.0,0.0,0.020202,0.070707,0.20202,0.555556,0.0,0.030303,0.222222,0.517647,0.533333,0.213115,0.1,0.020202,0.0,0.575758,0.050505,0.010101,0.010101,0.060606,0.10101,0.242424,0.636364,0.868687,0.147333,0.174,0.178667,0.194667,0.06928,0.262626,0.30303,0.232323,0.111111,0.070707,0.0,0.02,0.016393,0.010101,0.191919,0.262626,0.30303,0.141414,0.080808,0.0,0.02,0.010101,0.0,0.494949,0.050505,0.454545,0.070707,0.525253,0.484848,0.242424,0.868687,0.10101,0.0,0.0,0.0,0.0,0.04,0.0,0.030303,0.0,0.151515,0.222222,0.328947,0.050505,0.69697,0.454545,0.525253,0.373737,0.505051,0.373737,0.686869,0.686869,0.353535,1.0,0.070707,0.080808,0.090909,0.050505,0.10101,0.141414,0.0,0.109091,0.161616,0.030303,0.131313,0.020202,0.070707,0.060606,0.030303,0.030769,0.181818,0.030303,0.030303,0.078125,0.040404,0.20202,0.040404,0.050505,0.050505,0.029851,0.080808,0.030303,0.070707,0.157895,0.080808,0.060606,0.046875,0.060606,0.686869,0.10101,0.0,0.705882,0.080808,0.171717,0.454545,0.131313,0.257143,0.020202,0.050505,0.010309,0.151515,0.045455,0.166667,0.040404,0.0,0.0,0.0,0.242424,0.454545,0.1,0.171717,0.20202,0.59596,0.040404,0.012048,0.10101,0.033333,0.141304,0.0,0.071429,0.060606,0.018182,0.0,0.027027,0.023256,0.0,0.02,0.0,0.0,0.022222,0.242424,0.979798,0.010101,0.0,0.03125,0.989899,0.414141,0.111111,0.16129,0.230769,0.040404,0.191919,0.484848,0.818182,0.919192,0.090909,0.0,0.0,0.010101,0.10101,0.878788,0.0,0.0,0.0,0.414141,0.565657,0.282828,1.0,0.969697,0.238095,0.4,0.080808,0.185185,0.433333,0.356021,0.315789,0.194805,0.015625,0.072034,0.138889,0.003,0.638889,0.002002,0.863595,0.015,0.497487,0.926898,0.007353,0.007669,0.231316,0.0,1.0,0.95082
2,0.0,0.666667,0.283154,5.1e-05,0.052626,0.938144,0.166667,0.222222,0.0,0.0,0.262626,0.252525,0.20202,0.111111,0.020202,0.011494,0.555556,0.013911,0.016705,0.012937,1.0,0.0,0.0,0.474747,0.535354,0.707071,0.090909,0.0,0.0,0.30303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.278351,0.0,0.0,0.034884,0.301205,0.445783,0.481928,0.345679,0.518519,0.54321,0.533333,0.444444,0.40404,0.161616,0.121212,0.313131,0.20202,0.131313,0.10101,0.090909,0.044444,0.141414,0.20202,0.393939,0.181818,0.090909,0.171717,0.050505,0.161616,0.121212,0.30303,0.585859,0.363636,0.141414,0.030303,0.626263,0.131313,0.082192,0.191919,0.361538,0.427143,0.69697,0.686869,0.090909,0.212121,0.131313,0.060606,0.0,0.0,0.0,0.061,0.063333,0.230769,0.153846,0.626263,0.383838,0.939394,0.070707,0.0,0.59596,0.878788,0.575758,0.333333,0.949495,0.060606,0.262626,0.04,0.242424,0.060606,0.111111,0.010101,0.266667,0.393939,0.111111,0.116279,0.040404,0.0,0.0,0.0,0.0,0.0,0.151515,0.0,0.020202,0.343434,0.564706,0.566667,0.262295,0.15,0.181818,0.030303,0.080808,0.161616,0.131313,0.060606,0.010101,0.040404,0.10101,0.252525,0.414141,0.153333,0.162,0.150667,0.154,0.042533,0.343434,0.191919,0.272727,0.181818,0.010101,0.018182,0.0,0.0,0.0,0.343434,0.171717,0.272727,0.191919,0.010101,0.010101,0.0,0.0,0.0,0.151515,0.141414,0.232323,0.262626,0.606061,0.40404,0.020202,0.858586,0.080808,0.0,0.0,0.0,0.0,0.0,0.030303,0.040404,0.010101,0.090909,0.177778,0.236842,0.020202,0.555556,0.676768,0.787879,0.606061,0.69697,0.494949,0.505051,0.444444,0.40404,0.0,0.262626,0.090909,0.070707,0.040404,0.111111,0.252525,0.0,0.0,0.131313,0.020202,0.161616,0.030303,0.060606,0.020202,0.010101,0.138462,0.060606,0.121212,0.060606,0.046875,0.060606,0.212121,0.040404,0.080808,0.020202,0.014925,0.030303,0.090909,0.090909,0.0,0.111111,0.020202,0.015625,0.111111,0.676768,0.080808,0.0,0.705882,0.111111,0.262626,0.272727,0.252525,0.085714,0.050505,0.030303,0.020619,0.323232,0.045455,0.430556,0.020202,0.0,0.0,0.0,0.111111,0.262626,0.033333,0.252525,0.323232,0.20202,0.111111,0.0,0.030303,0.033333,0.054348,0.0,0.0,0.050505,0.018182,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.033333,0.707071,0.777778,0.232323,0.0,0.010417,0.848485,0.515152,0.171717,0.16129,0.519231,0.0,0.0,0.040404,0.272727,0.444444,0.565657,0.0,0.0,0.949495,0.0,0.060606,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.828283,0.238095,0.4,0.060606,0.092593,0.483333,0.350785,0.315789,0.168831,0.009079,0.042373,0.166667,0.002,0.545082,0.003003,0.913321,0.02,0.045226,0.906279,0.011949,0.007724,0.670821,0.0,0.333333,0.540984
3,1.0,0.333333,0.856631,5.1e-05,0.206076,0.793814,0.5,0.777778,0.082988,0.0,0.363636,0.454545,0.333333,0.020202,0.010101,0.022989,0.555556,0.010638,0.011782,0.00966,0.0,0.0,1.0,0.494949,0.515152,0.959596,0.010101,0.010101,0.031579,0.080808,0.0,0.0,0.013333,0.014925,0.0,0.0,0.0,0.082474,0.0,0.0,0.0,0.313253,0.445783,0.481928,0.358025,0.493827,0.54321,0.48,0.434343,0.383838,0.191919,0.151515,0.292929,0.232323,0.141414,0.080808,0.070707,0.044444,0.181818,0.171717,0.323232,0.181818,0.151515,0.151515,0.040404,0.141414,0.151515,0.30303,0.555556,0.383838,0.191919,0.060606,0.636364,0.131313,0.054795,0.20202,0.353846,0.437143,0.474747,0.474747,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095833,0.101,0.230769,0.230769,0.646465,0.363636,0.979798,0.030303,0.090909,0.535354,0.828283,0.656566,0.424242,0.919192,0.090909,0.121212,0.04,0.10101,0.111111,0.121212,0.030303,0.373333,0.585859,0.090909,0.0,0.0,0.0,0.0,0.0,0.030303,0.131313,0.79798,0.0,0.030303,0.414141,0.6,0.588889,0.262295,0.15,0.0,0.0,0.525253,0.10101,0.0,0.0,0.262626,0.0,0.090909,0.353535,0.848485,0.195333,0.218667,0.203333,0.224667,0.056904,0.282828,0.10101,0.272727,0.20202,0.111111,0.036364,0.02,0.0,0.0,0.161616,0.161616,0.282828,0.232323,0.131313,0.020202,0.02,0.0,0.0,0.181818,0.080808,0.292929,0.151515,0.626263,0.383838,0.222222,0.848485,0.141414,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.040404,0.222222,0.188889,0.25,0.0,0.656566,0.737374,0.828283,0.636364,0.818182,0.575758,0.676768,0.474747,0.919192,1.0,0.010101,0.070707,0.10101,0.050505,0.10101,0.141414,0.0,0.0,0.090909,0.010101,0.121212,0.121212,0.10101,0.10101,0.020202,0.0,0.060606,0.181818,0.080808,0.03125,0.080808,0.191919,0.020202,0.131313,0.020202,0.0,0.070707,0.060606,0.050505,0.035088,0.020202,0.010101,0.03125,0.080808,0.848485,0.030303,0.0,0.705882,0.060606,0.191919,0.414141,0.151515,0.257143,0.080808,0.030303,0.010309,0.313131,0.136364,0.361111,0.030303,0.0,0.0,0.0,0.191919,0.363636,0.033333,0.454545,0.0,0.333333,0.080808,0.0,0.050505,0.0,0.141304,0.021277,0.0,0.020202,0.036364,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.545455,0.989899,0.020202,0.0,0.0,0.989899,0.676768,0.30303,0.129032,0.365385,0.0,0.030303,0.131313,0.656566,0.848485,0.161616,0.0,0.0,0.171717,0.020202,0.484848,0.111111,0.0,0.20202,0.474747,0.515152,0.292929,1.0,0.909091,0.238095,0.4,0.020202,0.12963,0.166667,0.125654,0.315789,0.155844,0.007179,0.016949,0.055556,0.01,0.909381,0.003203,0.912865,0.021,0.040201,0.989482,0.000919,0.014933,0.696058,1.0,0.666667,0.622951
4,0.5,0.0,0.784946,0.000718,0.0,0.624862,0.666667,0.555556,0.0,0.0,0.222222,0.525253,0.141414,0.070707,0.030303,0.011494,0.555556,0.016585,0.020702,0.015451,0.656566,0.0,0.353535,0.505051,0.505051,0.989899,0.0,0.0,0.010526,0.010101,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.010309,0.0,0.0,0.0,0.385542,0.493976,0.53012,0.407407,0.530864,0.567901,0.386667,0.424242,0.393939,0.191919,0.121212,0.242424,0.212121,0.191919,0.151515,0.070707,0.022222,0.161616,0.171717,0.353535,0.171717,0.151515,0.131313,0.020202,0.121212,0.080808,0.383838,0.545455,0.343434,0.141414,0.040404,0.747475,0.040404,0.041096,0.20202,0.338462,0.427143,0.909091,0.686869,0.040404,0.090909,0.050505,0.0,0.0,0.0,0.0,0.130833,0.139,0.307692,0.307692,0.929293,0.080808,0.979798,0.030303,0.242424,0.424242,0.909091,0.828283,0.373737,0.959596,0.050505,0.050505,0.02,0.040404,0.060606,0.070707,0.020202,0.32,0.636364,0.121212,0.0,0.0,0.0,0.010101,0.020202,0.181818,0.555556,0.959596,0.0,0.0,0.606061,0.670588,0.677778,0.262295,0.125,0.090909,0.0,0.0,0.040404,0.040404,0.0,0.0,0.212121,0.767677,0.939394,0.939394,0.278,0.308667,0.318,0.329333,0.099912,0.050505,0.121212,0.131313,0.343434,0.252525,0.127273,0.08,0.0,0.010101,0.040404,0.090909,0.131313,0.323232,0.30303,0.090909,0.04,0.0,0.010101,0.121212,0.010101,0.555556,0.050505,0.343434,0.666667,0.010101,0.888889,0.070707,0.0,0.0,0.0,0.0,0.0,0.010101,0.030303,0.0,0.69697,0.222222,0.25,0.0,0.707071,0.808081,0.878788,0.727273,0.878788,0.717172,0.787879,0.69697,0.727273,0.808081,0.040404,0.070707,0.141414,0.030303,0.151515,0.141414,0.023256,0.018182,0.070707,0.040404,0.141414,0.060606,0.10101,0.040404,0.040404,0.0,0.050505,0.30303,0.070707,0.03125,0.070707,0.191919,0.050505,0.030303,0.020202,0.014925,0.060606,0.070707,0.030303,0.017544,0.080808,0.010101,0.0,0.050505,0.808081,0.060606,0.0,0.705882,0.030303,0.10101,0.484848,0.161616,0.342857,0.070707,0.040404,0.061856,0.212121,0.227273,0.277778,0.020202,0.0,0.010101,0.0,0.191919,0.343434,0.033333,0.282828,0.191919,0.353535,0.050505,0.433735,0.010101,0.033333,0.076087,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.02,0.037037,0.0,0.033333,0.929293,0.979798,0.010101,0.013514,0.010417,1.0,0.79798,0.272727,0.258065,0.365385,0.060606,0.20202,0.333333,0.535354,0.737374,0.272727,0.211111,0.0,0.828283,0.050505,0.0,0.080808,0.0,0.050505,0.787879,0.222222,0.505051,1.0,1.0,0.333333,0.4,0.111111,0.111111,0.25,0.188482,0.315789,0.181818,0.006546,0.016949,0.055556,0.01,0.909381,0.003003,0.909672,0.02,0.502513,0.979069,0.011949,0.013732,0.384712,1.0,0.666667,0.344262


In [41]:
X_test_norm = transformer.transform(X_test_num)
X_test_scale = pd.DataFrame(X_test_norm,columns=X_test_num.columns)
X_test_scale.head()

Unnamed: 0,DATASRCE,DOMAIN_B,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,0.5,0.333333,0.21147,5.1e-05,0.360556,0.639175,0.666667,0.555556,0.0,0.0,0.313131,0.414141,0.171717,0.171717,0.080808,0.103448,0.555556,0.016656,0.017083,0.015027,1.0,0.0,0.0,0.464646,0.545455,0.010101,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.385542,0.493976,0.542169,0.419753,0.530864,0.580247,0.36,0.353535,0.373737,0.282828,0.171717,0.181818,0.222222,0.191919,0.131313,0.070707,0.044444,0.141414,0.151515,0.262626,0.252525,0.20202,0.212121,0.050505,0.151515,0.181818,0.272727,0.555556,0.333333,0.191919,0.090909,0.424242,0.151515,0.09589,0.363636,0.343077,0.431429,0.848485,0.828283,0.010101,0.161616,0.141414,0.141414,0.0,0.0,0.0,0.114167,0.130167,0.230769,0.230769,0.787879,0.222222,0.929293,0.080808,0.020202,0.434343,0.767677,0.484848,0.252525,0.888889,0.121212,0.171717,0.04,0.151515,0.131313,0.222222,0.050505,0.0,0.0,0.010101,0.488372,0.666667,0.111111,0.030303,0.070707,0.131313,0.40404,0.787879,0.010101,0.010101,0.676768,0.694118,0.666667,0.262295,0.125,0.020202,0.141414,0.0,0.111111,0.0,0.111111,0.0,0.181818,0.323232,0.585859,0.939394,0.213333,0.226667,0.252,0.257333,0.069315,0.232323,0.090909,0.252525,0.171717,0.171717,0.127273,0.0,0.0,0.020202,0.191919,0.070707,0.272727,0.191919,0.191919,0.090909,0.0,0.0,0.0,0.212121,0.181818,0.161616,0.111111,0.353535,0.656566,0.040404,0.707071,0.121212,0.207317,0.207317,0.014085,0.0,0.0,0.010101,0.0,0.010101,0.232323,0.255556,0.394737,0.10101,0.787879,0.686869,0.747475,0.636364,0.676768,0.616162,0.666667,0.282828,0.89899,0.686869,0.060606,0.131313,0.131313,0.020202,0.050505,0.212121,0.069767,0.054545,0.242424,0.0,0.020202,0.050505,0.040404,0.050505,0.0,0.0,0.020202,0.090909,0.040404,0.015625,0.030303,0.161616,0.060606,0.070707,0.040404,0.044776,0.10101,0.121212,0.050505,0.298246,0.171717,0.080808,0.140625,0.070707,0.585859,0.020202,0.0,0.705882,0.080808,0.121212,0.313131,0.212121,0.114286,0.080808,0.161616,0.082474,0.232323,0.045455,0.277778,0.10101,0.0,0.0,0.0,0.141414,0.313131,0.0,0.414141,0.191919,0.171717,0.080808,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.818182,0.989899,0.010101,0.0,0.010417,0.888889,0.505051,0.323232,0.419355,0.673077,0.0,0.0,0.010101,0.151515,0.343434,0.666667,0.0,0.0,0.878788,0.010101,0.121212,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.949495,0.333333,0.6,0.050505,0.111111,0.5,0.340314,0.315789,0.155844,0.007496,0.076271,0.388889,0.0015,0.589709,0.001001,0.913321,0.01,0.045226,0.896595,0.008272,0.003139,0.147218,0.0,0.333333,0.409836
1,0.5,0.0,0.856631,2.6e-05,0.0,0.624862,0.666667,0.555556,0.0,0.0,0.272727,0.272727,0.333333,0.010101,0.020202,0.011494,0.555556,0.019473,0.022259,0.022314,1.0,0.0,0.0,0.454545,0.555556,0.989899,0.0,0.0,0.010526,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.011628,0.457831,0.542169,0.566265,0.518519,0.617284,0.641975,0.266667,0.393939,0.424242,0.191919,0.070707,0.181818,0.242424,0.151515,0.080808,0.10101,0.211111,0.171717,0.141414,0.353535,0.191919,0.151515,0.30303,0.141414,0.292929,0.272727,0.414141,0.313131,0.181818,0.060606,0.020202,0.606061,0.111111,0.178082,0.161616,0.238462,0.328571,0.707071,0.525253,0.0,0.272727,0.272727,0.262626,0.050505,0.050505,0.0,0.1895,0.2015,0.692308,0.615385,0.757576,0.252525,0.808081,0.20202,0.171717,0.272727,0.676768,0.59596,0.232323,0.787879,0.222222,0.040404,0.02,0.030303,0.131313,0.242424,0.060606,0.226667,0.555556,0.262626,0.0,0.0,0.0,0.040404,0.131313,0.686869,1.0,1.0,0.0,0.010101,0.393939,0.6,0.588889,0.213115,0.075,0.030303,0.242424,0.0,0.131313,0.0,0.111111,0.0,0.777778,0.787879,0.787879,0.787879,0.300667,0.326667,0.335333,0.376,0.14617,0.121212,0.111111,0.121212,0.262626,0.242424,0.145455,0.02,0.032787,0.040404,0.090909,0.060606,0.121212,0.262626,0.292929,0.111111,0.02,0.040404,0.030303,0.262626,0.010101,0.666667,0.040404,0.787879,0.222222,0.40404,0.828283,0.080808,0.012195,0.0,0.0,0.033333,0.0,0.040404,0.060606,0.030303,0.010101,0.266667,0.368421,0.020202,0.656566,0.676768,0.818182,0.565657,0.818182,0.545455,0.767677,0.767677,1.0,0.0,0.0,0.141414,0.222222,0.010101,0.252525,0.212121,0.0,0.018182,0.070707,0.0,0.040404,0.0,0.020202,0.030303,0.010101,0.030769,0.010101,0.10101,0.070707,0.046875,0.080808,0.222222,0.151515,0.070707,0.030303,0.014925,0.070707,0.050505,0.080808,0.0,0.010101,0.020202,0.015625,0.10101,0.818182,0.060606,0.0,0.876471,0.020202,0.030303,0.212121,0.292929,0.085714,0.323232,0.10101,0.072165,0.171717,0.045455,0.194444,0.080808,0.0,0.0,0.0,0.121212,0.272727,0.066667,0.272727,0.151515,0.333333,0.050505,0.024096,0.080808,0.066667,0.076087,0.0,0.0,0.070707,0.090909,0.0,0.013514,0.0,0.057692,0.06,0.037037,0.0625,0.055556,0.080808,0.909091,0.030303,0.0,0.072917,0.959596,0.585859,0.121212,0.096774,0.096154,0.040404,0.686869,0.848485,1.0,1.0,0.0,0.1,0.145161,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.52381,0.4,0.050505,0.055556,0.083333,0.057592,0.157895,0.077922,0.000739,0.0,0.027778,0.02,0.910747,0.003003,0.910584,0.02,0.015075,0.989899,0.007353,0.018738,0.712432,0.0,0.0,0.04918
2,0.5,0.666667,0.426523,0.0,0.0,0.624862,0.166667,0.555556,0.0,0.0,0.141414,0.262626,0.343434,0.060606,0.040404,0.022989,0.222222,0.028845,0.027897,0.02528,1.0,0.0,0.0,0.525253,0.484848,0.494949,0.050505,0.010101,0.073684,0.606061,0.045455,0.0,0.0,0.029851,0.026316,0.021277,0.0,0.515464,0.017544,0.0,0.093023,0.301205,0.385542,0.433735,0.333333,0.444444,0.506173,0.44,0.515152,0.343434,0.151515,0.222222,0.353535,0.191919,0.10101,0.060606,0.050505,0.022222,0.242424,0.212121,0.30303,0.151515,0.10101,0.131313,0.050505,0.10101,0.191919,0.252525,0.565657,0.383838,0.20202,0.111111,0.464646,0.151515,0.054795,0.353535,0.358462,0.444286,0.343434,0.30303,0.020202,0.656566,0.646465,0.565657,0.0,0.0,0.0,0.276,0.276167,0.461538,0.461538,0.141414,0.868687,0.959596,0.050505,0.0,0.515152,0.747475,0.454545,0.313131,0.848485,0.161616,0.20202,0.12,0.141414,0.20202,0.141414,0.070707,0.173333,0.313131,0.050505,0.046512,0.030303,0.0,0.141414,0.656566,0.939394,0.989899,1.0,0.010101,0.353535,0.10101,0.4,0.366667,0.262295,0.225,0.171717,0.494949,0.0,0.212121,0.090909,0.565657,0.0,0.747475,0.909091,0.959596,0.969697,0.190667,0.184,0.217333,0.217333,0.059826,0.222222,0.242424,0.141414,0.242424,0.111111,0.018182,0.02,0.032787,0.010101,0.181818,0.272727,0.171717,0.222222,0.111111,0.010101,0.02,0.020202,0.0,0.111111,0.111111,0.161616,0.161616,0.616162,0.393939,0.131313,0.707071,0.181818,0.060976,0.060976,0.0,0.0,0.0,0.080808,0.0,0.0,0.131313,0.211111,0.328947,0.070707,0.585859,0.757576,0.919192,0.59596,0.828283,0.555556,0.535354,0.343434,0.565657,0.808081,0.171717,0.10101,0.070707,0.010101,0.080808,0.141414,0.023256,0.036364,0.222222,0.010101,0.131313,0.10101,0.070707,0.040404,0.010101,0.0,0.050505,0.212121,0.060606,0.046875,0.040404,0.212121,0.060606,0.060606,0.040404,0.014925,0.10101,0.040404,0.070707,0.035088,0.060606,0.040404,0.03125,0.060606,0.757576,0.060606,0.020408,0.705882,0.252525,0.222222,0.222222,0.151515,0.114286,0.080808,0.040404,0.020619,0.282828,0.045455,0.347222,0.050505,0.0,0.0,0.0,0.080808,0.141414,0.033333,0.262626,0.151515,0.343434,0.121212,0.0,0.020202,0.033333,0.021739,0.021277,0.0,0.030303,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.388889,0.454545,0.424242,0.545455,0.027027,0.020833,0.868687,0.424242,0.090909,0.096774,0.576923,0.0,0.050505,0.050505,0.131313,0.131313,0.888889,0.0,0.032258,0.777778,0.0,0.232323,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.89899,0.428571,0.2,0.070707,0.055556,0.416667,0.298429,0.315789,0.155844,0.009502,0.050847,0.166667,0.003,0.637067,0.001401,0.910128,0.012,0.030151,0.927002,0.002757,0.006646,0.761911,0.0,0.333333,0.491803
3,1.0,0.666667,0.426523,0.0,0.535633,0.463918,0.166667,0.222222,0.004149,0.0,0.363636,0.323232,0.161616,0.060606,0.030303,0.0,0.111111,0.03618,0.040646,0.035647,0.0,0.0,1.0,0.515152,0.494949,0.989899,0.0,0.010101,0.010526,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.349398,0.433735,0.457831,0.37037,0.506173,0.530864,0.44,0.434343,0.40404,0.171717,0.10101,0.323232,0.272727,0.111111,0.090909,0.080808,0.044444,0.171717,0.181818,0.373737,0.191919,0.090909,0.161616,0.050505,0.141414,0.171717,0.323232,0.515152,0.333333,0.111111,0.030303,0.656566,0.111111,0.041096,0.212121,0.315385,0.404286,0.848485,0.838384,0.010101,0.020202,0.020202,0.010101,0.0,0.0,0.0,0.094667,0.11,0.230769,0.230769,0.848485,0.161616,0.69697,0.313131,0.828283,0.474747,0.777778,0.646465,0.383838,0.878788,0.131313,0.090909,0.04,0.070707,0.151515,0.121212,0.060606,0.373333,0.59596,0.111111,0.0,0.0,0.0,0.010101,0.030303,0.10101,0.232323,0.656566,0.0,0.121212,0.393939,0.6,0.555556,0.245902,0.1,0.020202,0.0,0.131313,0.111111,0.010101,0.010101,0.030303,0.040404,0.181818,0.555556,0.838384,0.178667,0.192,0.204,0.219333,0.06649,0.222222,0.222222,0.242424,0.191919,0.10101,0.036364,0.0,0.0,0.010101,0.151515,0.252525,0.252525,0.20202,0.10101,0.020202,0.0,0.0,0.020202,0.181818,0.10101,0.323232,0.111111,0.555556,0.454545,0.080808,0.858586,0.080808,0.012195,0.012195,0.0,0.0,0.0,0.040404,0.030303,0.0,0.050505,0.233333,0.276316,0.010101,0.79798,0.747475,0.818182,0.686869,0.767677,0.616162,0.717172,0.575758,0.757576,1.0,0.050505,0.121212,0.10101,0.060606,0.111111,0.111111,0.0,0.018182,0.151515,0.010101,0.171717,0.050505,0.060606,0.050505,0.010101,0.030769,0.121212,0.151515,0.020202,0.03125,0.040404,0.242424,0.030303,0.060606,0.040404,0.014925,0.090909,0.070707,0.040404,0.052632,0.040404,0.010101,0.03125,0.070707,0.727273,0.131313,0.0,0.705882,0.030303,0.141414,0.383838,0.222222,0.228571,0.111111,0.040404,0.020619,0.232323,0.090909,0.277778,0.030303,0.0,0.0,0.0,0.181818,0.343434,0.1,0.40404,0.121212,0.30303,0.10101,0.012048,0.060606,0.033333,0.163043,0.0,0.0,0.020202,0.0,0.014706,0.027027,0.0,0.0,0.0,0.037037,0.03125,0.022222,0.818182,0.939394,0.020202,0.0,0.052083,0.949495,0.636364,0.141414,0.16129,0.288462,0.040404,0.20202,0.424242,0.787879,0.848485,0.161616,0.0,0.0,0.40404,0.272727,0.040404,0.161616,0.0,0.131313,0.020202,0.989899,0.020202,1.0,0.989899,0.47619,0.4,0.010101,0.111111,0.316667,0.256545,0.263158,0.12987,0.006757,0.038136,0.111111,0.002,0.730419,0.001602,0.866332,0.009,0.502513,0.928043,0.020221,0.006423,0.38588,1.0,0.666667,0.688525
4,1.0,0.0,0.784946,2.6e-05,0.0,0.624862,0.666667,0.555556,0.0,0.0,0.232323,0.373737,0.181818,0.070707,0.020202,0.011494,0.555556,0.013789,0.016158,0.012598,1.0,0.0,0.0,0.505051,0.505051,0.959596,0.020202,0.0,0.031579,0.010101,0.0,0.0,0.013333,0.014925,0.026316,0.0,0.0,0.010309,0.0,0.0,0.0,0.361446,0.445783,0.481928,0.382716,0.493827,0.530864,0.4,0.434343,0.393939,0.191919,0.131313,0.272727,0.272727,0.161616,0.090909,0.050505,0.022222,0.151515,0.191919,0.333333,0.20202,0.131313,0.121212,0.020202,0.090909,0.111111,0.30303,0.59596,0.383838,0.131313,0.030303,0.717172,0.070707,0.054795,0.181818,0.376923,0.434286,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.149833,0.162,0.461538,0.461538,0.818182,0.191919,0.989899,0.020202,0.10101,0.505051,0.868687,0.787879,0.464646,0.939394,0.070707,0.040404,0.02,0.020202,0.070707,0.111111,0.030303,0.333333,0.626263,0.080808,0.0,0.010101,0.0,0.010101,0.040404,0.292929,0.828283,0.989899,0.0,0.0,0.787879,0.752941,0.733333,0.278689,0.1,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.69697,0.909091,1.0,1.0,0.264,0.278,0.319333,0.334,0.091524,0.070707,0.090909,0.151515,0.252525,0.252525,0.309091,0.04,0.0,0.0,0.040404,0.10101,0.131313,0.30303,0.222222,0.191919,0.04,0.0,0.0,0.161616,0.0,0.505051,0.060606,0.545455,0.464646,0.242424,0.848485,0.10101,0.012195,0.012195,0.0,0.0,0.0,0.040404,0.010101,0.040404,0.444444,0.322222,0.460526,0.141414,0.69697,0.767677,0.919192,0.606061,0.89899,0.565657,0.686869,0.555556,1.0,0.0,0.0,0.141414,0.212121,0.010101,0.121212,0.141414,0.0,0.018182,0.111111,0.0,0.131313,0.060606,0.040404,0.020202,0.0,0.0,0.070707,0.161616,0.040404,0.015625,0.050505,0.252525,0.070707,0.080808,0.010101,0.059701,0.040404,0.070707,0.050505,0.087719,0.070707,0.020202,0.015625,0.040404,0.808081,0.050505,0.0,0.758824,0.030303,0.10101,0.313131,0.262626,0.257143,0.131313,0.080808,0.030928,0.212121,0.136364,0.263889,0.020202,0.0,0.0,0.0,0.121212,0.232323,0.033333,0.373737,0.212121,0.181818,0.212121,0.012048,0.070707,0.0,0.097826,0.0,0.0,0.070707,0.018182,0.0,0.027027,0.0,0.0,0.02,0.0,0.0,0.066667,0.444444,0.919192,0.020202,0.054054,0.03125,0.979798,0.858586,0.313131,0.129032,0.288462,0.0,0.292929,0.343434,0.787879,0.919192,0.090909,0.0,0.0,0.828283,0.0,0.131313,0.0,0.0,0.050505,1.0,0.0,0.838384,1.0,0.989899,0.428571,0.4,0.030303,0.092593,0.233333,0.172775,0.315789,0.194805,0.004117,0.012712,0.0,0.005,0.822404,0.002402,0.913321,0.017,0.045226,0.969697,0.004596,0.011729,0.640145,0.0,0.0,0.311475


In [42]:
# Afterwards, i am going to remove these columns

X_train_scale = X_train_scale.drop(['DATASRCE','DOMAIN_B','TCODE','DOB'],axis=1)
X_test_scale = X_test_scale.drop(['DATASRCE','DOMAIN_B','TCODE','DOB'],axis=1)

In [43]:
print('X_train_scale shape: ', X_train_scale.shape)
print('X_test_scale shape: ', X_test_scale.shape)      

X_train_scale shape:  (71559, 316)
X_test_scale shape:  (23853, 316)


In [44]:
# Encode the categorical features using One-Hot Encoding or Ordinal Encoding

In [45]:
X_train_cat.dtypes

STATE       object
HOMEOWNR    object
GENDER      object
VETERANS    object
RFA_2A      object
GEOCODE2    object
DOMAIN_A    object
dtype: object

In [46]:
X_train_cat.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71559 entries, 2129 to 73193
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   STATE     71559 non-null  object
 1   HOMEOWNR  71559 non-null  object
 2   GENDER    71559 non-null  object
 3   VETERANS  71559 non-null  object
 4   RFA_2A    71559 non-null  object
 5   GEOCODE2  71559 non-null  object
 6   DOMAIN_A  71559 non-null  object
dtypes: object(7)
memory usage: 4.4+ MB


In [47]:
X_train_cat = X_train_cat.astype(str)
X_test_cat = X_test_cat.astype(str)

In [48]:
# Encode the categorical features using One-Hot Encoding or Ordinal Encoding. (train_cat, test_cat)

In [49]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(drop='first',handle_unknown = 'ignore').fit(X_train_cat)


cols = encoder.get_feature_names_out(input_features=X_train_cat.columns)
X_train_cat_encode = pd.DataFrame(encoder.transform(X_train_cat).toarray(),columns=cols)
X_train_cat_encode.head(10)

Unnamed: 0,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,HOMEOWNR_U,GENDER_M,GENDER_OTHER,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
8,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [50]:

cols = encoder.get_feature_names_out(input_features=X_test_cat.columns)
X_test_cat_encode = pd.DataFrame(encoder.transform(X_test_cat).toarray(),columns=cols)
X_test_cat_encode.head(10)

Unnamed: 0,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,HOMEOWNR_U,GENDER_M,GENDER_OTHER,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [51]:
print('X_train_cat_encode: ', X_train_cat_encode.shape)
print('X_train_scale: ', X_train_scale.shape)
print('X_test_cat_encode: ', X_test_cat_encode.shape)
print('X_test_scale: ', X_test_scale.shape)

X_train_cat_encode:  (71559, 25)
X_train_scale:  (71559, 316)
X_test_cat_encode:  (23853, 25)
X_test_scale:  (23853, 316)


In [52]:
X_train_transformed = pd.concat([X_train_cat_encode, X_train_scale], axis=1)
X_test_transformed = pd.concat([X_test_cat_encode, X_test_scale], axis=1)

In [53]:
print('X_train_transformed shape: ', X_train_transformed.shape)
print('X_test_transformed shape: ', X_test_transformed.shape)
print('y_train shape: ', y_train.shape)
print('y_test shape: ', y_test.shape)

X_train_transformed shape:  (71559, 341)
X_test_transformed shape:  (23853, 341)
y_train shape:  (71559, 1)
y_test shape:  (23853, 1)


In [54]:
X_train_transformed.isnull().sum()/len(X_train_transformed)

nulls_percent_df = pd.DataFrame(X_train_transformed.isna().sum()/len(X_train_transformed)).reset_index()
nulls_percent_df.columns = ['column_name', 'nulls_percentage']
nulls_percent_df[nulls_percent_df['nulls_percentage']!=0]

Unnamed: 0,column_name,nulls_percentage


In [55]:
# Fit a logistic regression model on the training data.

In [56]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

classification = LogisticRegression(random_state=42, solver='lbfgs')
classification.fit(X_train_transformed, y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [57]:
predictions = classification.predict(X_test_transformed)

In [58]:
# Check the accuracy on the test data.

In [59]:
classification.score(X_test_transformed, y_test)

0.9486437764641764

In [60]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [61]:
# Check for the imbalance.

In [62]:
print(X_train_transformed.shape)
print(y_train.shape)

(71559, 341)
(71559, 1)


In [63]:
X_train_transformed = X_train_transformed.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)

In [64]:
all_data = pd.concat([X_train_transformed, y_train], axis=1)

In [65]:
all_data.shape

(71559, 342)

In [66]:
all_data['TARGET_B'].value_counts()

0    67941
1     3618
Name: TARGET_B, dtype: int64

In [67]:
all_data.shape

(71559, 342)

In [68]:
from sklearn.utils import resample

category_0 = all_data[all_data['TARGET_B'] == 0]
category_1 = all_data[all_data['TARGET_B'] == 1]

# Undersample

In [69]:
category_0_undersampled = resample(category_0, 
                                   replace=False, 
                                   n_samples = len(category_1))

In [70]:
category_0_undersampled.shape

(3618, 342)

In [71]:
data_downsampled = pd.concat([category_0_undersampled, category_1], axis=0)

In [72]:
data_downsampled['TARGET_B'].value_counts()

0    3618
1    3618
Name: TARGET_B, dtype: int64

In [73]:
X_down = data_downsampled.drop(['TARGET_B'],axis=1)
y_down = data_downsampled[['TARGET_B']]

In [74]:
X_down

Unnamed: 0,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,HOMEOWNR_U,GENDER_M,GENDER_OTHER,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,ODATEDW,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
42780,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.498208,0.391753,0.666667,0.555556,0.000000,0.000000,0.292929,0.393939,0.191919,0.070707,0.010101,0.011494,0.666667,0.158175,0.172642,0.153885,0.070707,0.707071,0.242424,0.494949,0.515152,0.979798,0.010101,0.010101,0.010526,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.349398,0.445783,0.481928,0.395062,0.506173,0.543210,0.386667,0.414141,0.393939,0.202020,0.151515,0.282828,0.222222,0.151515,0.090909,0.060606,0.055556,0.161616,0.181818,0.333333,0.202020,0.131313,0.151515,0.060606,0.141414,0.181818,0.313131,0.505051,0.313131,0.121212,0.030303,0.595960,0.090909,0.068493,0.272727,0.310769,0.400000,0.707071,0.646465,0.030303,0.222222,0.191919,0.151515,0.020202,0.020202,0.000000,0.147000,0.158000,0.307692,0.307692,0.757576,0.252525,0.969697,0.040404,0.070707,0.424242,0.757576,0.636364,0.343434,0.858586,0.151515,0.080808,0.04,0.060606,0.141414,0.151515,0.060606,0.306667,0.636364,0.111111,0.000000,0.010101,0.000000,0.010101,0.060606,0.282828,0.757576,0.989899,0.000000,0.040404,0.545455,0.705882,0.644444,0.245902,0.125,0.090909,0.121212,0.070707,0.050505,0.050505,0.141414,0.000000,0.242424,0.626263,0.797980,0.878788,0.257333,0.283333,0.282667,0.313333,0.087530,0.131313,0.141414,0.171717,0.262626,0.202020,0.109091,0.04,0.016393,0.010101,0.080808,0.111111,0.171717,0.303030,0.222222,0.070707,0.04,0.010101,0.020202,0.191919,0.040404,0.454545,0.050505,0.464646,0.545455,0.070707,0.808081,0.141414,0.000000,0.000000,0.000000,0.000000,0.0,0.020202,0.040404,0.010101,0.525253,0.211111,0.289474,0.020202,0.616162,0.777778,0.858586,0.696970,0.808081,0.676768,0.787879,0.676768,0.727273,0.969697,0.030303,0.101010,0.121212,0.040404,0.121212,0.171717,0.000000,0.018182,0.111111,0.020202,0.131313,0.101010,0.040404,0.060606,0.020202,0.000000,0.070707,0.262626,0.040404,0.031250,0.060606,0.161616,0.060606,0.050505,0.020202,0.029851,0.080808,0.050505,0.050505,0.052632,0.070707,0.010101,0.015625,0.070707,0.787879,0.050505,0.000000,0.705882,0.080808,0.101010,0.424242,0.181818,0.228571,0.101010,0.030303,0.051546,0.212121,0.090909,0.263889,0.050505,0.000000,0.000000,0.0,0.151515,0.292929,0.033333,0.393939,0.171717,0.191919,0.141414,0.012048,0.010101,0.033333,0.336957,0.000000,0.0,0.030303,0.000000,0.044118,0.013514,0.0,0.0,0.00,0.037037,0.0,0.011111,0.808081,0.979798,0.010101,0.000000,0.020833,0.959596,0.707071,0.252525,0.193548,0.365385,0.030303,0.141414,0.242424,0.555556,0.707071,0.303030,0.022222,0.000000,0.757576,0.080808,0.090909,0.050505,0.000000,0.020202,0.797980,0.212121,0.797980,1.000000,1.000000,0.428571,0.6,0.050505,0.074074,0.366667,0.277487,0.315789,0.155844,0.009185,0.016949,0.083333,0.010,0.682149,0.004004,0.867701,0.025,0.492462,0.937624,0.018382,0.018738,0.435068,0.0,0.000000,0.262295
37036,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.354839,0.556701,0.333333,0.888889,0.004149,0.000000,0.404040,0.686869,0.000000,0.010101,0.000000,0.011494,0.777778,0.020486,0.023773,0.018021,1.000000,0.000000,0.000000,0.494949,0.515152,0.989899,0.010101,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.361446,0.445783,0.469880,0.358025,0.481481,0.518519,0.453333,0.414141,0.404040,0.191919,0.111111,0.313131,0.272727,0.202020,0.070707,0.030303,0.022222,0.181818,0.151515,0.333333,0.212121,0.131313,0.080808,0.020202,0.060606,0.080808,0.262626,0.666667,0.404040,0.141414,0.030303,0.707071,0.080808,0.041096,0.202020,0.400000,0.451429,0.969697,0.898990,0.030303,0.030303,0.000000,0.000000,0.000000,0.000000,0.000000,0.118167,0.138167,0.384615,0.384615,0.898990,0.111111,0.989899,0.020202,0.000000,0.575758,0.898990,0.777778,0.494949,0.949495,0.060606,0.080808,0.04,0.060606,0.060606,0.090909,0.030303,0.373333,0.656566,0.050505,0.000000,0.000000,0.000000,0.000000,0.020202,0.313131,0.373737,0.989899,0.000000,0.000000,0.565657,0.635294,0.655556,0.295082,0.125,0.030303,0.000000,0.000000,0.090909,0.020202,0.000000,0.000000,0.444444,0.858586,0.939394,0.949495,0.312667,0.329333,0.325333,0.335333,0.086040,0.030303,0.080808,0.171717,0.262626,0.333333,0.218182,0.00,0.000000,0.000000,0.020202,0.060606,0.202020,0.232323,0.363636,0.131313,0.02,0.000000,0.000000,0.111111,0.030303,0.565657,0.020202,0.484848,0.525253,0.131313,0.828283,0.131313,0.012195,0.000000,0.000000,0.033333,0.0,0.020202,0.020202,0.030303,0.585859,0.255556,0.328947,0.010101,0.727273,0.818182,0.969697,0.686869,0.939394,0.676768,0.636364,0.464646,0.484848,1.000000,0.040404,0.161616,0.030303,0.060606,0.141414,0.171717,0.000000,0.054545,0.111111,0.010101,0.151515,0.070707,0.040404,0.040404,0.010101,0.000000,0.080808,0.333333,0.101010,0.015625,0.040404,0.202020,0.050505,0.040404,0.020202,0.014925,0.050505,0.010101,0.050505,0.017544,0.010101,0.000000,0.015625,0.030303,0.909091,0.040404,0.000000,0.705882,0.030303,0.090909,0.393939,0.262626,0.142857,0.141414,0.050505,0.082474,0.272727,0.090909,0.333333,0.090909,0.000000,0.000000,0.0,0.202020,0.404040,0.033333,0.686869,0.121212,0.000000,0.000000,0.000000,0.050505,0.033333,0.206522,0.000000,0.0,0.030303,0.054545,0.000000,0.013514,0.0,0.0,0.04,0.000000,0.0,0.000000,0.727273,1.000000,0.000000,0.000000,0.010417,1.000000,0.868687,0.303030,0.161290,0.326923,0.040404,0.393939,0.434343,0.616162,1.000000,0.000000,0.000000,0.000000,0.959596,0.000000,0.050505,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.380952,0.4,0.030303,0.037037,0.450000,0.303665,0.315789,0.155844,0.011613,0.088983,0.333333,0.003,0.681694,0.001001,0.911496,0.010,0.025126,0.917318,0.013787,0.004311,0.526002,1.0,1.000000,0.065574
47629,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.498208,0.649485,0.666667,0.888889,0.049793,0.000000,0.131313,0.303030,0.303030,0.040404,0.000000,0.034483,0.888889,0.009645,0.011950,0.010027,1.000000,0.000000,0.000000,0.525253,0.484848,0.979798,0.000000,0.000000,0.031579,0.020202,0.045455,0.013889,0.000000,0.000000,0.000000,0.021277,0.0,0.010309,0.000000,0.0,0.000000,0.433735,0.506024,0.530120,0.469136,0.580247,0.604938,0.293333,0.484848,0.333333,0.191919,0.070707,0.252525,0.222222,0.141414,0.121212,0.121212,0.100000,0.242424,0.191919,0.252525,0.181818,0.141414,0.303030,0.080808,0.262626,0.171717,0.373737,0.464646,0.262626,0.090909,0.020202,0.696970,0.050505,0.082192,0.191919,0.292308,0.381429,0.929293,0.919192,0.030303,0.080808,0.050505,0.040404,0.000000,0.000000,0.000000,0.240833,0.241667,0.384615,0.384615,0.909091,0.101010,0.969697,0.040404,0.000000,0.343434,0.808081,0.727273,0.313131,0.919192,0.090909,0.030303,0.02,0.020202,0.131313,0.121212,0.030303,0.253333,0.575758,0.212121,0.000000,0.000000,0.000000,0.050505,0.434343,0.919192,0.989899,1.000000,0.000000,0.000000,0.747475,0.729412,0.711111,0.229508,0.100,0.070707,0.020202,0.000000,0.060606,0.040404,0.000000,0.000000,0.464646,0.838384,0.898990,0.979798,0.374000,0.377333,0.409333,0.423333,0.124356,0.000000,0.050505,0.101010,0.242424,0.343434,0.381818,0.10,0.000000,0.020202,0.000000,0.030303,0.060606,0.252525,0.383838,0.212121,0.10,0.000000,0.030303,0.212121,0.000000,0.727273,0.000000,0.393939,0.616162,0.080808,0.797980,0.050505,0.170732,0.000000,0.197183,0.000000,0.0,0.000000,0.030303,0.000000,0.111111,0.255556,0.407895,0.111111,0.696970,0.797980,0.868687,0.717172,0.858586,0.696970,0.686869,0.606061,1.000000,0.000000,0.000000,0.202020,0.232323,0.060606,0.121212,0.151515,0.000000,0.036364,0.030303,0.000000,0.131313,0.010101,0.000000,0.050505,0.000000,0.000000,0.121212,0.161616,0.060606,0.031250,0.060606,0.181818,0.171717,0.030303,0.000000,0.000000,0.060606,0.030303,0.070707,0.070175,0.040404,0.000000,0.046875,0.050505,0.848485,0.030303,0.000000,0.864706,0.060606,0.020202,0.151515,0.212121,0.342857,0.252525,0.181818,0.072165,0.232323,0.227273,0.291667,0.040404,0.000000,0.000000,0.0,0.070707,0.131313,0.000000,0.303030,0.232323,0.303030,0.000000,0.012048,0.010101,0.000000,0.217391,0.021277,0.0,0.030303,0.054545,0.000000,0.067568,0.0,0.0,0.00,0.074074,0.0,0.077778,0.797980,0.858586,0.000000,0.027027,0.135417,0.959596,0.818182,0.252525,0.225806,0.788462,0.000000,0.000000,0.000000,0.030303,0.101010,0.909091,0.000000,0.000000,0.949495,0.000000,0.020202,0.020202,0.000000,0.020202,1.000000,0.000000,0.989899,1.000000,1.000000,0.428571,0.6,0.030303,0.092593,0.350000,0.287958,0.315789,0.142857,0.008235,0.038136,0.166667,0.003,0.684882,0.001001,0.727190,0.010,0.035176,0.938248,0.005515,0.007824,0.462016,0.0,0.666667,0.049180
56474,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.569892,0.360825,0.500000,0.222222,0.016598,0.000000,0.272727,0.161616,0.282828,0.040404,0.030303,0.000000,0.555556,0.181092,0.206093,0.181849,0.818182,0.000000,0.191919,0.505051,0.505051,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.373494,0.457831,0.493976,0.395062,0.518519,0.555556,0.400000,0.393939,0.414141,0.202020,0.131313,0.262626,0.272727,0.141414,0.101010,0.070707,0.044444,0.151515,0.161616,0.353535,0.202020,0.141414,0.151515,0.060606,0.141414,0.181818,0.323232,0.505051,0.313131,0.101010,0.020202,0.656566,0.080808,0.054795,0.232323,0.307692,0.392857,0.696970,0.666667,0.080808,0.212121,0.131313,0.101010,0.010101,0.010101,0.000000,0.109667,0.120667,0.307692,0.230769,0.747475,0.262626,0.969697,0.040404,0.161616,0.444444,0.767677,0.666667,0.373737,0.878788,0.131313,0.070707,0.02,0.060606,0.121212,0.141414,0.050505,0.333333,0.636364,0.111111,0.000000,0.000000,0.000000,0.010101,0.030303,0.111111,0.323232,0.858586,0.000000,0.030303,0.515152,0.682353,0.633333,0.245902,0.125,0.161616,0.050505,0.090909,0.060606,0.090909,0.101010,0.000000,0.080808,0.262626,0.656566,0.888889,0.224667,0.252000,0.244000,0.272667,0.076718,0.181818,0.151515,0.191919,0.252525,0.171717,0.072727,0.02,0.000000,0.000000,0.101010,0.131313,0.202020,0.292929,0.212121,0.050505,0.02,0.000000,0.010101,0.181818,0.030303,0.525253,0.050505,0.454545,0.555556,0.090909,0.848485,0.111111,0.000000,0.000000,0.000000,0.000000,0.0,0.030303,0.020202,0.030303,0.040404,0.188889,0.236842,0.010101,0.616162,0.787879,0.858586,0.717172,0.828283,0.696970,0.808081,0.666667,0.939394,0.959596,0.010101,0.171717,0.121212,0.030303,0.151515,0.141414,0.000000,0.018182,0.111111,0.010101,0.101010,0.060606,0.050505,0.040404,0.020202,0.000000,0.040404,0.202020,0.050505,0.031250,0.070707,0.222222,0.050505,0.030303,0.020202,0.014925,0.111111,0.090909,0.050505,0.035088,0.070707,0.030303,0.031250,0.050505,0.727273,0.111111,0.000000,0.764706,0.050505,0.080808,0.323232,0.181818,0.314286,0.181818,0.070707,0.041237,0.252525,0.090909,0.277778,0.070707,0.000000,0.000000,0.0,0.151515,0.303030,0.033333,0.414141,0.171717,0.202020,0.121212,0.012048,0.010101,0.033333,0.217391,0.000000,0.0,0.030303,0.000000,0.161765,0.013514,0.0,0.0,0.00,0.000000,0.0,0.011111,0.747475,0.979798,0.020202,0.000000,0.020833,0.969697,0.686869,0.212121,0.161290,0.326923,0.030303,0.101010,0.222222,0.646465,0.797980,0.212121,0.000000,0.016129,0.525253,0.060606,0.191919,0.191919,0.000000,0.040404,0.747475,0.262626,0.727273,1.000000,1.000000,0.476190,0.8,0.060606,0.074074,0.283333,0.235602,0.315789,0.142857,0.003906,0.021186,0.083333,0.005,0.772313,0.002002,0.912865,0.015,0.040201,0.947933,0.010110,0.007057,0.423356,0.0,0.000000,0.245902
38121,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.498208,0.762887,0.666667,0.111111,0.004149,0.000000,0.252525,0.494949,0.171717,0.030303,0.020202,0.045977,0.111111,0.008896,0.008794,0.010225,1.000000,0.000000,0.000000,0.515152,0.494949,0.848485,0.030303,0.030303,0.073684,0.050505,0.000000,0.000000,0.013333,0.000000,0.026316,0.042553,0.0,0.030928,0.017544,0.0,0.023256,0.349398,0.421687,0.445783,0.370370,0.481481,0.506173,0.360000,0.535354,0.303030,0.171717,0.141414,0.333333,0.303030,0.080808,0.060606,0.050505,0.044444,0.252525,0.212121,0.262626,0.151515,0.131313,0.121212,0.050505,0.121212,0.303030,0.313131,0.404040,0.191919,0.080808,0.020202,0.404040,0.212121,0.054795,0.353535,0.256923,0.345714,0.575758,0.545455,0.161616,0.434343,0.272727,0.161616,0.000000,0.000000,0.000000,0.071333,0.072833,0.230769,0.230769,0.373737,0.636364,0.969697,0.040404,0.000000,0.363636,0.585859,0.363636,0.202020,0.737374,0.272727,0.151515,0.06,0.131313,0.262626,0.232323,0.131313,0.253333,0.585859,0.080808,0.023256,0.020202,0.000000,0.000000,0.000000,0.000000,0.000000,0.313131,0.000000,0.080808,0.282828,0.541176,0.522222,0.213115,0.125,0.333333,0.101010,0.000000,0.232323,0.242424,0.161616,0.000000,0.030303,0.151515,0.717172,0.989899,0.147333,0.148667,0.171333,0.180000,0.063224,0.191919,0.434343,0.171717,0.141414,0.040404,0.054545,0.00,0.000000,0.000000,0.131313,0.414141,0.171717,0.202020,0.080808,0.000000,0.00,0.000000,0.000000,0.151515,0.070707,0.353535,0.171717,0.606061,0.404040,0.181818,0.575758,0.191919,0.207317,0.207317,0.000000,0.000000,0.0,0.070707,0.000000,0.020202,0.161616,0.211111,0.276316,0.020202,0.727273,0.747475,0.868687,0.626263,0.868687,0.606061,0.717172,0.454545,0.444444,1.000000,0.181818,0.141414,0.181818,0.020202,0.121212,0.151515,0.000000,0.054545,0.080808,0.000000,0.080808,0.060606,0.080808,0.070707,0.000000,0.000000,0.010101,0.050505,0.080808,0.015625,0.060606,0.323232,0.060606,0.121212,0.070707,0.074627,0.090909,0.020202,0.020202,0.070175,0.030303,0.020202,0.062500,0.030303,0.868687,0.030303,0.000000,0.752941,0.050505,0.060606,0.343434,0.282828,0.171429,0.121212,0.101010,0.051546,0.141414,0.000000,0.083333,0.131313,0.000000,0.000000,0.0,0.141414,0.252525,0.100000,0.494949,0.000000,0.171717,0.232323,0.000000,0.040404,0.033333,0.065217,0.021277,0.0,0.030303,0.036364,0.044118,0.013514,0.0,0.0,0.04,0.148148,0.0,0.111111,0.505051,0.909091,0.010101,0.040541,0.062500,0.848485,0.414141,0.080808,0.129032,0.980769,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.282828,0.020202,0.363636,0.232323,0.000000,0.070707,1.000000,0.000000,1.000000,1.000000,0.929293,0.238095,0.4,0.030303,0.055556,0.383333,0.308901,0.315789,0.155844,0.019109,0.050847,0.222222,0.010,0.865209,0.003003,0.683850,0.015,0.492462,0.938144,0.004596,0.013655,0.914182,1.0,0.000000,0.721311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.856631,0.463918,0.500000,0.666667,0.020747,0.020202,0.292929,0.121212,0.454545,0.010101,0.111111,0.000000,0.555556,0.013272,0.015947,0.013728,0.000000,0.000000,1.000000,0.494949,0.515152,0.898990,0.111111,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.373494,0.481928,0.518072,0.419753,0.518519,0.567901,0.346667,0.373737,0.414141,0.212121,0.161616,0.222222,0.222222,0.161616,0.131313,0.080808,0.044444,0.161616,0.141414,0.343434,0.212121,0.151515,0.161616,0.050505,0.141414,0.171717,0.353535,0.474747,0.292929,0.090909,0.010101,0.636364,0.111111,0.082192,0.202020,0.295385,0.384286,0.545455,0.535354,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.091333,0.101333,0.153846,0.153846,0.828283,0.181818,0.898990,0.111111,0.545455,0.414141,0.787879,0.646465,0.323232,0.898990,0.111111,0.090909,0.06,0.060606,0.141414,0.141414,0.050505,0.240000,0.595960,0.121212,0.093023,0.070707,0.010101,0.020202,0.040404,0.080808,0.212121,0.606061,0.000000,0.010101,0.353535,0.564706,0.566667,0.245902,0.125,0.000000,0.000000,0.464646,0.090909,0.000000,0.000000,0.090909,0.000000,0.020202,0.121212,0.515152,0.192667,0.218000,0.226667,0.258667,0.068375,0.252525,0.181818,0.191919,0.151515,0.161616,0.109091,0.00,0.000000,0.000000,0.131313,0.202020,0.242424,0.151515,0.212121,0.080808,0.00,0.000000,0.000000,0.292929,0.060606,0.333333,0.090909,0.414141,0.595960,0.050505,0.888889,0.101010,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.020202,0.070707,0.404040,0.200000,0.276316,0.010101,0.666667,0.646465,0.707071,0.595960,0.696970,0.535354,0.606061,0.626263,1.000000,1.000000,0.040404,0.111111,0.060606,0.101010,0.030303,0.080808,0.000000,0.036364,0.080808,0.010101,0.242424,0.202020,0.020202,0.040404,0.010101,0.000000,0.070707,0.444444,0.000000,0.171875,0.000000,0.131313,0.020202,0.060606,0.000000,0.000000,0.040404,0.111111,0.010101,0.000000,0.010101,0.111111,0.000000,0.040404,0.828283,0.020202,0.000000,0.705882,0.161616,0.171717,0.303030,0.141414,0.257143,0.121212,0.020202,0.030928,0.212121,0.045455,0.277778,0.040404,0.010309,0.020202,0.0,0.141414,0.292929,0.000000,0.121212,0.292929,0.454545,0.191919,0.024096,0.070707,0.166667,0.076087,0.000000,0.0,0.181818,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.000000,0.0,0.000000,0.727273,1.000000,0.000000,0.000000,0.000000,0.939394,0.636364,0.212121,0.258065,0.288462,0.050505,0.242424,0.383838,0.646465,0.818182,0.191919,0.000000,0.000000,0.030303,0.181818,0.555556,0.131313,0.000000,0.111111,1.000000,0.010101,0.131313,1.000000,0.949495,0.285714,0.4,0.070707,0.074074,0.150000,0.104712,0.315789,0.155844,0.002323,0.008475,0.027778,0.005,0.908925,0.003003,0.912865,0.010,0.045226,0.989482,0.008272,0.010394,0.129833,1.0,0.333333,0.704918
71429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.856631,0.670103,0.166667,0.111111,0.145228,0.060606,0.414141,0.242424,0.303030,0.080808,0.050505,0.068966,0.555556,0.012259,0.013885,0.012259,1.000000,0.000000,0.000000,0.484848,0.525253,0.676768,0.111111,0.000000,0.063158,0.383838,0.000000,0.013889,0.013333,0.029851,0.026316,0.021277,0.0,0.329897,0.017544,0.0,0.046512,0.361446,0.457831,0.518072,0.407407,0.530864,0.580247,0.373333,0.515152,0.323232,0.171717,0.151515,0.272727,0.181818,0.111111,0.111111,0.141414,0.066667,0.232323,0.202020,0.303030,0.121212,0.151515,0.262626,0.070707,0.242424,0.181818,0.333333,0.494949,0.282828,0.121212,0.060606,0.535354,0.161616,0.068493,0.262626,0.303077,0.397143,0.515152,0.515152,0.000000,0.484848,0.484848,0.484848,0.000000,0.000000,0.000000,0.243833,0.239000,0.461538,0.461538,0.464646,0.545455,0.989899,0.020202,0.090909,0.404040,0.767677,0.525253,0.212121,0.848485,0.161616,0.181818,0.08,0.151515,0.131313,0.171717,0.060606,0.173333,0.383838,0.161616,0.093023,0.060606,0.000000,0.070707,0.474747,0.848485,0.949495,0.989899,0.010101,0.070707,0.313131,0.541176,0.522222,0.245902,0.150,0.040404,0.444444,0.000000,0.060606,0.000000,0.474747,0.000000,0.767677,0.878788,0.929293,0.949495,0.152000,0.164667,0.228667,0.234000,0.065321,0.313131,0.222222,0.090909,0.111111,0.222222,0.036364,0.00,0.032787,0.020202,0.282828,0.232323,0.171717,0.050505,0.202020,0.020202,0.00,0.020202,0.020202,0.292929,0.141414,0.252525,0.232323,0.646465,0.363636,0.151515,0.818182,0.101010,0.036585,0.036585,0.000000,0.000000,0.0,0.040404,0.030303,0.000000,0.040404,0.188889,0.276316,0.010101,0.646465,0.616162,0.767677,0.494949,0.696970,0.434343,0.454545,0.454545,0.444444,0.828283,0.222222,0.050505,0.161616,0.010101,0.141414,0.222222,0.000000,0.127273,0.101010,0.010101,0.151515,0.030303,0.010101,0.050505,0.000000,0.000000,0.050505,0.171717,0.050505,0.109375,0.010101,0.212121,0.060606,0.040404,0.020202,0.014925,0.090909,0.080808,0.070707,0.157895,0.080808,0.050505,0.093750,0.050505,0.747475,0.030303,0.000000,0.705882,0.121212,0.151515,0.333333,0.202020,0.171429,0.080808,0.060606,0.010309,0.303030,0.000000,0.319444,0.080808,0.030928,0.060606,0.0,0.181818,0.414141,0.000000,0.242424,0.282828,0.303030,0.414141,0.000000,0.050505,0.000000,0.032609,0.063830,0.0,0.040404,0.000000,0.000000,0.000000,0.0,0.0,0.02,0.000000,0.0,0.166667,0.494949,0.595960,0.303030,0.094595,0.041667,0.868687,0.444444,0.121212,0.096774,0.538462,0.000000,0.000000,0.000000,0.000000,0.202020,0.808081,0.000000,0.000000,0.636364,0.000000,0.373737,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,0.979798,0.380952,0.4,0.050505,0.074074,0.066667,0.052356,0.157895,0.103896,0.000211,0.000000,0.000000,0.015,0.911202,0.002002,0.911040,0.015,0.020101,0.990003,0.007353,0.013732,0.790220,1.0,0.000000,0.442623
71434,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.498208,0.793814,1.000000,0.555556,0.020747,0.000000,0.353535,0.272727,0.414141,0.040404,0.040404,0.011494,0.555556,0.018460,0.022511,0.018897,0.000000,0.020202,0.989899,0.515152,0.494949,0.939394,0.040404,0.000000,0.000000,0.050505,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.051546,0.000000,0.0,0.000000,0.397590,0.506024,0.542169,0.432099,0.555556,0.592593,0.360000,0.404040,0.373737,0.232323,0.121212,0.212121,0.212121,0.161616,0.131313,0.101010,0.077778,0.131313,0.181818,0.323232,0.212121,0.161616,0.242424,0.070707,0.222222,0.181818,0.363636,0.464646,0.282828,0.101010,0.030303,0.696970,0.070707,0.068493,0.191919,0.290769,0.388571,0.777778,0.767677,0.010101,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.139667,0.156000,0.230769,0.153846,0.838384,0.171717,0.868687,0.141414,0.343434,0.393939,0.808081,0.717172,0.333333,0.919192,0.090909,0.060606,0.04,0.040404,0.111111,0.121212,0.020202,0.253333,0.585859,0.161616,0.023256,0.020202,0.000000,0.040404,0.101010,0.353535,0.585859,0.848485,0.010101,0.030303,0.535354,0.658824,0.633333,0.229508,0.100,0.010101,0.000000,0.222222,0.121212,0.010101,0.000000,0.050505,0.111111,0.131313,0.373737,0.676768,0.224667,0.244000,0.294000,0.330667,0.104725,0.212121,0.181818,0.121212,0.141414,0.202020,0.090909,0.08,0.000000,0.060606,0.111111,0.181818,0.151515,0.161616,0.232323,0.060606,0.06,0.000000,0.070707,0.303030,0.050505,0.525253,0.080808,0.313131,0.696970,0.000000,0.868687,0.121212,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.020202,0.010101,0.121212,0.144444,0.223684,0.030303,0.373737,0.676768,0.767677,0.585859,0.767677,0.575758,0.747475,0.737374,1.000000,0.000000,0.000000,0.141414,0.121212,0.040404,0.151515,0.101010,0.000000,0.000000,0.090909,0.070707,0.131313,0.010101,0.040404,0.101010,0.070707,0.030769,0.050505,0.161616,0.040404,0.062500,0.030303,0.232323,0.070707,0.030303,0.030303,0.014925,0.050505,0.101010,0.070707,0.017544,0.040404,0.040404,0.015625,0.070707,0.767677,0.080808,0.020408,0.705882,0.131313,0.141414,0.282828,0.181818,0.142857,0.181818,0.050505,0.030928,0.222222,0.045455,0.277778,0.030303,0.000000,0.000000,0.0,0.181818,0.353535,0.000000,0.272727,0.060606,0.414141,0.080808,0.012048,0.020202,0.033333,0.597826,0.021277,0.0,0.020202,0.000000,0.000000,0.013514,0.0,0.0,0.00,0.037037,0.0,0.000000,0.888889,0.969697,0.000000,0.000000,0.041667,0.949495,0.828283,0.262626,0.322581,0.288462,0.000000,0.090909,0.393939,0.767677,0.898990,0.111111,0.000000,0.000000,0.010101,0.454545,0.535354,0.010101,0.000000,0.000000,0.191919,0.818182,0.191919,0.959596,0.959596,0.333333,0.6,0.040404,0.148148,0.383333,0.319372,0.315789,0.155844,0.015519,0.072034,0.250000,0.005,0.730874,0.003003,0.912409,0.020,0.035176,0.937415,0.001838,0.007613,0.648639,0.0,0.000000,0.590164
71485,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.641577,0.624862,0.500000,0.777778,0.000000,0.010101,0.212121,0.424242,0.141414,0.050505,0.030303,0.022989,0.555556,0.031682,0.033493,0.035731,1.000000,0.000000,0.000000,0.494949,0.515152,0.868687,0.010101,0.000000,0.084211,0.121212,0.000000,0.027778,0.013333,0.029851,0.052632,0.021277,0.0,0.082474,0.000000,0.0,0.034884,0.349398,0.397590,0.421687,0.358025,0.444444,0.481481,0.320000,0.484848,0.353535,0.161616,0.131313,0.414141,0.252525,0.111111,0.060606,0.020202,0.011111,0.212121,0.202020,0.343434,0.131313,0.121212,0.060606,0.020202,0.050505,0.272727,0.323232,0.404040,0.222222,0.070707,0.020202,0.535354,0.151515,0.041096,0.303030,0.261538,0.351429,0.616162,0.373737,0.010101,0.373737,0.363636,0.202020,0.000000,0.000000,0.000000,0.337000,0.317500,0.692308,0.692308,0.767677,0.242424,0.959596,0.050505,0.010101,0.353535,0.636364,0.505051,0.272727,0.787879,0.222222,0.080808,0.02,0.070707,0.222222,0.202020,0.101010,0.240000,0.646465,0.040404,0.000000,0.010101,0.000000,0.525253,0.727273,0.949495,1.000000,1.000000,0.000000,0.121212,0.232323,0.517647,0.488889,0.229508,0.125,0.363636,0.020202,0.000000,0.121212,0.050505,0.060606,0.000000,0.989899,1.000000,1.000000,1.000000,0.294000,0.356000,0.330667,0.379333,0.122047,0.040404,0.171717,0.161616,0.222222,0.252525,0.236364,0.02,0.000000,0.020202,0.010101,0.151515,0.070707,0.212121,0.343434,0.181818,0.02,0.000000,0.030303,0.040404,0.000000,0.373737,0.020202,0.696970,0.313131,0.161616,0.838384,0.151515,0.000000,0.000000,0.000000,0.000000,0.0,0.010101,0.010101,0.000000,0.111111,0.266667,0.394737,0.090909,0.676768,0.888889,0.939394,0.848485,0.939394,0.828283,0.777778,0.585859,0.616162,1.000000,0.060606,0.131313,0.121212,0.060606,0.141414,0.212121,0.000000,0.054545,0.080808,0.010101,0.131313,0.030303,0.030303,0.030303,0.010101,0.000000,0.050505,0.232323,0.030303,0.031250,0.040404,0.222222,0.070707,0.050505,0.060606,0.000000,0.050505,0.060606,0.090909,0.052632,0.050505,0.030303,0.031250,0.060606,0.808081,0.040404,0.000000,0.823529,0.000000,0.080808,0.171717,0.323232,0.342857,0.202020,0.090909,0.051546,0.252525,0.181818,0.236111,0.090909,0.000000,0.010101,0.0,0.111111,0.212121,0.033333,0.424242,0.111111,0.141414,0.363636,0.000000,0.050505,0.066667,0.076087,0.000000,0.0,0.050505,0.072727,0.029412,0.027027,0.0,0.0,0.02,0.037037,0.0,0.155556,0.444444,0.848485,0.101010,0.054054,0.031250,1.000000,0.626263,0.141414,0.096774,0.173077,0.000000,0.363636,0.646465,0.979798,1.000000,0.000000,0.477778,0.225806,0.515152,0.000000,0.494949,0.000000,0.033333,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.571429,0.4,0.030303,0.055556,0.266667,0.219895,0.315789,0.142857,0.003167,0.033898,0.111111,0.003,0.913479,0.000400,0.820255,0.005,0.492462,0.958242,0.013787,0.003497,0.823592,0.0,1.000000,0.180328


In [75]:
y_down

Unnamed: 0,TARGET_B
42780,0
37036,0
47629,0
56474,0
38121,0
...,...
71396,1
71429,1
71434,1
71485,1


In [88]:
LR_down = LogisticRegression(random_state=0, solver='lbfgs')
LR_down.fit(X_down, y_down)


pred = LR_down.predict(X_test_transformed)

#print("score: ",LR_down.score(X_test, y_test))
print("precision: ",precision_score(y_test,pred))
print("recall: ",recall_score(y_test,pred))
print("f1: ",f1_score(y_test,pred))

  y = column_or_1d(y, warn=True)


precision:  0.06684168012924072
recall:  0.5404081632653062
f1:  0.11896846077814718


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Oversample

In [78]:
category_1_oversampled = resample(category_1, 
                                  replace=True, 
                                  n_samples = len(category_0))

In [79]:
category_1_oversampled.shape

(67941, 342)

In [80]:
data_upsampled = pd.concat([category_0, category_1_oversampled], axis=0)

In [81]:
data_upsampled['TARGET_B'].value_counts()

0    67941
1    67941
Name: TARGET_B, dtype: int64

In [82]:
X_up = data_upsampled.drop(['TARGET_B'],axis=1)
y_up = data_upsampled[['TARGET_B']]

In [90]:
LR_over = LogisticRegression(random_state=0, solver='lbfgs')
LR_over.fit(X_up, y_up)

pred = LR_over.predict(X_test_transformed)

#print("score: ",LR.score(X_test, y_test)
      
print("precision: ",precision_score(y_test,pred))
print("recall: ",recall_score(y_test,pred))
print("f1: ",f1_score(y_test,pred))


  y = column_or_1d(y, warn=True)


precision:  0.06797272432081394
recall:  0.5126530612244898
f1:  0.12003058103975535


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
