In [1]:
#libraries
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.feature_selection import VarianceThreshold 


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

pd.set_option('display.max_columns', None)

In [2]:
#data sets
numerical = pd.read_csv('numerical.csv')
print("numerical: ", numerical.shape)

categorical = pd.read_csv('categorical.csv')
print("categorical: ", categorical.shape)

targets = pd.read_csv('target.csv')
print("targets: ", targets.shape)

numerical:  (95412, 315)
categorical:  (95412, 22)
targets:  (95412, 2)


In [3]:
# correcting the types for categorical variables

int_col= categorical.select_dtypes('int64')
col_names = int_col.columns.tolist()

for col in col_names:
    categorical[col]=categorical[col].astype('object')
    
# concatenating al our data together:
data=pd.concat([targets, numerical, categorical], axis = 1)

# subsetting the donors (TARGET_B==1)
donors = data[data["TARGET_B"]==1]
print("number of donors: ", data[data["TARGET_B"]==1].shape[0])
print("number of donors: ", donors.shape)

number of donors:  4843
number of donors:  (4843, 339)


In [4]:
#Train-test split | 
# setting our target as TARGET_D and our features as the other columns minus TARGET_B AND TARGET_D
y = donors["TARGET_D"]
X = donors.drop(["TARGET_B", "TARGET_D"], axis=1)

# separating the data using a train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# and resetting the index
X_train.reset_index(inplace=True,drop=True)
X_test.reset_index(inplace=True,drop=True)
y_train.reset_index(inplace=True,drop=True)
y_test.reset_index(inplace=True,drop=True)

# separate X_train and X_test into numerical and categorical
def split_num_cat(df):
    df_num = df.select_dtypes(include=np.number)
    df_cat = df.select_dtypes(include=object)
    return df_num, df_cat

X_train_num, X_train_cat = split_num_cat(X_train)
X_test_num, X_test_cat = split_num_cat(X_test)

print(X_train_num.shape)
print(X_train_cat.shape)

(3874, 316)
(3874, 21)


In [5]:
#Treating numericals | 
# Scaling/Normalizing the data
transformer = MinMaxScaler().fit(X_train_num) 
X_train_scaled_arr = transformer.transform(X_train_num)
X_train_scaled = pd.DataFrame(X_train_scaled_arr, columns=X_train_num.columns)
display(X_train_scaled.describe().T)
    #data has been successfully scaled between 0 and 1

# applying the transformer to our test set
X_test_scaled_arr = transformer.transform(X_test_num)
X_test_scaled = pd.DataFrame(X_test_scaled_arr, columns=X_test_num.columns)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TCODE,3874.0,0.002178,0.033160,0.0,0.000000,0.000026,0.000051,1.0
AGE,3874.0,0.618665,0.148508,0.0,0.531915,0.612890,0.723404,1.0
INCOME,3874.0,0.541086,0.277484,0.0,0.333333,0.666667,0.666667,1.0
WEALTH1,3874.0,0.787644,0.295165,0.0,0.666667,1.000000,1.000000,1.0
HIT,3874.0,0.015276,0.042395,0.0,0.000000,0.000000,0.016667,1.0
...,...,...,...,...,...,...,...,...
CONTROLN,3874.0,0.516304,0.302582,0.0,0.245671,0.525614,0.790987,1.0
HPHONE_D,3874.0,0.494063,0.500029,0.0,0.000000,0.000000,1.000000,1.0
RFA_2F,3874.0,0.414214,0.385046,0.0,0.000000,0.333333,0.666667,1.0
CLUSTER2,3874.0,0.477750,0.311180,0.0,0.196721,0.442623,0.770492,1.0


In [26]:
#Selection of features
# setting a variance threshold
var_threshold = 0.03
select = VarianceThreshold(threshold=(var_threshold))

# filtering based on the threshold
select = select.fit(X_train_scaled)
selected = select.transform(X_train_scaled)
X_train_selected = pd.DataFrame(selected)
print(X_train_selected.shape)
print((X_train_scaled.shape[1] - X_train_selected.shape[1]), " columns were removed")
var_list = list(select.get_support())
col_to_drop = [col[0] for col in zip(X_train_scaled.columns, var_list) if col[1] == False]
#col_to_drop.remove("AVGGIFT")

# dropping the columns with low variability from X_train_scaled and X_test scaled
X_train_scaled.drop(col_to_drop, axis=1, inplace=True)
X_test_scaled.drop(col_to_drop, axis=1, inplace=True)
X_train_scaled

(3874, 40)
1  columns were removed


Unnamed: 0,INCOME,WEALTH1,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,DW1,DW4,HV3,HU1,HU2,HU5,HVP1,HUR2,MSA,ADI,PEC2,TPE13,LFC8,LFC9,EC7,VC3,POBC2,VOC2,HC2,HC4,HC6,HC11,HC13,HC17,HC18,HC19,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,FIRSTDATE_MM
0,0.166667,1.0,0.000000,1.000000,0.141414,0.0,0.868687,0.797980,0.898990,0.000000,0.384615,0.888889,0.121212,0.020202,0.000000,0.434343,0.068522,0.314729,0.065217,0.878788,0.959596,1.000000,0.375000,0.000000,0.545455,0.737374,0.076923,0.838384,0.949495,0.717172,0.111111,0.929293,0.080808,0.767677,0.081633,0.663868,0.0,0.000000,0.311475,0.090909
1,0.666667,1.0,0.050505,1.000000,0.000000,0.0,1.000000,0.959596,0.717172,0.000000,0.384615,0.848485,0.161616,0.090909,0.151515,0.505051,0.740899,0.103876,0.500000,0.797980,0.333333,1.000000,0.107143,0.050505,0.606061,0.878788,0.307692,0.151515,0.696970,0.070707,0.191919,0.090909,0.898990,0.101010,0.163265,0.891718,0.0,0.000000,0.655738,1.000000
2,0.833333,1.0,0.323232,1.000000,1.000000,0.0,0.000000,0.929293,0.949495,0.060606,0.692308,0.939394,0.070707,0.020202,0.050505,0.939394,0.359743,0.311628,0.065217,0.575758,1.000000,1.000000,0.678571,0.323232,0.282828,0.949495,0.346154,0.010101,0.747475,0.909091,0.101010,1.000000,0.000000,1.000000,0.510204,0.645883,1.0,0.333333,0.016393,0.818182
3,0.666667,1.0,0.000000,1.000000,1.000000,0.0,0.010101,0.838384,0.555556,0.373737,0.307692,0.515152,0.494949,0.020202,0.010101,0.373737,0.055675,0.305426,0.521739,0.727273,0.696970,0.878788,0.125000,0.252525,0.676768,0.565657,0.403846,0.070707,0.373737,0.595960,0.393939,1.000000,0.000000,0.919192,0.142857,0.143685,0.0,0.000000,0.540984,0.909091
4,0.000000,1.0,0.303030,1.000000,1.000000,0.0,0.000000,0.939394,1.000000,0.000000,0.692308,0.989899,0.020202,0.303030,0.040404,0.909091,0.286938,0.196899,0.152174,0.767677,0.000000,1.000000,0.392857,0.303030,0.262626,0.888889,0.557692,0.000000,0.080808,0.242424,0.696970,0.989899,0.020202,0.989899,0.306122,0.203471,1.0,0.333333,0.098361,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3869,0.666667,1.0,0.252525,1.000000,0.000000,0.0,1.000000,1.000000,0.686869,0.020202,0.230769,0.727273,0.282828,0.525253,0.010101,0.343434,0.000000,0.733333,0.119565,0.424242,0.707071,0.919192,0.285714,0.252525,0.535354,0.757576,0.365385,0.090909,0.616162,0.181818,0.090909,0.565657,0.353535,0.383838,0.081633,0.689608,0.0,0.000000,0.901639,0.818182
3870,1.000000,1.0,0.121212,1.000000,1.000000,0.0,0.000000,0.868687,0.727273,0.272727,0.384615,0.747475,0.262626,0.080808,0.353535,0.666667,0.813704,0.162791,0.032609,0.868687,1.000000,1.000000,0.553571,0.121212,0.505051,0.818182,0.153846,0.444444,0.767677,0.575758,0.373737,1.000000,0.000000,0.808081,0.632653,0.934728,1.0,0.000000,0.065574,0.909091
3871,0.666667,1.0,0.323232,0.777778,0.000000,0.0,1.000000,0.969697,0.919192,0.000000,0.230769,0.868687,0.141414,0.000000,0.040404,0.535354,0.407923,0.317829,0.054348,0.616162,1.000000,0.000000,0.196429,0.323232,0.858586,0.727273,0.346154,0.151515,0.636364,0.424242,0.484848,0.969697,0.040404,0.535354,0.224490,0.626755,0.0,0.000000,0.590164,0.909091
3872,0.166667,1.0,0.313131,0.222222,0.000000,0.0,1.000000,1.000000,0.656566,0.313131,0.153846,0.646465,0.363636,0.040404,0.000000,0.464646,0.000000,0.165891,0.271739,0.272727,0.383838,1.000000,0.178571,0.313131,0.808081,0.434343,0.384615,0.111111,0.313131,0.767677,0.040404,0.868687,0.141414,0.868687,0.714286,0.964775,0.0,1.000000,0.950820,0.727273


In [27]:
#Selecting numerical columns based on the fact that they are not colinear with each other
corr_matrix=X_train_scaled.select_dtypes(include=[np.number]).corr()  # default
corr_matrix

Unnamed: 0,INCOME,WEALTH1,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,DW1,DW4,HV3,HU1,HU2,HU5,HVP1,HUR2,MSA,ADI,PEC2,TPE13,LFC8,LFC9,EC7,VC3,POBC2,VOC2,HC2,HC4,HC6,HC11,HC13,HC17,HC18,HC19,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,FIRSTDATE_MM
INCOME,1.0,0.357232,-0.080157,0.203326,0.130027,-0.102224,-0.077447,0.021218,0.13192,-0.05565,0.292013,0.120766,-0.132951,-0.031535,0.269554,0.245073,0.124,-0.103903,0.031535,0.112396,0.033486,-0.054082,0.300036,-0.079527,-0.115124,0.246029,-0.133173,0.124456,0.120605,0.063052,0.001392,0.038868,-0.044609,0.032945,-0.074615,0.081171,-0.106018,-0.094747,-0.336402,-0.054896
WEALTH1,0.357232,1.0,-0.121006,0.261853,0.111708,-0.114047,-0.049651,0.084053,0.087559,-0.033975,0.232489,0.119535,-0.139795,-0.016393,0.192997,0.220217,0.066667,-0.045993,0.059977,0.107469,0.034259,-0.015815,0.307711,-0.105484,-0.094756,0.225135,-0.148349,0.121545,0.141047,0.032514,0.023731,0.02833,-0.039166,0.03055,-0.056317,0.035168,-0.370478,-0.044046,-0.307383,-0.006976
WWIIVETS,-0.080157,-0.121006,1.0,-0.06217,-0.028809,0.072686,-0.003831,0.115839,-0.037117,0.049194,0.003215,0.089867,-0.057294,0.238802,0.058169,-0.106113,-0.021484,0.019995,-0.096443,-0.122722,-0.058758,-0.120057,-0.030529,0.81092,-0.109541,-0.239358,0.191957,-0.166077,-0.159901,-0.018169,0.057477,0.065694,-0.043164,0.062508,0.015187,-0.030631,0.044571,-0.027351,0.057017,0.014122
WEALTH2,0.203326,0.261853,-0.06217,1.0,0.229172,-0.114208,-0.188898,0.063596,0.080664,0.020274,0.278312,0.089788,-0.101674,-0.106479,0.203065,0.238697,0.14232,-0.08816,0.019114,0.138362,0.054662,-0.074502,0.328638,-0.048417,-0.134569,0.16945,-0.118881,0.118265,0.090325,0.112526,0.034003,0.129215,-0.130155,0.14676,-0.456725,0.036143,0.005625,0.007321,-0.392306,-0.067877
POP90C1,0.130027,0.111708,-0.028809,0.229172,1.0,-0.543835,-0.745331,-0.12362,-0.049372,0.333351,0.515481,-0.137671,0.190269,-0.308805,0.307103,0.09402,0.492705,-0.306802,-0.120551,0.342532,0.081688,-0.104629,0.397663,-0.02478,-0.28755,-0.051587,0.073823,-0.0438,-0.122063,0.414288,-0.000117,0.530742,-0.500679,0.621964,-0.066075,0.082967,0.017264,-0.0461,-0.604474,-0.026099
POP90C2,-0.102224,-0.114047,0.072686,-0.114208,-0.543835,1.0,-0.123466,0.082348,-0.001277,-0.025631,-0.233642,-0.043394,0.061074,-0.020215,-0.180477,-0.085426,-0.303089,0.187415,-0.045757,-0.44655,-0.000119,-0.005339,-0.155689,0.09631,0.117293,-0.113707,0.048877,-0.039459,-0.023353,0.018897,-0.008817,0.107275,-0.094231,0.083075,0.044732,-0.006428,0.043142,0.020931,0.308721,0.040334
POP90C3,-0.077447,-0.049651,-0.003831,-0.188898,-0.745331,-0.123466,1.0,0.168571,0.113398,-0.363198,-0.399161,0.265166,-0.25148,0.397886,-0.215241,-0.005702,-0.346633,0.223985,0.198384,0.007758,-0.052631,0.157127,-0.323365,-0.008429,0.302422,0.216954,-0.092213,0.100114,0.194932,-0.48237,0.028731,-0.66035,0.683409,-0.769397,0.048316,-0.102735,-0.040764,0.036548,0.483198,-0.000672
ETH1,0.021218,0.084053,0.115839,0.063596,-0.12362,0.082348,0.168571,1.0,0.226425,-0.153966,0.049649,0.398871,-0.245513,0.22621,-0.064668,0.27511,-0.028694,0.101896,0.120264,0.0049,0.120484,0.01109,0.155159,0.179068,0.091625,0.356192,-0.05461,0.102955,0.21371,-0.066043,0.112788,-0.055093,0.178071,-0.102598,0.072534,-0.079478,0.085723,0.047769,-0.055866,-0.01484
DW1,0.13192,0.087559,-0.037117,0.080664,-0.049372,-0.001277,0.113398,0.226425,1.0,-0.766104,0.213397,0.7454,-0.667225,-0.034593,0.037393,0.76098,-0.012337,0.022607,0.121624,0.116618,0.08536,-0.02859,0.110692,-0.03501,0.269697,0.687176,0.117429,0.089396,0.010639,0.192887,-0.224746,-0.039782,0.118712,-0.086431,0.038722,-0.003278,0.040948,-0.023562,-0.196797,0.007588
DW4,-0.05565,-0.033975,0.049194,0.020274,0.333351,-0.025631,-0.363198,-0.153966,-0.766104,1.0,0.021815,-0.78082,0.841084,-0.121453,0.091255,-0.56125,0.100134,-0.148076,-0.140665,-0.013643,0.027141,-0.000857,0.159109,0.060001,-0.222389,-0.590957,0.07453,-0.129864,-0.138921,0.041309,0.214091,0.341915,-0.32464,0.434256,-0.05109,0.023055,-0.000636,-0.012721,-0.051575,-0.004771


In [28]:
# Extract values and row, column names
arr = corr_matrix.values
index_names = corr_matrix.index
col_names = corr_matrix.columns

#  Get indices where the correlation threshold is crossed; avoiding the diagonal elements
R,C = np.where(np.triu(arr,1)>0.85)

# Arrange everything in a dataframe
out_arr = np.column_stack((index_names[R],col_names[C],arr[R,C]))
df_out = pd.DataFrame(out_arr,columns=[['row_name','col_name','value']])
df_out

Unnamed: 0,row_name,col_name,value


In [29]:
#creating and checking the list of multicolinear column to drop
col_to_drop2 = pd.DataFrame(df_out['col_name'].value_counts(dropna=False)).reset_index()
col_to_drop2.columns = ['col_name', 'value_count']
list_col_to_drop = list(col_to_drop2['col_name'])

#dropping the columns from X_train_selected and X_test_selected
X_train_scaled.drop(list_col_to_drop, axis = 1, inplace = True)
X_test_scaled.drop(list_col_to_drop, axis = 1, inplace = True)

# checking the shape of the resulting dfs
print(X_train_scaled.shape)
print(X_test_scaled.shape)

(3874, 40)
(969, 40)


In [30]:
#Treating categoricals | 
encoder = OneHotEncoder(drop='first', handle_unknown ='ignore').fit(X_train_cat)
encoded_cat = encoder.transform(X_train_cat).toarray()
cols = encoder.get_feature_names_out(input_features=X_train_cat.columns)
onehot_encoded = pd.DataFrame(encoded_cat, columns=cols)

#applying encoder on the test set:
encoded_test_cat = encoder.transform(X_test_cat).toarray()
onehot_encoded_test = pd.DataFrame(encoded_test_cat, columns=cols)

# Concatenating the numericals and categoricals data:
X_train_treated = pd.concat([X_train_scaled, onehot_encoded], axis=1)
display(X_train_treated)
X_test_treated = pd.concat([X_test_scaled, onehot_encoded_test], axis=1)

Unnamed: 0,INCOME,WEALTH1,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,DW1,DW4,HV3,HU1,HU2,HU5,HVP1,HUR2,MSA,ADI,PEC2,TPE13,LFC8,LFC9,EC7,VC3,POBC2,VOC2,HC2,HC4,HC6,HC11,HC13,HC17,HC18,HC19,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,FIRSTDATE_MM,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,CLUSTER_2,CLUSTER_3,CLUSTER_4,CLUSTER_5,CLUSTER_6,CLUSTER_7,CLUSTER_8,CLUSTER_9,CLUSTER_10,CLUSTER_11,CLUSTER_12,CLUSTER_13,CLUSTER_14,CLUSTER_15,CLUSTER_16,CLUSTER_17,CLUSTER_18,CLUSTER_19,CLUSTER_20,CLUSTER_21,CLUSTER_22,CLUSTER_23,CLUSTER_24,CLUSTER_25,CLUSTER_26,CLUSTER_27,CLUSTER_28,CLUSTER_29,CLUSTER_30,CLUSTER_31,CLUSTER_32,CLUSTER_33,CLUSTER_34,CLUSTER_35,CLUSTER_36,CLUSTER_37,CLUSTER_38,CLUSTER_39,CLUSTER_40,CLUSTER_41,CLUSTER_42,CLUSTER_43,CLUSTER_44,CLUSTER_45,CLUSTER_46,CLUSTER_47,CLUSTER_48,CLUSTER_49,CLUSTER_50,CLUSTER_51,CLUSTER_52,CLUSTER_53,HOMEOWNR_U,GENDER_M,GENDER_other,DATASRCE_2,DATASRCE_3,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,DOMAIN_B_2,DOMAIN_B_3,DOMAIN_B_4,ODATEW_YR_87,ODATEW_YR_88,ODATEW_YR_89,ODATEW_YR_90,ODATEW_YR_91,ODATEW_YR_92,ODATEW_YR_93,ODATEW_YR_94,ODATEW_YR_95,ODATEW_YR_96,ODATEW_YR_97,ODATEW_MM_4,ODATEW_MM_8,ODATEW_MM_9,ODATEW_MM_10,ODATEW_MM_11,DOB_YR_1,DOB_YR_10,DOB_YR_11,DOB_YR_12,DOB_YR_13,DOB_YR_14,DOB_YR_15,DOB_YR_16,DOB_YR_17,DOB_YR_18,DOB_YR_19,DOB_YR_20,DOB_YR_21,DOB_YR_22,DOB_YR_23,DOB_YR_24,DOB_YR_25,DOB_YR_26,DOB_YR_27,DOB_YR_28,DOB_YR_29,DOB_YR_30,DOB_YR_31,DOB_YR_32,DOB_YR_33,DOB_YR_34,DOB_YR_35,DOB_YR_36,DOB_YR_37,DOB_YR_38,DOB_YR_39,DOB_YR_40,DOB_YR_41,DOB_YR_42,DOB_YR_43,DOB_YR_44,DOB_YR_45,DOB_YR_46,DOB_YR_47,DOB_YR_48,DOB_YR_49,DOB_YR_50,DOB_YR_51,DOB_YR_52,DOB_YR_53,DOB_YR_54,DOB_YR_55,DOB_YR_56,DOB_YR_57,DOB_YR_58,DOB_YR_59,DOB_YR_60,DOB_YR_61,DOB_YR_62,DOB_YR_63,DOB_YR_64,DOB_YR_65,DOB_YR_66,DOB_YR_67,DOB_YR_68,DOB_YR_69,DOB_YR_70,DOB_YR_71,DOB_YR_72,DOB_YR_73,DOB_YR_74,DOB_YR_75,DOB_YR_76,DOB_YR_77,DOB_YR_78,DOB_YR_80,DOB_YR_81,DOB_YR_82,DOB_YR_83,DOB_YR_90,DOB_YR_91,DOB_YR_93,DOB_MM_2,DOB_MM_3,DOB_MM_4,DOB_MM_5,DOB_MM_6,DOB_MM_7,DOB_MM_8,DOB_MM_9,DOB_MM_10,DOB_MM_11,DOB_MM_12,MINRDATE_YR_87,MINRDATE_YR_88,MINRDATE_YR_89,MINRDATE_YR_90,MINRDATE_YR_91,MINRDATE_YR_92,MINRDATE_YR_93,MINRDATE_YR_94,MINRDATE_YR_95,MINRDATE_YR_96,MINRDATE_YR_97,MINRDATE_MM_2,MINRDATE_MM_3,MINRDATE_MM_4,MINRDATE_MM_5,MINRDATE_MM_6,MINRDATE_MM_7,MINRDATE_MM_8,MINRDATE_MM_9,MINRDATE_MM_10,MINRDATE_MM_11,MINRDATE_MM_12,MAXRDATE_YR_85,MAXRDATE_YR_86,MAXRDATE_YR_87,MAXRDATE_YR_88,MAXRDATE_YR_89,MAXRDATE_YR_90,MAXRDATE_YR_91,MAXRDATE_YR_92,MAXRDATE_YR_93,MAXRDATE_YR_94,MAXRDATE_YR_95,MAXRDATE_YR_96,MAXRDATE_YR_97,MAXRDATE_MM_2,MAXRDATE_MM_3,MAXRDATE_MM_4,MAXRDATE_MM_5,MAXRDATE_MM_6,MAXRDATE_MM_7,MAXRDATE_MM_8,MAXRDATE_MM_9,MAXRDATE_MM_10,MAXRDATE_MM_11,MAXRDATE_MM_12,LASTDATE_YR_96,LASTDATE_YR_97,LASTDATE_MM_2,LASTDATE_MM_3,LASTDATE_MM_4,LASTDATE_MM_5,LASTDATE_MM_6,LASTDATE_MM_7,LASTDATE_MM_8,LASTDATE_MM_9,LASTDATE_MM_10,LASTDATE_MM_11,LASTDATE_MM_12,FIRSTDATE_YR_78,FIRSTDATE_YR_81,FIRSTDATE_YR_83,FIRSTDATE_YR_84,FIRSTDATE_YR_85,FIRSTDATE_YR_86,FIRSTDATE_YR_87,FIRSTDATE_YR_88,FIRSTDATE_YR_89,FIRSTDATE_YR_90,FIRSTDATE_YR_91,FIRSTDATE_YR_92,FIRSTDATE_YR_93,FIRSTDATE_YR_94,FIRSTDATE_YR_95,FIRSTDATE_YR_96
0,0.166667,1.0,0.000000,1.000000,0.141414,0.0,0.868687,0.797980,0.898990,0.000000,0.384615,0.888889,0.121212,0.020202,0.000000,0.434343,0.068522,0.314729,0.065217,0.878788,0.959596,1.000000,0.375000,0.000000,0.545455,0.737374,0.076923,0.838384,0.949495,0.717172,0.111111,0.929293,0.080808,0.767677,0.081633,0.663868,0.0,0.000000,0.311475,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.666667,1.0,0.050505,1.000000,0.000000,0.0,1.000000,0.959596,0.717172,0.000000,0.384615,0.848485,0.161616,0.090909,0.151515,0.505051,0.740899,0.103876,0.500000,0.797980,0.333333,1.000000,0.107143,0.050505,0.606061,0.878788,0.307692,0.151515,0.696970,0.070707,0.191919,0.090909,0.898990,0.101010,0.163265,0.891718,0.0,0.000000,0.655738,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.833333,1.0,0.323232,1.000000,1.000000,0.0,0.000000,0.929293,0.949495,0.060606,0.692308,0.939394,0.070707,0.020202,0.050505,0.939394,0.359743,0.311628,0.065217,0.575758,1.000000,1.000000,0.678571,0.323232,0.282828,0.949495,0.346154,0.010101,0.747475,0.909091,0.101010,1.000000,0.000000,1.000000,0.510204,0.645883,1.0,0.333333,0.016393,0.818182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.666667,1.0,0.000000,1.000000,1.000000,0.0,0.010101,0.838384,0.555556,0.373737,0.307692,0.515152,0.494949,0.020202,0.010101,0.373737,0.055675,0.305426,0.521739,0.727273,0.696970,0.878788,0.125000,0.252525,0.676768,0.565657,0.403846,0.070707,0.373737,0.595960,0.393939,1.000000,0.000000,0.919192,0.142857,0.143685,0.0,0.000000,0.540984,0.909091,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.000000,1.0,0.303030,1.000000,1.000000,0.0,0.000000,0.939394,1.000000,0.000000,0.692308,0.989899,0.020202,0.303030,0.040404,0.909091,0.286938,0.196899,0.152174,0.767677,0.000000,1.000000,0.392857,0.303030,0.262626,0.888889,0.557692,0.000000,0.080808,0.242424,0.696970,0.989899,0.020202,0.989899,0.306122,0.203471,1.0,0.333333,0.098361,0.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3869,0.666667,1.0,0.252525,1.000000,0.000000,0.0,1.000000,1.000000,0.686869,0.020202,0.230769,0.727273,0.282828,0.525253,0.010101,0.343434,0.000000,0.733333,0.119565,0.424242,0.707071,0.919192,0.285714,0.252525,0.535354,0.757576,0.365385,0.090909,0.616162,0.181818,0.090909,0.565657,0.353535,0.383838,0.081633,0.689608,0.0,0.000000,0.901639,0.818182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3870,1.000000,1.0,0.121212,1.000000,1.000000,0.0,0.000000,0.868687,0.727273,0.272727,0.384615,0.747475,0.262626,0.080808,0.353535,0.666667,0.813704,0.162791,0.032609,0.868687,1.000000,1.000000,0.553571,0.121212,0.505051,0.818182,0.153846,0.444444,0.767677,0.575758,0.373737,1.000000,0.000000,0.808081,0.632653,0.934728,1.0,0.000000,0.065574,0.909091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3871,0.666667,1.0,0.323232,0.777778,0.000000,0.0,1.000000,0.969697,0.919192,0.000000,0.230769,0.868687,0.141414,0.000000,0.040404,0.535354,0.407923,0.317829,0.054348,0.616162,1.000000,0.000000,0.196429,0.323232,0.858586,0.727273,0.346154,0.151515,0.636364,0.424242,0.484848,0.969697,0.040404,0.535354,0.224490,0.626755,0.0,0.000000,0.590164,0.909091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3872,0.166667,1.0,0.313131,0.222222,0.000000,0.0,1.000000,1.000000,0.656566,0.313131,0.153846,0.646465,0.363636,0.040404,0.000000,0.464646,0.000000,0.165891,0.271739,0.272727,0.383838,1.000000,0.178571,0.313131,0.808081,0.434343,0.384615,0.111111,0.313131,0.767677,0.040404,0.868687,0.141414,0.868687,0.714286,0.964775,0.0,1.000000,0.950820,0.727273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
# creating a function to test multiple models
def train_test_model(list):
    for values in list:
        m=values
        m.fit(X_train_treated, y_train)
        pred_train = m.predict(X_train_treated)
        score_train = r2_score(y_train, pred_train)
        pred_test = m.predict(X_test_treated)
        score_test = r2_score(y_test, pred_test)
        mae = round(mean_absolute_error(y_test, pred_test),2)
        print(values)
        print(f"train score is: {score_train}, test score is: {score_test} and mean absolute error is {mae}")
    return

In [32]:
models=[LinearRegression(), 
        KNeighborsRegressor(), 
        DecisionTreeRegressor(), 
        MLPRegressor()]
train_test_model(models)

LinearRegression()
train score is: 0.4209298005945815, test score is: 0.33309911478390875 and mean absolute error is 5.49
KNeighborsRegressor()
train score is: 0.41039883213305484, test score is: 0.06895641536816632 and mean absolute error is 6.6
DecisionTreeRegressor()
train score is: 1.0, test score is: -0.48962367653364036 and mean absolute error is 7.29
MLPRegressor()
train score is: 0.9225242915698368, test score is: 0.07344179098538373 and mean absolute error is 7.44




In [33]:
lm = LinearRegression()
lm.fit(X_train_treated,y_train)

In [34]:
predictions = lm.predict(X_test_treated)
score_test = r2_score(y_test, predictions)
mae = round(mean_absolute_error(y_test, predictions),2)

print("R2 test score:", score_test, "and mean absolute error: ", mae )

R2 test score: 0.33309911478390875 and mean absolute error:  5.49


In [35]:
forecast = pd.read_csv("forecast.csv")
forecast

Unnamed: 0,TCODE,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MAXRAMNT,LASTGIFT,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,STATE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,ODATEW_YR,ODATEW_MM,DOB_YR,DOB_MM,MINRDATE_YR,MINRDATE_MM,MAXRDATE_YR,MAXRDATE_MM,LASTDATE_YR,LASTDATE_MM,FIRSTDATE_YR,FIRSTDATE_MM,TARGET_D,TARGET_B
0,0,60.000000,5,9,0,0,39,34,18,10,2,1,5,992,264,332,0,35,65,47,53,92,1,0,0,11,0,0,0,0,0,0,0,11,0,0,0,39,48,51,40,50,54,25,31,42,27,11,14,18,17,13,11,15,12,11,34,25,18,26,10,23,18,33,49,28,12,4,61,7,12,19,198,276,97,95,2,2,0,0,7,7,0,479,635,3,2,86,14,96,4,7,38,80,70,32,84,16,6,2,5,9,15,3,17,50,25,0,0,0,2,7,13,27,47,0,1,61,58,61,15,4,2,0,0,14,1,0,0,2,5,17,73,0.0,177.0,682.0,307,318,349,378,12883,13,23,23,23,15,1,0,0,1,4,25,24,26,17,2,0,0,2,28,4,51,1,46,54,3,88,8,0,0,0,0,0,0,4,1,13,14,16,2,45,56,64,50,64,44,62,53,99,0,0,9,3,8,13,9,0,3,9,3,15,19,5,4,3,0,3,41,1,0,7,13,6,5,0,4,9,4,1,3,10,2,1,7,78,2,0,120,16,10,39,21,8,4,3,5,20,3,19,4,0,0,0,18,39,0,34,23,18,16,1,4,0,23,0,0,5,1,0,0,0,0,0,2,0,3,74,88,8,0,4,96,77,19,13,31,5,14,14,31,54,46,0,0,90,0,10,0,0,0,33,65,40,99,99,6,2,10,7,27,74,6,14,240.0,31,14,5.0,12.0,10.0,4,7.741935,95515,0,4,39,IL,36,H,F,3,L,E,C,T,2,89,1,37,12,92,8,94,2,95,12,89,11.0,0.0,1
1,1,46.000000,6,9,16,0,15,55,11,6,2,1,9,3611,940,998,99,0,0,50,50,67,0,0,31,6,4,2,6,4,14,0,0,2,0,1,4,34,41,43,32,42,45,32,33,46,21,13,14,33,23,10,4,2,11,16,36,22,15,12,1,5,4,21,75,55,23,9,69,4,3,24,317,360,99,99,0,0,0,0,0,0,0,5468,5218,12,10,96,4,97,3,9,59,94,88,55,95,5,4,1,3,5,4,2,18,44,5,0,0,0,97,98,98,98,99,94,0,83,76,73,21,5,0,0,0,4,0,0,0,91,91,91,94,4480.0,13.0,803.0,1088,1096,1026,1037,36175,2,6,2,5,15,14,13,10,33,2,5,2,5,15,14,14,10,32,6,2,66,3,56,44,9,80,14,0,0,0,0,0,0,6,0,2,24,32,12,71,70,83,58,81,57,64,57,99,99,0,22,24,4,21,13,2,1,6,0,4,1,0,3,1,0,6,13,1,2,8,18,11,4,3,4,10,7,11,1,6,2,1,16,69,5,2,160,5,5,12,21,7,30,20,14,24,4,24,10,0,0,0,8,15,0,55,10,11,0,0,2,0,3,1,1,2,3,1,1,0,3,0,0,0,42,39,50,7,27,16,99,92,53,5,10,2,26,56,97,99,0,0,0,96,0,4,0,0,0,99,0,99,99,99,20,4,6,5,12,32,6,13,47.0,3,1,10.0,25.0,25.0,18,15.666667,148535,0,2,1,CA,14,H,M,3,L,G,A,S,1,94,1,52,2,93,10,95,12,95,12,93,10.0,0.0,0
2,1,61.611649,3,1,2,0,20,29,33,6,8,1,1,7001,2040,2669,0,2,98,49,51,96,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,35,43,46,37,45,49,23,35,40,25,13,20,19,16,13,10,8,15,14,30,22,19,25,10,23,21,35,44,22,6,2,63,9,9,19,183,254,69,69,1,6,5,3,3,3,0,497,546,2,1,78,22,93,7,18,36,76,65,30,86,14,7,2,5,11,17,3,17,60,18,0,1,0,0,1,6,18,50,0,4,36,49,51,14,5,4,2,24,11,2,3,6,0,2,9,44,0.0,281.0,518.0,251,292,292,340,11576,32,18,20,15,12,2,0,0,1,20,19,24,18,16,2,0,0,1,28,8,31,11,38,62,8,74,22,0,0,0,0,0,2,2,1,21,19,24,6,61,65,73,59,70,56,78,62,82,99,4,10,5,2,6,12,0,1,9,5,18,20,5,7,6,0,11,33,4,3,2,12,3,3,2,0,7,8,3,3,6,7,1,8,74,3,1,120,22,20,28,16,6,5,3,1,23,1,16,6,0,0,0,10,21,0,28,23,32,8,1,14,1,5,0,0,7,0,0,0,0,0,1,0,0,2,84,96,3,0,0,92,65,29,9,22,3,12,23,50,69,31,0,0,0,6,35,44,0,15,22,77,17,97,92,9,2,6,5,26,63,6,14,202.0,27,14,2.0,16.0,5.0,12,7.481481,15078,1,4,60,NC,43,U,M,3,L,E,C,R,2,90,1,0,2,91,11,92,7,95,12,90,1.0,0.0,1
3,0,70.000000,1,4,2,0,23,14,31,3,0,3,0,640,160,219,0,8,92,54,46,61,0,0,11,32,6,2,0,0,0,0,0,31,0,0,1,32,40,44,34,43,47,25,45,35,20,15,25,17,17,12,7,7,20,17,30,14,19,25,11,23,23,27,50,30,15,8,63,9,6,23,199,283,85,83,3,4,1,0,2,0,2,1000,1263,2,1,48,52,93,7,6,36,73,61,30,84,16,6,3,3,21,12,4,13,36,13,0,0,0,10,25,50,69,92,10,15,42,55,50,15,5,4,0,9,42,4,0,5,1,8,17,34,9340.0,67.0,862.0,386,388,396,423,15130,27,12,4,26,22,5,0,0,4,35,5,6,12,30,6,0,0,5,22,14,26,20,46,54,3,58,36,0,0,0,0,0,6,0,0,17,13,15,0,43,69,81,53,68,45,33,31,0,99,23,17,3,0,6,6,0,0,13,42,12,0,0,0,42,0,6,3,0,0,0,23,3,3,6,0,3,3,3,3,3,0,3,6,87,0,0,120,28,12,14,27,10,3,5,0,19,1,17,0,0,0,0,13,23,0,14,40,31,16,0,1,0,13,0,0,4,0,0,0,3,0,0,0,0,29,67,56,41,3,0,94,43,27,4,38,0,10,19,39,45,55,0,0,45,22,17,0,0,16,23,77,22,93,89,16,2,6,6,27,66,6,14,109.0,16,7,2.0,11.0,10.0,9,6.812500,172556,1,4,41,CA,44,U,F,3,L,E,C,R,2,87,1,28,1,87,11,94,11,95,12,87,2.0,0.0,1
4,0,78.000000,3,2,60,1,28,9,53,26,3,2,9,2520,627,761,99,0,0,46,54,2,98,0,0,1,0,0,0,0,0,0,0,0,0,0,0,33,45,50,36,46,50,27,34,43,23,14,21,13,15,20,12,5,13,15,34,19,19,31,7,27,16,26,57,36,24,14,42,17,9,33,235,323,99,98,0,0,0,0,0,0,0,576,594,4,3,90,10,97,3,0,42,82,49,22,92,8,20,3,17,9,23,1,1,1,0,21,58,19,0,1,2,16,67,0,2,45,52,53,16,6,0,0,0,9,0,0,0,25,58,74,83,5000.0,127.0,528.0,240,250,293,321,9836,24,29,23,13,4,4,0,0,2,21,30,22,16,4,5,0,0,3,35,8,11,14,20,80,4,73,22,1,1,0,0,0,3,1,2,1,24,27,3,76,61,73,51,65,49,80,31,81,99,10,17,8,2,6,15,3,7,22,2,9,0,7,2,2,0,6,1,5,2,2,12,2,7,6,4,15,29,4,3,26,3,2,7,49,12,1,120,16,20,30,13,3,12,5,2,26,1,20,7,1,1,1,15,28,4,9,16,53,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,65,99,0,0,0,90,45,18,25,34,0,1,3,6,33,67,0,0,9,14,72,3,0,0,99,1,21,99,96,6,2,7,11,43,113,10,25,254.0,37,8,3.0,15.0,15.0,14,6.864865,7112,1,2,26,FL,16,H,F,3,L,F,A,S,2,86,1,20,1,93,10,96,1,96,1,79,3.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,1,61.611649,5,9,0,14,36,47,11,7,8,13,9,27380,7252,10037,99,0,0,50,50,78,10,6,4,5,0,0,0,1,1,0,0,3,1,0,2,28,35,38,29,38,41,30,45,37,18,16,31,25,15,8,3,1,20,18,31,18,13,7,3,5,20,32,48,28,10,4,58,15,3,24,195,271,54,38,8,32,24,14,0,0,0,988,1025,6,6,56,44,89,11,3,44,72,56,32,83,17,12,3,10,16,15,8,19,55,5,3,6,0,2,10,49,73,92,0,4,40,52,53,15,4,24,8,13,14,15,12,3,69,84,92,97,380.0,0.0,743.0,433,481,499,535,18807,11,13,13,21,22,13,4,2,2,9,11,11,21,24,16,4,2,2,9,6,70,6,63,37,27,76,15,2,2,0,0,0,5,2,1,2,18,20,2,69,81,89,73,83,69,69,57,61,94,7,15,16,5,10,21,0,3,11,1,11,2,3,3,1,4,6,4,7,3,3,17,7,5,3,1,9,8,7,14,7,8,13,6,59,7,0,136,2,7,28,33,8,15,8,3,26,2,19,8,8,15,2,20,35,5,48,15,11,25,1,5,1,9,0,0,4,1,1,1,0,0,1,1,0,4,26,92,3,2,4,95,60,19,3,14,0,7,32,78,91,9,6,5,86,1,12,0,0,1,93,7,98,99,98,16,4,4,3,6,14,5,12,25.0,1,0,25.0,25.0,25.0,9,25.000000,184568,0,1,12,other,27,H,M,3,L,G,C,C,2,96,1,0,2,96,2,96,2,96,2,96,2.0,0.0,0
95408,1,48.000000,7,9,1,0,31,43,19,4,1,0,9,1254,322,361,96,0,4,51,49,91,3,0,2,6,1,0,1,0,0,0,0,5,0,0,1,30,40,40,28,41,43,39,33,42,25,9,19,43,17,7,4,2,10,16,35,23,16,9,2,7,10,20,70,52,25,6,73,4,2,20,307,346,89,88,1,1,0,0,0,0,0,1679,1723,3,3,88,12,97,3,0,63,89,85,60,96,4,2,1,1,7,5,1,28,58,5,2,2,0,18,71,88,91,97,5,1,77,82,75,20,4,1,0,10,7,1,0,5,16,26,44,79,3360.0,201.0,618.0,806,836,802,849,26538,8,9,7,6,11,29,13,2,15,10,0,8,2,13,35,16,3,13,8,5,61,7,83,17,36,80,4,4,4,0,0,0,6,5,3,3,25,32,10,61,73,88,56,87,52,48,43,99,0,0,18,31,0,13,17,0,1,2,4,6,0,3,5,1,8,8,9,3,7,9,13,9,6,0,0,4,7,13,3,4,1,0,4,78,12,0,160,1,6,12,24,7,36,14,9,35,5,32,7,0,0,0,21,31,8,43,5,19,15,1,12,1,14,0,0,4,0,0,1,0,0,0,1,0,2,51,94,3,0,2,99,84,29,4,7,2,55,90,94,94,6,0,0,82,2,16,0,0,0,69,31,67,99,97,18,5,3,2,4,10,3,8,20.0,1,0,20.0,20.0,20.0,9,20.000000,122706,1,1,2,TX,24,H,M,3,L,F,A,C,1,96,1,50,1,96,3,96,3,96,3,96,3.0,0.0,0
95409,1,60.000000,5,9,0,0,18,46,20,7,23,0,9,552,131,205,99,0,0,53,47,82,14,0,1,9,0,0,0,0,0,0,0,9,0,0,0,28,35,37,30,41,44,32,46,38,17,13,34,21,9,9,9,4,21,17,32,20,10,18,7,17,27,29,44,31,14,5,45,19,5,31,179,268,96,95,1,2,1,0,0,0,0,376,377,4,3,66,34,95,5,10,37,64,43,21,80,20,16,2,14,21,20,9,20,49,12,7,7,1,0,0,0,1,9,0,2,45,51,54,14,5,2,0,0,31,2,0,0,3,34,78,91,4040.0,61.0,551.0,263,264,319,345,12178,21,26,20,18,12,0,3,0,0,26,18,17,11,21,0,6,0,0,10,13,26,26,43,57,3,83,17,0,0,0,0,0,0,0,0,25,17,17,0,69,69,70,69,70,69,77,24,62,0,25,5,13,9,5,22,0,2,14,0,13,9,5,2,0,0,4,14,3,11,0,10,5,2,0,5,6,19,3,19,7,23,0,0,52,18,0,120,5,3,51,23,7,11,0,6,32,4,27,7,0,0,0,9,18,0,46,0,20,20,2,8,0,14,0,0,0,1,0,0,0,0,1,0,0,6,82,92,5,3,0,93,42,12,6,51,0,0,0,0,0,99,0,0,97,0,0,0,0,4,99,0,99,99,99,5,2,3,11,14,33,7,17,58.0,7,4,3.0,10.0,10.0,3,8.285714,189641,1,3,34,MI,30,H,M,3,L,E,B,C,3,95,1,38,1,96,3,95,1,96,10,94,10.0,0.0,1
95410,0,58.000000,7,9,0,0,28,35,20,9,1,1,7,1746,432,508,99,0,0,47,53,92,1,1,5,8,0,1,2,0,1,0,0,5,0,0,3,34,42,45,36,45,49,25,38,40,22,12,21,21,18,12,7,9,13,16,34,20,17,20,4,16,9,26,65,41,17,6,56,9,8,27,262,324,99,99,0,0,0,0,5,4,1,2421,2459,11,10,88,12,99,1,0,44,85,71,36,84,16,8,2,6,9,12,6,19,56,16,0,0,0,89,96,99,99,99,9,0,90,65,68,18,5,0,0,0,12,0,0,0,88,88,90,91,8735.0,13.0,803.0,552,544,568,556,15948,7,4,11,18,38,15,5,3,0,4,6,15,19,38,13,4,3,0,25,2,46,3,43,57,9,80,11,0,0,0,0,1,2,6,0,24,18,28,11,52,73,88,60,85,57,70,54,99,99,0,14,16,6,16,17,0,2,12,1,11,2,0,2,1,0,2,22,4,6,4,19,4,7,2,4,6,7,9,4,9,1,1,7,72,8,2,140,7,6,20,35,12,15,5,6,29,4,21,10,0,0,0,13,28,1,35,18,20,8,0,3,1,9,0,0,2,6,1,2,0,0,0,0,0,14,50,83,8,4,5,99,85,43,9,25,0,0,6,17,99,1,0,0,99,0,1,0,0,0,99,0,99,99,99,12,3,6,3,36,127,9,31,498.0,41,18,5.0,21.0,18.0,4,12.146341,4693,1,4,11,CA,24,H,F,2,L,F,A,C,1,86,1,40,5,90,11,96,8,97,1,86,12.0,18.0,1


In [36]:
forecasted_donors = forecast[forecast["TARGET_B"]==1]
forecasted_donors.shape

(37184, 339)

In [37]:
# dropping TARGET_D and separating numerical/categorical
X = forecasted_donors.drop(["TARGET_D", "TARGET_B"], axis=1)

#defining the same categorical and numerical classification of the features
cat_col = ['STATE', 'CLUSTER', 'HOMEOWNR', 'GENDER', 'DATASRCE', 'RFA_2R',
       'RFA_2A', 'GEOCODE2', 'DOMAIN_A', 'DOMAIN_B', 'ODATEW_YR', 'ODATEW_MM',
       'DOB_YR', 'DOB_MM', 'MINRDATE_YR', 'MINRDATE_MM', 'MAXRDATE_YR',
       'MAXRDATE_MM', 'LASTDATE_YR', 'LASTDATE_MM', 'FIRSTDATE_YR',
       'FIRSTDATE_MM']

In [38]:
X_num = X.drop(cat_col, axis=1)
X_cat = X.drop(X_num.columns, axis=1)

# converting all categorical variables as type object
for col in cat_col:
    X_cat[col] = X_cat[col].astype('object')

# checking the shape of the resulting df
print(X_num.shape)
print(X_cat.shape)

(37184, 315)
(37184, 22)


In [39]:
# applying the transformer to scale our data
X_num_arr = transformer.transform(X_num)
X_scaled = pd.DataFrame(X_num_arr, columns=X_num.columns)

ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- FIRSTDATE_MM


In [None]:
# ValueError: The feature names should match those that were passed during fit. Feature names seen at fit time, yet now missing:
#- FIRSTDATE_MM

# why ???

In [19]:
# dropping the columns with low variability
X_scaled.drop(col_to_drop, axis=1, inplace=True)
X_scaled.shape

NameError: name 'X_scaled' is not defined

In [22]:
predictions = lm.predict(forecast)
print("R2 train score is:",r2_score(y_train, forecast))

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- AC1
- AC2
- AFC1
- AFC2
- AFC3
- ...
Feature names seen at fit time, yet now missing:
- CLUSTER_10
- CLUSTER_11
- CLUSTER_12
- CLUSTER_13
- CLUSTER_14
- ...
