### Importing Vyper

In [1]:
from vyper.user import Model
import pandas as pd
from sklearn.utils import shuffle

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

### Reading in Data

In [2]:
df=pd.read_csv("../source_datasets/Profile America Response Model Data.csv")
df = shuffle(df, random_state = 26)
df_oob = df.iloc[-5000:]
df = df.iloc[:-5000]
print(df_oob.shape, df.shape)

(5000, 104) (59081, 104)


### Creating Model

In [None]:
Model?

In [3]:
m = Model(data=df, 
          dependent_variable='y',
          training_percentage=0.75, 
          na_drop_threshold=0.5,
          model_type='logistic')

In [4]:
%%time
#string_threshold: when data type is string and if no. of values increase this threshlod the variable is excluded
m.auto_define_variables(categorical_threshold = 100,
                        string_threshold = 100,
                        overwrite=True)

Wall time: 1.32 s


### Model Variable Functions

In [5]:
m.variables.show_types()

{'ID': 'continuous',
 'FIRST_NAME': 'exclude',
 'LAST_NAME': 'exclude',
 'ADDRESS': 'exclude',
 'CITY': 'exclude',
 'STATE': 'categorical',
 'ZIP': 'continuous',
 'CREDIT_CARD_REVOLVERS': 'exclude',
 'CREDIT_SCORE_INDEX': 'categorical',
 'INCOME': 'continuous',
 'HOMEOWNER': 'binary',
 'LOR': 'continuous',
 'MOBILITY_TRIGGER': 'binary',
 'PRESENCE_OF_CHILDREN': 'binary',
 'PRESENCE_OF_CREDIT_CARD': 'exclude',
 'PRESENCE_OF_FINANCE_LOAN': 'binary',
 'PROFITABILITY_INDEX': 'continuous',
 'DWELLING_TYPE': 'continuous',
 'MARITAL_STATUS': 'continuous',
 'AGE_RANGES': 'categorical',
 'NUMBER_OF_CHILDREN': 'exclude',
 'AUTO_INDEX': 'continuous',
 'LIFE_EVENT_INDEX': 'continuous',
 'HOME_VALUE': 'categorical',
 'Childrens Age 0-2': 'exclude',
 'Childrens Age 11-15': 'exclude',
 'Childrens Age 16-17': 'exclude',
 'Childrens Age 3-5': 'exclude',
 'Childrens Age 6-10': 'exclude',
 'Devotional': 'exclude',
 'Reading': 'exclude',
 'Childrens Books': 'exclude',
 'Cooking': 'exclude',
 'Fashion': 'e

In [6]:
m.variables.get_active_variables()

{'AGE_RANGES',
 'AIRLINE_UPGRADERS',
 'AUTO_INDEX',
 'BARGAIN_SHOPPERS',
 'CREDIT_SCORE_INDEX',
 'DWELLING_TYPE',
 'HOMEOWNER',
 'HOME_VALUE',
 'ID',
 'INCOME',
 'LIFE_EVENT_INDEX',
 'LOR',
 'LOYALTY_MEMBERS',
 'MARITAL_STATUS',
 'MOBILITY_TRIGGER',
 'Net Assets',
 'Online Insurance Purchasers',
 'Online Movie Interest',
 'Online Music Interest',
 'Online Savings',
 'Online Shopper',
 'Online Transactor',
 'PERSONAL_TRAVELER',
 'PRESENCE_OF_CHILDREN',
 'PRESENCE_OF_FINANCE_LOAN',
 'PROFITABILITY_INDEX',
 'STATE',
 'Subscribes to Magazines Online',
 'VACATION_SPENDERS',
 'ZIP',
 'orderspercustomer',
 'spendpercustomers',
 'spendperorder'}

In [7]:
m.variables.get_numeric_variables()

{'AUTO_INDEX',
 'DWELLING_TYPE',
 'ID',
 'INCOME',
 'LIFE_EVENT_INDEX',
 'LOR',
 'MARITAL_STATUS',
 'PROFITABILITY_INDEX',
 'ZIP',
 'orderspercustomer',
 'spendpercustomers',
 'spendperorder'}

In [8]:
m.variables.get_categorical_variables()

{'AGE_RANGES', 'CREDIT_SCORE_INDEX', 'HOME_VALUE', 'Net Assets', 'STATE'}

In [9]:
m.variables.get_binary_variables()

{'AIRLINE_UPGRADERS',
 'BARGAIN_SHOPPERS',
 'HOMEOWNER',
 'LOYALTY_MEMBERS',
 'MOBILITY_TRIGGER',
 'Online Insurance Purchasers',
 'Online Movie Interest',
 'Online Music Interest',
 'Online Savings',
 'Online Shopper',
 'Online Transactor',
 'PERSONAL_TRAVELER',
 'PRESENCE_OF_CHILDREN',
 'PRESENCE_OF_FINANCE_LOAN',
 'Subscribes to Magazines Online',
 'VACATION_SPENDERS'}

### Custom Set Variables

In [10]:
m.set_variable_types({
    'ID': 'exclude', 
    'ZIP': 'exclude', 
    'STATE': 'exclude',
    'AUTO_INDEX': 'exclude',
    'y': 'target_variable'})
m.variables.show_types()

{'ID': 'exclude',
 'FIRST_NAME': 'exclude',
 'LAST_NAME': 'exclude',
 'ADDRESS': 'exclude',
 'CITY': 'exclude',
 'STATE': 'exclude',
 'ZIP': 'exclude',
 'CREDIT_CARD_REVOLVERS': 'exclude',
 'CREDIT_SCORE_INDEX': 'categorical',
 'INCOME': 'continuous',
 'HOMEOWNER': 'binary',
 'LOR': 'continuous',
 'MOBILITY_TRIGGER': 'binary',
 'PRESENCE_OF_CHILDREN': 'binary',
 'PRESENCE_OF_CREDIT_CARD': 'exclude',
 'PRESENCE_OF_FINANCE_LOAN': 'binary',
 'PROFITABILITY_INDEX': 'continuous',
 'DWELLING_TYPE': 'continuous',
 'MARITAL_STATUS': 'continuous',
 'AGE_RANGES': 'categorical',
 'NUMBER_OF_CHILDREN': 'exclude',
 'AUTO_INDEX': 'exclude',
 'LIFE_EVENT_INDEX': 'continuous',
 'HOME_VALUE': 'categorical',
 'Childrens Age 0-2': 'exclude',
 'Childrens Age 11-15': 'exclude',
 'Childrens Age 16-17': 'exclude',
 'Childrens Age 3-5': 'exclude',
 'Childrens Age 6-10': 'exclude',
 'Devotional': 'exclude',
 'Reading': 'exclude',
 'Childrens Books': 'exclude',
 'Cooking': 'exclude',
 'Fashion': 'exclude',
 'In

### Variable Profiling

In [11]:
from vyper.user.explorer import DataProfiler
from openpyxl import Workbook
l = list(m.variables.get_active_variables())# to look at only active variables
l = l + ['y']
var_prof_df = DataProfiler(df[l], dependent_variable='y')
wb = Workbook()
var_prof_df.create_var_profiling_ws(wb=wb, sheet_name='profile', sort_by_variance = 'desc')
wb.save(r"variable profiles logistic.xlsx")

### TNI

In [None]:
m.tni_smart?

In [12]:
%%time
m.tni_smart(transformations=('Log', 'Pw2'))

Wall time: 3.46 s


In [None]:
m.tni_transform?

In [13]:
%%time
m.tni_transform()

Wall time: 2.04 s


### Variable Reduction

In [None]:
?m.variable_reduction

In [14]:
%%time
m.variable_reduction(cluster_type='hclust',
                           wt_corr_dv=1,
                           wt_univ_reg=1,
                           wt_inf_val=1,
                           wt_clust=1,
                           selection = 100)

Wall time: 3.89 s


In [15]:
len(m.var_selected)

80

In [16]:
m.var_selected

['DWELLING_TYPE_TnI_processed',
 'DWELLING_TYPE_bi_(-inf_1.0]',
 'AGE_RANGES_bi_(5.0_6.0]',
 'AIRLINE_UPGRADERS_N',
 'INCOME_TnI_log',
 'orderspercustomer_bi_(11.4708249496982_12.7638888888889]',
 'MARITAL_STATUS_bi_(-inf_1.0]',
 'MARITAL_STATUS_TnI_pw2',
 'spendpercustomers_TnI_log',
 'AGE_RANGES_bi_(7.0_inf]',
 'PROFITABILITY_INDEX_TnI_processed',
 'LIFE_EVENT_INDEX_TnI_pw2',
 'BARGAIN_SHOPPERS_N',
 'AGE_RANGES_bi_(1.0_2.0]',
 'Online Movie Interest_N',
 'PROFITABILITY_INDEX_bi_(-inf_1.0]',
 'INCOME_TnI_processed',
 'Net Assets_bi_(6.0_inf]',
 'spendperorder_TnI_log',
 'orderspercustomer_TnI_processed',
 'spendpercustomers_bi_(873.209509803922_1575.24369863014]',
 'PRESENCE_OF_CHILDREN_N',
 'orderspercustomer_TnI_pw2',
 'HOME_VALUE_bi_(13.0_inf]',
 'LOR_TnI_pw2',
 'DWELLING_TYPE_bi_(1.0_5.0]',
 'LIFE_EVENT_INDEX_TnI_processed',
 'MOBILITY_TRIGGER_N',
 'PROFITABILITY_INDEX_TnI_pw2',
 'spendpercustomers_TnI_pw2',
 'AGE_RANGES_bi_(6.0_7.0]',
 'DWELLING_TYPE_TnI_log',
 'PROFITABILITY_IND

In [17]:
#Combining var_scores and variable_reduction_method_values
m.variable_reduction_method_values.merge(m.var_scores, on = ['variable'], how='left').to_csv('logistic_variable_reduction_output.csv')
m.variable_reduction_method_values.merge(m.var_scores, on = ['variable'], how='left').sort_values(['variable'])

Unnamed: 0,corr_dv_x,variable,corr_direction,univ_reg_x,inf_val_x,cluster,cluster_correlation,corr_dv_y,univ_reg_y,inf_val_y,clust,selected
68,0.000956,AGE_RANGES_bi_(-inf_1.0],0,0.521409,0.024814,24,0.939841,1,1,1,1,1
13,0.001001,AGE_RANGES_bi_(1.0_2.0],0,0.526341,0.024398,26,0.707107,1,1,1,1,1
75,0.000315,AGE_RANGES_bi_(3.0_4.0],0,0.51479,0.007133,20,1.064611,1,1,1,1,1
2,0.000341,AGE_RANGES_bi_(5.0_6.0],1,0.482199,0.006846,36,0.707107,1,1,1,1,1
30,0.000474,AGE_RANGES_bi_(6.0_7.0],1,0.481457,0.009265,33,0.707107,1,1,1,1,1
9,0.001635,AGE_RANGES_bi_(7.0_inf],1,0.471928,0.028766,16,0.0,1,1,1,1,1
52,0.003902,AGE_RANGES_tni_assign,1,0.417736,0.08844,54,0.0,1,1,1,1,1
3,0.002014,AIRLINE_UPGRADERS_N,0,0.551324,0.042322,74,0.0,1,1,1,1,1
12,0.002628,BARGAIN_SHOPPERS_N,1,0.46054,0.077511,65,0.0,1,1,1,1,1
79,0.000405,CREDIT_SCORE_INDEX_bi_(5.0_6.0],0,0.516432,0.009318,34,1.101489,1,1,1,1,1


In [18]:
#Implementing Recursive Feature Elimination 
m.rfe_selection(num_features = 50)

In [19]:
len(m.var_selected)

50

In [20]:
m.var_selected

['DWELLING_TYPE_TnI_processed',
 'DWELLING_TYPE_bi_(-inf_1.0]',
 'AIRLINE_UPGRADERS_N',
 'INCOME_TnI_log',
 'MARITAL_STATUS_bi_(-inf_1.0]',
 'MARITAL_STATUS_TnI_pw2',
 'spendpercustomers_TnI_log',
 'PROFITABILITY_INDEX_TnI_processed',
 'LIFE_EVENT_INDEX_TnI_pw2',
 'BARGAIN_SHOPPERS_N',
 'Online Movie Interest_N',
 'INCOME_TnI_processed',
 'spendperorder_TnI_log',
 'orderspercustomer_TnI_processed',
 'spendpercustomers_bi_(873.209509803922_1575.24369863014]',
 'PRESENCE_OF_CHILDREN_N',
 'orderspercustomer_TnI_pw2',
 'LOR_TnI_pw2',
 'LIFE_EVENT_INDEX_TnI_processed',
 'MOBILITY_TRIGGER_N',
 'PROFITABILITY_INDEX_TnI_pw2',
 'spendpercustomers_TnI_pw2',
 'PROFITABILITY_INDEX_TnI_log',
 'orderspercustomer_bi_(6.427184466019419_10.264285714285698]',
 'Online Music Interest_N',
 'PRESENCE_OF_FINANCE_LOAN_N',
 'Subscribes to Magazines Online_N',
 'Online Savings_N',
 'LOR_bi_(3.0_11.0]',
 'spendpercustomers_TnI_processed',
 'Online Transactor_N',
 'LOYALTY_MEMBERS_N',
 'spendperorder_TnI_pw2',
 

In [21]:
#lasso reduction and alpha is the penalty parameter
m.lasso_selection(alpha = 2)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.17420863570799353
            Iterations: 722
            Function evaluations: 722
            Gradient evaluations: 722


In [22]:
m.var_selected

['DWELLING_TYPE_TnI_processed',
 'DWELLING_TYPE_bi_(-inf_1.0]',
 'AIRLINE_UPGRADERS_N',
 'INCOME_TnI_log',
 'MARITAL_STATUS_bi_(-inf_1.0]',
 'MARITAL_STATUS_TnI_pw2',
 'spendpercustomers_TnI_log',
 'PROFITABILITY_INDEX_TnI_processed',
 'LIFE_EVENT_INDEX_TnI_pw2',
 'BARGAIN_SHOPPERS_N',
 'Online Movie Interest_N',
 'INCOME_TnI_processed',
 'spendperorder_TnI_log',
 'orderspercustomer_TnI_processed',
 'spendpercustomers_bi_(873.209509803922_1575.24369863014]',
 'PRESENCE_OF_CHILDREN_N',
 'orderspercustomer_TnI_pw2',
 'LOR_TnI_pw2',
 'LIFE_EVENT_INDEX_TnI_processed',
 'MOBILITY_TRIGGER_N',
 'PROFITABILITY_INDEX_TnI_pw2',
 'spendpercustomers_TnI_pw2',
 'PROFITABILITY_INDEX_TnI_log',
 'orderspercustomer_bi_(6.427184466019419_10.264285714285698]',
 'Online Music Interest_N',
 'PRESENCE_OF_FINANCE_LOAN_N',
 'Subscribes to Magazines Online_N',
 'Online Savings_N',
 'LOR_bi_(3.0_11.0]',
 'spendpercustomers_TnI_processed',
 'Online Transactor_N',
 'LOYALTY_MEMBERS_N',
 'spendperorder_TnI_pw2',
 

### Model Fit

In [None]:
?m.fit

In [23]:
%%time
final_results = m.fit()
final_results['model_descriptor'].sort_values(['importance'], ascending = False)

Train mean response:  0.050078988941548185
Test mean response:  0.04935346286642746
Deviation in response: -1.45%
Wall time: 7.08 s


Unnamed: 0,variable,estimate,standard_error,t_z_val,p_val,vif,standardized_coeff,importance,corr_dv_direction,corr_model,evaluation
45,spendperorder_TnI_processed,-0.9278482,0.2883254,-3.218059,0.001290611,29907.62,-18.107092,0.242994,0.0,0,0
13,spendperorder_TnI_log,67.19698,21.68109,3.099336,0.001939546,7820.833,9.150705,0.122801,0.0,1,0
33,spendperorder_TnI_pw2,0.00148636,0.0004820015,3.083724,0.002044271,7495.784,8.552909,0.114778,0.0,1,0
12,INCOME_TnI_processed,4.710271,6.786387,0.694076,0.4876343,252794.3,7.397987,0.09928,1.0,1,0
4,INCOME_TnI_log,-21.21538,29.81616,-0.71154,0.4767498,60846.01,-3.729081,0.050044,1.0,0,0
50,INCOME_TnI_pw2,-0.1228532,0.1904838,-0.644953,0.5189574,66247.57,-3.512957,0.047143,1.0,0,0
14,orderspercustomer_TnI_processed,0.785462,0.6462854,1.215349,0.2242332,4410.932,2.688398,0.036078,1.0,1,0
7,spendpercustomers_TnI_log,5.769748,3.318935,1.738434,0.08213443,1320.506,2.481751,0.033305,1.0,1,0
38,orderspercustomer_TnI_log,-6.074629,3.95784,-1.534834,0.1248245,1353.587,-2.200414,0.029529,1.0,0,0
30,spendpercustomers_TnI_processed,-0.003695111,0.002925028,-1.263274,0.2064908,2891.977,-2.189781,0.029386,1.0,0,0


### Manual Variable Selection

In [24]:
m.var_selected = [
    'Online Transactor_N',
    'Net Assets_tni_assign',
    'HOME_VALUE_tni_assign',
    'PRESENCE_OF_FINANCE_LOAN_N',
    'Subscribes to Magazines Online_N',
    'AGE_RANGES_tni_assign',
    'Online Music Interest_N',
    'AIRLINE_UPGRADERS_N'
]

### Manual Model Fit

In [25]:
%%time
final_results = m.fit()

Train mean response:  0.050078988941548185
Test mean response:  0.04935346286642746
Deviation in response: -1.45%
Wall time: 821 ms


In [26]:
final_results['model_descriptor'].sort_values(['importance'], ascending = False)

Unnamed: 0,variable,estimate,standard_error,t_z_val,p_val,vif,standardized_coeff,importance,corr_dv_direction,corr_model,evaluation
3,HOME_VALUE_tni_assign,0.201948,0.011375,17.753104,1.630859e-70,1.146846,0.679757,0.357242,1.0,1,1
6,AGE_RANGES_tni_assign,0.117125,0.011605,10.092982,5.933907e-24,1.328918,0.26175,0.137561,1.0,1,1
4,PRESENCE_OF_FINANCE_LOAN_N,0.708451,0.090988,7.786172,6.907006e-15,1.083452,0.255181,0.134109,1.0,1,1
2,Net Assets_tni_assign,0.145221,0.013899,10.447976,1.496853e-25,1.143759,0.233277,0.122597,1.0,1,1
5,Subscribes to Magazines Online_N,-0.398872,0.047488,-8.399342,4.4898840000000005e-17,1.191622,-0.195541,0.102766,0.0,0,1
1,Online Transactor_N,-0.309963,0.04998,-6.201718,5.584992e-10,1.07044,-0.140949,0.074075,0.0,0,1
8,AIRLINE_UPGRADERS_N,-0.22202,0.048097,-4.616144,3.909351e-06,1.131213,-0.110757,0.058208,0.0,0,1
7,Online Music Interest_N,-0.051158,0.049255,-1.038631,0.2989762,1.211275,-0.025578,0.013442,0.0,0,0
0,intercept,-6.767639,0.173812,-38.936658,0.0,0.0,0.0,0.0,,0,0


In [27]:
final_results['model_metric']

{'train_auc': 0.7094311709808687, 'test_auc': 0.7136123962035118}

### Workbook Creation

In [28]:
%%time
m.create_model_playbook("Vyper Demo", "vyper_demo_logisticregression_fulldata.xlsx", oot_data = df_oob, 
                       sort_by_variance = 'desc')

Wall time: 4.29 s


### Scoring a out of time data

In [29]:
df_scored_selected = m.score_oot_data(df_oob)[0]
df_scored_selected.head(20)

Unnamed: 0,ID,FIRST_NAME,LAST_NAME,ADDRESS,CITY,STATE,ZIP,CREDIT_CARD_REVOLVERS,CREDIT_SCORE_INDEX,INCOME,HOMEOWNER,LOR,MOBILITY_TRIGGER,PRESENCE_OF_CHILDREN,PRESENCE_OF_CREDIT_CARD,PROFITABILITY_INDEX,DWELLING_TYPE,MARITAL_STATUS,NUMBER_OF_CHILDREN,AUTO_INDEX,LIFE_EVENT_INDEX,Childrens Age 0-2,Childrens Age 11-15,Childrens Age 16-17,Childrens Age 3-5,Childrens Age 6-10,Devotional,Reading,Childrens Books,Cooking,Fashion,Interior Decorating,Romance,Baking,Career Advancement Courses,Cigar Smoking,Sweepstakes,Crafts,Cultural/Arts Events,Gardening,Gourmet Foods,DIY,Home Study Courses,Motorcycle Riding,Photography,Quilting,Scrapbooking,Self-Improvement Courses,Sewing/Knitting,Wine,Woodworking,Christian or Gospel,Classical,Country,Jazz,R&B,Rock N Roll,Own a Cat,Own a Dog,Boating/Sailing,Camping/Hiking,Cycling,Fishing,Fitness/Exercise,Golf,Hunting/Shooting,NASCAR,Running,Skiing,Walking for Health,Yoga/Pilates,Casino Gambling,Cruise Ship Vacation,International Travel,Leisure Travel,RV Vacations,Timeshare,Travel within US,Traveler,Interest in Investing,Interest in Mutual Funds,LOYALTY_MEMBERS,PERSONAL_TRAVELER,VACATION_SPENDERS,BARGAIN_SHOPPERS,interest to purchase Medicare Supplement Insurance,interest to purchase Medicare Advantage Plan,Online Shopper,Online Household,Online Insurance Purchasers,Online Savings,Online Movie Interest,spendperorder,orderspercustomer,spendpercustomers,y,PRESENCE_OF_FINANCE_LOAN_N,PRESENCE_OF_FINANCE_LOAN_Y,AGE_RANGES_tni_assign,AGE_RANGES_A,AGE_RANGES_B,AGE_RANGES_C,AGE_RANGES_D,AGE_RANGES_F,AGE_RANGES_G,AGE_RANGES_H,AGE_RANGES_TnImissing,AGE_RANGES_bi_(-inf_1.0],AGE_RANGES_bi_(3.0_4.0],AGE_RANGES_bi_(7.0_inf],AGE_RANGES_bi_(5.0_6.0],AGE_RANGES_bi_(1.0_2.0],AGE_RANGES_bi_(6.0_7.0],HOME_VALUE_tni_assign,HOME_VALUE_A,HOME_VALUE_B,HOME_VALUE_C,HOME_VALUE_D,HOME_VALUE_E,HOME_VALUE_F,HOME_VALUE_G,HOME_VALUE_H,HOME_VALUE_I,HOME_VALUE_J,HOME_VALUE_K,HOME_VALUE_M,HOME_VALUE_O,HOME_VALUE_TnImissing,HOME_VALUE_bi_(7.0_8.0],HOME_VALUE_bi_(10.0_11.0],HOME_VALUE_bi_(13.0_inf],HOME_VALUE_bi_(12.0_13.0],Net Assets_tni_assign,Net Assets_A,Net Assets_B,Net Assets_C,Net Assets_D,Net Assets_E,Net Assets_F,Net Assets_G,Net Assets_TnImissing,Net Assets_bi_(2.0_3.0],Net Assets_bi_(6.0_inf],AIRLINE_UPGRADERS_N,AIRLINE_UPGRADERS_Y,Subscribes to Magazines Online_N,Subscribes to Magazines Online_Y,Online Transactor_N,Online Transactor_Y,Online Music Interest_N,Online Music Interest_Y,y_pred,decile
44933,22881,MICHAEL,DEMSIA,186 WAYNE ST APT 123D,JERSEY CITY,NJ,7302,N,G,8,Y,3.0,Y,N,,1.0,2.0,2.0,,6.0,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,Y,,N,N,N,119.476804,11.094025,1325.478648,0,1,0,8,0,0,0,0,0,0,0,1,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,5,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0.033451,6
58369,217527,SANDI,WERFEL,331 ANCHORAGE DR,WOODBURY,NY,11797,N,G,11,Y,3.0,N,N,,1.0,1.0,1.0,,9.0,9.0,,,,,,,Y,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,Y,Y,N,N,N,N,Y,N,N,Y,144.579826,12.763889,1845.400833,1,1,0,6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,7,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0.107798,1
27537,135412,JENNIFER,SALEMI,49 DAFFODIL CT,STATEN ISLAND,NY,10312,N,F,9,Y,11.0,N,N,,4.0,1.0,1.0,,7.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,Y,Y,Y,Y,135.656909,7.373529,1000.270206,0,1,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0.007343,10
23143,149276,ERICA,BUDHRAM,10732 110TH ST,SOUTH RICHMOND HILL,NY,11419,N,F,7,Y,11.0,N,N,,5.0,1.0,1.0,,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,,N,N,N,114.603266,4.304348,493.292319,0,1,0,4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0.022147,8
3205,170035,JASON,SPAIN,9 BERNARDS AVE,BERNARDSVILLE,NJ,7924,N,H,8,Y,6.0,N,N,,2.0,1.0,2.0,,7.0,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,Y,Y,Y,N,213.918871,9.358491,2001.957736,0,1,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0.006804,10
19631,84793,SUZANNE,ZAHN,PO BOX 353,MANHASSET,NY,11030,N,G,11,Y,20.0,N,N,,4.0,0.0,1.0,,5.0,9.0,,,Y,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,Y,,,,Y,Y,N,N,N,N,Y,Y,N,N,N,169.487995,16.478346,2792.881909,0,0,1,4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,5,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0.023829,7
17317,77876,JERI,HARRISON,155 W 20TH ST APT 3J,NEW YORK,NY,10011,N,H,9,N,6.0,N,N,Y,2.0,2.0,2.0,0.0,3.0,9.0,,,,,,,Y,,,,,,,,,,Y,,,Y,,,,,,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,Y,N,N,Y,141.705326,11.779804,1669.260994,0,1,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,5,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0,0.072781,3
42176,89169,JAMES,RENTON,202 W 98TH ST APT 4B,NEW YORK,NY,10025,N,F,7,N,3.0,Y,N,,5.0,2.0,2.0,,4.0,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,,N,Y,Y,136.758138,13.848286,1893.865779,0,1,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0.056617,4
34431,66617,ROBERT,RHOADES,11 TREETOP LN,PLEASANTVILLE,NY,10570,N,H,11,Y,20.0,N,N,,1.0,5.0,1.0,,9.0,,,,,,,,Y,,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,Y,Y,N,N,N,Y,Y,N,N,N,168.26213,14.996815,2523.396083,0,1,0,4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,6,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0.037043,6
42046,184714,EUGENIA,PIFLAKS,18 DELAWARE ST,HUNTINGTON,NY,11743,N,H,11,N,6.0,N,N,,5.0,1.0,1.0,,5.0,6.0,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,,N,N,N,N,N,N,N,Y,Y,N,Y,187.689296,7.875,1478.053208,0,1,0,6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0.050676,5


### Generate Python Code

In [30]:
m.generate_python_code?

In [31]:
print(m.generate_python_code(in_df_name = 'df', out_df_name = 'df_out', y_name = 'y_pred'))

df_out = df.copy()

df_out['AGE_RANGES'] = df_out['AGE_RANGES'].fillna('TnImissing')
df_out['AGE_RANGES_tni_assign'] = df_out['AGE_RANGES'].replace({'G': 1, 'F': 2, 'A': 3, 'H': 4, 'D': 5, 'C': 6, 'B': 7, 'TnImissing': 8})
df_out = pd.get_dummies(df_out, columns=['AGE_RANGES'])
for col in [8, 10]:
    if col in df_out.columns:
        df_out.drop(columns=col, inplace=True)
og_tni_dummies = ['AGE_RANGES_bi_(-inf_1.0]', 'AGE_RANGES_bi_(3.0_4.0]', 'AGE_RANGES_bi_(7.0_inf]', 'AGE_RANGES_bi_(5.0_6.0]', 'AGE_RANGES_bi_(1.0_2.0]', 'AGE_RANGES_bi_(6.0_7.0]']
for og in og_tni_dummies:
    if og not in df_out.columns:
        df_out[og] = 0

df_out['AIRLINE_UPGRADERS'] = df_out['AIRLINE_UPGRADERS'].fillna('TnImissing')
df_out = pd.get_dummies(df_out, columns=['AIRLINE_UPGRADERS'])
og_tni_dummies = ['AIRLINE_UPGRADERS_N', 'AIRLINE_UPGRADERS_Y']
for og in og_tni_dummies:
    if og not in df_out.columns:
        df_out[og] = 0

df_out['Online Music Interest'] = df_out['Online Music Interest'].filln

### Pickle

In [32]:
m.clear_data()

In [33]:
#Dumping the model
import joblib
filename = 'final_log_model.pkl'
joblib.dump(m,open(filename, 'wb'))

In [34]:
#loading the saved model object
loaded_model = joblib.load(open(filename, 'rb'))
df_scored_selected = loaded_model.score_oot_data(df_oob)[0]
df_scored_selected.head(5)

Unnamed: 0,ID,FIRST_NAME,LAST_NAME,ADDRESS,CITY,STATE,ZIP,CREDIT_CARD_REVOLVERS,CREDIT_SCORE_INDEX,INCOME,HOMEOWNER,LOR,MOBILITY_TRIGGER,PRESENCE_OF_CHILDREN,PRESENCE_OF_CREDIT_CARD,PROFITABILITY_INDEX,DWELLING_TYPE,MARITAL_STATUS,NUMBER_OF_CHILDREN,AUTO_INDEX,LIFE_EVENT_INDEX,Childrens Age 0-2,Childrens Age 11-15,Childrens Age 16-17,Childrens Age 3-5,Childrens Age 6-10,Devotional,Reading,Childrens Books,Cooking,Fashion,Interior Decorating,Romance,Baking,Career Advancement Courses,Cigar Smoking,Sweepstakes,Crafts,Cultural/Arts Events,Gardening,Gourmet Foods,DIY,Home Study Courses,Motorcycle Riding,Photography,Quilting,Scrapbooking,Self-Improvement Courses,Sewing/Knitting,Wine,Woodworking,Christian or Gospel,Classical,Country,Jazz,R&B,Rock N Roll,Own a Cat,Own a Dog,Boating/Sailing,Camping/Hiking,Cycling,Fishing,Fitness/Exercise,Golf,Hunting/Shooting,NASCAR,Running,Skiing,Walking for Health,Yoga/Pilates,Casino Gambling,Cruise Ship Vacation,International Travel,Leisure Travel,RV Vacations,Timeshare,Travel within US,Traveler,Interest in Investing,Interest in Mutual Funds,LOYALTY_MEMBERS,PERSONAL_TRAVELER,VACATION_SPENDERS,BARGAIN_SHOPPERS,interest to purchase Medicare Supplement Insurance,interest to purchase Medicare Advantage Plan,Online Shopper,Online Household,Online Insurance Purchasers,Online Savings,Online Movie Interest,spendperorder,orderspercustomer,spendpercustomers,y,PRESENCE_OF_FINANCE_LOAN_N,PRESENCE_OF_FINANCE_LOAN_Y,AGE_RANGES_tni_assign,AGE_RANGES_A,AGE_RANGES_B,AGE_RANGES_C,AGE_RANGES_D,AGE_RANGES_F,AGE_RANGES_G,AGE_RANGES_H,AGE_RANGES_TnImissing,AGE_RANGES_bi_(-inf_1.0],AGE_RANGES_bi_(3.0_4.0],AGE_RANGES_bi_(7.0_inf],AGE_RANGES_bi_(5.0_6.0],AGE_RANGES_bi_(1.0_2.0],AGE_RANGES_bi_(6.0_7.0],HOME_VALUE_tni_assign,HOME_VALUE_A,HOME_VALUE_B,HOME_VALUE_C,HOME_VALUE_D,HOME_VALUE_E,HOME_VALUE_F,HOME_VALUE_G,HOME_VALUE_H,HOME_VALUE_I,HOME_VALUE_J,HOME_VALUE_K,HOME_VALUE_M,HOME_VALUE_O,HOME_VALUE_TnImissing,HOME_VALUE_bi_(7.0_8.0],HOME_VALUE_bi_(10.0_11.0],HOME_VALUE_bi_(13.0_inf],HOME_VALUE_bi_(12.0_13.0],Net Assets_tni_assign,Net Assets_A,Net Assets_B,Net Assets_C,Net Assets_D,Net Assets_E,Net Assets_F,Net Assets_G,Net Assets_TnImissing,Net Assets_bi_(2.0_3.0],Net Assets_bi_(6.0_inf],AIRLINE_UPGRADERS_N,AIRLINE_UPGRADERS_Y,Subscribes to Magazines Online_N,Subscribes to Magazines Online_Y,Online Transactor_N,Online Transactor_Y,Online Music Interest_N,Online Music Interest_Y,y_pred,decile
44933,22881,MICHAEL,DEMSIA,186 WAYNE ST APT 123D,JERSEY CITY,NJ,7302,N,G,8,Y,3.0,Y,N,,1.0,2.0,2.0,,6.0,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,Y,,N,N,N,119.476804,11.094025,1325.478648,0,1,0,8,0,0,0,0,0,0,0,1,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,5,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0.033451,6
58369,217527,SANDI,WERFEL,331 ANCHORAGE DR,WOODBURY,NY,11797,N,G,11,Y,3.0,N,N,,1.0,1.0,1.0,,9.0,9.0,,,,,,,Y,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,Y,Y,N,N,N,N,Y,N,N,Y,144.579826,12.763889,1845.400833,1,1,0,6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,7,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,1,0.107798,1
27537,135412,JENNIFER,SALEMI,49 DAFFODIL CT,STATEN ISLAND,NY,10312,N,F,9,Y,11.0,N,N,,4.0,1.0,1.0,,7.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,Y,Y,Y,Y,135.656909,7.373529,1000.270206,0,1,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0.007343,10
23143,149276,ERICA,BUDHRAM,10732 110TH ST,SOUTH RICHMOND HILL,NY,11419,N,F,7,Y,11.0,N,N,,5.0,1.0,1.0,,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,,N,N,N,114.603266,4.304348,493.292319,0,1,0,4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0.022147,8
3205,170035,JASON,SPAIN,9 BERNARDS AVE,BERNARDSVILLE,NJ,7924,N,H,8,Y,6.0,N,N,,2.0,1.0,2.0,,7.0,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,N,N,N,N,N,N,Y,Y,Y,N,213.918871,9.358491,2001.957736,0,1,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0.006804,10
