In [1]:
import pandas as pd
import numpy as np
import ppscore as pps
import statsmodels as sm
import time
from sklearn.model_selection import KFold
from hpsklearn import HyperoptEstimator, any_classifier, any_preprocessing, random_forest, gaussian_nb, gradient_boosting, multinomial_nb
from hyperopt import tpe
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier

WARN: OMP_NUM_THREADS=None =>
... If you are using openblas if you are using openblas set OMP_NUM_THREADS=1 or risk subprocess calls hanging indefinitely


In [2]:
#Load data
pd.set_option('display.max_columns', None)
df_td = pd.read_csv('tic_2000_train_data.csv')
eval_df = pd.read_csv('tic_2000_eval_data.csv')#CARAVAN is renamed 'Target' in this set

#renaming the training data to match the test data.
df_td.rename(columns={'MOSTYPE': 'subtype_L0', 'MAANTHUI':'Num_houses', 'MGEMOMV' : 'Avg_hh_size',
                   'MGEMLEEF':'age_L1', 'MOSHOOFD': 'maintype_L2', 'MGODRK': 'romcath_L3',
                   'MGODPR': 'Protestant','MGODOV' : 'O_religion', 'MGODGE': 'N_religion','MRELGE' :'Married',
                   'MRELSA' : 'Living_together','MRELOV' : 'O_relation','MFALLEEN' : 'Singles','MFGEKIND' : 'hh_wo_child',
                   'MFWEKIND' : 'hh_w_child','MOPLHOOG' : 'H_lvl_edu','MOPLMIDD' : 'M_lvl_edu',
                   'MOPLLAAG' : 'L_lvl_edu','MBERHOOG' : 'H_status','MBERZELF' : 'Entrepreneur','MBERBOER' : 'Farmer',
                   'MBERMIDD' : 'Mid_management','MBERARBG' : 'Skld_labor','MBERARBO' : 'Unskld_labor',
                   'MSKA' : 'Soc_cls_A','MSKB1' : 'Soc_cls_B1','MSKB2' : 'Soc_cls_B2','MSKC' : 'Soc_cls_C',
                   'MSKD' : 'Soc_cls_D','MHHUUR' : 'R_house','MHKOOP' : 'O_house','MAUT1' : '1_car','MAUT2' : '2_cars',
                   'MAUT0' : 'N_car','MZFONDS' : 'Nat_Hlth_Serv','MZPART' : 'Prv_Hlth_Insur','MINKM30' : 'Inc_u_30k',
                   'MINK3045' : 'Inc_btw_30_45k','MINK4575' : 'Inc_btw_45_75k','MINK7512' : 'Inc_75_122k','MINK123M' : 'Inc_ovr_123k',
                   'MINKGEM' : 'Avg_inc','MKOOPKLA' : 'PP_cls','PWAPART' : 'Contri_prv_3p_insur','PWABEDR' : 'Firm_Contri_3p_ insur',
                   'PWALAND' : 'Ag_Contri_3p_insur','PPERSAUT' : 'Contri_car_pol','PBESAUT' : 'Contri_deliv_van_pol',
                   'PMOTSCO' : 'Contri_motorcycle/scooter_pol','PVRAAUT' : 'Contri_lorry_pol','PAANHANG' : 'Contri_trailer_pols',
                   'PTRACTOR' : 'Contri_tractor_pol','PWERKT' : 'Contri_ag_machine_pol','PBROM' : 'Contri_moped_pol',
                   'PLEVEN' : 'Contri_life_insur','PPERSONG' : 'Contri_prv_accid_insur_pol',
                   'PGEZONG' : 'Contri_fam_accid_insur_pol','PWAOREG' : 'Contri_disabl_insur_pol','PBRAND' : 'Contri_fire_pol',
                   'PZEILPL' : 'Contri_surfb_pol','PPLEZIER' : 'Contri_boat_pol','PFIETS' : 'Contri_bike_pol',
                   'PINBOED' : 'Contri_prop_insur_pol','PBYSTAND' : 'Contri_ss_insur_polo','AWAPART' : 'Num_prv_3p_insur',
                   'AWABEDR' : 'Num_firm_3p_insur','AWALAND' : 'Num_ag_3p_insur','APERSAUT' : 'Num_car_pol',
                   'ABESAUT' : 'Num_deliv_van_pol','AMOTSCO' : 'Num_motorcycle/scooter_pol', 'AVRAAUT' : 'Num_lorry_pol','AAANHANG': 'Num_trailer_pol',
                   'ATRACTOR' : 'Num_tractor_pol','AWERKT' : 'Num_ag_machines_pol','ABROM' : 'Num_moped_pol',
                   'ALEVEN' : 'Num_life_insur_pol', 'APERSONG' : 'Num_prv_accid_insur_pol','AGEZONG' : 'Num_fam_ccid_insur_pol',
                   'AWAOREG' : 'Num_disabl_insur_pol','ABRAND' :'Num_fire_pol','AZEILPL' :'Num_surfb_pol','APLEZIER' :'Num_boat_pol',
                   'AFIETS' :'Num_bike_pol','AINBOED' :'Num_prop_insur_pol','ABYSTAND' :'num_ss_insur_pol', 'CARAVAN' : 'Target'},
          inplace=True)
eval_df.rename(columns={'MOSTYPE': 'subtype_L0', 'MAANTHUI':'Num_houses', 'MGEMOMV' : 'Avg_hh_size',
                   'MGEMLEEF':'age_L1', 'MOSHOOFD': 'maintype_L2', 'MGODRK': 'romcath_L3',
                   'MGODPR': 'Protestant','MGODOV' : 'O_religion', 'MGODGE': 'N_religion','MRELGE' :'Married',
                   'MRELSA' : 'Living_together','MRELOV' : 'O_relation','MFALLEEN' : 'Singles','MFGEKIND' : 'hh_wo_child',
                   'MFWEKIND' : 'hh_w_child','MOPLHOOG' : 'H_lvl_edu','MOPLMIDD' : 'M_lvl_edu',
                   'MOPLLAAG' : 'L_lvl_edu','MBERHOOG' : 'H_status','MBERZELF' : 'Entrepreneur','MBERBOER' : 'Farmer',
                   'MBERMIDD' : 'Mid_management','MBERARBG' : 'Skld_labor','MBERARBO' : 'Unskld_labor',
                   'MSKA' : 'Soc_cls_A','MSKB1' : 'Soc_cls_B1','MSKB2' : 'Soc_cls_B2','MSKC' : 'Soc_cls_C',
                   'MSKD' : 'Soc_cls_D','MHHUUR' : 'R_house','MHKOOP' : 'O_house','MAUT1' : '1_car','MAUT2' : '2_cars',
                   'MAUT0' : 'N_car','MZFONDS' : 'Nat_Hlth_Serv','MZPART' : 'Prv_Hlth_Insur','MINKM30' : 'Inc_u_30k',
                   'MINK3045' : 'Inc_btw_30_45k','MINK4575' : 'Inc_btw_45_75k','MINK7512' : 'Inc_75_122k','MINK123M' : 'Inc_ovr_123k',
                   'MINKGEM' : 'Avg_inc','MKOOPKLA' : 'PP_cls','PWAPART' : 'Contri_prv_3p_insur','PWABEDR' : 'Firm_Contri_3p_ insur',
                   'PWALAND' : 'Ag_Contri_3p_insur','PPERSAUT' : 'Contri_car_pol','PBESAUT' : 'Contri_deliv_van_pol',
                   'PMOTSCO' : 'Contri_motorcycle/scooter_pol','PVRAAUT' : 'Contri_lorry_pol','PAANHANG' : 'Contri_trailer_pols',
                   'PTRACTOR' : 'Contri_tractor_pol','PWERKT' : 'Contri_ag_machine_pol','PBROM' : 'Contri_moped_pol',
                   'PLEVEN' : 'Contri_life_insur','PPERSONG' : 'Contri_prv_accid_insur_pol',
                   'PGEZONG' : 'Contri_fam_accid_insur_pol','PWAOREG' : 'Contri_disabl_insur_pol','PBRAND' : 'Contri_fire_pol',
                   'PZEILPL' : 'Contri_surfb_pol','PPLEZIER' : 'Contri_boat_pol','PFIETS' : 'Contri_bike_pol',
                   'PINBOED' : 'Contri_prop_insur_pol','PBYSTAND' : 'Contri_ss_insur_polo','AWAPART' : 'Num_prv_3p_insur',
                   'AWABEDR' : 'Num_firm_3p_insur','AWALAND' : 'Num_ag_3p_insur','APERSAUT' : 'Num_car_pol',
                   'ABESAUT' : 'Num_deliv_van_pol','AMOTSCO' : 'Num_motorcycle/scooter_pol', 'AVRAAUT' : 'Num_lorry_pol','AAANHANG': 'Num_trailer_pol',
                   'ATRACTOR' : 'Num_tractor_pol','AWERKT' : 'Num_ag_machines_pol','ABROM' : 'Num_moped_pol',
                   'ALEVEN' : 'Num_life_insur_pol', 'APERSONG' : 'Num_prv_accid_insur_pol','AGEZONG' : 'Num_fam_ccid_insur_pol',
                   'AWAOREG' : 'Num_disabl_insur_pol','ABRAND' :'Num_fire_pol','AZEILPL' :'Num_surfb_pol','APLEZIER' :'Num_boat_pol',
                   'AFIETS' :'Num_bike_pol','AINBOED' :'Num_prop_insur_pol','ABYSTAND' :'num_ss_insur_pol'},
          inplace=True)

df_list = [df_td, eval_df]
df = pd.concat(df_list)

df.describe()

Unnamed: 0,subtype_L0,Num_houses,Avg_hh_size,age_L1,maintype_L2,romcath_L3,Protestant,O_religion,N_religion,Married,Living_together,O_relation,Singles,hh_wo_child,hh_w_child,H_lvl_edu,M_lvl_edu,L_lvl_edu,H_status,Entrepreneur,Farmer,Mid_management,Skld_labor,Unskld_labor,Soc_cls_A,Soc_cls_B1,Soc_cls_B2,Soc_cls_C,Soc_cls_D,R_house,O_house,1_car,2_cars,N_car,Nat_Hlth_Serv,Prv_Hlth_Insur,Inc_u_30k,Inc_btw_30_45k,Inc_btw_45_75k,Inc_75_122k,Inc_ovr_123k,Avg_inc,PP_cls,Contri_prv_3p_insur,Firm_Contri_3p_ insur,Ag_Contri_3p_insur,Contri_car_pol,Contri_deliv_van_pol,Contri_motorcycle/scooter_pol,Contri_lorry_pol,Contri_trailer_pols,Contri_tractor_pol,Contri_ag_machine_pol,Contri_moped_pol,Contri_life_insur,Contri_prv_accid_insur_pol,Contri_fam_accid_insur_pol,Contri_disabl_insur_pol,Contri_fire_pol,Contri_surfb_pol,Contri_boat_pol,Contri_bike_pol,Contri_prop_insur_pol,Contri_ss_insur_polo,Num_prv_3p_insur,Num_firm_3p_insur,Num_ag_3p_insur,Num_car_pol,Num_deliv_van_pol,Num_motorcycle/scooter_pol,Num_lorry_pol,Num_trailer_pol,Num_tractor_pol,Num_ag_machines_pol,Num_moped_pol,Num_life_insur_pol,Num_prv_accid_insur_pol,Num_fam_ccid_insur_pol,Num_disabl_insur_pol,Num_fire_pol,Num_surfb_pol,Num_boat_pol,Num_bike_pol,Num_prop_insur_pol,num_ss_insur_pol,Target
count,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0
mean,24.253207,1.108735,2.677561,2.996437,5.779067,0.700672,4.63765,1.050092,3.262981,6.188964,0.873142,2.286602,1.887294,3.237324,4.302891,1.484525,3.307269,4.592038,1.898799,0.403278,0.545714,2.877113,2.226532,2.291183,1.650682,1.595093,2.204744,3.742211,1.068214,4.187742,4.819487,6.022501,1.33598,1.95673,6.254327,2.750662,2.577072,3.505498,2.739462,0.808491,0.208002,3.80452,4.260334,0.764915,0.038892,0.073712,2.956424,0.054877,0.170841,0.008858,0.019344,0.093565,0.011505,0.215027,0.202301,0.011505,0.018733,0.023315,1.84942,0.001629,0.015272,0.025351,0.016697,0.045408,0.40002,0.01405,0.021279,0.557218,0.011098,0.040216,0.00224,0.011403,0.034413,0.005192,0.071065,0.079821,0.004582,0.007941,0.004276,0.574018,0.000916,0.005091,0.03146,0.00845,0.013846,0.059662
std,12.918058,0.412101,0.780701,0.80466,2.874148,1.015107,1.721212,1.011156,1.606287,1.89607,0.961955,1.710674,1.779238,1.609139,1.984152,1.645968,1.723377,2.279839,1.814406,0.786792,1.106349,1.846703,1.748025,1.684008,1.74241,1.321487,1.534163,1.9449,1.298229,3.093127,3.093541,1.54398,1.213627,1.596842,2.000374,2.00296,2.073125,1.871365,1.950625,1.173771,0.561832,1.33093,1.998913,0.956555,0.356924,0.507818,2.921736,0.566108,0.888518,0.237556,0.200885,0.60435,0.215408,0.810899,0.910574,0.188699,0.213712,0.37535,1.881271,0.057058,0.24421,0.157198,0.211487,0.396983,0.492001,0.126058,0.144319,0.608575,0.129928,0.223622,0.068402,0.116251,0.249706,0.109954,0.267432,0.384431,0.067535,0.088764,0.071224,0.561255,0.030258,0.077996,0.20907,0.092647,0.117728,0.236872
min,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,10.0,1.0,2.0,2.0,3.0,0.0,4.0,0.0,2.0,5.0,0.0,1.0,0.0,2.0,3.0,0.0,2.0,3.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,1.0,1.0,2.0,0.0,2.0,2.0,5.0,0.0,0.0,5.0,1.0,1.0,2.0,1.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,30.0,1.0,3.0,3.0,7.0,0.0,5.0,1.0,3.0,6.0,1.0,2.0,2.0,3.0,4.0,1.0,3.0,5.0,2.0,0.0,0.0,3.0,2.0,2.0,1.0,2.0,2.0,4.0,1.0,4.0,5.0,6.0,1.0,2.0,7.0,2.0,2.0,4.0,3.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,35.0,1.0,3.0,3.0,8.0,1.0,6.0,2.0,4.0,7.0,1.0,3.0,3.0,4.0,6.0,2.0,4.0,6.0,3.0,1.0,1.0,4.0,3.0,3.0,2.0,2.0,3.0,5.0,2.0,7.0,7.0,7.0,2.0,3.0,8.0,4.0,4.0,5.0,4.0,1.0,0.0,4.0,6.0,2.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,41.0,10.0,6.0,6.0,10.0,9.0,9.0,5.0,9.0,9.0,7.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,5.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,8.0,3.0,6.0,4.0,9.0,7.0,7.0,9.0,5.0,7.0,6.0,6.0,9.0,6.0,3.0,7.0,8.0,3.0,6.0,1.0,6.0,5.0,2.0,5.0,1.0,12.0,5.0,8.0,4.0,3.0,6.0,6.0,3.0,8.0,1.0,1.0,2.0,7.0,1.0,2.0,4.0,2.0,2.0,1.0


In [3]:
#Creating dummy tables and leaving our Target variable out for the models.
pre_enc= df.drop('Target', axis=1)
post_enc_df = pd.get_dummies(df, prefix_sep="_", columns=pre_enc.columns)

new_df = post_enc_df[['Contri_boat_pol_0', 'Avg_inc_0', 'maintype_L2_10', 'Contri_fire_pol_2',
       'Contri_ss_insur_polo_0', 'maintype_L2_5', 'maintype_L2_4', 'R_house_2',
       'L_lvl_edu_7', 'H_lvl_edu_3', 'H_lvl_edu_0', 'L_lvl_edu_6',
       'maintype_L2_3', '1_car_4', '1_car_2', 'L_lvl_edu_9', 'Avg_inc_2',
       'Contri_fire_pol_1', 'Avg_inc_3', 'Contri_fire_pol_6', 'R_house_5',
       'PP_cls_3', 'PP_cls_5', 'Contri_prv_3p_insur_1', 'Num_fire_pol_2',
       'maintype_L2_1', 'maintype_L2_2', 'Contri_disabl_insur_pol_6',
       'H_lvl_edu_5', 'Num_fire_pol_1', '1_car_5', '1_car_6', 'PP_cls_4',
       'H_lvl_edu_4', 'H_lvl_edu_6', 'Contri_fire_pol_5', 'Avg_inc_5',
       'maintype_L2_8', 'R_house_1', '1_car_9', 'L_lvl_edu_8', 'R_house_6',
       'L_lvl_edu_2', 'maintype_L2_9', 'Contri_fire_pol_3', 'Avg_inc_4',
       'Avg_inc_7', 'L_lvl_edu_1', 'PP_cls_8', 'R_house_0', 'L_lvl_edu_0',
       '1_car_7', 'Contri_prv_3p_insur_2', 'H_lvl_edu_7',
       'Contri_ss_insur_polo_4', 'Contri_fire_pol_4', 'PP_cls_7',
       'Contri_car_pol_6', 'Contri_fam_accid_insur_pol_3','Target']]

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = new_df.drop('Target', axis=1)
y = new_df.Target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)


# Instantiate a HyperoptEstimator with the search space and number of evaluations

estim = HyperoptEstimator(classifier=multinomial_nb('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim.fit(X_train, y_train)

# Show the results

print(estim.score(X_test, y_test))
# 1.0

print( estim.best_model() )

  0%|          | 0/1 [00:00<?, ?trial/s, best loss=?]

job exception: Negative values in data passed to MultinomialNB (input X)



  0%|          | 0/1 [00:01<?, ?trial/s, best loss=?]


ValueError: Negative values in data passed to MultinomialNB (input X)

In [None]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix, plot_roc_curve, classification_report, balanced_accuracy_score, coverage_error
import matplotlib.pyplot as plt

classifier = GradientBoostingClassifier(learning_rate=0.030509817593472984,
                           loss='exponential', max_features=0.36684107529168053,
                           min_samples_leaf=8, n_estimators=287, presort='auto',
                           random_state=3)

classifier.fit(X_train,y_train)

predictions = classifier.predict(X_test)

print("accuracy score" '\n', accuracy_score(y_test, predictions))
print("model confusion matrix" '\n', confusion_matrix(y_test, predictions, normalize='all'))
print("classification_report" '\n', classification_report(y_test, predictions),'\n')
ax = plt.gca()
plt.rcParams['figure.figsize'] = (10, 10)
disp = plot_roc_curve(classifier, X_test, y_test, ax=ax, alpha=0.9)

In [6]:
estim_rf = HyperoptEstimator(classifier=random_forest('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_rf.fit(X_train, y_train)

# Show the results

print(estim_rf.score(X_test, y_test))
# 1.0

print( estim_rf.best_model() )

100%|██████████| 1/1 [00:39<00:00, 39.76s/trial, best loss: 0.0693384223918575]
100%|██████████| 2/2 [00:01<00:00,  1.26trial/s, best loss: 0.058524173027989845]
100%|██████████| 3/3 [00:03<00:00,  1.21s/trial, best loss: 0.058524173027989845]
100%|██████████| 4/4 [00:21<00:00,  5.36s/trial, best loss: 0.058524173027989845]
100%|██████████| 5/5 [00:16<00:00,  3.26s/trial, best loss: 0.058524173027989845]
100%|██████████| 6/6 [00:02<00:00,  2.99trial/s, best loss: 0.058524173027989845]
100%|██████████| 7/7 [00:01<00:00,  4.70trial/s, best loss: 0.058524173027989845]
100%|██████████| 8/8 [00:03<00:00,  2.27trial/s, best loss: 0.058524173027989845]
100%|██████████| 9/9 [00:02<00:00,  3.29trial/s, best loss: 0.058524173027989845]
100%|██████████| 10/10 [00:23<00:00,  2.34s/trial, best loss: 0.058524173027989845]
100%|██████████| 11/11 [00:05<00:00,  2.09trial/s, best loss: 0.058524173027989845]
100%|██████████| 12/12 [00:29<00:00,  2.44s/trial, best loss: 0.058524173027989845]
100%|███████

In [None]:
estim_gbt = HyperoptEstimator(classifier=gradient_boosting('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_gbt.fit(X_train, y_train)

# Show the results

print(estim_gbt.score(X_test, y_test))
# 1.0

print( estim_gbt.best_model() )

In [5]:
estim_guas = HyperoptEstimator(classifier=gaussian_nb('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_guas.fit(X_train, y_train)

# Show the results

print(estim_guas.score(X_test, y_test))
# 1.0

print( estim_guas.best_model() )

100%|██████████| 1/1 [00:01<00:00,  1.38s/trial, best loss: 0.07697201017811706]
100%|██████████| 2/2 [00:01<00:00,  1.55trial/s, best loss: 0.07697201017811706]
100%|██████████| 3/3 [00:01<00:00,  2.38trial/s, best loss: 0.07697201017811706]
100%|██████████| 4/4 [00:01<00:00,  2.99trial/s, best loss: 0.07697201017811706]
100%|██████████| 5/5 [00:01<00:00,  3.92trial/s, best loss: 0.07697201017811706]
100%|██████████| 6/6 [00:01<00:00,  4.62trial/s, best loss: 0.07697201017811706]
100%|██████████| 7/7 [00:01<00:00,  5.49trial/s, best loss: 0.07697201017811706]
100%|██████████| 8/8 [00:01<00:00,  6.04trial/s, best loss: 0.07697201017811706]
100%|██████████| 9/9 [00:01<00:00,  6.59trial/s, best loss: 0.07697201017811706]
100%|██████████| 10/10 [00:01<00:00,  7.87trial/s, best loss: 0.07697201017811706]
100%|██████████| 11/11 [00:01<00:00,  8.65trial/s, best loss: 0.07697201017811706]
100%|██████████| 12/12 [00:01<00:00,  9.23trial/s, best loss: 0.07697201017811706]
100%|██████████| 13/13

In [None]:
estim_find = HyperoptEstimator( algo=tpe.suggest, 
                            max_evals=150, 
                            trial_timeout=60 )

estim_find.fit(X_train, y_train)

# Show the results

# 1.0

print( estim_find.best_model() )

In [None]:
from hpsklearn import extra_trees

estim_et = HyperoptEstimator(classifier=extra_trees('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_et.fit(X_train, y_train)

# Show the results

print(estim_et.score(X_test, y_test))
# 1.0

print( estim_et.best_model() )


In [8]:
from hpsklearn import decision_tree

estim_knn = HyperoptEstimator(classifier=decision_tree('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_knn.fit(X_train, y_train)

# Show the results

print(estim_knn.score(X_test, y_test))
# 1.0

print( estim_knn.best_model() )

100%|██████████| 1/1 [00:01<00:00,  1.47s/trial, best loss: 0.06997455470737912]
100%|██████████| 2/2 [00:01<00:00,  1.50trial/s, best loss: 0.06997455470737912]
100%|██████████| 3/3 [00:01<00:00,  2.12trial/s, best loss: 0.06870229007633588]
100%|██████████| 4/4 [00:01<00:00,  3.11trial/s, best loss: 0.06870229007633588]
100%|██████████| 5/5 [00:01<00:00,  3.90trial/s, best loss: 0.06870229007633588]
100%|██████████| 6/6 [00:01<00:00,  4.63trial/s, best loss: 0.06870229007633588]
100%|██████████| 7/7 [00:01<00:00,  5.38trial/s, best loss: 0.060432569974554706]
100%|██████████| 8/8 [00:01<00:00,  4.01trial/s, best loss: 0.060432569974554706]
100%|██████████| 9/9 [00:01<00:00,  5.18trial/s, best loss: 0.060432569974554706]
100%|██████████| 10/10 [00:01<00:00,  7.90trial/s, best loss: 0.059796437659033086]
100%|██████████| 11/11 [00:01<00:00,  8.02trial/s, best loss: 0.059796437659033086]
100%|██████████| 12/12 [00:01<00:00,  9.49trial/s, best loss: 0.059796437659033086]
100%|██████████|



In [7]:
estim_dt = HyperoptEstimator(classifier=decision_tree('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_dt.fit(X_train, y_train)

# Show the results

print(estim_dt.score(X_test, y_test))
# 1.0

print( estim_dt.best_model() )

NameError: name 'decision_tree' is not defined

In [None]:
estim_adab = HyperoptEstimator(classifier=ada_boost('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_adab.fit(X_train, y_train)

# Show the results

print(estim_adab.score(X_test, y_test))
# 1.0

print( estim_adab.best_model() )

In [None]:
estim_lda = HyperoptEstimator(classifier=linear_discriminant_analysis('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_lda.fit(X_train, y_train)

# Show the results

print(estim_lda.score(X_test, y_test))
# 1.0

print( estim_lda.best_model() )

In [None]:
estim_qda = HyperoptEstimator(classifier=quadratic_discriminant_analysis('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_qda.fit(X_train, y_train)

# Show the results

print(estim_qda.score(X_test, y_test))
# 1.0

print( estim_qda.best_model() )

In [None]:
estim_xgb = HyperoptEstimator(classifier=xgboost_classification('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim_xgb.fit(X_train, y_train)

# Show the results

print(estim_xgb.score(X_test, y_test))
# 1.0

print( estim_xgb.best_model() )