## Imports

We imported the necessary tools that be used to clean, impute, and model our dataset.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
import numpy as np
%matplotlib inline

In [24]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_curve, auc

In [25]:
from category_encoders import OrdinalEncoder as oe

In [5]:
from catboost import CatBoostClassifier
from catboost import Pool, cv
import optuna
import pandas_profiling as pp

## Load Data

In [67]:
train = pd.read_csv('data/training_set_features.csv', index_col='respondent_id')

In [68]:
labels = pd.read_csv('data/training_set_labels.csv', index_col='respondent_id')

In [69]:
test = pd.read_csv('data/test_set_features.csv', index_col='respondent_id')

In [70]:
num_cols = train.select_dtypes('number').columns

In [71]:
cat_cols = ['race', 'sex', 'marital_status', 'rent_or_own',  'hhs_geo_region','census_msa', 'employment_industry', 'employment_occupation']

In [72]:
ord_cols = ['age_group', 'education',  'income_poverty','employment_status']

## Imputation

In [73]:
for col in (cat_cols+ord_cols):
    train[col] = train[col].fillna(value='None')

In [74]:
for col in num_cols:
    train[col] = train[col].fillna(value=-1)

In [75]:
for col in (cat_cols+ord_cols):
    test[col] = test[col].fillna(value='None')

In [76]:
for col in num_cols:
    test[col] = test[col].fillna(value=-1)

In [87]:
le = LabelEncoder()
for col in ord_cols:
    train[col] = le.fit_transform(train[col])

In [93]:
for col in ord_cols:
    pd.to_numeric(train[col])

In [96]:
train.dtypes

h1n1_concern                   float64
h1n1_knowledge                 float64
behavioral_antiviral_meds      float64
behavioral_avoidance           float64
behavioral_face_mask           float64
behavioral_wash_hands          float64
behavioral_large_gatherings    float64
behavioral_outside_home        float64
behavioral_touch_face          float64
doctor_recc_h1n1               float64
doctor_recc_seasonal           float64
chronic_med_condition          float64
child_under_6_months           float64
health_worker                  float64
health_insurance               float64
opinion_h1n1_vacc_effective    float64
opinion_h1n1_risk              float64
opinion_h1n1_sick_from_vacc    float64
opinion_seas_vacc_effective    float64
opinion_seas_risk              float64
opinion_seas_sick_from_vacc    float64
age_group                        int64
education                        int64
race                            object
sex                             object
income_poverty           

## Train Test Split

We now split the data to be trained and tested.

In [97]:
X_train, X_test, y_train, y_test = train_test_split(train, labels, test_size=0.3, random_state=10)

## CatBoost with default parameters

Since this is a baseline model for CatBoost, we will model our training and test data with default hyperparameters.

In [98]:
categorical_features_indices = np.where(X_train.dtypes == object)[0]
categorical_features_indices

array([23, 24, 26, 27, 29, 30, 33, 34], dtype=int64)

### H1N1 Vaccine

In [143]:
model_classifier_h1n1 = CatBoostClassifier(cat_features=categorical_features_indices, eval_metric='AUC')

In [144]:
model_classifier_h1n1.fit(X_train, y_train.h1n1_vaccine, eval_set=(X_test, y_test.h1n1_vaccine), use_best_model=True, plot=True);

Custom logger is already specified. Specify more than one logger at same time is not thread safe.

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.065316
0:	test: 0.8206128	best: 0.8206128 (0)	total: 51.8ms	remaining: 51.8s
1:	test: 0.8402900	best: 0.8402900 (1)	total: 96.4ms	remaining: 48.1s
2:	test: 0.8432450	best: 0.8432450 (2)	total: 144ms	remaining: 47.8s
3:	test: 0.8450762	best: 0.8450762 (3)	total: 192ms	remaining: 47.7s
4:	test: 0.8452648	best: 0.8452648 (4)	total: 242ms	remaining: 48.2s
5:	test: 0.8461928	best: 0.8461928 (5)	total: 292ms	remaining: 48.4s
6:	test: 0.8464103	best: 0.8464103 (6)	total: 342ms	remaining: 48.5s
7:	test: 0.8481304	best: 0.8481304 (7)	total: 390ms	remaining: 48.4s
8:	test: 0.8490304	best: 0.8490304 (8)	total: 438ms	remaining: 48.3s
9:	test: 0.8499302	best: 0.8499302 (9)	total: 487ms	remaining: 48.2s
10:	test: 0.8495384	best: 0.8499302 (9)	total: 552ms	remaining: 49.6s
11:	test: 0.8508231	best: 0.8508231 (11)	total: 617ms	remaining: 50.8s
12:	test: 0.8511980	best: 0.8511980 (12)	total: 665ms	remaining: 50.5s
13:	test: 0.8515359	best: 0.8515359 (13)	total: 716ms	remaining: 5

117:	test: 0.8691823	best: 0.8691823 (117)	total: 6.49s	remaining: 48.5s
118:	test: 0.8691841	best: 0.8691841 (118)	total: 6.55s	remaining: 48.5s
119:	test: 0.8692001	best: 0.8692001 (119)	total: 6.6s	remaining: 48.4s
120:	test: 0.8692404	best: 0.8692404 (120)	total: 6.66s	remaining: 48.4s
121:	test: 0.8693564	best: 0.8693564 (121)	total: 6.71s	remaining: 48.3s
122:	test: 0.8693746	best: 0.8693746 (122)	total: 6.76s	remaining: 48.2s
123:	test: 0.8693364	best: 0.8693746 (122)	total: 6.81s	remaining: 48.1s
124:	test: 0.8693503	best: 0.8693746 (122)	total: 6.87s	remaining: 48.1s
125:	test: 0.8692894	best: 0.8693746 (122)	total: 6.92s	remaining: 48s
126:	test: 0.8693544	best: 0.8693746 (122)	total: 6.96s	remaining: 47.9s
127:	test: 0.8693536	best: 0.8693746 (122)	total: 7.02s	remaining: 47.8s
128:	test: 0.8693487	best: 0.8693746 (122)	total: 7.07s	remaining: 47.8s
129:	test: 0.8693686	best: 0.8693746 (122)	total: 7.12s	remaining: 47.7s
130:	test: 0.8693070	best: 0.8693746 (122)	total: 7.19

232:	test: 0.8703554	best: 0.8706279 (216)	total: 12.7s	remaining: 42s
233:	test: 0.8703658	best: 0.8706279 (216)	total: 12.8s	remaining: 41.9s
234:	test: 0.8703947	best: 0.8706279 (216)	total: 12.9s	remaining: 41.8s
235:	test: 0.8704081	best: 0.8706279 (216)	total: 12.9s	remaining: 41.8s
236:	test: 0.8704764	best: 0.8706279 (216)	total: 13s	remaining: 41.8s
237:	test: 0.8704407	best: 0.8706279 (216)	total: 13s	remaining: 41.7s
238:	test: 0.8704640	best: 0.8706279 (216)	total: 13.1s	remaining: 41.6s
239:	test: 0.8704890	best: 0.8706279 (216)	total: 13.2s	remaining: 41.6s
240:	test: 0.8704692	best: 0.8706279 (216)	total: 13.2s	remaining: 41.6s
241:	test: 0.8704618	best: 0.8706279 (216)	total: 13.3s	remaining: 41.5s
242:	test: 0.8704705	best: 0.8706279 (216)	total: 13.3s	remaining: 41.5s
243:	test: 0.8705562	best: 0.8706279 (216)	total: 13.4s	remaining: 41.4s
244:	test: 0.8705781	best: 0.8706279 (216)	total: 13.4s	remaining: 41.3s
245:	test: 0.8705957	best: 0.8706279 (216)	total: 13.5s	r

346:	test: 0.8705500	best: 0.8706727 (279)	total: 19.4s	remaining: 36.5s
347:	test: 0.8706195	best: 0.8706727 (279)	total: 19.4s	remaining: 36.4s
348:	test: 0.8706267	best: 0.8706727 (279)	total: 19.5s	remaining: 36.3s
349:	test: 0.8705900	best: 0.8706727 (279)	total: 19.5s	remaining: 36.3s
350:	test: 0.8705715	best: 0.8706727 (279)	total: 19.6s	remaining: 36.2s
351:	test: 0.8705691	best: 0.8706727 (279)	total: 19.6s	remaining: 36.2s
352:	test: 0.8705558	best: 0.8706727 (279)	total: 19.7s	remaining: 36.1s
353:	test: 0.8705356	best: 0.8706727 (279)	total: 19.8s	remaining: 36.1s
354:	test: 0.8705209	best: 0.8706727 (279)	total: 19.8s	remaining: 36.1s
355:	test: 0.8705205	best: 0.8706727 (279)	total: 19.9s	remaining: 36s
356:	test: 0.8704475	best: 0.8706727 (279)	total: 20s	remaining: 36s
357:	test: 0.8704368	best: 0.8706727 (279)	total: 20s	remaining: 35.9s
358:	test: 0.8704070	best: 0.8706727 (279)	total: 20.1s	remaining: 35.9s
359:	test: 0.8703962	best: 0.8706727 (279)	total: 20.1s	rem

461:	test: 0.8708272	best: 0.8709454 (449)	total: 25.7s	remaining: 29.9s
462:	test: 0.8708272	best: 0.8709454 (449)	total: 25.8s	remaining: 29.9s
463:	test: 0.8708757	best: 0.8709454 (449)	total: 25.8s	remaining: 29.8s
464:	test: 0.8709198	best: 0.8709454 (449)	total: 25.9s	remaining: 29.8s
465:	test: 0.8709116	best: 0.8709454 (449)	total: 25.9s	remaining: 29.7s
466:	test: 0.8709133	best: 0.8709454 (449)	total: 26s	remaining: 29.7s
467:	test: 0.8709063	best: 0.8709454 (449)	total: 26s	remaining: 29.6s
468:	test: 0.8709001	best: 0.8709454 (449)	total: 26.1s	remaining: 29.6s
469:	test: 0.8708846	best: 0.8709454 (449)	total: 26.2s	remaining: 29.5s
470:	test: 0.8708787	best: 0.8709454 (449)	total: 26.2s	remaining: 29.4s
471:	test: 0.8708756	best: 0.8709454 (449)	total: 26.3s	remaining: 29.4s
472:	test: 0.8708224	best: 0.8709454 (449)	total: 26.3s	remaining: 29.3s
473:	test: 0.8708376	best: 0.8709454 (449)	total: 26.4s	remaining: 29.3s
474:	test: 0.8707968	best: 0.8709454 (449)	total: 26.4s

576:	test: 0.8705009	best: 0.8709454 (449)	total: 32.4s	remaining: 23.8s
577:	test: 0.8705106	best: 0.8709454 (449)	total: 32.5s	remaining: 23.7s
578:	test: 0.8704599	best: 0.8709454 (449)	total: 32.5s	remaining: 23.7s
579:	test: 0.8704095	best: 0.8709454 (449)	total: 32.6s	remaining: 23.6s
580:	test: 0.8704177	best: 0.8709454 (449)	total: 32.6s	remaining: 23.5s
581:	test: 0.8703996	best: 0.8709454 (449)	total: 32.7s	remaining: 23.5s
582:	test: 0.8703373	best: 0.8709454 (449)	total: 32.8s	remaining: 23.4s
583:	test: 0.8702707	best: 0.8709454 (449)	total: 32.8s	remaining: 23.4s
584:	test: 0.8702796	best: 0.8709454 (449)	total: 32.9s	remaining: 23.3s
585:	test: 0.8702862	best: 0.8709454 (449)	total: 32.9s	remaining: 23.3s
586:	test: 0.8702889	best: 0.8709454 (449)	total: 33s	remaining: 23.2s
587:	test: 0.8702736	best: 0.8709454 (449)	total: 33.1s	remaining: 23.2s
588:	test: 0.8702392	best: 0.8709454 (449)	total: 33.1s	remaining: 23.1s
589:	test: 0.8702681	best: 0.8709454 (449)	total: 33.

692:	test: 0.8697575	best: 0.8709454 (449)	total: 39.1s	remaining: 17.3s
693:	test: 0.8697727	best: 0.8709454 (449)	total: 39.2s	remaining: 17.3s
694:	test: 0.8697834	best: 0.8709454 (449)	total: 39.2s	remaining: 17.2s
695:	test: 0.8697503	best: 0.8709454 (449)	total: 39.3s	remaining: 17.2s
696:	test: 0.8697231	best: 0.8709454 (449)	total: 39.4s	remaining: 17.1s
697:	test: 0.8696967	best: 0.8709454 (449)	total: 39.4s	remaining: 17.1s
698:	test: 0.8696779	best: 0.8709454 (449)	total: 39.5s	remaining: 17s
699:	test: 0.8696307	best: 0.8709454 (449)	total: 39.5s	remaining: 16.9s
700:	test: 0.8696177	best: 0.8709454 (449)	total: 39.6s	remaining: 16.9s
701:	test: 0.8696192	best: 0.8709454 (449)	total: 39.7s	remaining: 16.8s
702:	test: 0.8696295	best: 0.8709454 (449)	total: 39.7s	remaining: 16.8s
703:	test: 0.8696366	best: 0.8709454 (449)	total: 39.8s	remaining: 16.7s
704:	test: 0.8696538	best: 0.8709454 (449)	total: 39.8s	remaining: 16.7s
705:	test: 0.8696485	best: 0.8709454 (449)	total: 39.

805:	test: 0.8694358	best: 0.8709454 (449)	total: 45.9s	remaining: 11s
806:	test: 0.8694276	best: 0.8709454 (449)	total: 46s	remaining: 11s
807:	test: 0.8694775	best: 0.8709454 (449)	total: 46s	remaining: 10.9s
808:	test: 0.8694937	best: 0.8709454 (449)	total: 46.1s	remaining: 10.9s
809:	test: 0.8694916	best: 0.8709454 (449)	total: 46.1s	remaining: 10.8s
810:	test: 0.8694995	best: 0.8709454 (449)	total: 46.2s	remaining: 10.8s
811:	test: 0.8694753	best: 0.8709454 (449)	total: 46.2s	remaining: 10.7s
812:	test: 0.8694392	best: 0.8709454 (449)	total: 46.3s	remaining: 10.7s
813:	test: 0.8694620	best: 0.8709454 (449)	total: 46.4s	remaining: 10.6s
814:	test: 0.8694586	best: 0.8709454 (449)	total: 46.4s	remaining: 10.5s
815:	test: 0.8694750	best: 0.8709454 (449)	total: 46.5s	remaining: 10.5s
816:	test: 0.8694750	best: 0.8709454 (449)	total: 46.6s	remaining: 10.4s
817:	test: 0.8695055	best: 0.8709454 (449)	total: 46.6s	remaining: 10.4s
818:	test: 0.8694262	best: 0.8709454 (449)	total: 46.7s	rem

921:	test: 0.8689760	best: 0.8709454 (449)	total: 52.5s	remaining: 4.44s
922:	test: 0.8689585	best: 0.8709454 (449)	total: 52.6s	remaining: 4.39s
923:	test: 0.8689458	best: 0.8709454 (449)	total: 52.7s	remaining: 4.33s
924:	test: 0.8689471	best: 0.8709454 (449)	total: 52.7s	remaining: 4.28s
925:	test: 0.8689392	best: 0.8709454 (449)	total: 52.8s	remaining: 4.22s
926:	test: 0.8689383	best: 0.8709454 (449)	total: 52.8s	remaining: 4.16s
927:	test: 0.8689427	best: 0.8709454 (449)	total: 52.9s	remaining: 4.1s
928:	test: 0.8689472	best: 0.8709454 (449)	total: 52.9s	remaining: 4.04s
929:	test: 0.8689149	best: 0.8709454 (449)	total: 53s	remaining: 3.99s
930:	test: 0.8689254	best: 0.8709454 (449)	total: 53s	remaining: 3.93s
931:	test: 0.8689416	best: 0.8709454 (449)	total: 53.1s	remaining: 3.88s
932:	test: 0.8689274	best: 0.8709454 (449)	total: 53.2s	remaining: 3.82s
933:	test: 0.8689174	best: 0.8709454 (449)	total: 53.2s	remaining: 3.76s
934:	test: 0.8689197	best: 0.8709454 (449)	total: 53.3s	

In [145]:
h1n1_preds = model_classifier_h1n1.predict(X_test)

In [146]:
h1n1_pred = model_classifier_h1n1.predict_proba(X_test)

In [147]:
h1n1_pred = h1n1_pred[:,1].reshape(-1,1)

In [148]:
accuracy_score(y_test.h1n1_vaccine, h1n1_preds)

0.8577311868214152

In [149]:
roc_auc_score(y_test.h1n1_vaccine, h1n1_pred)

0.8709454236796019

### Seasonal Vaccine

In [150]:
model_classifier_seasonal = CatBoostClassifier(cat_features=categorical_features_indices, eval_metric='AUC')

In [151]:
model_classifier_seasonal.fit(X_train, y_train.seasonal_vaccine, eval_set=(X_test, y_test.seasonal_vaccine), use_best_model=True, plot=True);

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.065316
0:	test: 0.8159216	best: 0.8159216 (0)	total: 65.8ms	remaining: 1m 5s
1:	test: 0.8258981	best: 0.8258981 (1)	total: 109ms	remaining: 54.5s
2:	test: 0.8310996	best: 0.8310996 (2)	total: 153ms	remaining: 50.9s
3:	test: 0.8319270	best: 0.8319270 (3)	total: 197ms	remaining: 49.1s
4:	test: 0.8320700	best: 0.8320700 (4)	total: 247ms	remaining: 49.1s
5:	test: 0.8326555	best: 0.8326555 (5)	total: 295ms	remaining: 48.8s
6:	test: 0.8331410	best: 0.8331410 (6)	total: 372ms	remaining: 52.8s
7:	test: 0.8326904	best: 0.8331410 (6)	total: 424ms	remaining: 52.6s
8:	test: 0.8332308	best: 0.8332308 (8)	total: 473ms	remaining: 52.1s
9:	test: 0.8336175	best: 0.8336175 (9)	total: 535ms	remaining: 52.9s
10:	test: 0.8354368	best: 0.8354368 (10)	total: 586ms	remaining: 52.6s
11:	test: 0.8364208	best: 0.8364208 (11)	total: 637ms	remaining: 52.4s
12:	test: 0.8380613	best: 0.8380613 (12)	total: 718ms	remaining: 54.5s
13:	test: 0.8392185	best: 0.8392185 (13)	total: 771ms	remaining: 5

117:	test: 0.8583905	best: 0.8583920 (114)	total: 6.67s	remaining: 49.9s
118:	test: 0.8585324	best: 0.8585324 (118)	total: 6.75s	remaining: 50s
119:	test: 0.8585609	best: 0.8585609 (119)	total: 6.8s	remaining: 49.9s
120:	test: 0.8585844	best: 0.8585844 (120)	total: 6.85s	remaining: 49.8s
121:	test: 0.8585350	best: 0.8585844 (120)	total: 6.91s	remaining: 49.7s
122:	test: 0.8585540	best: 0.8585844 (120)	total: 6.96s	remaining: 49.6s
123:	test: 0.8586059	best: 0.8586059 (123)	total: 7s	remaining: 49.5s
124:	test: 0.8586685	best: 0.8586685 (124)	total: 7.05s	remaining: 49.4s
125:	test: 0.8586656	best: 0.8586685 (124)	total: 7.11s	remaining: 49.3s
126:	test: 0.8587307	best: 0.8587307 (126)	total: 7.16s	remaining: 49.2s
127:	test: 0.8587489	best: 0.8587489 (127)	total: 7.22s	remaining: 49.2s
128:	test: 0.8587592	best: 0.8587592 (128)	total: 7.27s	remaining: 49.1s
129:	test: 0.8587938	best: 0.8587938 (129)	total: 7.33s	remaining: 49.1s
130:	test: 0.8587939	best: 0.8587939 (130)	total: 7.39s	r

230:	test: 0.8603821	best: 0.8604414 (227)	total: 12.8s	remaining: 42.7s
231:	test: 0.8604290	best: 0.8604414 (227)	total: 12.9s	remaining: 42.6s
232:	test: 0.8604380	best: 0.8604414 (227)	total: 12.9s	remaining: 42.6s
233:	test: 0.8604551	best: 0.8604551 (233)	total: 13s	remaining: 42.5s
234:	test: 0.8604567	best: 0.8604567 (234)	total: 13.1s	remaining: 42.5s
235:	test: 0.8604478	best: 0.8604567 (234)	total: 13.1s	remaining: 42.4s
236:	test: 0.8605068	best: 0.8605068 (236)	total: 13.2s	remaining: 42.4s
237:	test: 0.8606047	best: 0.8606047 (237)	total: 13.2s	remaining: 42.3s
238:	test: 0.8606223	best: 0.8606223 (238)	total: 13.3s	remaining: 42.3s
239:	test: 0.8606835	best: 0.8606835 (239)	total: 13.3s	remaining: 42.2s
240:	test: 0.8606671	best: 0.8606835 (239)	total: 13.4s	remaining: 42.1s
241:	test: 0.8606437	best: 0.8606835 (239)	total: 13.4s	remaining: 42s
242:	test: 0.8606537	best: 0.8606835 (239)	total: 13.5s	remaining: 41.9s
243:	test: 0.8606477	best: 0.8606835 (239)	total: 13.5s

344:	test: 0.8613116	best: 0.8613116 (344)	total: 19s	remaining: 36.1s
345:	test: 0.8613108	best: 0.8613116 (344)	total: 19.1s	remaining: 36s
346:	test: 0.8613417	best: 0.8613417 (346)	total: 19.1s	remaining: 36s
347:	test: 0.8613449	best: 0.8613449 (347)	total: 19.2s	remaining: 35.9s
348:	test: 0.8613432	best: 0.8613449 (347)	total: 19.2s	remaining: 35.9s
349:	test: 0.8613431	best: 0.8613449 (347)	total: 19.3s	remaining: 35.9s
350:	test: 0.8613417	best: 0.8613449 (347)	total: 19.4s	remaining: 35.8s
351:	test: 0.8613374	best: 0.8613449 (347)	total: 19.4s	remaining: 35.7s
352:	test: 0.8613539	best: 0.8613539 (352)	total: 19.5s	remaining: 35.7s
353:	test: 0.8613564	best: 0.8613564 (353)	total: 19.5s	remaining: 35.6s
354:	test: 0.8613982	best: 0.8613982 (354)	total: 19.6s	remaining: 35.6s
355:	test: 0.8614280	best: 0.8614280 (355)	total: 19.6s	remaining: 35.5s
356:	test: 0.8614516	best: 0.8614516 (356)	total: 19.7s	remaining: 35.5s
357:	test: 0.8614541	best: 0.8614541 (357)	total: 19.7s	r

459:	test: 0.8614383	best: 0.8614974 (367)	total: 25.2s	remaining: 29.6s
460:	test: 0.8614328	best: 0.8614974 (367)	total: 25.3s	remaining: 29.6s
461:	test: 0.8614253	best: 0.8614974 (367)	total: 25.4s	remaining: 29.5s
462:	test: 0.8614153	best: 0.8614974 (367)	total: 25.4s	remaining: 29.5s
463:	test: 0.8614365	best: 0.8614974 (367)	total: 25.5s	remaining: 29.4s
464:	test: 0.8614560	best: 0.8614974 (367)	total: 25.5s	remaining: 29.4s
465:	test: 0.8614689	best: 0.8614974 (367)	total: 25.6s	remaining: 29.3s
466:	test: 0.8614716	best: 0.8614974 (367)	total: 25.6s	remaining: 29.2s
467:	test: 0.8614715	best: 0.8614974 (367)	total: 25.7s	remaining: 29.2s
468:	test: 0.8614714	best: 0.8614974 (367)	total: 25.7s	remaining: 29.1s
469:	test: 0.8614671	best: 0.8614974 (367)	total: 25.8s	remaining: 29.1s
470:	test: 0.8615183	best: 0.8615183 (470)	total: 25.8s	remaining: 29s
471:	test: 0.8614777	best: 0.8615183 (470)	total: 25.9s	remaining: 28.9s
472:	test: 0.8614916	best: 0.8615183 (470)	total: 25.

572:	test: 0.8614223	best: 0.8615272 (558)	total: 31.4s	remaining: 23.4s
573:	test: 0.8614101	best: 0.8615272 (558)	total: 31.5s	remaining: 23.4s
574:	test: 0.8614209	best: 0.8615272 (558)	total: 31.5s	remaining: 23.3s
575:	test: 0.8614292	best: 0.8615272 (558)	total: 31.6s	remaining: 23.2s
576:	test: 0.8614288	best: 0.8615272 (558)	total: 31.6s	remaining: 23.2s
577:	test: 0.8614346	best: 0.8615272 (558)	total: 31.7s	remaining: 23.2s
578:	test: 0.8614404	best: 0.8615272 (558)	total: 31.8s	remaining: 23.1s
579:	test: 0.8614326	best: 0.8615272 (558)	total: 31.8s	remaining: 23s
580:	test: 0.8614328	best: 0.8615272 (558)	total: 31.9s	remaining: 23s
581:	test: 0.8614395	best: 0.8615272 (558)	total: 31.9s	remaining: 22.9s
582:	test: 0.8614361	best: 0.8615272 (558)	total: 32s	remaining: 22.9s
583:	test: 0.8614338	best: 0.8615272 (558)	total: 32s	remaining: 22.8s
584:	test: 0.8614515	best: 0.8615272 (558)	total: 32.1s	remaining: 22.8s
585:	test: 0.8614424	best: 0.8615272 (558)	total: 32.2s	rem

685:	test: 0.8612591	best: 0.8615272 (558)	total: 37.8s	remaining: 17.3s
686:	test: 0.8612467	best: 0.8615272 (558)	total: 37.9s	remaining: 17.3s
687:	test: 0.8612408	best: 0.8615272 (558)	total: 37.9s	remaining: 17.2s
688:	test: 0.8612381	best: 0.8615272 (558)	total: 38s	remaining: 17.1s
689:	test: 0.8612545	best: 0.8615272 (558)	total: 38s	remaining: 17.1s
690:	test: 0.8612933	best: 0.8615272 (558)	total: 38.1s	remaining: 17s
691:	test: 0.8612890	best: 0.8615272 (558)	total: 38.1s	remaining: 17s
692:	test: 0.8612824	best: 0.8615272 (558)	total: 38.2s	remaining: 16.9s
693:	test: 0.8612916	best: 0.8615272 (558)	total: 38.2s	remaining: 16.9s
694:	test: 0.8612915	best: 0.8615272 (558)	total: 38.3s	remaining: 16.8s
695:	test: 0.8613091	best: 0.8615272 (558)	total: 38.4s	remaining: 16.8s
696:	test: 0.8613073	best: 0.8615272 (558)	total: 38.4s	remaining: 16.7s
697:	test: 0.8612950	best: 0.8615272 (558)	total: 38.5s	remaining: 16.6s
698:	test: 0.8613014	best: 0.8615272 (558)	total: 38.5s	rem

799:	test: 0.8612200	best: 0.8615272 (558)	total: 44s	remaining: 11s
800:	test: 0.8612115	best: 0.8615272 (558)	total: 44.1s	remaining: 10.9s
801:	test: 0.8612484	best: 0.8615272 (558)	total: 44.1s	remaining: 10.9s
802:	test: 0.8612558	best: 0.8615272 (558)	total: 44.2s	remaining: 10.8s
803:	test: 0.8612660	best: 0.8615272 (558)	total: 44.2s	remaining: 10.8s
804:	test: 0.8612688	best: 0.8615272 (558)	total: 44.3s	remaining: 10.7s
805:	test: 0.8612416	best: 0.8615272 (558)	total: 44.3s	remaining: 10.7s
806:	test: 0.8612369	best: 0.8615272 (558)	total: 44.4s	remaining: 10.6s
807:	test: 0.8612393	best: 0.8615272 (558)	total: 44.4s	remaining: 10.6s
808:	test: 0.8612334	best: 0.8615272 (558)	total: 44.5s	remaining: 10.5s
809:	test: 0.8612238	best: 0.8615272 (558)	total: 44.5s	remaining: 10.4s
810:	test: 0.8612282	best: 0.8615272 (558)	total: 44.6s	remaining: 10.4s
811:	test: 0.8612261	best: 0.8615272 (558)	total: 44.6s	remaining: 10.3s
812:	test: 0.8612110	best: 0.8615272 (558)	total: 44.7s

916:	test: 0.8610526	best: 0.8615272 (558)	total: 50.4s	remaining: 4.56s
917:	test: 0.8610544	best: 0.8615272 (558)	total: 50.4s	remaining: 4.5s
918:	test: 0.8610591	best: 0.8615272 (558)	total: 50.5s	remaining: 4.45s
919:	test: 0.8610530	best: 0.8615272 (558)	total: 50.6s	remaining: 4.4s
920:	test: 0.8610891	best: 0.8615272 (558)	total: 50.6s	remaining: 4.34s
921:	test: 0.8611000	best: 0.8615272 (558)	total: 50.7s	remaining: 4.29s
922:	test: 0.8610962	best: 0.8615272 (558)	total: 50.7s	remaining: 4.23s
923:	test: 0.8611157	best: 0.8615272 (558)	total: 50.8s	remaining: 4.17s
924:	test: 0.8611211	best: 0.8615272 (558)	total: 50.8s	remaining: 4.12s
925:	test: 0.8611047	best: 0.8615272 (558)	total: 50.9s	remaining: 4.07s
926:	test: 0.8610607	best: 0.8615272 (558)	total: 50.9s	remaining: 4.01s
927:	test: 0.8610603	best: 0.8615272 (558)	total: 51s	remaining: 3.96s
928:	test: 0.8610547	best: 0.8615272 (558)	total: 51s	remaining: 3.9s
929:	test: 0.8610932	best: 0.8615272 (558)	total: 51.1s	re

In [152]:
seasonal_preds = model_classifier_seasonal.predict(X_test)

In [153]:
accuracy_score(y_test.seasonal_vaccine, seasonal_preds)

0.7847248221639835

In [154]:
seasonal_pred = model_classifier_h1n1.predict_proba(X_test)

In [155]:
seasonal_pred = seasonal_pred[:,1].reshape(-1,1)

In [156]:
roc_auc_score(y_test.seasonal_vaccine, seasonal_pred)

0.712041149724607

It looks like we have a pretty good accuracy score of 86% and .87 ROCAUC score for H1N1 vaccination predictions, but an underwhelming 78% accuracy and .71 ROCAUC score for seasonal vaccine predictions. CatBoost will need to have its hyperparameters optimized to find the best fit model for our targets.