# Import Modules

In [3]:
import numpy as np
import pandas as pd
pd.set_option('max_columns', 90)
import matplotlib.pyplot as plt
import seaborn as sns
PALETTE = ['#dd4124','#009473', '#b4b4b4', '#336b87']
BACKCOLOR = '#f6f5f5'
sns.set_palette(PALETTE)

from scipy.stats import norm, probplot, skew
from scipy.special import boxcox1p
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, RepeatedStratifiedKFold
from sklearn.model_selection import RepeatedKFold, train_test_split, RandomizedSearchCV
from sklearn.neighbors import  KNeighborsClassifier as knn
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import xgboost
from xgboost import XGBClassifier
from sklearn.metrics import auc, accuracy_score, confusion_matrix

from IPython.core.display import HTML

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False

# Read Data

In [4]:
# Import training and test data.
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# When exploring a dataset, it is recommended to use the entire data.
all_data = pd.concat((train, test)).reset_index(drop=True)

# Feature engineering

In [5]:
# missing values
all_data['Age'] = all_data.Age.fillna(train.Age.median())
all_data['Fare'] = all_data.Fare.fillna(train.Fare.median())
all_data.dropna(subset=['Embarked'], inplace=True)
cabins = all_data.Cabin
all_data.drop(['Cabin'], axis=1, inplace=True)

In [6]:
# derivative features
all_data['CabinCnt'] = cabins.apply(lambda x: 0 if pd.isna(x) else len(x.split(' ')))
all_data['CabinClass'] = cabins.apply(lambda x: str(x)[0])
all_data['IsNumericTicket'] = all_data.Ticket.apply(lambda x: 1 if x.isnumeric() else 0)
all_data['TicketType'] = all_data.Ticket.apply(lambda x: ''.join(x.split(' ')[:-1]).replace('.','').replace('/','').lower() if len(x.split(' ')[:-1]) > 0 else 0)
all_data['Title'] = all_data.Name.apply(lambda x: x.split(',')[1].split('.')[0].strip())
all_data['Family'] = all_data.SibSp + all_data.Parch

In [7]:
# feature transform
numeric_vars = ['Age', 'SibSp', 'Parch', 'Fare', 'CabinCnt', 'Family']
ordinal_vars = ['Pclass']
nominal_vars = ['Name', 'Sex', 'Ticket', 'Embarked', 'CabinClass', 'IsNumericTicket', 'TicketType', 'Title']
all_data[nominal_vars] = all_data[nominal_vars].astype('str')

for feature in numeric_vars:
    all_data[feature] = np.log1p(all_data[feature])

scaler = StandardScaler()
numeric_vars = all_data.columns[(all_data.dtypes != 'object') & (all_data.columns != 'PassengerId') & (all_data.columns != 'Survived') & (all_data.columns != 'IsTrain')]
all_data[numeric_vars] = scaler.fit_transform(all_data[numeric_vars])

In [8]:
# split data
all_data.drop(['PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)
data_dummies = pd.get_dummies(all_data)
X_train = data_dummies[data_dummies.Survived.notnull()].drop(['Survived'], axis=1)
y_train = data_dummies[data_dummies.Survived.notnull()].Survived
X_test = data_dummies[data_dummies.Survived.isnull()].drop(['Survived'], axis=1)

In [9]:
X_train.shape, y_train.shape, X_test.shape

((889, 76), (889,), (418, 76))

## Modeling

In [10]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

In [None]:
sol = pd.read_csv('정답.csv')

## 테스트1
그리드서치를 사용해서 best_estimator를 생성 후 테스트한 결과와 best_params를 사용해서 새로 모델 생성 후 학습(전체 데이터) -> 테스트한 결과가 같다.

In [91]:
model_lg = LogisticRegression()
param_lg = {
    'C': [.001, .0001, .01, .1, 1]
}
search_lg = GridSearchCV(model_lg, param_lg, cv=cv, scoring='roc_auc').fit(X_train, y_train)
cv_rlt_lg = pd.DataFrame(search_lg.cv_results_)

best_params_lg = search_lg.best_params_
best_estimator_lg = search_lg.best_estimator_
model_lg = LogisticRegression(**best_params_lg)

model_lg = model_lg.fit(X_train, y_train)
y_pred_model_lg = model_lg.predict(X_test)
y_pred_best_estimator_lg = best_estimator_lg.predict(X_test)

In [95]:
model_svm = SVC()
param_svm = {
    'C': [.001, .0001, .01, .1, 1],
    'gamma': [.001, .0001, .01, .1, 1]
}
search_svm = GridSearchCV(model_svm, param_svm, cv=cv, scoring='roc_auc', verbose=1).fit(X_train, y_train)
cv_rlt_svm = pd.DataFrame(search_svm.cv_results_)

best_params_svm = search_svm.best_params_
best_estimator_svm = search_svm.best_estimator_
model_svm = SVC(**best_params_svm)

model_svm = model_svm.fit(X_train, y_train)
y_pred_model_svm = model_svm.predict(X_test)
y_pred_best_estimator_svm = best_estimator_svm.predict(X_test)

Fitting 5 folds for each of 25 candidates, totalling 125 fits


In [54]:
xtrain, xvalid = train_test_split(X_train, test_size=0.2, random_state=42)
ytrain, yvalid = train_test_split(y_train, test_size=0.2, random_state=42)

In [108]:
model_xgb = XGBClassifier()
param_xgb = {
    'learning_rate': [.001, .01, 1, 10],
    'objective': ['binary:logistic'],
    'use_label_encoder': [False],
    'eval_metric': ['mlogloss'],
    'n_estimators': [1000]
}
search_xgb = GridSearchCV(model_xgb, param_xgb, cv=cv, verbose=1, scoring='roc_auc').fit(xtrain, ytrain, early_stopping_rounds=15,
                                                                      eval_metric=['logloss'], eval_set=[(xvalid, yvalid)]
                                                                     )
cv_rlt_xgb = pd.DataFrame(search_xgb.cv_results_)

best_params_xgb = search_xgb.best_params_
best_estimator_xgb = search_xgb.best_estimator_
model_xgb = XGBClassifier(**best_params_xgb)

model_xgb = model_xgb.fit(X_train, y_train)
y_pred_model_xgb = model_xgb.predict(X_test)
y_pred_best_estimator_xgb = best_estimator_xgb.predict(X_test)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[0]	validation_0-logloss:0.69268
[1]	validation_0-logloss:0.69221
[2]	validation_0-logloss:0.69174
[3]	validation_0-logloss:0.69128
[4]	validation_0-logloss:0.69082
[5]	validation_0-logloss:0.69035
[6]	validation_0-logloss:0.68990
[7]	validation_0-logloss:0.68943
[8]	validation_0-logloss:0.68897
[9]	validation_0-logloss:0.68851
[10]	validation_0-logloss:0.68806
[11]	validation_0-logloss:0.68759
[12]	validation_0-logloss:0.68713
[13]	validation_0-logloss:0.68668
[14]	validation_0-logloss:0.68623
[15]	validation_0-logloss:0.68578
[16]	validation_0-logloss:0.68532
[17]	validation_0-logloss:0.68487
[18]	validation_0-logloss:0.68442
[19]	validation_0-logloss:0.68397
[20]	validation_0-logloss:0.68352
[21]	validation_0-logloss:0.68307
[22]	validation_0-logloss:0.68263
[23]	validation_0-logloss:0.68218
[24]	validation_0-logloss:0.68173
[25]	validation_0-logloss:0.68129
[26]	validation_0-logloss:0.68085
[27]	validation_0-logloss:0.6804

[236]	validation_0-logloss:0.60347
[237]	validation_0-logloss:0.60318
[238]	validation_0-logloss:0.60287
[239]	validation_0-logloss:0.60257
[240]	validation_0-logloss:0.60226
[241]	validation_0-logloss:0.60196
[242]	validation_0-logloss:0.60165
[243]	validation_0-logloss:0.60136
[244]	validation_0-logloss:0.60106
[245]	validation_0-logloss:0.60076
[246]	validation_0-logloss:0.60045
[247]	validation_0-logloss:0.60016
[248]	validation_0-logloss:0.59986
[249]	validation_0-logloss:0.59956
[250]	validation_0-logloss:0.59927
[251]	validation_0-logloss:0.59897
[252]	validation_0-logloss:0.59866
[253]	validation_0-logloss:0.59837
[254]	validation_0-logloss:0.59808
[255]	validation_0-logloss:0.59778
[256]	validation_0-logloss:0.59749
[257]	validation_0-logloss:0.59720
[258]	validation_0-logloss:0.59690
[259]	validation_0-logloss:0.59661
[260]	validation_0-logloss:0.59632
[261]	validation_0-logloss:0.59602
[262]	validation_0-logloss:0.59573
[263]	validation_0-logloss:0.59545
[264]	validation_0-l

[471]	validation_0-logloss:0.54495
[472]	validation_0-logloss:0.54475
[473]	validation_0-logloss:0.54454
[474]	validation_0-logloss:0.54434
[475]	validation_0-logloss:0.54414
[476]	validation_0-logloss:0.54394
[477]	validation_0-logloss:0.54374
[478]	validation_0-logloss:0.54354
[479]	validation_0-logloss:0.54334
[480]	validation_0-logloss:0.54314
[481]	validation_0-logloss:0.54294
[482]	validation_0-logloss:0.54275
[483]	validation_0-logloss:0.54255
[484]	validation_0-logloss:0.54235
[485]	validation_0-logloss:0.54215
[486]	validation_0-logloss:0.54195
[487]	validation_0-logloss:0.54176
[488]	validation_0-logloss:0.54156
[489]	validation_0-logloss:0.54136
[490]	validation_0-logloss:0.54117
[491]	validation_0-logloss:0.54097
[492]	validation_0-logloss:0.54078
[493]	validation_0-logloss:0.54058
[494]	validation_0-logloss:0.54039
[495]	validation_0-logloss:0.54020
[496]	validation_0-logloss:0.54001
[497]	validation_0-logloss:0.53982
[498]	validation_0-logloss:0.53963
[499]	validation_0-l

[706]	validation_0-logloss:0.50608
[707]	validation_0-logloss:0.50593
[708]	validation_0-logloss:0.50579
[709]	validation_0-logloss:0.50564
[710]	validation_0-logloss:0.50549
[711]	validation_0-logloss:0.50535
[712]	validation_0-logloss:0.50521
[713]	validation_0-logloss:0.50506
[714]	validation_0-logloss:0.50492
[715]	validation_0-logloss:0.50477
[716]	validation_0-logloss:0.50463
[717]	validation_0-logloss:0.50449
[718]	validation_0-logloss:0.50434
[719]	validation_0-logloss:0.50420
[720]	validation_0-logloss:0.50406
[721]	validation_0-logloss:0.50392
[722]	validation_0-logloss:0.50377
[723]	validation_0-logloss:0.50364
[724]	validation_0-logloss:0.50349
[725]	validation_0-logloss:0.50335
[726]	validation_0-logloss:0.50321
[727]	validation_0-logloss:0.50306
[728]	validation_0-logloss:0.50293
[729]	validation_0-logloss:0.50279
[730]	validation_0-logloss:0.50265
[731]	validation_0-logloss:0.50251
[732]	validation_0-logloss:0.50236
[733]	validation_0-logloss:0.50222
[734]	validation_0-l

[941]	validation_0-logloss:0.47869
[942]	validation_0-logloss:0.47860
[943]	validation_0-logloss:0.47851
[944]	validation_0-logloss:0.47842
[945]	validation_0-logloss:0.47833
[946]	validation_0-logloss:0.47824
[947]	validation_0-logloss:0.47815
[948]	validation_0-logloss:0.47806
[949]	validation_0-logloss:0.47797
[950]	validation_0-logloss:0.47788
[951]	validation_0-logloss:0.47779
[952]	validation_0-logloss:0.47770
[953]	validation_0-logloss:0.47762
[954]	validation_0-logloss:0.47753
[955]	validation_0-logloss:0.47744
[956]	validation_0-logloss:0.47735
[957]	validation_0-logloss:0.47727
[958]	validation_0-logloss:0.47718
[959]	validation_0-logloss:0.47709
[960]	validation_0-logloss:0.47701
[961]	validation_0-logloss:0.47692
[962]	validation_0-logloss:0.47683
[963]	validation_0-logloss:0.47674
[964]	validation_0-logloss:0.47665
[965]	validation_0-logloss:0.47657
[966]	validation_0-logloss:0.47648
[967]	validation_0-logloss:0.47639
[968]	validation_0-logloss:0.47630
[969]	validation_0-l

[179]	validation_0-logloss:0.62538
[180]	validation_0-logloss:0.62506
[181]	validation_0-logloss:0.62475
[182]	validation_0-logloss:0.62444
[183]	validation_0-logloss:0.62412
[184]	validation_0-logloss:0.62382
[185]	validation_0-logloss:0.62351
[186]	validation_0-logloss:0.62320
[187]	validation_0-logloss:0.62289
[188]	validation_0-logloss:0.62257
[189]	validation_0-logloss:0.62227
[190]	validation_0-logloss:0.62196
[191]	validation_0-logloss:0.62165
[192]	validation_0-logloss:0.62135
[193]	validation_0-logloss:0.62104
[194]	validation_0-logloss:0.62074
[195]	validation_0-logloss:0.62044
[196]	validation_0-logloss:0.62014
[197]	validation_0-logloss:0.61983
[198]	validation_0-logloss:0.61953
[199]	validation_0-logloss:0.61922
[200]	validation_0-logloss:0.61893
[201]	validation_0-logloss:0.61863
[202]	validation_0-logloss:0.61833
[203]	validation_0-logloss:0.61802
[204]	validation_0-logloss:0.61773
[205]	validation_0-logloss:0.61743
[206]	validation_0-logloss:0.61713
[207]	validation_0-l

[414]	validation_0-logloss:0.56438
[415]	validation_0-logloss:0.56415
[416]	validation_0-logloss:0.56393
[417]	validation_0-logloss:0.56370
[418]	validation_0-logloss:0.56347
[419]	validation_0-logloss:0.56324
[420]	validation_0-logloss:0.56302
[421]	validation_0-logloss:0.56279
[422]	validation_0-logloss:0.56256
[423]	validation_0-logloss:0.56234
[424]	validation_0-logloss:0.56212
[425]	validation_0-logloss:0.56189
[426]	validation_0-logloss:0.56166
[427]	validation_0-logloss:0.56144
[428]	validation_0-logloss:0.56122
[429]	validation_0-logloss:0.56100
[430]	validation_0-logloss:0.56077
[431]	validation_0-logloss:0.56055
[432]	validation_0-logloss:0.56033
[433]	validation_0-logloss:0.56011
[434]	validation_0-logloss:0.55988
[435]	validation_0-logloss:0.55967
[436]	validation_0-logloss:0.55944
[437]	validation_0-logloss:0.55922
[438]	validation_0-logloss:0.55900
[439]	validation_0-logloss:0.55878
[440]	validation_0-logloss:0.55856
[441]	validation_0-logloss:0.55834
[442]	validation_0-l

[649]	validation_0-logloss:0.52201
[650]	validation_0-logloss:0.52187
[651]	validation_0-logloss:0.52173
[652]	validation_0-logloss:0.52159
[653]	validation_0-logloss:0.52145
[654]	validation_0-logloss:0.52131
[655]	validation_0-logloss:0.52118
[656]	validation_0-logloss:0.52104
[657]	validation_0-logloss:0.52091
[658]	validation_0-logloss:0.52078
[659]	validation_0-logloss:0.52064
[660]	validation_0-logloss:0.52051
[661]	validation_0-logloss:0.52037
[662]	validation_0-logloss:0.52024
[663]	validation_0-logloss:0.52011
[664]	validation_0-logloss:0.51997
[665]	validation_0-logloss:0.51984
[666]	validation_0-logloss:0.51971
[667]	validation_0-logloss:0.51958
[668]	validation_0-logloss:0.51944
[669]	validation_0-logloss:0.51931
[670]	validation_0-logloss:0.51918
[671]	validation_0-logloss:0.51905
[672]	validation_0-logloss:0.51892
[673]	validation_0-logloss:0.51879
[674]	validation_0-logloss:0.51866
[675]	validation_0-logloss:0.51852
[676]	validation_0-logloss:0.51839
[677]	validation_0-l

[884]	validation_0-logloss:0.49600
[885]	validation_0-logloss:0.49592
[886]	validation_0-logloss:0.49582
[887]	validation_0-logloss:0.49574
[888]	validation_0-logloss:0.49564
[889]	validation_0-logloss:0.49556
[890]	validation_0-logloss:0.49547
[891]	validation_0-logloss:0.49538
[892]	validation_0-logloss:0.49529
[893]	validation_0-logloss:0.49521
[894]	validation_0-logloss:0.49512
[895]	validation_0-logloss:0.49503
[896]	validation_0-logloss:0.49494
[897]	validation_0-logloss:0.49486
[898]	validation_0-logloss:0.49477
[899]	validation_0-logloss:0.49468
[900]	validation_0-logloss:0.49459
[901]	validation_0-logloss:0.49451
[902]	validation_0-logloss:0.49442
[903]	validation_0-logloss:0.49434
[904]	validation_0-logloss:0.49424
[905]	validation_0-logloss:0.49417
[906]	validation_0-logloss:0.49407
[907]	validation_0-logloss:0.49399
[908]	validation_0-logloss:0.49391
[909]	validation_0-logloss:0.49382
[910]	validation_0-logloss:0.49373
[911]	validation_0-logloss:0.49365
[912]	validation_0-l

[122]	validation_0-logloss:0.63909
[123]	validation_0-logloss:0.63870
[124]	validation_0-logloss:0.63832
[125]	validation_0-logloss:0.63793
[126]	validation_0-logloss:0.63754
[127]	validation_0-logloss:0.63716
[128]	validation_0-logloss:0.63677
[129]	validation_0-logloss:0.63639
[130]	validation_0-logloss:0.63601
[131]	validation_0-logloss:0.63563
[132]	validation_0-logloss:0.63524
[133]	validation_0-logloss:0.63487
[134]	validation_0-logloss:0.63449
[135]	validation_0-logloss:0.63411
[136]	validation_0-logloss:0.63373
[137]	validation_0-logloss:0.63335
[138]	validation_0-logloss:0.63297
[139]	validation_0-logloss:0.63260
[140]	validation_0-logloss:0.63222
[141]	validation_0-logloss:0.63185
[142]	validation_0-logloss:0.63148
[143]	validation_0-logloss:0.63110
[144]	validation_0-logloss:0.63073
[145]	validation_0-logloss:0.63036
[146]	validation_0-logloss:0.62999
[147]	validation_0-logloss:0.62962
[148]	validation_0-logloss:0.62925
[149]	validation_0-logloss:0.62888
[150]	validation_0-l

[357]	validation_0-logloss:0.56726
[358]	validation_0-logloss:0.56700
[359]	validation_0-logloss:0.56679
[360]	validation_0-logloss:0.56653
[361]	validation_0-logloss:0.56629
[362]	validation_0-logloss:0.56606
[363]	validation_0-logloss:0.56582
[364]	validation_0-logloss:0.56557
[365]	validation_0-logloss:0.56535
[366]	validation_0-logloss:0.56510
[367]	validation_0-logloss:0.56488
[368]	validation_0-logloss:0.56463
[369]	validation_0-logloss:0.56438
[370]	validation_0-logloss:0.56417
[371]	validation_0-logloss:0.56392
[372]	validation_0-logloss:0.56369
[373]	validation_0-logloss:0.56346
[374]	validation_0-logloss:0.56323
[375]	validation_0-logloss:0.56298
[376]	validation_0-logloss:0.56277
[377]	validation_0-logloss:0.56253
[378]	validation_0-logloss:0.56231
[379]	validation_0-logloss:0.56207
[380]	validation_0-logloss:0.56184
[381]	validation_0-logloss:0.56162
[382]	validation_0-logloss:0.56138
[383]	validation_0-logloss:0.56115
[384]	validation_0-logloss:0.56093
[385]	validation_0-l

[592]	validation_0-logloss:0.52142
[593]	validation_0-logloss:0.52124
[594]	validation_0-logloss:0.52108
[595]	validation_0-logloss:0.52090
[596]	validation_0-logloss:0.52073
[597]	validation_0-logloss:0.52058
[598]	validation_0-logloss:0.52041
[599]	validation_0-logloss:0.52024
[600]	validation_0-logloss:0.52007
[601]	validation_0-logloss:0.51990
[602]	validation_0-logloss:0.51974
[603]	validation_0-logloss:0.51959
[604]	validation_0-logloss:0.51943
[605]	validation_0-logloss:0.51929
[606]	validation_0-logloss:0.51912
[607]	validation_0-logloss:0.51898
[608]	validation_0-logloss:0.51883
[609]	validation_0-logloss:0.51866
[610]	validation_0-logloss:0.51853
[611]	validation_0-logloss:0.51834
[612]	validation_0-logloss:0.51820
[613]	validation_0-logloss:0.51806
[614]	validation_0-logloss:0.51789
[615]	validation_0-logloss:0.51776
[616]	validation_0-logloss:0.51760
[617]	validation_0-logloss:0.51743
[618]	validation_0-logloss:0.51731
[619]	validation_0-logloss:0.51716
[620]	validation_0-l

[827]	validation_0-logloss:0.49212
[828]	validation_0-logloss:0.49203
[829]	validation_0-logloss:0.49193
[830]	validation_0-logloss:0.49186
[831]	validation_0-logloss:0.49177
[832]	validation_0-logloss:0.49167
[833]	validation_0-logloss:0.49159
[834]	validation_0-logloss:0.49150
[835]	validation_0-logloss:0.49142
[836]	validation_0-logloss:0.49133
[837]	validation_0-logloss:0.49125
[838]	validation_0-logloss:0.49116
[839]	validation_0-logloss:0.49107
[840]	validation_0-logloss:0.49099
[841]	validation_0-logloss:0.49090
[842]	validation_0-logloss:0.49081
[843]	validation_0-logloss:0.49074
[844]	validation_0-logloss:0.49064
[845]	validation_0-logloss:0.49056
[846]	validation_0-logloss:0.49049
[847]	validation_0-logloss:0.49039
[848]	validation_0-logloss:0.49030
[849]	validation_0-logloss:0.49023
[850]	validation_0-logloss:0.49014
[851]	validation_0-logloss:0.49006
[852]	validation_0-logloss:0.48998
[853]	validation_0-logloss:0.48989
[854]	validation_0-logloss:0.48981
[855]	validation_0-l

[64]	validation_0-logloss:0.66481
[65]	validation_0-logloss:0.66442
[66]	validation_0-logloss:0.66400
[67]	validation_0-logloss:0.66361
[68]	validation_0-logloss:0.66320
[69]	validation_0-logloss:0.66281
[70]	validation_0-logloss:0.66242
[71]	validation_0-logloss:0.66201
[72]	validation_0-logloss:0.66162
[73]	validation_0-logloss:0.66123
[74]	validation_0-logloss:0.66082
[75]	validation_0-logloss:0.66044
[76]	validation_0-logloss:0.66005
[77]	validation_0-logloss:0.65965
[78]	validation_0-logloss:0.65927
[79]	validation_0-logloss:0.65886
[80]	validation_0-logloss:0.65848
[81]	validation_0-logloss:0.65810
[82]	validation_0-logloss:0.65770
[83]	validation_0-logloss:0.65732
[84]	validation_0-logloss:0.65694
[85]	validation_0-logloss:0.65654
[86]	validation_0-logloss:0.65617
[87]	validation_0-logloss:0.65579
[88]	validation_0-logloss:0.65539
[89]	validation_0-logloss:0.65502
[90]	validation_0-logloss:0.65465
[91]	validation_0-logloss:0.65425
[92]	validation_0-logloss:0.65388
[93]	validatio

[300]	validation_0-logloss:0.58833
[301]	validation_0-logloss:0.58806
[302]	validation_0-logloss:0.58780
[303]	validation_0-logloss:0.58755
[304]	validation_0-logloss:0.58728
[305]	validation_0-logloss:0.58702
[306]	validation_0-logloss:0.58677
[307]	validation_0-logloss:0.58651
[308]	validation_0-logloss:0.58625
[309]	validation_0-logloss:0.58598
[310]	validation_0-logloss:0.58573
[311]	validation_0-logloss:0.58548
[312]	validation_0-logloss:0.58521
[313]	validation_0-logloss:0.58497
[314]	validation_0-logloss:0.58472
[315]	validation_0-logloss:0.58445
[316]	validation_0-logloss:0.58420
[317]	validation_0-logloss:0.58395
[318]	validation_0-logloss:0.58369
[319]	validation_0-logloss:0.58345
[320]	validation_0-logloss:0.58320
[321]	validation_0-logloss:0.58294
[322]	validation_0-logloss:0.58269
[323]	validation_0-logloss:0.58244
[324]	validation_0-logloss:0.58218
[325]	validation_0-logloss:0.58194
[326]	validation_0-logloss:0.58169
[327]	validation_0-logloss:0.58144
[328]	validation_0-l

[535]	validation_0-logloss:0.53856
[536]	validation_0-logloss:0.53838
[537]	validation_0-logloss:0.53821
[538]	validation_0-logloss:0.53805
[539]	validation_0-logloss:0.53787
[540]	validation_0-logloss:0.53771
[541]	validation_0-logloss:0.53753
[542]	validation_0-logloss:0.53737
[543]	validation_0-logloss:0.53719
[544]	validation_0-logloss:0.53702
[545]	validation_0-logloss:0.53685
[546]	validation_0-logloss:0.53669
[547]	validation_0-logloss:0.53651
[548]	validation_0-logloss:0.53635
[549]	validation_0-logloss:0.53617
[550]	validation_0-logloss:0.53601
[551]	validation_0-logloss:0.53583
[552]	validation_0-logloss:0.53567
[553]	validation_0-logloss:0.53550
[554]	validation_0-logloss:0.53534
[555]	validation_0-logloss:0.53516
[556]	validation_0-logloss:0.53500
[557]	validation_0-logloss:0.53483
[558]	validation_0-logloss:0.53468
[559]	validation_0-logloss:0.53450
[560]	validation_0-logloss:0.53434
[561]	validation_0-logloss:0.53417
[562]	validation_0-logloss:0.53402
[563]	validation_0-l

[770]	validation_0-logloss:0.50418
[771]	validation_0-logloss:0.50406
[772]	validation_0-logloss:0.50393
[773]	validation_0-logloss:0.50380
[774]	validation_0-logloss:0.50368
[775]	validation_0-logloss:0.50355
[776]	validation_0-logloss:0.50342
[777]	validation_0-logloss:0.50330
[778]	validation_0-logloss:0.50317
[779]	validation_0-logloss:0.50306
[780]	validation_0-logloss:0.50293
[781]	validation_0-logloss:0.50280
[782]	validation_0-logloss:0.50268
[783]	validation_0-logloss:0.50255
[784]	validation_0-logloss:0.50243
[785]	validation_0-logloss:0.50231
[786]	validation_0-logloss:0.50219
[787]	validation_0-logloss:0.50207
[788]	validation_0-logloss:0.50194
[789]	validation_0-logloss:0.50182
[790]	validation_0-logloss:0.50170
[791]	validation_0-logloss:0.50158
[792]	validation_0-logloss:0.50145
[793]	validation_0-logloss:0.50134
[794]	validation_0-logloss:0.50122
[795]	validation_0-logloss:0.50110
[796]	validation_0-logloss:0.50097
[797]	validation_0-logloss:0.50085
[798]	validation_0-l

[5]	validation_0-logloss:0.69029
[6]	validation_0-logloss:0.68982
[7]	validation_0-logloss:0.68934
[8]	validation_0-logloss:0.68887
[9]	validation_0-logloss:0.68839
[10]	validation_0-logloss:0.68792
[11]	validation_0-logloss:0.68745
[12]	validation_0-logloss:0.68698
[13]	validation_0-logloss:0.68651
[14]	validation_0-logloss:0.68604
[15]	validation_0-logloss:0.68558
[16]	validation_0-logloss:0.68511
[17]	validation_0-logloss:0.68464
[18]	validation_0-logloss:0.68418
[19]	validation_0-logloss:0.68371
[20]	validation_0-logloss:0.68325
[21]	validation_0-logloss:0.68279
[22]	validation_0-logloss:0.68233
[23]	validation_0-logloss:0.68187
[24]	validation_0-logloss:0.68141
[25]	validation_0-logloss:0.68095
[26]	validation_0-logloss:0.68049
[27]	validation_0-logloss:0.68004
[28]	validation_0-logloss:0.67958
[29]	validation_0-logloss:0.67912
[30]	validation_0-logloss:0.67867
[31]	validation_0-logloss:0.67822
[32]	validation_0-logloss:0.67777
[33]	validation_0-logloss:0.67732
[34]	validation_0-l

[242]	validation_0-logloss:0.59888
[243]	validation_0-logloss:0.59857
[244]	validation_0-logloss:0.59825
[245]	validation_0-logloss:0.59795
[246]	validation_0-logloss:0.59764
[247]	validation_0-logloss:0.59733
[248]	validation_0-logloss:0.59703
[249]	validation_0-logloss:0.59672
[250]	validation_0-logloss:0.59641
[251]	validation_0-logloss:0.59611
[252]	validation_0-logloss:0.59580
[253]	validation_0-logloss:0.59549
[254]	validation_0-logloss:0.59518
[255]	validation_0-logloss:0.59488
[256]	validation_0-logloss:0.59458
[257]	validation_0-logloss:0.59428
[258]	validation_0-logloss:0.59398
[259]	validation_0-logloss:0.59368
[260]	validation_0-logloss:0.59337
[261]	validation_0-logloss:0.59307
[262]	validation_0-logloss:0.59277
[263]	validation_0-logloss:0.59247
[264]	validation_0-logloss:0.59216
[265]	validation_0-logloss:0.59187
[266]	validation_0-logloss:0.59158
[267]	validation_0-logloss:0.59128
[268]	validation_0-logloss:0.59098
[269]	validation_0-logloss:0.59069
[270]	validation_0-l

[477]	validation_0-logloss:0.53963
[478]	validation_0-logloss:0.53943
[479]	validation_0-logloss:0.53922
[480]	validation_0-logloss:0.53902
[481]	validation_0-logloss:0.53882
[482]	validation_0-logloss:0.53861
[483]	validation_0-logloss:0.53841
[484]	validation_0-logloss:0.53821
[485]	validation_0-logloss:0.53801
[486]	validation_0-logloss:0.53781
[487]	validation_0-logloss:0.53761
[488]	validation_0-logloss:0.53740
[489]	validation_0-logloss:0.53720
[490]	validation_0-logloss:0.53700
[491]	validation_0-logloss:0.53680
[492]	validation_0-logloss:0.53660
[493]	validation_0-logloss:0.53640
[494]	validation_0-logloss:0.53621
[495]	validation_0-logloss:0.53600
[496]	validation_0-logloss:0.53581
[497]	validation_0-logloss:0.53561
[498]	validation_0-logloss:0.53542
[499]	validation_0-logloss:0.53522
[500]	validation_0-logloss:0.53502
[501]	validation_0-logloss:0.53483
[502]	validation_0-logloss:0.53464
[503]	validation_0-logloss:0.53443
[504]	validation_0-logloss:0.53424
[505]	validation_0-l

[712]	validation_0-logloss:0.50018
[713]	validation_0-logloss:0.50004
[714]	validation_0-logloss:0.49990
[715]	validation_0-logloss:0.49978
[716]	validation_0-logloss:0.49965
[717]	validation_0-logloss:0.49950
[718]	validation_0-logloss:0.49937
[719]	validation_0-logloss:0.49924
[720]	validation_0-logloss:0.49911
[721]	validation_0-logloss:0.49897
[722]	validation_0-logloss:0.49885
[723]	validation_0-logloss:0.49871
[724]	validation_0-logloss:0.49857
[725]	validation_0-logloss:0.49845
[726]	validation_0-logloss:0.49831
[727]	validation_0-logloss:0.49817
[728]	validation_0-logloss:0.49806
[729]	validation_0-logloss:0.49793
[730]	validation_0-logloss:0.49779
[731]	validation_0-logloss:0.49766
[732]	validation_0-logloss:0.49754
[733]	validation_0-logloss:0.49741
[734]	validation_0-logloss:0.49727
[735]	validation_0-logloss:0.49716
[736]	validation_0-logloss:0.49701
[737]	validation_0-logloss:0.49687
[738]	validation_0-logloss:0.49677
[739]	validation_0-logloss:0.49663
[740]	validation_0-l

[947]	validation_0-logloss:0.47417
[948]	validation_0-logloss:0.47408
[949]	validation_0-logloss:0.47399
[950]	validation_0-logloss:0.47391
[951]	validation_0-logloss:0.47381
[952]	validation_0-logloss:0.47374
[953]	validation_0-logloss:0.47365
[954]	validation_0-logloss:0.47356
[955]	validation_0-logloss:0.47347
[956]	validation_0-logloss:0.47339
[957]	validation_0-logloss:0.47330
[958]	validation_0-logloss:0.47321
[959]	validation_0-logloss:0.47313
[960]	validation_0-logloss:0.47304
[961]	validation_0-logloss:0.47295
[962]	validation_0-logloss:0.47286
[963]	validation_0-logloss:0.47279
[964]	validation_0-logloss:0.47269
[965]	validation_0-logloss:0.47260
[966]	validation_0-logloss:0.47253
[967]	validation_0-logloss:0.47245
[968]	validation_0-logloss:0.47236
[969]	validation_0-logloss:0.47227
[970]	validation_0-logloss:0.47220
[971]	validation_0-logloss:0.47211
[972]	validation_0-logloss:0.47202
[973]	validation_0-logloss:0.47195
[974]	validation_0-logloss:0.47186
[975]	validation_0-l

[185]	validation_0-logloss:0.44672
[186]	validation_0-logloss:0.44669
[187]	validation_0-logloss:0.44666
[188]	validation_0-logloss:0.44663
[189]	validation_0-logloss:0.44664
[190]	validation_0-logloss:0.44661
[191]	validation_0-logloss:0.44663
[192]	validation_0-logloss:0.44666
[193]	validation_0-logloss:0.44667
[194]	validation_0-logloss:0.44641
[195]	validation_0-logloss:0.44646
[196]	validation_0-logloss:0.44652
[197]	validation_0-logloss:0.44657
[198]	validation_0-logloss:0.44661
[199]	validation_0-logloss:0.44668
[200]	validation_0-logloss:0.44674
[201]	validation_0-logloss:0.44680
[202]	validation_0-logloss:0.44659
[203]	validation_0-logloss:0.44666
[204]	validation_0-logloss:0.44672
[205]	validation_0-logloss:0.44678
[206]	validation_0-logloss:0.44674
[207]	validation_0-logloss:0.44684
[208]	validation_0-logloss:0.44682
[0]	validation_0-logloss:0.68869
[1]	validation_0-logloss:0.68431
[2]	validation_0-logloss:0.68002
[3]	validation_0-logloss:0.67582
[4]	validation_0-logloss:0.6

[214]	validation_0-logloss:0.45459
[215]	validation_0-logloss:0.45483
[216]	validation_0-logloss:0.45471
[217]	validation_0-logloss:0.45458
[0]	validation_0-logloss:0.68858
[1]	validation_0-logloss:0.68374
[2]	validation_0-logloss:0.67899
[3]	validation_0-logloss:0.67434
[4]	validation_0-logloss:0.66979
[5]	validation_0-logloss:0.66532
[6]	validation_0-logloss:0.66095
[7]	validation_0-logloss:0.65667
[8]	validation_0-logloss:0.65247
[9]	validation_0-logloss:0.64836
[10]	validation_0-logloss:0.64433
[11]	validation_0-logloss:0.64034
[12]	validation_0-logloss:0.63646
[13]	validation_0-logloss:0.63266
[14]	validation_0-logloss:0.62894
[15]	validation_0-logloss:0.62528
[16]	validation_0-logloss:0.62175
[17]	validation_0-logloss:0.61830
[18]	validation_0-logloss:0.61484
[19]	validation_0-logloss:0.61146
[20]	validation_0-logloss:0.60832
[21]	validation_0-logloss:0.60512
[22]	validation_0-logloss:0.60208
[23]	validation_0-logloss:0.59896
[24]	validation_0-logloss:0.59592
[25]	validation_0-lo

[15]	validation_0-logloss:0.63010
[16]	validation_0-logloss:0.62671
[17]	validation_0-logloss:0.62355
[18]	validation_0-logloss:0.62054
[19]	validation_0-logloss:0.61750
[20]	validation_0-logloss:0.61439
[21]	validation_0-logloss:0.61134
[22]	validation_0-logloss:0.60831
[23]	validation_0-logloss:0.60537
[24]	validation_0-logloss:0.60237
[25]	validation_0-logloss:0.59955
[26]	validation_0-logloss:0.59677
[27]	validation_0-logloss:0.59393
[28]	validation_0-logloss:0.59126
[29]	validation_0-logloss:0.58865
[30]	validation_0-logloss:0.58595
[31]	validation_0-logloss:0.58350
[32]	validation_0-logloss:0.58104
[33]	validation_0-logloss:0.57848
[34]	validation_0-logloss:0.57611
[35]	validation_0-logloss:0.57378
[36]	validation_0-logloss:0.57136
[37]	validation_0-logloss:0.56919
[38]	validation_0-logloss:0.56698
[39]	validation_0-logloss:0.56469
[40]	validation_0-logloss:0.56257
[41]	validation_0-logloss:0.56036
[42]	validation_0-logloss:0.55832
[43]	validation_0-logloss:0.55639
[44]	validatio

[48]	validation_0-logloss:0.53693
[49]	validation_0-logloss:0.53500
[50]	validation_0-logloss:0.53299
[51]	validation_0-logloss:0.53104
[52]	validation_0-logloss:0.52926
[53]	validation_0-logloss:0.52731
[54]	validation_0-logloss:0.52562
[55]	validation_0-logloss:0.52377
[56]	validation_0-logloss:0.52203
[57]	validation_0-logloss:0.52018
[58]	validation_0-logloss:0.51851
[59]	validation_0-logloss:0.51672
[60]	validation_0-logloss:0.51510
[61]	validation_0-logloss:0.51360
[62]	validation_0-logloss:0.51201
[63]	validation_0-logloss:0.51033
[64]	validation_0-logloss:0.50900
[65]	validation_0-logloss:0.50742
[66]	validation_0-logloss:0.50586
[67]	validation_0-logloss:0.50451
[68]	validation_0-logloss:0.50313
[69]	validation_0-logloss:0.50161
[70]	validation_0-logloss:0.50037
[71]	validation_0-logloss:0.49902
[72]	validation_0-logloss:0.49761
[73]	validation_0-logloss:0.49649
[74]	validation_0-logloss:0.49515
[75]	validation_0-logloss:0.49390
[76]	validation_0-logloss:0.49257
[77]	validatio

[8]	validation_0-logloss:0.57865
[9]	validation_0-logloss:0.58518
[10]	validation_0-logloss:0.58478
[11]	validation_0-logloss:0.57500
[12]	validation_0-logloss:0.59158
[13]	validation_0-logloss:0.58708
[14]	validation_0-logloss:0.58911
[15]	validation_0-logloss:0.59998
[0]	validation_0-logloss:0.46824
[1]	validation_0-logloss:0.46167
[2]	validation_0-logloss:0.47481
[3]	validation_0-logloss:0.49124
[4]	validation_0-logloss:0.50422
[5]	validation_0-logloss:0.53592
[6]	validation_0-logloss:0.54892
[7]	validation_0-logloss:0.54551
[8]	validation_0-logloss:0.54728
[9]	validation_0-logloss:0.55002
[10]	validation_0-logloss:0.55210
[11]	validation_0-logloss:0.55029
[12]	validation_0-logloss:0.56419
[13]	validation_0-logloss:0.57809
[14]	validation_0-logloss:0.57444
[15]	validation_0-logloss:0.57261
[16]	validation_0-logloss:0.59270
[0]	validation_0-logloss:2.48598
[1]	validation_0-logloss:18.70334
[2]	validation_0-logloss:8.07198
[3]	validation_0-logloss:8.07198
[4]	validation_0-logloss:8.07

[139]	validation_0-logloss:0.63324
[140]	validation_0-logloss:0.63288
[141]	validation_0-logloss:0.63251
[142]	validation_0-logloss:0.63214
[143]	validation_0-logloss:0.63177
[144]	validation_0-logloss:0.63140
[145]	validation_0-logloss:0.63104
[146]	validation_0-logloss:0.63067
[147]	validation_0-logloss:0.63031
[148]	validation_0-logloss:0.62994
[149]	validation_0-logloss:0.62958
[150]	validation_0-logloss:0.62922
[151]	validation_0-logloss:0.62886
[152]	validation_0-logloss:0.62849
[153]	validation_0-logloss:0.62813
[154]	validation_0-logloss:0.62778
[155]	validation_0-logloss:0.62741
[156]	validation_0-logloss:0.62706
[157]	validation_0-logloss:0.62670
[158]	validation_0-logloss:0.62634
[159]	validation_0-logloss:0.62598
[160]	validation_0-logloss:0.62563
[161]	validation_0-logloss:0.62527
[162]	validation_0-logloss:0.62492
[163]	validation_0-logloss:0.62456
[164]	validation_0-logloss:0.62421
[165]	validation_0-logloss:0.62386
[166]	validation_0-logloss:0.62351
[167]	validation_0-l

[374]	validation_0-logloss:0.56480
[375]	validation_0-logloss:0.56459
[376]	validation_0-logloss:0.56436
[377]	validation_0-logloss:0.56415
[378]	validation_0-logloss:0.56391
[379]	validation_0-logloss:0.56370
[380]	validation_0-logloss:0.56347
[381]	validation_0-logloss:0.56326
[382]	validation_0-logloss:0.56305
[383]	validation_0-logloss:0.56284
[384]	validation_0-logloss:0.56260
[385]	validation_0-logloss:0.56239
[386]	validation_0-logloss:0.56216
[387]	validation_0-logloss:0.56195
[388]	validation_0-logloss:0.56173
[389]	validation_0-logloss:0.56153
[390]	validation_0-logloss:0.56129
[391]	validation_0-logloss:0.56109
[392]	validation_0-logloss:0.56087
[393]	validation_0-logloss:0.56066
[394]	validation_0-logloss:0.56045
[395]	validation_0-logloss:0.56023
[396]	validation_0-logloss:0.56000
[397]	validation_0-logloss:0.55980
[398]	validation_0-logloss:0.55959
[399]	validation_0-logloss:0.55939
[400]	validation_0-logloss:0.55916
[401]	validation_0-logloss:0.55896
[402]	validation_0-l

[609]	validation_0-logloss:0.52154
[610]	validation_0-logloss:0.52138
[611]	validation_0-logloss:0.52121
[612]	validation_0-logloss:0.52105
[613]	validation_0-logloss:0.52089
[614]	validation_0-logloss:0.52073
[615]	validation_0-logloss:0.52056
[616]	validation_0-logloss:0.52040
[617]	validation_0-logloss:0.52024
[618]	validation_0-logloss:0.52007
[619]	validation_0-logloss:0.51991
[620]	validation_0-logloss:0.51975
[621]	validation_0-logloss:0.51959
[622]	validation_0-logloss:0.51943
[623]	validation_0-logloss:0.51927
[624]	validation_0-logloss:0.51911
[625]	validation_0-logloss:0.51895
[626]	validation_0-logloss:0.51879
[627]	validation_0-logloss:0.51863
[628]	validation_0-logloss:0.51847
[629]	validation_0-logloss:0.51831
[630]	validation_0-logloss:0.51815
[631]	validation_0-logloss:0.51799
[632]	validation_0-logloss:0.51784
[633]	validation_0-logloss:0.51767
[634]	validation_0-logloss:0.51752
[635]	validation_0-logloss:0.51736
[636]	validation_0-logloss:0.51721
[637]	validation_0-l

[844]	validation_0-logloss:0.48873
[845]	validation_0-logloss:0.48860
[846]	validation_0-logloss:0.48850
[847]	validation_0-logloss:0.48837
[848]	validation_0-logloss:0.48826
[849]	validation_0-logloss:0.48815
[850]	validation_0-logloss:0.48802
[851]	validation_0-logloss:0.48792
[852]	validation_0-logloss:0.48780
[853]	validation_0-logloss:0.48768
[854]	validation_0-logloss:0.48757
[855]	validation_0-logloss:0.48746
[856]	validation_0-logloss:0.48734
[857]	validation_0-logloss:0.48723
[858]	validation_0-logloss:0.48711
[859]	validation_0-logloss:0.48701
[860]	validation_0-logloss:0.48689
[861]	validation_0-logloss:0.48678
[862]	validation_0-logloss:0.48667
[863]	validation_0-logloss:0.48655
[864]	validation_0-logloss:0.48643
[865]	validation_0-logloss:0.48633
[866]	validation_0-logloss:0.48621
[867]	validation_0-logloss:0.48611
[868]	validation_0-logloss:0.48599
[869]	validation_0-logloss:0.48588
[870]	validation_0-logloss:0.48578
[871]	validation_0-logloss:0.48566
[872]	validation_0-l

XGB는 early stopping을 사용하기 위해 훈련 데이터의 일부를 검증 데이터로 분리했다.  
그래서 best_estimator는 전체 훈련 데이터를 사용해서 학습하지 않았기 때문에 동일한 파라미터일지라도 전체 훈련데이터로 학습한 모델과 테스트 결과가 다르다.  
best_estimator를 전체 훈련데이터로 학습한다면 best_param으로 학습한 모델과 테스트 결과가 동일하다.

## 테스트2
그리드서치의 best_estimator의 테스트 점수가 가장 높을까?  
=> 테스트해본 결과는 다음과 같다.

1. 그리드서치의 best_estimator의 테스트 점수가 가장 좋다고 확신할 수 없다. 대체로 교차검증 점수의 평균이 best_estimator보다 조금 낮아도 표준편차가 작은 경우 테스트 성능이 좋은 경우가  많이 보인다.  
2. early stopping이 필요한 알고리즘(ex XGB or NN etc.)은 그리드서치를 위해 별도의 검증 데이터가 필요하다. 전체 훈련 데이터를 사용하지 않기 때문에 성능이 떨어지므로 최적의 파라미터를  찾은 다음, 전체 데이터로 학습한 모델을 만들어야 한다.

In [81]:
from sklearn.metrics import roc_auc_score, accuracy_score

In [78]:
# logistic regresssion

print(roc_auc_score(sol.Survived, y_pred_best_estimator_lg))
print(roc_auc_score(sol.Survived, LogisticRegression(C=.1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, LogisticRegression(C=.01).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, LogisticRegression(C=.001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, LogisticRegression(C=.0001).fit(X_train, y_train).predict(X_test)))

0.7467380720545278
0.7507059396299903
0.7246348588120739
0.626460564751704
0.5


In [97]:
# SVC

print(roc_auc_score(sol.Survived, y_pred_best_estimator_svm))
print(roc_auc_score(sol.Survived, SVC(C=.0001, gamma=.0001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.0001, gamma=.001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.0001, gamma=.01).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.0001, gamma=.1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.0001, gamma=1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.001, gamma=.0001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.001, gamma=.001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.001, gamma=.01).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.001, gamma=.1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.001, gamma=1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.01, gamma=.0001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.01, gamma=.001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.01, gamma=.01).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.01, gamma=.1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.01, gamma=1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.1, gamma=.0001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.1, gamma=.001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.1, gamma=.01).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.1, gamma=.1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=.1, gamma=1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=1, gamma=.0001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=1, gamma=.001).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=1, gamma=.01).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=1, gamma=.1).fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, SVC(C=1, gamma=1).fit(X_train, y_train).predict(X_test)))

0.765968841285297
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.7000000000000001
0.7556718597857839
0.5041626095423565
0.5
0.7101752677702045
0.7698149951314508
0.765968841285297
0.7313534566699124


In [109]:
# XGB

print(roc_auc_score(sol.Survived, y_pred_best_estimator_xgb))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=.001, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=.01, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=.1, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=1, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))

0.7386075949367089
0.7559152872444012
0.7424537487828627
0.7194985394352482
0.7207400194741967


In [107]:
# XGB Early Stop

print(roc_auc_score(sol.Survived, y_pred_best_estimator_xgb))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=.001, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=.01, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=.1, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))
print(roc_auc_score(sol.Survived, XGBClassifier(learning_rate=1, n_estimators=1000, eval_metric='mlogloss').fit(X_train, y_train).predict(X_test)))

0.7424537487828627
0.7559152872444012
0.7424537487828627
0.7194985394352482
0.7207400194741967


## 결론
1. 그리드서치는 최적의 반복 횟수 or 측정기 개수도 탐색의 대상으로 잡기 때문에 early stopping을 사용하지 않는 것이 좋다. 과적합을 유발하지 않는 최적의 파라미터 조합을 찾는 것이 목적인데 early stop은 어떤 조합이던 상관없이 과적합이 발생하지 않도록 막아주기 때문이다.  
2. 그리드서치의 최적의 파라미터 조합을 얻어냈다고해서 이 조합이 테스트셋에 대해서도 최적이라는 보장은 없다. 따라서 최적 조합의 상위 그룹들을 전반적으로 테스트해볼 필요가 있으며, 특히 교차검증 점수의 표준편차가 작은 조합일수록 테스트셋에서의 일반화 성능이 좋았다.  
3. 그리드서치의 best_estimator는 fit에서 제공된 훈련데이터를 마지막에 학습한 모델이므로, best_params를 사용해서 별도의 모델을 생성하거나 best_estimator를 다시 전체 훈련 데이터로 학습할 필요가 없다.