<a href="https://colab.research.google.com/github/heyggun/ML/blob/main/Parameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---------------
# 파라미터 튜닝
----------------

#### 각 모델별로 성능을 결정하는 하이퍼 파라미터의 최적의 조합을 찾음 (cross validation 형태로 진행)

- GridSearchCV
- RandomizedSearchCV
- BayesianOptimization 최적화

###### >  k-fold 방식으로 하이퍼 파라미터 조합을 탐색함
-------------------

#### [알고리즘별 하이퍼 파라미터]

- 트리 & 부스팅 계열(RandomForest, XGBoost, LightGBM, Catboost 등): max_depth, n_estimaotrs, learning_rate(랜덤포레스트는 없음)
- 서포트벡터머신(SVM, SVC, SVR 등) : C, alpha

---------------------------

#### [튜닝방법]

- (1) GridSearchCV : 그물망 방식, 모든 파라미터를 조합하여 최적의 조합을 찾음. 시간이 많이 걸리나 모든 경우의 수를 다 활용함
- (2) RandomizedSearchCV : 모든 조합 중 랜덤하게 선택하여 최적의 조합을 찾음. Grid 방식에 비해 시간은 빠르나 모든 경우의 수를 활용하지는 않음

In [1]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, KFold, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, roc_auc_score
from xgboost import XGBClassifier, XGBRegressor
from lightgbm import LGBMClassifier, LGBMRegressor 

import pandas as pd
import numpy as np

- 예시로 유방암 데이터로

In [2]:
from sklearn.datasets import load_breast_cancer

data  = pd.DataFrame(load_breast_cancer().data, columns=load_breast_cancer().feature_names )
data

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [3]:
data['target'] = load_breast_cancer().target
data


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


In [4]:
test = data.sample(69)
train = data.query('index not in @test.index')

In [12]:
len(train.columns)

31

In [13]:
len(test.columns)

31

In [5]:
X= train.drop(columns=['target'], axis=1)
y = train['target']
target = test[X.columns]

In [8]:
display(X.head(3))

display(y.head(3))

print(target)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758


0    0
1    0
2    0
Name: target, dtype: int64

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
470        9.667         18.49           61.49      289.1          0.08946   
127       19.000         18.91          123.40     1138.0          0.08217   
210       20.580         22.14          134.70     1290.0          0.09090   
479       16.250         19.51          109.80      815.8          0.10260   
408       17.990         20.66          117.80      991.7          0.10360   
..           ...           ...             ...        ...              ...   
155       12.250         17.94           78.27      460.3          0.08654   
360       12.540         18.07           79.42      491.9          0.07436   
83        19.100         26.29          129.10     1132.0          0.12150   
49        13.490         22.30           86.91      561.0          0.08752   
481       13.900         19.24           88.73      602.9          0.07991   

     mean compactness  mean concavity  mean concave points  mea

In [14]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)

In [15]:
target

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
470,9.667,18.49,61.49,289.1,0.08946,0.06258,0.029480,0.015140,0.2238,0.06413,...,11.14,25.62,70.88,385.2,0.12340,0.15420,0.127700,0.06560,0.3174,0.08524
127,19.000,18.91,123.40,1138.0,0.08217,0.08028,0.092710,0.056270,0.1946,0.05044,...,22.32,25.73,148.20,1538.0,0.10210,0.22640,0.320700,0.12180,0.2841,0.06541
210,20.580,22.14,134.70,1290.0,0.09090,0.13480,0.164000,0.095610,0.1765,0.05024,...,23.24,27.84,158.30,1656.0,0.11780,0.29200,0.386100,0.19200,0.2909,0.05865
479,16.250,19.51,109.80,815.8,0.10260,0.18930,0.223600,0.091940,0.2151,0.06578,...,17.39,23.05,122.10,939.7,0.13770,0.44620,0.589700,0.17750,0.3318,0.09136
408,17.990,20.66,117.80,991.7,0.10360,0.13040,0.120100,0.088240,0.1992,0.06069,...,21.08,25.41,138.10,1349.0,0.14820,0.37350,0.330100,0.19740,0.3060,0.08503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,12.250,17.94,78.27,460.3,0.08654,0.06679,0.038850,0.023310,0.1970,0.06228,...,13.59,25.22,86.60,564.2,0.12170,0.17880,0.194300,0.08211,0.3113,0.08132
360,12.540,18.07,79.42,491.9,0.07436,0.02650,0.001194,0.005449,0.1528,0.05185,...,13.72,20.98,86.82,585.7,0.09293,0.04327,0.003581,0.01635,0.2233,0.05521
83,19.100,26.29,129.10,1132.0,0.12150,0.17910,0.193700,0.146900,0.1634,0.07224,...,20.33,32.72,141.30,1298.0,0.13920,0.28170,0.243200,0.18410,0.2311,0.09203
49,13.490,22.30,86.91,561.0,0.08752,0.07698,0.047510,0.033840,0.1809,0.05718,...,15.15,31.82,99.00,698.8,0.11620,0.17110,0.228200,0.12820,0.2871,0.06917


In [18]:
np.zeros(target.shape[0])

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0.])

In [20]:
np.zeros(target.shape[0]).shape

(69,)

In [19]:
np.zeros((target.shape[0]))

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0.])

In [21]:
np.zeros((target.shape[0])).shape

(69,)

In [22]:
lgbm_pred = np.zeros((target.shape[0]))

for train_idx, val_idx in skf.split(X,y):
  X_train, X_test = X.iloc[train_idx], X.iloc[val_idx]
  y_train, y_test = y.iloc[train_idx], y.iloc[val_idx]

  lgbm = LGBMClassifier(random_state=42)
  lgbm.fit(X_train, y_train)

  val_pred = lgbm.predict_proba(X_test)[:,1]
  val_score = roc_auc_score(y_test, val_pred)
  print(val_score)

  fold_pred = lgbm.predict_proba(target)[:,1] /10
  lgbm_pred += fold_pred

  print(lgbm_pred)

1.0
[9.99886199e-02 2.00558604e-04 3.57974751e-06 1.16860380e-04
 3.72396643e-06 3.78046566e-05 9.98890559e-02 5.29145378e-05
 9.99762780e-02 9.99746116e-02 9.99899029e-02 9.99973110e-02
 1.00794881e-02 9.99892584e-02 5.46507572e-06 8.26574989e-06
 9.98259973e-02 5.12126963e-06 5.06771764e-06 4.61175652e-06
 9.99744485e-02 8.35007708e-06 9.99944879e-02 1.13298554e-02
 9.99897586e-02 1.31273528e-02 1.83299856e-04 9.99701995e-02
 9.99983930e-02 9.99902314e-02 8.76914906e-03 9.99958477e-02
 9.99755050e-02 1.65744368e-04 9.99616604e-02 9.99950598e-02
 9.99747810e-02 9.99922642e-02 9.99747366e-02 6.20458951e-06
 9.16891034e-06 9.99484537e-02 9.98958732e-02 9.46951728e-02
 9.99961361e-02 9.99814035e-02 5.06576900e-06 9.99921749e-02
 7.27628086e-06 2.37889453e-05 9.94746315e-02 9.99885159e-02
 9.90217086e-02 9.99062661e-02 9.99952134e-02 9.99864683e-02
 9.99002080e-02 9.94703175e-02 2.10936507e-02 9.99765332e-02
 9.99935880e-02 1.20807913e-04 7.56793864e-02 9.99974548e-02
 9.99870697e-02 9.99

In [23]:
roc_auc_score(test['target'], lgbm_pred)

1.0

*GridSearchCV*

- 모델을 불러오고, 탐색하고자 하는 파라미터 조합 세팅
- GridSearchCV(모델, 파라미터, 평가지표, cross_validation 구성방법)

In [24]:
lgbm = LGBMClassifier(random_state=42)

lgbm_params = {'max_depth': [3,4,5,6,7], 'n_estimators':[50,100,150,175,200], 'learning_rate':[0.005, 0.05, 0.01, 0.1]}

5 x 5 x 4 = 100 개 모델 생성

In [25]:
grid_cv = GridSearchCV(lgbm, lgbm_params, scoring='roc_auc', n_jobs=-1, verbose =1, cv=skf)

In [26]:
%%time

grid_cv.fit(X,y)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits
CPU times: user 4.56 s, sys: 270 ms, total: 4.84 s
Wall time: 1min 43s


GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=42, shuffle=True),
             estimator=LGBMClassifier(random_state=42), n_jobs=-1,
             param_grid={'learning_rate': [0.005, 0.05, 0.01, 0.1],
                         'max_depth': [3, 4, 5, 6, 7],
                         'n_estimators': [50, 100, 150, 175, 200]},
             scoring='roc_auc', verbose=1)

In [27]:
grid_cv.best_params_

{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}

In [28]:
grid_cv.best_estimator_

LGBMClassifier(max_depth=3, n_estimators=200, random_state=42)

In [29]:
lgbm_pred = np.zeros((target.shape[0]))

for train_idx, test_idx in skf.split(X,y):
  
  X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
  y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

  lgbm = LGBMClassifier(**grid_cv.best_params_)
  ## ** -> unpack (딕셔너리 형태로 되어 있는 값 자동으로 배분해서 넣어줌)

  lgbm.fit(X_train, y_train)

  val_pred = lgbm.predict_proba(X_test)[:,1]
  val_score = roc_auc_score(y_test, val_pred)
  print(val_score)

  fold_pred = lgbm.predict_proba(target)[:,1]/10
  lgbm_pred += fold_pred


1.0
1.0
0.9456706281833616
0.9966044142614602
0.9983022071307301
0.9966044142614602
0.9982638888888888
1.0
0.9982638888888888
1.0


### RandomizedSearchCV

In [30]:
rand_cv = RandomizedSearchCV(lgbm, lgbm_params, scoring='roc_auc', n_jobs=-1, verbose = 1, cv=skf )

In [31]:
%%time

rand_cv.fit(X,y)

Fitting 10 folds for each of 10 candidates, totalling 100 fits
CPU times: user 898 ms, sys: 41 ms, total: 939 ms
Wall time: 12.4 s


RandomizedSearchCV(cv=StratifiedKFold(n_splits=10, random_state=42, shuffle=True),
                   estimator=LGBMClassifier(max_depth=3, n_estimators=200),
                   n_jobs=-1,
                   param_distributions={'learning_rate': [0.005, 0.05, 0.01,
                                                          0.1],
                                        'max_depth': [3, 4, 5, 6, 7],
                                        'n_estimators': [50, 100, 150, 175,
                                                         200]},
                   scoring='roc_auc', verbose=1)

In [32]:
rand_cv.best_params_

{'learning_rate': 0.05, 'max_depth': 7, 'n_estimators': 200}

In [33]:
rand_cv.best_estimator_

LGBMClassifier(learning_rate=0.05, max_depth=7, n_estimators=200)

In [34]:
lgbm_pred = np.zeros((target.shape[0]))

for train_idx, test_idx in skf.split(X,y):
  X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
  y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

  lgbm = LGBMClassifier(**rand_cv.best_params_)
  lgbm.fit(X_train, y_train)

  val_pred = lgbm.predict_proba(X_test)[:,1]
  val_score = roc_auc_score(y_test, val_pred)
  print(val_score)

  fold_pred = lgbm.predict_proba(target)[:,1]/10
  lgbm_pred += fold_pred

1.0
1.0
0.9439728353140917
0.9932088285229201
0.9949066213921902
0.9966044142614602
1.0
1.0
0.9982638888888888
1.0


In [35]:
roc_auc = roc_auc_score(test['target'], lgbm_pred)
roc_auc

1.0

### 와인 등급(quailty) 맞추기

In [36]:
x_train = pd.read_csv("https://raw.githubusercontent.com/Datamanim/datarepo/main/redwine/x_train.csv")
y_train = pd.read_csv("https://raw.githubusercontent.com/Datamanim/datarepo/main/redwine/y_train.csv")
x_test= pd.read_csv("https://raw.githubusercontent.com/Datamanim/datarepo/main/redwine/x_test.csv")


display(x_train.head(3))
display(y_train.head(3))

Unnamed: 0,ID,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,1,10.6,0.44,0.68,4.1,0.114,6.0,24.0,0.997,3.06,0.66,13.4
1,2,7.0,0.6,0.3,4.5,0.068,20.0,110.0,0.99914,3.3,1.17,10.2
2,3,8.0,0.43,0.36,2.3,0.075,10.0,48.0,0.9976,3.34,0.46,9.4


Unnamed: 0,ID,quality
0,1,6
1,2,5
2,3,5


In [37]:
train = x_train.drop(columns=['ID'])
test = y_train.drop(columns=['ID'])

display(train.head(3))
display(test.head(3))

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,10.6,0.44,0.68,4.1,0.114,6.0,24.0,0.997,3.06,0.66,13.4
1,7.0,0.6,0.3,4.5,0.068,20.0,110.0,0.99914,3.3,1.17,10.2
2,8.0,0.43,0.36,2.3,0.075,10.0,48.0,0.9976,3.34,0.46,9.4


Unnamed: 0,quality
0,6
1,5
2,5


In [38]:
skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

In [39]:
xgb = XGBClassifier(random_state=42)
xgb_params = {'max_depth':[4,6,8], 'learning_rage':[0.05, 0.01, 0.1], 'n_estimators':[100,250]}

In [40]:
rand_cv = RandomizedSearchCV(xgb, xgb_params, cv=skf, scoring='neg_log_loss', n_jobs=-1)

In [41]:
from sklearn.metrics import log_loss

In [42]:
rand_cv.fit(train,test['quality'])

RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),
                   estimator=XGBClassifier(random_state=42), n_jobs=-1,
                   param_distributions={'learning_rage': [0.05, 0.01, 0.1],
                                        'max_depth': [4, 6, 8],
                                        'n_estimators': [100, 250]},
                   scoring='neg_log_loss')

In [43]:
rand_cv.best_params_

{'learning_rage': 0.1, 'max_depth': 4, 'n_estimators': 100}

In [45]:
target = x_test.drop(columns=['ID'])
display(target.head(3))

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.5,0.38,0.57,2.3,0.106,5.0,12.0,0.99605,3.36,0.55,11.4
1,8.2,0.24,0.34,5.1,0.062,8.0,22.0,0.9974,3.22,0.94,10.9
2,7.7,0.75,0.27,3.8,0.11,34.0,89.0,0.99664,3.24,0.45,9.3


In [48]:
target = x_test.drop(columns=['ID'])
display(target.head(3))

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.5,0.38,0.57,2.3,0.106,5.0,12.0,0.99605,3.36,0.55,11.4
1,8.2,0.24,0.34,5.1,0.062,8.0,22.0,0.9974,3.22,0.94,10.9
2,7.7,0.75,0.27,3.8,0.11,34.0,89.0,0.99664,3.24,0.45,9.3


In [55]:
xgb_pred = np.zeros((target.shape[0],6))

for train_idx, test_idx in skf.split(train,test):
  X_train, X_test = train.iloc[train_idx], train.iloc[test_idx]
  y_train, y_test = test.iloc[train_idx], test.iloc[test_idx]

  xgb = rand_cv.best_estimator_
  xgb.fit(X_train, y_train)

  val_pred = xgb.predict_proba(X_test)
  val_log_loss = log_loss(y_test, val_pred)
  print('\n', val_log_loss, '\n')

  fold_pred = xgb.predict_proba(target)/5
  xgb_pred += fold_pred

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



 0.912744000685052 



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



 0.9193874863958627 



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



 0.923457231005159 



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



 0.8112942801089957 



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



 0.8896651023852766 



In [62]:
pred = np.argmax(xgb_pred, axis= 1)
pred

array([3, 3, 2, 0, 2, 3, 2, 3, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 3, 2, 5, 3,
       2, 3, 3, 3, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 2, 3, 0, 4, 3, 2, 3, 2,
       3, 3, 2, 3, 3, 2, 3, 3, 2, 3, 3, 2, 4, 3, 2, 2, 3, 2, 2, 3, 2, 3,
       3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 3, 4, 3, 2, 2, 4, 3, 3, 3, 3, 3, 3,
       2, 3, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 2, 3, 4, 4, 2, 2,
       2, 2, 2, 3, 2, 3, 3, 1, 2, 3, 2, 3, 3, 3, 3, 1, 3, 3, 4, 3, 2, 3,
       2, 2, 3, 3, 3, 3, 5, 3, 3, 2, 2, 3, 3, 3, 3, 3, 4, 2, 2, 3, 3, 3,
       2, 5, 2, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3,
       3, 2, 2, 2, 3, 2, 1, 3, 3, 3, 2, 3, 3, 2, 3, 4, 2, 2, 2, 2, 2, 3,
       3, 3, 3, 2, 3, 3, 4, 3, 4, 3, 3, 3, 2, 2, 3, 2, 3, 3, 3, 3, 3, 3,
       3, 3, 2, 3, 3, 3, 3, 2, 4, 2, 2, 2, 3, 2, 0, 2, 2, 2, 2, 3, 3, 2,
       2, 3, 3, 2, 2, 3, 3, 2, 3, 1, 2, 3, 2, 3, 2, 2, 1, 2, 2, 3, 2, 3,
       3, 3, 3, 1, 3, 3, 3, 2, 3, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 2, 4, 2,
       3, 2, 3, 0, 3, 2, 2, 2, 2, 3, 3, 2, 3, 3, 3,