<a href="https://colab.research.google.com/github/hyojin530/eng_keyword-and-ect/blob/master/predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import make_scorer
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
import matplotlib.pyplot as plt

#dir 저장
input_dir = '/content/drive/My Drive/pro_data/'
out_dir   = '/content/drive/My Drive/pro_data/output/'
model_dir    = '/content/drive/My Drive/pro_data/'

#score로 rmsle 사용
def rmsle(predicted_values, actual_values):  #정밀도 0에 가까울수록 예측이 좋다
    # 넘파이로 배열 형태로 바꿔준다.
    predicted_values = np.array(predicted_values)
    actual_values = np.array(actual_values)
    print("actual_values\n", actual_values)
    # 예측값과 실제 값에 1을 더하고 로그를 씌워준다.
    log_predict = np.log(predicted_values + 1)
    log_actual = np.log(actual_values + 1)

    # 위에서 계산한 예측값에서 실제값을 빼주고 제곱을 해준다.
    #difference = log_predict - log_actual
    difference = (log_predict - log_actual) ** 2
    #difference = np.square(difference)
    print("rmsle_difference\n",difference)
    # 평균을 낸다.
    mean_difference = difference.mean()
    #print("mean_difference\n", mean_difference)

    # 다시 루트를 씌운다.
    score = np.sqrt(mean_difference)
    #print("rmsle_difference\n", difference)
    return score
rmsle_scorer = make_scorer(rmsle)
print(rmsle_scorer)

def submission(disp, prediction,score,Y_test):
    submission =pd.DataFrame()
    submission["Y_test"] = Y_test
    submission["Y_pre"] = prediction
    print(submission.shape)
    submission.head()
    submission.to_csv(out_dir+'/submission04__{0:.5f}_.csv'.format(score), index=False)

#최적 모델 찾기
def finalGridSearchCV(X_train, y_train, X_test,Y_test):
    i = 0
    #gradient boosting regression 모델에 사용할 파라미터 지정
    param_grid_gbr = {
        "n_estimators": [2000,3000,4000,5000],
        'learning_rate':[0.01,0.05, 0.1],
        'alpha' :[0.1,0.2,0.3]
    }
    #xgboost regression 모델에 사용할 파라미터 지정
    param_xgr = {
        'learning_rate': [0.06],
        'max_depth': [2, 3],
        'n_estimators': [40000],
        'colsample_bytree': [0.5, 1],
    }
    #cross validation 지정
    shuffle_cv = KFold(n_splits=10, shuffle=True, random_state=2019)

    models = [GradientBoostingRegressor(),xgb.XGBRegressor()]
    model_names =["gbr",'xgr']
    param_grid = [param_grid_gbr,param_xgr]
    rmsle_s =[]

    #모델별로 최적의 하이퍼파라미터 찾기
    for model in models:
        grid_search = GridSearchCV(model, param_grid=param_grid[i], cv=shuffle_cv, n_jobs=1, verbose=2,
                                   scoring=rmsle_scorer)
        y_train_log = np.log1p(y_train)           # 라벨을 log시킴
        grid_search.fit(X_train, y_train_log)     #train

        prediction = grid_search.predict(X_test)  # 검증을 위한 테스트
        score = grid_search.best_score_           # 최고 점수 저장
        print(model_names[i] + "_최고 점수 : ", score)
        print(model_names[i] + "_최고 점수를 낸 파라미터 : ", grid_search.best_params_)
        print(model_names[i] + "_최고 점수를 낸 파라미터를 가진 모형 : ", grid_search.best_estimator_)
        rmsle_s.append(score)
        joblib.dump(grid_search, model_dir + f'{model_names[i]}_log_reg05.pkl') # 최고 점수를 낸 모델 저장
        submission(model_names[i], np.exp(prediction), score,Y_test)            # 해달 모델의 prediction 결과 저장
        i += 1
    d = {'Modelling Algo': model_names, 'RMSLE': rmsle_s}
    print("d====",d)
    #gbr 최고 성능 모델과 xgr 최고 성능 모델 스코어 비교 후 더 좋은 모델 이름 return
    if rmsle_s[0] < rmsle_s[1]:
        print('best: '+model_names[0]+str(rmsle_s[0]))
        return model_names[0]
    else:
        print('best: '+model_names[1]+str(rmsle_s[1]))
        return model_names[1]

#예측 함수
def predict_data():
    pred_raw =pd.read_csv(input_dir + "nspdatatest12.csv")
    Xp = pred_raw.iloc[:, 0:2]
    
    #저장해놓은 최고 성능 모델 load
    grid_from_joblib = joblib.load(model_dir+f'{best_model}_log_reg05.pkl')
    test_y_pred = grid_from_joblib.predict(Xp)  #predict
    test_y_pred_ = np.exp(test_y_pred)          #log풀기

    pred_df05 = pd.DataFrame()
    pred_df05['test_y_pred'] = test_y_pred_
    pred_df05.to_csv(out_dir+'nsp_predict.csv')

#사용할 data 불러오기
datasetCompleted = pd.read_csv('/content/drive/My Drive/pro_data/nspdata12.csv')

#X, Y 편집
X = datasetCompleted.iloc[:,:2] #독립변수
X = X.dropna(axis=0)            #null 값 제거
print(X.head())
Y = datasetCompleted['correct'] #종속변수
Y = Y.dropna(axis=0)            #null 값 제거
print(Y.head())
#train, test data 나누기
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=1004)
#스케일링
scaler = MinMaxScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

#베스트 모델 찾기
best_model = finalGridSearchCV(X_train_scale, Y_train, X_test_scale, Y_test)

predict_data()

make_scorer(rmsle)
     onetwo  twothree
0  0.977854  0.297051
1  0.050302  0.736681
2  0.995564  0.019322
3  0.882049  0.846484
4  0.949409  0.112150
0    0.5883
1    0.3640
2    0.4617
3    0.5927
4    0.4341
Name: correct, dtype: float64
Fitting 10 folds for each of 36 candidates, totalling 360 fits
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


actual_values
 [0.36550053 0.37892172 0.5140807  0.40804412]
rmsle_difference
 [4.54852121e-04 9.90252131e-05 7.10302099e-03 1.44622940e-02]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.074, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s


actual_values
 [0.3602325  0.4321875  0.31886736 0.34200569]
rmsle_difference
 [0.0016413  0.01321726 0.00021768 0.00076333]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.063, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.1s remaining:    0.0s


actual_values
 [0.46366318 0.44388978 0.38564797 0.27102685]
rmsle_difference
 [0.00055326 0.00120607 0.01024873 0.01196098]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.077, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    1.7s remaining:    0.0s


actual_values
 [0.40024193 0.35877763 0.31807189 0.43036778]
rmsle_difference
 [8.55029275e-03 1.67528773e-06 9.16472105e-04 2.24263221e-05]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.049, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    2.3s remaining:    0.0s


actual_values
 [0.44358646 0.35433287 0.41770319 0.36969904]
rmsle_difference
 [1.28967677e-05 5.92139812e-03 1.09634442e-03 5.93009881e-04]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.044, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................
actual_values
 [0.43388498 0.39414164 0.37763102 0.38760925]
rmsle_difference
 [0.00736292 0.00483712 0.00165126 0.0033913 ]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.066, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................
actual_values
 [0.38739806 0.34844909 0.37334346 0.37334346]
rmsle_difference
 [1.01654385e-04 3.64918843e-04 3.78447110e-05 3.02334996e-04]
[CV]  alpha=0.1, learning_rate=0.01, n_estimators=2000, score=0.014, total=   0.6s
[CV] alpha=0.1, learning_rate=0.01, n_estimators=2000 ................
actual_values
 [0.47034447 0.33393133 0.35082288 0.39428288]
rmsle_difference
 [1.88567008e-03 1.68799416e-03 1.88148299e-05 3.7377

[Parallel(n_jobs=1)]: Done 360 out of 360 | elapsed:  5.4min finished


gbr_최고 점수 :  0.05474402615649404
gbr_최고 점수를 낸 파라미터 :  {'alpha': 0.3, 'learning_rate': 0.01, 'n_estimators': 3000}
gbr_최고 점수를 낸 파라미터를 가진 모형 :  GradientBoostingRegressor(alpha=0.3, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.01, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=3000,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)
(5, 2)
Fitting 10 folds for each of 4 candidates, totalling 40 fits
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


actual_values
 [0.3606398  0.39402777 0.5614329  0.39283597]
rmsle_difference
 [0.00031546 0.00043458 0.01324222 0.01196828]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.081, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


actual_values
 [0.28851974 0.4270304  0.2688563  0.38879132]
rmsle_difference
 [8.96325607e-03 1.24008349e-02 2.85277902e-03 4.40910172e-05]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.078, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.6s remaining:    0.0s


actual_values
 [0.4580483  0.44003105 0.36464995 0.2921631 ]
rmsle_difference
 [0.00038723 0.00102736 0.01357362 0.00862553]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.077, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    3.9s remaining:    0.0s


actual_values
 [0.39622828 0.32662112 0.36786377 0.41320935]
rmsle_difference
 [0.00802768 0.00063729 0.0045365  0.00028238]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.058, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    5.2s remaining:    0.0s


actual_values
 [0.4080838  0.36286598 0.33824104 0.33172143]
rmsle_difference
 [0.0008118  0.00499423 0.00824338 0.00275314]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.065, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 
actual_values
 [0.39566612 0.4164444  0.368814   0.41404057]
rmsle_difference
 [0.00345647 0.00729663 0.0022143  0.00154968]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.060, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 
actual_values
 [0.37518498 0.3270502  0.37409544 0.35342312]
rmsle_difference
 [3.58124777e-04 1.23196523e-03 4.48795542e-05 7.70936549e-06]
[CV]  colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000, score=0.020, total=   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.06, max_depth=2, n_estimators=40000 
actual_values
 [0.47090757 0.30966896 0.39506

[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   51.8s finished


xgr_최고 점수 :  0.060275834360443446
xgr_최고 점수를 낸 파라미터 :  {'colsample_bytree': 0.5, 'learning_rate': 0.06, 'max_depth': 2, 'n_estimators': 40000}
xgr_최고 점수를 낸 파라미터를 가진 모형 :  XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.5, gamma=0,
             importance_type='gain', learning_rate=0.06, max_delta_step=0,
             max_depth=2, min_child_weight=1, missing=None, n_estimators=40000,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)
(5, 2)
d==== {'Modelling Algo': ['gbr', 'xgr'], 'RMSLE': [0.05474402615649404, 0.060275834360443446]}
best: gbr0.05474402615649404


In [0]:
import numpy as np
import pandas as pd
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import make_scorer
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
import matplotlib.pyplot as plt

#dir 저장
input_dir = '/content/drive/My Drive/pro_data/'
out_dir   = '/content/drive/My Drive/pro_data/output/'
model_dir    = '/content/drive/My Drive/pro_data/'


def submission(disp, prediction,score,Y_test):
    submission =pd.DataFrame()
    submission["Y_test"] = Y_test
    submission["Y_pre"] = prediction
    print(submission.shape)
    submission.head()
    submission.to_csv(out_dir+'/submissioncl__{0:.5f}_.csv'.format(score), index=False)

#최적 모델 찾기
def finalGridSearchCV(X_train, y_train, X_test,Y_test):
    i = 0
    param_grid_gbr = {
        "n_estimators": [100,500,1000,3000, 5000],
        'learning_rate':[0.01,0.05, 0.1],
        'subsample' :[0.5,0.7,0.9]
    }
    param_xgr = {
        'learning_rate': [0.01, 0.05, 0.1],
        'max_depth': [2, 3],
        'subsample' :[0.5,0.7,0.9],
        'colsample_bytree': [0.5, 1],
    }
    shuffle_cv = KFold(n_splits=5, shuffle=True, random_state=2019)
    models = [GradientBoostingClassifier(random_state=2019),xgb.XGBClassifier()]  #Classifier
    model_names =["gbr",'xgr']
    param_grid = [param_grid_gbr,param_xgr]
    accu_s =[]
    for model in models:
        grid_search = GridSearchCV(model, param_grid=param_grid[i], cv=shuffle_cv, n_jobs=1, verbose=5,
                                   scoring='accuracy')
        #y_train_log = np.log1p(y_train)
        grid_search.fit(X_train, y_train)
        prediction = grid_search.predict(X_test)  # 검증을 위한 테스트
        score = grid_search.best_score_
        print(model_names[i] + "_최고 점수 : ", score)
        print(model_names[i] + "_최고 점수를 낸 파라미터 : ", grid_search.best_params_)
        print(model_names[i] + "_최고 점수를 낸 파라미터를 가진 모형 : ", grid_search.best_estimator_)
        accu_s.append(score)
        joblib.dump(grid_search, model_dir + f'{model_names[i]}_cl.pkl')
        submission(model_names[i], np.exp(prediction), score,Y_test)
        i += 1
    
    d = {'Modelling Algo': model_names, 'Accuracy': accu_s}
    print("d====",d)
    if accu_s[0] < accu_s[1]:
        print('best: '+model_names[0]+str(accu_s[0]))
        return model_names[0]
    else:
        print('best: '+model_names[1]+str(accu_s[1]))
        return model_names[1]


def predict_data():
    pred_raw=pd.read_csv(input_dir + "nspdatatestcl.csv")
    Xp = pred_raw.iloc[:, 0:2]

    grid_from_joblib = joblib.load(model_dir+f'{best_model}_cl.pkl')
    if best_model == 'xgr':
      Xp.columns = ['f0', 'f1']
    test_y_pred = grid_from_joblib.predict(Xp)
    #test_y_pred_ = np.exp(test_y_pred)

    pred_df = pd.DataFrame()
    pred_df['test_y_pred'] = test_y_pred
    pred_df.to_csv(out_dir+'nsp_predictcl.csv')


datasetCompleted = pd.read_csv('/content/drive/My Drive/pro_data/nspdatacl.csv')

X = datasetCompleted.iloc[:,:2]
X = X.dropna(axis=0)
print(X.head())
Y = datasetCompleted['correct']
Y = Y.dropna(axis=0)
print(Y.head())
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=1004)
scaler = MinMaxScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

best_model = finalGridSearchCV(X_train_scale, Y_train, X_test_scale, Y_test)

predict_data()

     onetwo  twothree
0  0.977854  0.297051
1  0.050302  0.736681
2  0.995564  0.019322
3  0.882049  0.846484
4  0.949409  0.112150
0    1
1    0
2    0
3    1
4    0
Name: correct, dtype: int64
Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] learning_rate=0.01, n_estimators=100, subsample=0.5 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.5, score=0.625, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.5 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.5, score=0.500, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.5 .............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV]  learning_rate=0.01, n_estimators=100, subsample=0.5, score=0.375, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.5 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.5, score=0.875, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.5 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.5, score=0.625, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.7 .............


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.3s remaining:    0.0s


[CV]  learning_rate=0.01, n_estimators=100, subsample=0.7, score=0.625, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.7 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.7, score=0.500, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.7 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.7, score=0.375, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.7 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.7, score=0.875, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.7 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.7, score=0.500, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.9 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.9, score=0.625, total=   0.1s
[CV] learning_rate=0.01, n_estimators=100, subsample=0.9 .............
[CV]  learning_rate=0.01, n_estimators=100, subsample=0.9,

[Parallel(n_jobs=1)]: Done 225 out of 225 | elapsed:  3.7min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s


[CV] colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9 
[CV]  colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9, score=0.625, total=   0.0s
[CV] colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9 
[CV]  colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9, score=0.500, total=   0.0s
[CV] colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9 
[CV]  colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9, score=0.375, total=   0.0s
[CV] colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9 
[CV]  colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9, score=0.875, total=   0.0s
[CV] colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9 
[CV]  colsample_bytree=0.5, learning_rate=0.01, max_depth=2, subsample=0.9, score=0.625, total=   0.0s
[CV] colsample_bytree=0.5, learning_rate=0.01, max_depth=3, subsample=0.5 
[CV]  colsample_bytree=0.5, learnin

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:    2.1s finished
