In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV, GridSearchCV
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, confusion_matrix
from datetime import datetime

In [2]:
df = pd.read_csv('creditcard.csv')

In [3]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Time,284807.0,94813.86,47488.145955,0.0,54201.5,84692.0,139320.5,172792.0
V1,284807.0,1.16598e-15,1.958696,-56.40751,-0.920373,0.018109,1.315642,2.45493
V2,284807.0,3.416908e-16,1.651309,-72.715728,-0.59855,0.065486,0.803724,22.057729
V3,284807.0,-1.37315e-15,1.516255,-48.325589,-0.890365,0.179846,1.027196,9.382558
V4,284807.0,2.086869e-15,1.415869,-5.683171,-0.84864,-0.019847,0.743341,16.875344
V5,284807.0,9.604066e-16,1.380247,-113.743307,-0.691597,-0.054336,0.611926,34.801666
V6,284807.0,1.490107e-15,1.332271,-26.160506,-0.768296,-0.274187,0.398565,73.301626
V7,284807.0,-5.556467e-16,1.237094,-43.557242,-0.554076,0.040103,0.570436,120.589494
V8,284807.0,1.177556e-16,1.194353,-73.216718,-0.20863,0.022358,0.327346,20.007208
V9,284807.0,-2.406455e-15,1.098632,-13.434066,-0.643098,-0.051429,0.597139,15.594995


In [3]:
#The Target class is heavily skewed
print('Genuine Transactions',round(df['Class'].value_counts()[0]/len(df)*100,2), '% of the Dataset')
print('Fraud Transactions',round(df['Class'].value_counts()[1]/len(df)*100,2), '% of the Dataset')

Genuine Transactions 99.83 % of the Dataset
Fraud Transactions 0.17 % of the Dataset


In [4]:
#Spliiting the Dataset into Train and test before analysing it further
X = df.drop('Class',axis=1)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

In [5]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((227845, 30), (56962, 30), (227845,), (56962,))

In [6]:
#Poportion of Genuine and fraud transactions in training and test set
print('Genuine Transactions in train set',round((len(y_train) - sum(y_train))/len(y_train)*100,2), '% of the Dataset')
print('Genuine Transactions in test set',round((len(y_test) - sum(y_test))/len(y_test)*100,2), '% of the Dataset')

print('Fraud Transactions in train set',round(sum(y_train)/len(y_train)*100,2), '% of the Dataset')
print('Fraud Transactions in test set',round(sum(y_test)/len(y_test)*100,2), '% of the Dataset')

Genuine Transactions in train set 99.83 % of the Dataset
Genuine Transactions in test set 99.83 % of the Dataset
Fraud Transactions in train set 0.17 % of the Dataset
Fraud Transactions in test set 0.17 % of the Dataset


In [7]:
X_train_set, X_val, y_train_set, y_val = train_test_split(X_train,y_train,test_size=0.2,random_state=45)

In [8]:
print('Fradulent Transactions in train set',round( sum(y_train_set)/len(y_train_set)*100,2), '% of the Dataset')
print('Fradulent Transactions in test set',round(sum(y_val)/len(y_val)*100,2), '% of the Dataset')

Fradulent Transactions in train set 0.17 % of the Dataset
Fradulent Transactions in test set 0.17 % of the Dataset


In [9]:
sm = SMOTE(sampling_strategy='minority',random_state=42)
X_trainsm, y_trainsm = sm.fit_sample(X_train_set,y_train_set)
X_trainsm.shape, X_train_set.shape

((363920, 30), (182276, 30))

In [30]:
xgb = XGBClassifier(n_estimators=100,max_depth=5,verbosity=1,n_jobs=-1,random_state=42)

In [32]:
xgb.fit(X_trainsm,y_trainsm,early_stopping_rounds=10,eval_set=[(X_val,y_val)])

[0]	validation_0-error:0.02337
Will train until validation_0-error hasn't improved in 10 rounds.
[1]	validation_0-error:0.01685
[2]	validation_0-error:0.01479
[3]	validation_0-error:0.01315
[4]	validation_0-error:0.01251
[5]	validation_0-error:0.01115
[6]	validation_0-error:0.01047
[7]	validation_0-error:0.00996
[8]	validation_0-error:0.00928
[9]	validation_0-error:0.00856
[10]	validation_0-error:0.00757
[11]	validation_0-error:0.00777
[12]	validation_0-error:0.00733
[13]	validation_0-error:0.00700
[14]	validation_0-error:0.00656
[15]	validation_0-error:0.00597
[16]	validation_0-error:0.00608
[17]	validation_0-error:0.00518
[18]	validation_0-error:0.00487
[19]	validation_0-error:0.00470
[20]	validation_0-error:0.00430
[21]	validation_0-error:0.00377
[22]	validation_0-error:0.00373
[23]	validation_0-error:0.00345
[24]	validation_0-error:0.00325
[25]	validation_0-error:0.00318
[26]	validation_0-error:0.00301
[27]	validation_0-error:0.00268
[28]	validation_0-error:0.00252
[29]	validation_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=5,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=-1, num_parallel_tree=1, random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=1)

In [37]:
xgb.feature_importances_

array([0.00936838, 0.01721717, 0.0074037 , 0.0155289 , 0.06084084,
       0.0028368 , 0.00442742, 0.0080187 , 0.01874845, 0.00594272,
       0.08739559, 0.02067459, 0.03677052, 0.01307691, 0.5474884 ,
       0.01002057, 0.00988437, 0.03224188, 0.00859835, 0.00619586,
       0.00886336, 0.00917379, 0.00618211, 0.00434133, 0.00718178,
       0.00619274, 0.00932774, 0.00560972, 0.00869945, 0.01174782],
      dtype=float32)

In [38]:
feat_imp = pd.DataFrame(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'],columns=['Feature Name'])

In [39]:
feat_imp['Feature Importances'] = xgb.feature_importances_

In [42]:
feat_imp.sort_values('Feature Importances',ascending=False).head(10)

Unnamed: 0,Feature Name,Feature Importances
14,V14,0.547488
10,V10,0.087396
4,V4,0.060841
12,V12,0.036771
17,V17,0.032242
11,V11,0.020675
8,V8,0.018748
1,V1,0.017217
3,V3,0.015529
13,V13,0.013077


In [43]:
xgb_prediction = xgb.predict(X_test)

In [48]:
print("Classification Report for Extreme Gradient Boosting \n\n\n",classification_report(y_test,xgb_prediction))

Classification Report for Extreme Gradient Boosting 


               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.70      0.86      0.77        98

    accuracy                           1.00     56962
   macro avg       0.85      0.93      0.89     56962
weighted avg       1.00      1.00      1.00     56962



In [50]:
print("Confusion Matrix for Extreme Gradient Boosting \n",confusion_matrix(y_test,xgb_prediction))

Confusion Matrix for Extreme Gradient Boosting 
 [[56828    36]
 [   14    84]]


In [11]:
def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))

In [12]:
# A parameter grid for XGBoost
params = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5]
        }

In [13]:
xgb = XGBClassifier(n_estimators=100,verbosity=3,n_jobs=-1,random_state=42)

In [14]:
skf = StratifiedKFold(n_splits=5, shuffle = True, random_state = 101)
random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=5, scoring='roc_auc', 
                                   n_jobs=-1, cv=skf.split(X_trainsm,y_trainsm), verbose=1, random_state=101)

In [15]:
start_time = timer(None)
random_search.fit(X_trainsm,y_trainsm,early_stopping_rounds=10,eval_set=[(X_val,y_val)])
timer(start_time)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed: 12.2min finished


[08:32:02] DEBUG: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\gbm\gbtree.cc:147: Using tree method: 2
[08:32:03] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=4
[0]	validation_0-error:0.03805
Will train until validation_0-error hasn't improved in 10 rounds.
[08:32:04] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[1]	validation_0-error:0.02971
[08:32:04] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[2]	validation_0-error:0.01510
[08:32:04] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[3]	validation_0-error:0.01444
[08

[08:32:22] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 22 extra nodes, 4 pruned nodes, max_depth=4
[40]	validation_0-error:0.00252
[08:32:22] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=4
[41]	validation_0-error:0.00241
[08:32:22] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 24 extra nodes, 6 pruned nodes, max_depth=4
[42]	validation_0-error:0.00241
[08:32:23] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[43]	validation_0-error:0.00222
[08:32:23] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 28 extra nodes, 2 pruned nodes, max_depth=4
[44]	validation_0

[08:32:39] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=4
[81]	validation_0-error:0.00092
[08:32:40] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 24 extra nodes, 6 pruned nodes, max_depth=4
[82]	validation_0-error:0.00092
[08:32:40] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 22 extra nodes, 4 pruned nodes, max_depth=4
[83]	validation_0-error:0.00088
[08:32:41] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 12 extra nodes, 2 pruned nodes, max_depth=4
[84]	validation_0-error:0.00083
[08:32:41] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 16 extra nodes, 4 pruned nodes, max_depth=4
[85]	validation_0

In [17]:
random_search.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1.0, gamma=2, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=-1, num_parallel_tree=1, random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=0.6,
              tree_method='exact', validate_parameters=1, verbosity=3)

In [18]:
random_search.best_params_

{'subsample': 0.6,
 'min_child_weight': 1,
 'max_depth': 4,
 'gamma': 2,
 'colsample_bytree': 1.0}

The hyperparametrs need to be tuned further as we had only 5 iterations in the randomized search and also some of the hyperparametrs are from the end 

In [19]:
random_search.cv_results_

{'mean_fit_time': array([228.6882772 ,  29.68213105, 193.11487565, 355.00734129,
        222.86075273]),
 'std_fit_time': array([ 0.94757504,  3.19502437, 73.66814628, 14.8403803 , 67.83660046]),
 'mean_score_time': array([0.73124566, 0.27167912, 0.77146583, 0.88842554, 0.52000384]),
 'std_score_time': array([0.01935282, 0.03526943, 0.28866786, 0.1684275 , 0.20513182]),
 'param_subsample': masked_array(data=[1.0, 0.8, 1.0, 0.8, 0.6],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_min_child_weight': masked_array(data=[5, 10, 10, 10, 1],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_max_depth': masked_array(data=[3, 4, 4, 5, 4],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_gamma': masked_array(data=[1.5, 1, 2, 1.5, 2],
              mask=[False, False, False, False, False],
       

In [20]:
random_search.cv_results_
results = pd.DataFrame(random_search.cv_results_)
results.to_csv('xgb-random-grid-search-results-01.csv', index=False)

In [21]:
random_search_prediction = random_search.predict(X_test)

In [22]:
print("Classification Report for Extreme Gradient Boosting \n\n\n",classification_report(y_test,random_search_prediction))

Classification Report for Extreme Gradient Boosting 


               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.67      0.89      0.77        98

    accuracy                           1.00     56962
   macro avg       0.84      0.94      0.88     56962
weighted avg       1.00      1.00      1.00     56962



In [23]:
print("Confusion Matrix for Extreme Gradient Boosting \n",confusion_matrix(y_test,random_search_prediction))

Confusion Matrix for Extreme Gradient Boosting 
 [[56822    42]
 [   11    87]]


The model performs better than the previous model and can be futher optimized around the best parameters from randomized search

In [24]:
random_search.best_params_

{'subsample': 0.6,
 'min_child_weight': 1,
 'max_depth': 4,
 'gamma': 2,
 'colsample_bytree': 1.0}

In [25]:
params = {
        'min_child_weight': [8, 10],
#        'gamma': [1.5, 2, 2.5],
#        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.6, 0.7],
        'max_depth': [3, 4, 5]
        }

xgb_grid1 = XGBClassifier(n_estimators=100,gamm=2,subsample=1.0, verbosity=3,n_jobs=-1,random_state=42)
grid_search = GridSearchCV(xgb_grid1, param_grid=params,cv=3, scoring='roc_auc', n_jobs=-1, verbose=1, return_train_score=True)



In [26]:
start_time = timer(None)
grid_search.fit(X_train_set,y_train_set,early_stopping_rounds=10,eval_set=[(X_val,y_val)])
timer(start_time)

Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:  1.9min finished


Parameters: { gamm } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[09:09:42] DEBUG: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\gbm\gbtree.cc:147: Using tree method: 2
[09:09:42] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=4
[0]	validation_0-error:0.00083
Will train until validation_0-error hasn't improved in 10 rounds.
[09:09:42] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=4
[1]	validation_0-error:0.00072
[09:09:42] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 

In [27]:
grid_search.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.6, gamm=2, gamma=0,
              gpu_id=-1, importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=4,
              min_child_weight=10, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=-1, num_parallel_tree=1, random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0,
              tree_method='exact', validate_parameters=1, verbosity=3)

In [28]:
grid_search.best_params_

{'colsample_bytree': 0.6, 'max_depth': 4, 'min_child_weight': 10}

In [29]:
random_search.best_params_

{'subsample': 0.6,
 'min_child_weight': 1,
 'max_depth': 4,
 'gamma': 2,
 'colsample_bytree': 1.0}

In [30]:
grid_search.best_score_

0.9809599285952381

In [31]:
random_search.best_score_

0.9999845424001352

In [32]:
grid_search_prediction = grid_search.predict(X_test)

In [33]:
print("Classification Report for Extreme Gradient Boosting \n\n\n",classification_report(y_test,grid_search_prediction))
print("Confusion Matrix for Extreme Gradient Boosting \n",confusion_matrix(y_test,grid_search_prediction))

Classification Report for Extreme Gradient Boosting 


               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.93      0.82      0.87        98

    accuracy                           1.00     56962
   macro avg       0.96      0.91      0.93     56962
weighted avg       1.00      1.00      1.00     56962

Confusion Matrix for Extreme Gradient Boosting 
 [[56858     6]
 [   18    80]]


In [34]:
print("Classification Report for Extreme Gradient Boosting \n\n\n",classification_report(y_test,random_search_prediction))
print("Confusion Matrix for Extreme Gradient Boosting \n",confusion_matrix(y_test,random_search_prediction))

Classification Report for Extreme Gradient Boosting 


               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.67      0.89      0.77        98

    accuracy                           1.00     56962
   macro avg       0.84      0.94      0.88     56962
weighted avg       1.00      1.00      1.00     56962

Confusion Matrix for Extreme Gradient Boosting 
 [[56822    42]
 [   11    87]]


Thus the model has slightly improved using the GridSearchCV it can be further improved by doing extensive gridsearch

In [46]:
params = {
        'min_child_weight': [6, 8, 9],
        'gamma': [1, 2],
        'subsample': [0.75, 0.8, 1.0],
        'colsample_bytree': [0.5, 0.6, 0.7],
        'max_depth': [4, 5, 7]
        }

xgb_grid1 = XGBClassifier(n_estimators=100,gamm=2,subsample=1.0, verbosity=3,n_jobs=-1,random_state=42)
grid_search = GridSearchCV(xgb_grid1, param_grid=params,cv=3, scoring='roc_auc', n_jobs=-1, verbose=1, return_train_score=True)

In [47]:
start_time = timer(None)
grid_search.fit(X_trainsm,y_trainsm,early_stopping_rounds=10,eval_set=[(X_val,y_val)])
timer(start_time)

Fitting 3 folds for each of 162 candidates, totalling 486 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 10.2min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed: 53.6min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 161.5min
[Parallel(n_jobs=-1)]: Done 486 out of 486 | elapsed: 191.0min finished


Parameters: { gamm } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[13:18:19] DEBUG: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\gbm\gbtree.cc:147: Using tree method: 2
[13:18:20] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[0]	validation_0-error:0.02517
Will train until validation_0-error hasn't improved in 10 rounds.
[13:18:21] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[1]	validation_0-error:0.01053
[13:18:21] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 

[13:18:33] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 16 extra nodes, 0 pruned nodes, max_depth=4
[38]	validation_0-error:0.00309
[13:18:33] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=4
[39]	validation_0-error:0.00292
[13:18:34] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[40]	validation_0-error:0.00255
[13:18:34] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 26 extra nodes, 2 pruned nodes, max_depth=4
[41]	validation_0-error:0.00252
[13:18:34] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[42]	validation_0

[13:18:47] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 16 extra nodes, 2 pruned nodes, max_depth=4
[79]	validation_0-error:0.00108
[13:18:47] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 26 extra nodes, 2 pruned nodes, max_depth=4
[80]	validation_0-error:0.00105
[13:18:47] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 14 extra nodes, 4 pruned nodes, max_depth=4
[81]	validation_0-error:0.00108
[13:18:48] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 20 extra nodes, 2 pruned nodes, max_depth=4
[82]	validation_0-error:0.00103
[13:18:48] INFO: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\tree\updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[83]	validation_0

In [48]:
X_trainsm.shape, X_train_set.shape,X_train.shape

((363920, 30), (182276, 30), (227845, 30))

In [49]:
grid_search.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.6, gamm=2, gamma=2,
              gpu_id=-1, importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=4,
              min_child_weight=6, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=-1, num_parallel_tree=1, random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0,
              tree_method='exact', validate_parameters=1, verbosity=3)

In [50]:
grid_search.best_params_

{'colsample_bytree': 0.6,
 'gamma': 2,
 'max_depth': 4,
 'min_child_weight': 6,
 'subsample': 1.0}

In [51]:
grid_search.best_score_

0.9999864040191603

In [52]:
grid_search_prediction1 = grid_search.predict(X_test)

In [53]:
print("Classification Report for Extreme Gradient Boosting \n\n\n",classification_report(y_test,grid_search_prediction1))
print("Confusion Matrix for Extreme Gradient Boosting \n",confusion_matrix(y_test,grid_search_prediction1))

Classification Report for Extreme Gradient Boosting 


               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.60      0.89      0.72        98

    accuracy                           1.00     56962
   macro avg       0.80      0.94      0.86     56962
weighted avg       1.00      1.00      1.00     56962

Confusion Matrix for Extreme Gradient Boosting 
 [[56807    57]
 [   11    87]]
