In [1]:
import pandas as pd

In [2]:
TrainSet = pd.read_csv('./dataset/TrainSet.csv')

In [3]:
TrainSet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48925 entries, 0 to 48924
Data columns (total 6 columns):
Total Unsigned Flux (Mx)           48925 non-null float64
GWILL (Mm)                         48925 non-null float64
Average Gradient across PIL (G)    48925 non-null float64
Effective Separation (pixels)      48925 non-null float64
PIL Length (Mm)                    48925 non-null float64
Label                              48925 non-null int64
dtypes: float64(5), int64(1)
memory usage: 2.2 MB


In [4]:
#import libraries:
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import cross_validation, metrics   #Additional sklearn functions
from sklearn.grid_search import GridSearchCV   #Perforing grid search

import matplotlib.pylab as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 4

target = 'Label'



In [12]:
def modelfit(alg, dtrain, predictors, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
    
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics='auc', early_stopping_rounds=early_stopping_rounds)
        alg.set_params(n_estimators=cvresult.shape[0])
    
    #Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain[target], eval_metric='auc')
        
    #Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])
    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
        
    #Print model report:
    print("\nModel Report")
    print("Accuracy : %.4g" % metrics.accuracy_score(dtrain[target].values, dtrain_predictions))
    print("AUC Score (Train): %f" % metrics.roc_auc_score(dtrain[target], dtrain_predprob))
                    
    feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    #feat_imp = pd.Series(alg.booster().get_fscore())
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')

In [11]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =0.1,
 n_estimators=5000,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8452
AUC Score (Train): 0.934827


TypeError: 'str' object is not callable

In [13]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =0.5,
 n_estimators=5000,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8713
AUC Score (Train): 0.948358


TypeError: 'str' object is not callable

In [14]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=5000,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8247
AUC Score (Train): 0.911075


TypeError: 'str' object is not callable

In [21]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=250,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8398
AUC Score (Train): 0.924156


TypeError: 'str' object is not callable

In [22]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=240,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8366
AUC Score (Train): 0.920226


TypeError: 'str' object is not callable

In [24]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=230,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8334
AUC Score (Train): 0.917090


TypeError: 'str' object is not callable

In [25]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=260,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8454
AUC Score (Train): 0.927499


TypeError: 'str' object is not callable

In [28]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=261,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8453
AUC Score (Train): 0.927938


TypeError: 'str' object is not callable

In [29]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=262,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8455
AUC Score (Train): 0.928310


TypeError: 'str' object is not callable

In [30]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=263,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8466
AUC Score (Train): 0.928722


TypeError: 'str' object is not callable

In [31]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=264,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8463
AUC Score (Train): 0.928885


TypeError: 'str' object is not callable

In [32]:
#Choose all predictors except target & IDcols
predictors = [x for x in TrainSet.columns if x not in [target]]
xgb1 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=265,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb1, TrainSet, predictors)


Model Report
Accuracy : 0.8247
AUC Score (Train): 0.911075


TypeError: 'str' object is not callable

`n_estimators=264`是最优值
## step 2

In [33]:
param_test1 = {
 'max_depth': list(range(3,10,2)),
 'min_child_weight': list(range(1,6,2))
}
gsearch1 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=264, max_depth=5,
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1, seed=27), 
 param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch1.fit(TrainSet[predictors],TrainSet[target])
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

([mean: 0.51993, std: 0.01253, params: {'max_depth': 3, 'min_child_weight': 1},
  mean: 0.52281, std: 0.01237, params: {'max_depth': 3, 'min_child_weight': 3},
  mean: 0.51481, std: 0.01662, params: {'max_depth': 3, 'min_child_weight': 5},
  mean: 0.51696, std: 0.01145, params: {'max_depth': 5, 'min_child_weight': 1},
  mean: 0.51896, std: 0.01369, params: {'max_depth': 5, 'min_child_weight': 3},
  mean: 0.52434, std: 0.01039, params: {'max_depth': 5, 'min_child_weight': 5},
  mean: 0.51563, std: 0.01546, params: {'max_depth': 7, 'min_child_weight': 1},
  mean: 0.52076, std: 0.01101, params: {'max_depth': 7, 'min_child_weight': 3},
  mean: 0.51809, std: 0.01087, params: {'max_depth': 7, 'min_child_weight': 5},
  mean: 0.51852, std: 0.01433, params: {'max_depth': 9, 'min_child_weight': 1},
  mean: 0.52134, std: 0.01095, params: {'max_depth': 9, 'min_child_weight': 3},
  mean: 0.52353, std: 0.01260, params: {'max_depth': 9, 'min_child_weight': 5}],
 {'max_depth': 5, 'min_child_weight': 5

In [37]:
param_test2 = {
 'max_depth': [4,5,6],
 'min_child_weight': [4, 5, 6, 7, 8, 9]
}
gsearch2 = GridSearchCV(estimator = XGBClassifier( learning_rate=1.0, n_estimators=264, max_depth=5,
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch2.fit(TrainSet[predictors],TrainSet[target])
gsearch2.grid_scores_, gsearch2.best_params_, gsearch2.best_score_

([mean: 0.51936, std: 0.00905, params: {'max_depth': 4, 'min_child_weight': 4},
  mean: 0.52706, std: 0.00853, params: {'max_depth': 4, 'min_child_weight': 5},
  mean: 0.51581, std: 0.00817, params: {'max_depth': 4, 'min_child_weight': 6},
  mean: 0.51789, std: 0.01259, params: {'max_depth': 4, 'min_child_weight': 7},
  mean: 0.52361, std: 0.01261, params: {'max_depth': 4, 'min_child_weight': 8},
  mean: 0.51983, std: 0.00973, params: {'max_depth': 4, 'min_child_weight': 9},
  mean: 0.52102, std: 0.01019, params: {'max_depth': 5, 'min_child_weight': 4},
  mean: 0.52434, std: 0.01039, params: {'max_depth': 5, 'min_child_weight': 5},
  mean: 0.51741, std: 0.01554, params: {'max_depth': 5, 'min_child_weight': 6},
  mean: 0.51885, std: 0.01176, params: {'max_depth': 5, 'min_child_weight': 7},
  mean: 0.51830, std: 0.01212, params: {'max_depth': 5, 'min_child_weight': 8},
  mean: 0.52403, std: 0.01103, params: {'max_depth': 5, 'min_child_weight': 9},
  mean: 0.51764, std: 0.01482, params: {

## step 3: tune gamma
***

In [38]:
param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch3 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=264, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test3, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch3.fit(TrainSet[predictors],TrainSet[target])
gsearch3.grid_scores_, gsearch3.best_params_, gsearch3.best_score_

([mean: 0.52706, std: 0.00853, params: {'gamma': 0.0},
  mean: 0.51581, std: 0.00584, params: {'gamma': 0.1},
  mean: 0.51924, std: 0.01055, params: {'gamma': 0.2},
  mean: 0.51776, std: 0.01180, params: {'gamma': 0.3},
  mean: 0.52377, std: 0.00937, params: {'gamma': 0.4}],
 {'gamma': 0.0},
 0.5270637854630812)

In [39]:
xgb2 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=264,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb2, TrainSet, predictors)


Model Report
Accuracy : 0.7677
AUC Score (Train): 0.856999


TypeError: 'str' object is not callable

In [40]:
xgb2 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=300,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb2, TrainSet, predictors)


Model Report
Accuracy : 0.777
AUC Score (Train): 0.866671


TypeError: 'str' object is not callable

In [46]:
xgb2 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=310,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb2, TrainSet, predictors)


Model Report
Accuracy : 0.7634
AUC Score (Train): 0.852949


TypeError: 'str' object is not callable

### Step 4: Tune **`subsample`** 和 **`colsample_bytree`**
***

In [47]:
param_test4 = {
 'subsample': [i/10.0 for i in range(6,10)],
 'colsample_bytree': [i/10.0 for i in range(6,10)]
}
gsearch4 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=300, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test4, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch4.fit(TrainSet[predictors],TrainSet[target])
gsearch4.grid_scores_, gsearch4.best_params_, gsearch4.best_score_

([mean: 0.52201, std: 0.00787, params: {'colsample_bytree': 0.6, 'subsample': 0.6},
  mean: 0.52074, std: 0.00657, params: {'colsample_bytree': 0.6, 'subsample': 0.7},
  mean: 0.51608, std: 0.01082, params: {'colsample_bytree': 0.6, 'subsample': 0.8},
  mean: 0.51886, std: 0.00954, params: {'colsample_bytree': 0.6, 'subsample': 0.9},
  mean: 0.52201, std: 0.00787, params: {'colsample_bytree': 0.7, 'subsample': 0.6},
  mean: 0.52074, std: 0.00657, params: {'colsample_bytree': 0.7, 'subsample': 0.7},
  mean: 0.51608, std: 0.01082, params: {'colsample_bytree': 0.7, 'subsample': 0.8},
  mean: 0.51886, std: 0.00954, params: {'colsample_bytree': 0.7, 'subsample': 0.9},
  mean: 0.51651, std: 0.01502, params: {'colsample_bytree': 0.8, 'subsample': 0.6},
  mean: 0.51945, std: 0.00972, params: {'colsample_bytree': 0.8, 'subsample': 0.7},
  mean: 0.52472, std: 0.00868, params: {'colsample_bytree': 0.8, 'subsample': 0.8},
  mean: 0.52168, std: 0.00876, params: {'colsample_bytree': 0.8, 'subsample'

### Step 5: Tuning Regularization Parameters
***

In [48]:
param_test6 = {
 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=300, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(TrainSet[predictors],TrainSet[target])
gsearch6.grid_scores_, gsearch6.best_params_, gsearch6.best_score_

([mean: 0.52472, std: 0.00868, params: {'reg_alpha': 1e-05},
  mean: 0.51492, std: 0.00357, params: {'reg_alpha': 0.01},
  mean: 0.51529, std: 0.00907, params: {'reg_alpha': 0.1},
  mean: 0.51801, std: 0.00659, params: {'reg_alpha': 1},
  mean: 0.53319, std: 0.01507, params: {'reg_alpha': 100}],
 {'reg_alpha': 100},
 0.5331906809412243)

In [50]:
param_test6 = {
 'reg_alpha':[10, 100, 1000, 10000]
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=300, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(TrainSet[predictors],TrainSet[target])
gsearch6.grid_scores_, gsearch6.best_params_, gsearch6.best_score_

([mean: 0.52962, std: 0.01242, params: {'reg_alpha': 10},
  mean: 0.53319, std: 0.01507, params: {'reg_alpha': 100},
  mean: 0.55677, std: 0.02144, params: {'reg_alpha': 1000},
  mean: 0.50000, std: 0.00000, params: {'reg_alpha': 10000}],
 {'reg_alpha': 1000},
 0.5567746888426015)

In [51]:
param_test6 = {
 'reg_alpha':[100, 1000, 3000, 10000]
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=300, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(TrainSet[predictors],TrainSet[target])
gsearch6.grid_scores_, gsearch6.best_params_, gsearch6.best_score_

([mean: 0.53319, std: 0.01507, params: {'reg_alpha': 100},
  mean: 0.55677, std: 0.02144, params: {'reg_alpha': 1000},
  mean: 0.50000, std: 0.00000, params: {'reg_alpha': 3000},
  mean: 0.50000, std: 0.00000, params: {'reg_alpha': 10000}],
 {'reg_alpha': 1000},
 0.5567746888426015)

In [52]:
param_test6 = {
 'reg_alpha': list(range(100, 3001, 100))
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =1.0, n_estimators=300, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(TrainSet[predictors],TrainSet[target])
gsearch6.grid_scores_, gsearch6.best_params_, gsearch6.best_score_

([mean: 0.53319, std: 0.01507, params: {'reg_alpha': 100},
  mean: 0.53633, std: 0.01596, params: {'reg_alpha': 200},
  mean: 0.53871, std: 0.01166, params: {'reg_alpha': 300},
  mean: 0.54734, std: 0.00880, params: {'reg_alpha': 400},
  mean: 0.55421, std: 0.01432, params: {'reg_alpha': 500},
  mean: 0.56079, std: 0.01331, params: {'reg_alpha': 600},
  mean: 0.55698, std: 0.01215, params: {'reg_alpha': 700},
  mean: 0.55856, std: 0.01533, params: {'reg_alpha': 800},
  mean: 0.55429, std: 0.02030, params: {'reg_alpha': 900},
  mean: 0.55677, std: 0.02144, params: {'reg_alpha': 1000},
  mean: 0.55661, std: 0.02157, params: {'reg_alpha': 1100},
  mean: 0.55392, std: 0.02248, params: {'reg_alpha': 1200},
  mean: 0.55399, std: 0.02232, params: {'reg_alpha': 1300},
  mean: 0.55433, std: 0.02217, params: {'reg_alpha': 1400},
  mean: 0.55411, std: 0.02197, params: {'reg_alpha': 1500},
  mean: 0.55673, std: 0.02596, params: {'reg_alpha': 1600},
  mean: 0.54324, std: 0.01654, params: {'reg_alph

In [54]:
xgb3 = XGBClassifier(
 learning_rate =1.0,
 n_estimators=300,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb3, TrainSet, predictors)


Model Report
Accuracy : 0.777
AUC Score (Train): 0.866671


TypeError: 'str' object is not callable

### Step 6: Reducing Learning Rate
***
最后, 我们应该减小学习率, 这样会得到更多子估计器.

In [58]:
xgb4 = XGBClassifier(
 learning_rate =0.2,
 n_estimators=700,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb4, TrainSet, predictors)


Model Report
Accuracy : 0.7655
AUC Score (Train): 0.860004


TypeError: 'str' object is not callable

In [59]:
xgb4 = XGBClassifier(
 learning_rate =0.2,
 n_estimators=1000,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb4, TrainSet, predictors)


Model Report
Accuracy : 0.7626
AUC Score (Train): 0.856344


TypeError: 'str' object is not callable

In [61]:
xgb4 = XGBClassifier(
 learning_rate =0.2,
 n_estimators=800,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb4, TrainSet, predictors)


Model Report
Accuracy : 0.7626
AUC Score (Train): 0.856344


TypeError: 'str' object is not callable

In [62]:
xgb4 = XGBClassifier(
 learning_rate =0.1,
 n_estimators=1000,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb4, TrainSet, predictors)


Model Report
Accuracy : 0.7448
AUC Score (Train): 0.841340


TypeError: 'str' object is not callable

In [63]:
xgb4 = XGBClassifier(
 learning_rate =0.1,
 n_estimators=2000,
 max_depth=4,
 min_child_weight=5,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb4, TrainSet, predictors)


Model Report
Accuracy : 0.8011
AUC Score (Train): 0.894479


TypeError: 'str' object is not callable

In [68]:
TestSet = pd.read_csv('./dataset/TestSet.csv')

In [69]:
TestSet_predictions = xgb4.predict(TestSet[predictors])
print("Accuracy : %.4g" % metrics.accuracy_score(TestSet[target].values, TestSet_predictions))

Accuracy : 0.6271


In [70]:
xgb5 = XGBClassifier(
 learning_rate =0.5,
 n_estimators=5000,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
modelfit(xgb5, TrainSet, predictors)


Model Report
Accuracy : 0.8713
AUC Score (Train): 0.948358


TypeError: 'str' object is not callable

In [72]:
TestSet_predictions = xgb5.predict(TestSet[predictors])
print("Accuracy : %.4g" % metrics.accuracy_score(TestSet[target].values, TestSet_predictions))

Accuracy : 0.5844
