In [11]:
import xgboost

In [12]:
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [13]:
# load data
dataset = loadtxt('pima-indians-diabetes.data.csv', delimiter=",")

In [14]:
# split data into X and y
X = dataset[:,0:8]
Y = dataset[:,8]

In [15]:
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

In [16]:
# fit model no training data
model1 = XGBClassifier()
print(model1)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
              max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
              n_jobs=1, nthread=None, objective='binary:logistic',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              seed=None, silent=True, subsample=1)


In [17]:
model2 = XGBClassifier(silent=False, 
                      scale_pos_weight=1,
                      learning_rate=0.01,  
                      colsample_bytree = 0.4,
                      subsample = 0.8,
                      objective='binary:logistic', 
                      n_estimators=1000, 
                      reg_alpha = 0.3,
                      max_depth=4, 
                      gamma=10)
print(model2)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bytree=0.4, gamma=10, learning_rate=0.01,
              max_delta_step=0, max_depth=4, min_child_weight=1, missing=None,
              n_estimators=1000, n_jobs=1, nthread=None,
              objective='binary:logistic', random_state=0, reg_alpha=0.3,
              reg_lambda=1, scale_pos_weight=1, seed=None, silent=False,
              subsample=0.8)


In [18]:
model1.fit(X_train, y_train)
# make predictions for test data
y_pred = model1.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 77.95%


In [19]:
eval_set = [(X_train, y_train), (X_test, y_test)]
eval_metric = ["auc","error"]
model1.fit(X_train, y_train, eval_metric=eval_metric, eval_set=eval_set, verbose=True)


[0]	validation_0-auc:0.852306	validation_0-error:0.22179	validation_1-auc:0.817834	validation_1-error:0.259843
[1]	validation_0-auc:0.864931	validation_0-error:0.202335	validation_1-auc:0.816861	validation_1-error:0.26378
[2]	validation_0-auc:0.868797	validation_0-error:0.196498	validation_1-auc:0.817297	validation_1-error:0.26378
[3]	validation_0-auc:0.880144	validation_0-error:0.200389	validation_1-auc:0.824913	validation_1-error:0.255906
[4]	validation_0-auc:0.876832	validation_0-error:0.202335	validation_1-auc:0.823034	validation_1-error:0.255906
[5]	validation_0-auc:0.88006	validation_0-error:0.202335	validation_1-auc:0.824946	validation_1-error:0.26378
[6]	validation_0-auc:0.884985	validation_0-error:0.194553	validation_1-auc:0.822632	validation_1-error:0.23622
[7]	validation_0-auc:0.885666	validation_0-error:0.200389	validation_1-auc:0.827026	validation_1-error:0.255906
[8]	validation_0-auc:0.892516	validation_0-error:0.194553	validation_1-auc:0.824242	validation_1-error:0.24803

[73]	validation_0-auc:0.956243	validation_0-error:0.120623	validation_1-auc:0.825953	validation_1-error:0.216535
[74]	validation_0-auc:0.956378	validation_0-error:0.118677	validation_1-auc:0.826825	validation_1-error:0.216535
[75]	validation_0-auc:0.956714	validation_0-error:0.118677	validation_1-auc:0.826557	validation_1-error:0.216535
[76]	validation_0-auc:0.956714	validation_0-error:0.116732	validation_1-auc:0.827563	validation_1-error:0.216535
[77]	validation_0-auc:0.958143	validation_0-error:0.116732	validation_1-auc:0.828771	validation_1-error:0.216535
[78]	validation_0-auc:0.958882	validation_0-error:0.114786	validation_1-auc:0.827899	validation_1-error:0.216535
[79]	validation_0-auc:0.959588	validation_0-error:0.114786	validation_1-auc:0.82716	validation_1-error:0.212598
[80]	validation_0-auc:0.960177	validation_0-error:0.114786	validation_1-auc:0.826624	validation_1-error:0.212598
[81]	validation_0-auc:0.960362	validation_0-error:0.11284	validation_1-auc:0.826758	validation_1-

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
              max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
              n_jobs=1, nthread=None, objective='binary:logistic',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              seed=None, silent=True, subsample=1)

In [20]:
model2.fit(X_train, y_train)
# make predictions for test data
y_pred = model2.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 74.80%
