In [81]:
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score

In [9]:
# load data
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")
dataset

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

In [14]:
# split data into X and y
X = dataset[:,0:8]
Y = dataset[:,8]
X.shape, Y.shape

((768, 8), (768,))

In [15]:
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

In [69]:
def isolate():
    # fit model no training data
    xgb_clf = XGBClassifier()
    xgb_clf.fit(X_train, y_train)

    # make predictions for test data
    y_pred = xgb_clf.predict(X_test)

    # evaluate predictions
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy: %.2f%%" % (accuracy * 100.0))

isolate()

Accuracy: 74.02%


In [46]:
def isolate():
    # fit model no training data
    rf_clf = RandomForestClassifier(random_state=17)
    rf_clf.fit(X_train, y_train)

    # make predictions for test data
    y_pred = rf_clf.predict(X_test)

    # evaluate predictions
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy: %.2f%%" % (accuracy * 100.0))

isolate()

Accuracy: 77.17%


In [74]:
def isolate():
    # fit model no training data
    ab_clf = AdaBoostClassifier(random_state=17)
    ab_clf.fit(X_train, y_train)

    # make predictions for test data
    y_pred = ab_clf.predict(X_test)

    # evaluate predictions
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy: %.2f%%" % (accuracy * 100.0))

isolate()

Accuracy: 72.44%


In [87]:
def isolate():
    xgb_clf = XGBClassifier()
    ada_clf = AdaBoostClassifier()
    rf_clf = RandomForestClassifier()

    class DummyEstimator(BaseEstimator):
        def fit(self): pass
        def score(self): pass

    pipe = Pipeline(steps=[
        ('clf', DummyEstimator())
    ])

    cv = GridSearchCV(estimator=pipe, 
                      param_grid=[
                        {'clf': [xgb_clf],
                         'clf__n_estimators': [90, 100, 110]},
                        {'clf': [ada_clf],
                         'clf__n_estimators': [80, 100, 120]},
                        {'clf': [rf_clf],
                         'clf__n_estimators': [70, 100, 130]}
                      ])
    cv.fit(X_train, y_train)

    print(cv.best_estimator_)
    print("Accuracy: %.2f%%" % (cv.best_score_ * 100.0))

isolate()

Pipeline(steps=[('clf', RandomForestClassifier(n_estimators=70))])
Accuracy: 76.06%
