In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

import warnings
warnings.simplefilter('ignore')

In [2]:
# Creating the classification dataset
X, y = make_classification(
    n_samples=1000, 
    n_features=10, 
    n_informative=5, 
    n_redundant=5, 
    random_state=1)

In [3]:
print(X.shape, y.shape)

(1000, 10) (1000,)


In [4]:
# Build and fit a Gradient Boosting classifier
clf_gbm = GradientBoostingClassifier(
    n_estimators=100, 
    learning_rate=0.1, 
    random_state=500)

### Cross validation

In [5]:
scores = cross_val_score(clf_gbm, X, y, cv = 5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.91 (+/- 0.02)


### Test set validation

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)

In [7]:
# Calculate the predictions on the 
clf_gbm.fit(X_train, y_train)
pred = clf_gbm.predict(X_test)

In [8]:
# Evaluate the performance based on the accuracy
acc = accuracy_score(pred,y_test)
print('Accuracy: {:.3f}'.format(acc))

Accuracy: 0.920


In [9]:
# Get and show the Confusion Matrix
cm = confusion_matrix(y_test, pred)
print(cm)

[[89  7]
 [ 9 95]]
