In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score

### Creating classification data set

In [2]:
X, y = make_classification(
    n_samples=1000, 
    n_features=4,
    n_informative=2, 
    n_redundant=0,
    random_state=0,
    shuffle=False)

### Creating the base estimator

In [3]:
from sklearn.tree import DecisionTreeClassifier

clf_tree = DecisionTreeClassifier(criterion="entropy", max_depth = 4)

In [4]:
clf_ada = AdaBoostClassifier(
    base_estimator = clf_tree, 
    n_estimators = 20, 
    random_state=500)

### Cross validation

In [5]:
scores = cross_val_score(clf_ada, X, y, cv = 5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.94 (+/- 0.06)


### Test set validation

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)

In [7]:
# Calculate the predictions on the 
clf_ada.fit(X_train, y_train)
pred = clf_ada.predict(X_test)

In [8]:
# Evaluate the performance based on the accuracy
acc = accuracy_score(pred,y_test)
print('Accuracy: {:.3f}'.format(acc))

Accuracy: 0.940


In [9]:
# Get and show the Confusion Matrix
cm = confusion_matrix(y_test, pred)
print(cm)

[[97  6]
 [ 6 91]]
