In [23]:
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

In [24]:
# load iris dataset
iris = load_breast_cancer()
X, y = iris.data, iris.target

# split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
X_train[:5]

array([[9.029e+00, 1.733e+01, 5.879e+01, 2.505e+02, 1.066e-01, 1.413e-01,
        3.130e-01, 4.375e-02, 2.111e-01, 8.046e-02, 3.274e-01, 1.194e+00,
        1.885e+00, 1.767e+01, 9.549e-03, 8.606e-02, 3.038e-01, 3.322e-02,
        4.197e-02, 9.559e-03, 1.031e+01, 2.265e+01, 6.550e+01, 3.247e+02,
        1.482e-01, 4.365e-01, 1.252e+00, 1.750e-01, 4.228e-01, 1.175e-01],
       [2.109e+01, 2.657e+01, 1.427e+02, 1.311e+03, 1.141e-01, 2.832e-01,
        2.487e-01, 1.496e-01, 2.395e-01, 7.398e-02, 6.298e-01, 7.629e-01,
        4.414e+00, 8.146e+01, 4.253e-03, 4.759e-02, 3.872e-02, 1.567e-02,
        1.798e-02, 5.295e-03, 2.668e+01, 3.348e+01, 1.765e+02, 2.089e+03,
        1.491e-01, 7.584e-01, 6.780e-01, 2.903e-01, 4.098e-01, 1.284e-01],
       [9.173e+00, 1.386e+01, 5.920e+01, 2.609e+02, 7.721e-02, 8.751e-02,
        5.988e-02, 2.180e-02, 2.341e-01, 6.963e-02, 4.098e-01, 2.265e+00,
        2.608e+00, 2.352e+01, 8.738e-03, 3.938e-02, 4.312e-02, 1.560e-02,
        4.192e-02, 5.822e-03, 1.001e

In [34]:
y_train[:5]

array([1, 0, 1, 1, 1])

In [29]:
# define Decision Tree classifier and set hyperparameters
dt_clf = DecisionTreeClassifier(max_depth=5)

# train the classifier on the train set
dt_clf.fit(X_train, y_train)

# predict the labels of test set
y_pred = dt_clf.predict(X_test)

# calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

report = classification_report(y_test,y_pred)
print(report)

conf_mat = confusion_matrix(y_test,y_pred)
print(conf_mat)

Accuracy: 0.9473684210526315
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

[[40  3]
 [ 3 68]]


In [30]:
# define Gradient Boosting classifier and set hyperparameters
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5)

# train the classifier on the train set
gb_clf.fit(X_train, y_train)

# predict the labels of test set
y_pred = gb_clf.predict(X_test)

# calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

report = classification_report(y_test,y_pred)
print(report)

conf_mat = confusion_matrix(y_test,y_pred)
print(conf_mat)

Accuracy: 0.9649122807017544
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

[[40  3]
 [ 1 70]]


In [32]:
# define XGBoost classifier and set hyperparameters
xgb_clf = xgb.XGBClassifier(objective='binary:logistic', max_depth=5, learning_rate=0.1)

# train the classifier on the train set
xgb_clf.fit(X_train, y_train)

# calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

report = classification_report(y_test,y_pred)
print(report)

conf_mat = confusion_matrix(y_test,y_pred)
print(conf_mat)


Accuracy: 0.9649122807017544
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

[[40  3]
 [ 1 70]]
