In [1]:
# Basic libraries
import numpy as np
import pandas as pd

# Scikit-learn modules
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer

In [2]:
# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

In [3]:
X.shape

(569, 30)

In [4]:
y.shape

(569,)

In [5]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape 

((398, 30), (171, 30), (398,), (171,))

In [7]:
# Base estimator: Decision Tree (stump)
base_estimator = DecisionTreeClassifier(max_depth=1)

In [8]:
base_estimator

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [10]:
# Create AdaBoost classifier
adaboost = AdaBoostClassifier(estimator=base_estimator, n_estimators=50, learning_rate=0.5, random_state=42)

In [11]:
adb = adaboost.fit(X_train,y_train)

In [12]:
adb.score(X_test, y_test)

0.9649122807017544

In [13]:
y_pred = adb.predict(X_test)

In [14]:
# Evaluate
print("Accuracy (Basic Model):", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy (Basic Model): 0.9649122807017544
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.97      0.95        63
           1       0.98      0.96      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.97      0.96       171
weighted avg       0.97      0.96      0.97       171



In [26]:
# Parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1, 2],
    'algorithm': ['SAMME', 'SAMME.R']
}

In [27]:
# AdaBoost with Decision Tree as base estimator
ada = AdaBoostClassifier(estimator=DecisionTreeClassifier(), random_state=42)

In [28]:
# GridSearchCV
grid_search = GridSearchCV(estimator=ada,
                           param_grid=param_grid,
                           cv=5,            # 5-fold cross-validation
                           scoring='accuracy',
                           n_jobs=-1,        # Use all CPU cores
                           verbose=2)

In [29]:
grid_search.fit(X_train,y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


60 fits failed out of a total of 120.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\amitm\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\amitm\anaconda3\Lib\site-packages\sklearn\base.py", line 1358, in wrapper
    estimator._validate_params()
  File "C:\Users\amitm\anaconda3\Lib\site-packages\sklearn\base.py", line 471, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\amitm\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 98, in validate_parameter_constraints
    raise InvalidParamete

0,1,2
,estimator,AdaBoostClass...ndom_state=42)
,param_grid,"{'algorithm': ['SAMME', 'SAMME.R'], 'learning_rate': [0.01, 0.1, ...], 'n_estimators': [50, 100, ...]}"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [30]:
print("Best Score on Training Set:", grid_search.best_score_)

Best Score on Training Set: 0.9246835443037975


In [32]:
grid_search.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.01, 'n_estimators': 50}

In [31]:
# Best estimator from GridSearch
best_ada = grid_search.best_estimator_

# Predict
y_pred_best = best_ada.predict(X_test)

In [33]:
# Evaluate
print("Accuracy (Tuned Model):", accuracy_score(y_test, y_pred_best))
print("Classification Report:\n", classification_report(y_test, y_pred_best))

Accuracy (Tuned Model): 0.9298245614035088
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.94      0.91        63
           1       0.96      0.93      0.94       108

    accuracy                           0.93       171
   macro avg       0.92      0.93      0.93       171
weighted avg       0.93      0.93      0.93       171

