In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import (
    VotingClassifier, BaggingClassifier, RandomForestClassifier,
    BaggingRegressor, RandomForestRegressor,
    AdaBoostClassifier, GradientBoostingClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, mean_squared_error

# Load Iris for classification
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# For regression
X_reg, y_reg = datasets.make_moons(n_samples=1000, noise=0.25, random_state=42)
Xr_train, Xr_test, yr_train, yr_test = train_test_split(
    X_reg, y_reg, test_size=0.3, random_state=42
)

In [2]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier(random_state=42)
dt_clf = DecisionTreeClassifier(random_state=42)

voting_clf_hard = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('dt', dt_clf)],
    voting='hard'
)
voting_clf_soft = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('dt', dt_clf)],
    voting='soft'
)

for clf in (voting_clf_hard, voting_clf_soft):
    clf.fit(X_train, y_train)
    print(clf.__class__.__name__, "accuracy:", accuracy_score(y_test, clf.predict(X_test)))


VotingClassifier accuracy: 1.0
VotingClassifier accuracy: 1.0


In [3]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1, random_state=42
)
bag_clf.fit(X_train, y_train)
print("Bagging classifier accuracy:", accuracy_score(y_test, bag_clf.predict(X_test)))

Bagging classifier accuracy: 1.0


In [4]:
rf_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)
rf_clf.fit(X_train, y_train)
print("Random Forest accuracy:", accuracy_score(y_test, rf_clf.predict(X_test)))

# Regression
rf_reg = RandomForestRegressor(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)
rf_reg.fit(Xr_train, yr_train)
yr_pred = rf_reg.predict(Xr_test)
print("RF regression MSE:", mean_squared_error(yr_test, yr_pred))

Random Forest accuracy: 1.0
RF regression MSE: 0.03451875235577146


In [5]:
for name, score in zip(iris.feature_names, rf_clf.feature_importances_):
    print(f"{name}: {score:.3f}")

sepal length (cm): 0.115
sepal width (cm): 0.042
petal length (cm): 0.434
petal width (cm): 0.409


In [6]:
bag_clf_oob = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=200,
    bootstrap=True, oob_score=True, n_jobs=-1, random_state=42
)
bag_clf_oob.fit(X_train, y_train)
print("OOB accuracy:", bag_clf_oob.oob_score_)

OOB accuracy: 0.9428571428571428


In [7]:
ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm="SAMME.R", learning_rate=0.5, random_state=42
)
ada_clf.fit(X_train, y_train)
print("AdaBoost accuracy:", accuracy_score(y_test, ada_clf.predict(X_test)))

gbrt_clf = GradientBoostingClassifier(
    max_depth=3, n_estimators=200, learning_rate=0.1, random_state=42
)
gbrt_clf.fit(X_train, y_train)
print("Gradient Boosting accuracy:", accuracy_score(y_test, gbrt_clf.predict(X_test)))




AdaBoost accuracy: 0.9777777777777777
Gradient Boosting accuracy: 1.0


In [10]:
from sklearn.tree import DecisionTreeClassifier

# Base estimator with default settings
base_estimator = DecisionTreeClassifier()

# Corrected param_grid with 'estimator__max_depth'
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'estimator__max_depth': [1, 2, 3]
}

grid_search = GridSearchCV(
    AdaBoostClassifier(estimator=base_estimator, random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best params:", grid_search.best_params_)
print("Best CV score:", grid_search.best_score_)


Best params: {'estimator__max_depth': 2, 'learning_rate': 0.01, 'n_estimators': 50}
Best CV score: 0.9428571428571428


