In [8]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X, y = load_iris(return_X_y=True, as_frame=False)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=2)

### Task 1. Voting classifier

In [2]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(
    estimators=[
        ('lr', LogisticRegression()),
        ('svm', SVC(probability=True)),
        ('knn', KNeighborsClassifier(n_neighbors=5))
    ],
    voting='soft'
)

#scores = cross_val_score(voting_clf, X, y, cv=5)

voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.9666666666666667

In [3]:
print(voting_clf.score(X_test, y_test))
for name, clf in voting_clf.named_estimators_.items():
    print(f"{name}: {clf.score(X_test, y_test)}")

0.9666666666666667
lr: 0.9333333333333333
svm: 0.9666666666666667
knn: 0.9666666666666667


### Task 2. Bagging

In [16]:
from sklearn.ensemble import BaggingClassifier
bag_clf = BaggingClassifier(
    estimator = DecisionTreeClassifier(),
    n_estimators = 50,
    #max_samples = 100
)

bag_clf.fit(X_train, y_train)
bag_clf.score(X_test, y_test)

0.9666666666666667

In [5]:
rnd_clf = RandomForestClassifier(n_estimators=100)
rnd_clf.fit(X_train, y_train)
rnd_clf.score(X_test, y_test)

0.9666666666666667

### Task 3. Boosting

In [6]:
from sklearn.ensemble import GradientBoostingClassifier

grd_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
grd_clf.fit(X_train, y_train)
grd_clf.score(X_test, y_test)

0.9333333333333333

In [17]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [2, 3, 4]
}

grd_clf = GradientBoostingClassifier(random_state=42)

grid_search = GridSearchCV(grd_clf, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.4f}".format(grid_search.best_score_))

# Evaluate the best estimator on the test set
best_grd_clf = grid_search.best_estimator_
test_score = best_grd_clf.score(X_test, y_test)
print("Test set score with best estimator: {:.4f}".format(test_score))

Best parameters found:  {'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 50}
Best cross-validation score: 0.9750
Test set score with best estimator: 0.9667


### Task 4. Stacking 

In [9]:
from sklearn.ensemble import StackingClassifier

base_learners = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('svm', SVC(probability=True, random_state=42)),
    ('knn', KNeighborsClassifier(n_neighbors=5))
]

meta_learner = LogisticRegression(random_state=42)

stacking_clf = StackingClassifier(
    estimators=base_learners,
    final_estimator=meta_learner,
    cv=5,  # Cross-validation folds for generating meta-features
    stack_method='predict_proba'  # Use probabilities as meta-features
)

stacking_clf.fit(X_train, y_train)
stacking_pred = stacking_clf.predict(X_test)

print(f"Stacking Classifier Accuracy: {accuracy_score(y_test, stacking_pred):.3f}")

for name, clf in base_learners:
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print(f"{name.upper()} Accuracy: {accuracy_score(y_test, pred):.3f}")

Stacking Classifier Accuracy: 0.967
RF Accuracy: 0.967
SVM Accuracy: 0.967
KNN Accuracy: 0.967
