In [111]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [112]:
data = load_digits()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

In [113]:
single_tree = DecisionTreeClassifier(random_state=42)
single_tree.fit(X_train, y_train)
tree_acc = accuracy_score(y_test, single_tree.predict(X_test))

In [114]:
bagging = BaggingClassifier(estimator=DecisionTreeClassifier(),
                            n_estimators=30,  #n_estimators i) more thinking ii) better quality iii) more is better
                            random_state=42)
bagging.fit(X_train, y_train)
bagging_acc = accuracy_score(y_test, bagging.predict(X_test))

In [115]:
# Вывод результатов
print(tree_acc)
print(bagging_acc)

0.8416666666666667
0.9583333333333334


In [116]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_wine

In [117]:
data = load_wine()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, random_state=42)

In [118]:
estimators_list = [50, 100, 200, 1000]

for n in estimators_list:
    gb_clf = GradientBoostingClassifier(n_estimators=n, learning_rate=0.1, random_state=42)
    gb_clf.fit(X_train, y_train)
    y_pred = gb_clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    train_acc = accuracy_score(y_train, gb_clf.predict(X_train))

    print(f"n_estimators = {n}: Test Accuracy = {acc:.4f} | Train Accuracy = {train_acc:.4f}")

n_estimators = 50: Test Accuracy = 0.9074 | Train Accuracy = 1.0000
n_estimators = 100: Test Accuracy = 0.9074 | Train Accuracy = 1.0000
n_estimators = 200: Test Accuracy = 0.9259 | Train Accuracy = 1.0000
n_estimators = 1000: Test Accuracy = 0.9259 | Train Accuracy = 1.0000


In [119]:
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_wine

In [120]:
data = load_wine()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)


In [126]:
estimators = [
    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('svm', SVC(probability=False, random_state=42)),
    ('dt', DecisionTreeClassifier(max_depth=5))
]


In [127]:
clf = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),
    cv=5
)


In [128]:
clf.fit(X_train, y_train)

In [129]:
stacking_acc = accuracy_score(y_test, clf.predict(X_test))
stacking_acc

0.9722222222222222

In [132]:
rf = RandomForestClassifier(n_estimators=30, random_state=42)
rf.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf.predict(X_test))
rf_acc

1.0

Mini project
breast cancer model

In [134]:
import time
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [138]:
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

In [145]:
models = {
    "Bagging (DT)": BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42),
    "Stacking": StackingClassifier(
        estimators=[
            ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
            ('svm', SVC(probability=False, random_state=42)),
            ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42))
        ],
        final_estimator=LogisticRegression(),
        cv=5
    )
}

In [147]:
results = []

for name, model in models.items():
    start_time = time.time()
    model.fit(X_train, y_train)
    acc = accuracy_score(y_test, model.predict(X_test))
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"{name:<20} | {acc:.4f}     | {elapsed_time:.4f}")
    results.append((name, acc, elapsed_time))

Bagging (DT)         | 0.9561     | 1.8330
Random Forest        | 0.9649     | 0.4488
Gradient Boosting    | 0.9561     | 1.0665
Stacking             | 0.9737     | 6.7862
