ENSEMBLE LEARNING


1.VOTING ENSEMBLE

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC

lr_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LogisticRegression(max_iter=3000))
])

svc_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(probability=True))
])

rf = RandomForestClassifier(n_estimators=200, random_state=42)

voting = VotingClassifier(
    estimators=[
        ('lr', lr_pipe),
        ('rf', rf),
        ('svc', svc_pipe)
    ],
    voting='soft'
)
voting.fit(X_train, y_train)

preds = voting.predict_proba(X_test)[:, 1]
print("ROC AUC:", roc_auc_score(y_test, preds))

ROC AUC: 0.9970238095238094


2.BAGGING (bootstrap aggregation) IMPLEMENTATION

In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import roc_auc_score

# Data
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# Single Tree (High Variance)
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)

tree_auc = roc_auc_score(y_test, tree.predict_proba(X_test)[:, 1])

# Bagging
bagging = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=200,
    bootstrap=True,
    random_state=42
)

bagging.fit(X_train, y_train)

bagging_auc = roc_auc_score(
    y_test,
    bagging.predict_proba(X_test)[:, 1]
)

print("Single Tree AUC:", tree_auc)
print("Bagging AUC:", bagging_auc)


Single Tree AUC: 0.9156746031746031
Bagging AUC: 0.99239417989418


3.ADABOOSTING IMPLEMENTATION

In [7]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score

# Data
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# Weak learner
base_learner = DecisionTreeClassifier(max_depth=1)

# AdaBoost
ada = AdaBoostClassifier(
    estimator=base_learner,
    n_estimators=200,
    learning_rate=0.5,
    random_state=42
)

ada.fit(X_train, y_train)

preds = ada.predict_proba(X_test)[:, 1]
print("AdaBoost ROC AUC:", roc_auc_score(y_test, preds))


AdaBoost ROC AUC: 0.9847883597883598


4.GRADIENTBOOSTING IMPLEMENTATION

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score

# Data
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

gbdt = GradientBoostingClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

gbdt.fit(X_train, y_train)

preds = gbdt.predict_proba(X_test)[:, 1]
print("GBDT ROC AUC:", roc_auc_score(y_test, preds))


5.XGBOOST IMPLEMENTATION

In [9]:
from xgboost import XGBClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

xgb = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=0.1,
    reg_lambda=1.0,
    eval_metric='logloss',
    random_state=42
)

xgb.fit(X_train, y_train)

preds = xgb.predict_proba(X_test)[:, 1]
print("XGBoost ROC AUC:", roc_auc_score(y_test, preds))


XGBoost ROC AUC: 0.9953703703703703


6.LIGNTGBM IMPLEMENTATION

In [18]:
from lightgbm import LGBMClassifier
import warnings
warnings.filterwarnings("ignore")

lgbm = LGBMClassifier(
    n_estimators=200,
    learning_rate=0.05,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    verbosity=-1
)


lgbm.fit(X_train, y_train)

preds = lgbm.predict_proba(X_test)[:, 1]
print("LightGBM ROC AUC:", roc_auc_score(y_test, preds))


LightGBM ROC AUC: 0.9894179894179894


7.STACKING IMPLEMENTATION 

In [19]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score

# Data
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# Base models
base_models = [
    ('lr', LogisticRegression(max_iter=1000)),
    ('rf', RandomForestClassifier(n_estimators=300)),
    ('xgb', XGBClassifier(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=4,
        eval_metric='logloss'
    ))
]

# Meta-model
meta_model = LogisticRegression()

stack = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,
    cv=5,
    stack_method='predict_proba'
)

stack.fit(X_train, y_train)

preds = stack.predict_proba(X_test)[:, 1]
print("Stacking ROC AUC:", roc_auc_score(y_test, preds))


Stacking ROC AUC: 0.9947089947089947
