# Bootstrap Aggregation (Bagging)

In [1]:
#@ creating data
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [2]:
#@ Implementaion of baaginclassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,
 max_samples=100, n_jobs=-1, random_state=42)
bag_clf.fit(X_train, y_train)


In [3]:
# checking out of bag (oob score)
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,
oob_score=True, n_jobs=-1, random_state=42)
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.896

In [4]:
# checking accuracy score 
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.92

# Random Forest

In [5]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16,
 n_jobs=-1, random_state=42)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)

In [6]:
#@ Implementation of feature importance
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris.data, iris.target)
for score, name in zip(rnd_clf.feature_importances_, iris.data.columns):
    print(round(score, 2), name)

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


# Boosting

In [7]:
#@ Implementation of Adaboost 
from sklearn.ensemble import AdaBoostClassifier
ada_clf = AdaBoostClassifier(
 DecisionTreeClassifier(max_depth=1), n_estimators=30,                   #30 decision stumps
 learning_rate=0.5, random_state=42)
ada_clf.fit(X_train, y_train)


In [8]:
#@ Implementation of gradient boosting
import numpy as np
from sklearn.tree import DecisionTreeRegressor
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3 * X[:, 0] ** 2 + 0.05 * np.random.randn(100) # y = 3x² + Gaussian noise
tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X, y)

In [9]:
#@ correcting the residual error made by previous regressror model
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=43)
tree_reg2.fit(X, y2)


In [10]:
#@ correcting residual error of second predictor 
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=44)
tree_reg3.fit(X, y3)

y4 = y3 - tree_reg3.predict(X)
tree_reg4 = DecisionTreeRegressor(max_depth=2, random_state=44)
tree_reg4.fit(X,y4)

In [11]:
#@ looking to prediction made by using gradient boosting technique
X_new = np.array([[-0.8], [0.], [0.89]])
sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))


array([0.6679558 , 0.04021166, 0.75026781])

In [12]:
#@ Diect Implementation of gradient boosting
from sklearn.ensemble import GradientBoostingRegressor
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0, random_state=42)

gbrt.fit(X,y)

In [13]:
X_new = np.array([[-0.8], [0.], [0.89]])
gbrt.predict(X_new)

array([0.6679558 , 0.04021166, 0.75026781])

# Hist Gradient boosting

In [15]:
# from sklearn.pipeline import make_pipeline
# from sklearn.compose import make_column_transformer
# from sklearn.ensemble import HistGradientBoostingRegressor
# from sklearn.preprocessing import OrdinalEncoder
# hgb_reg = make_pipeline(
#  make_column_transformer((OrdinalEncoder(), ["ocean_proximity"]),
#  remainder="passthrough"),
#  HistGradientBoostingRegressor(categorical_features=[0], random_state=42)
# )
# hgb_reg.fit(housing, housing_labels)

In [19]:
#@ Implementation of stacking
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

stacking_clf = StackingClassifier(
 estimators=[
 ('lr', LogisticRegression(random_state=42)),
 ('rf', RandomForestClassifier(random_state=42)),
 ('svc', SVC(probability=True, random_state=42))
 ],
 final_estimator=RandomForestClassifier(random_state=43),
 cv=5 # number of cross-validation folds
)
stacking_clf.fit(X_train, y_train)