<a href="https://colab.research.google.com/github/itsmepriyabrata/priyabrata_ai_python/blob/main/Ensemble%20algorithm%20part%201.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Boosting

In [1]:
from sklearn.ensemble import GradientBoostingRegressor

X = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [15, 30, 45, 60]

model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)

model.fit(X, y)

new_data = [[9, 10]]
prediction = model.predict(new_data)
print(f"Prediction for new data: {prediction[0]}")


Prediction for new data: 59.99940236852503


Bagging

In [2]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

base_model = DecisionTreeClassifier(random_state=42)

bagging_model = BaggingClassifier(base_estimator=base_model, n_estimators=5, random_state=42)

base_model.fit(X_train, y_train)
bagging_model.fit(X_train, y_train)

base_predictions = base_model.predict(X_test)
bagging_predictions = bagging_model.predict(X_test)

base_accuracy = accuracy_score(y_test, base_predictions)
bagging_accuracy = accuracy_score(y_test, bagging_predictions)

print("Base Model Accuracy:", base_accuracy)
print("Bagging Model Accuracy:", bagging_accuracy)


Base Model Accuracy: 1.0
Bagging Model Accuracy: 1.0




AdaBoost

In [9]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X = iris.data  # Features
y = iris.target  # Labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = AdaBoostClassifier(n_estimators=50)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.9333


Gradient boosting

In [13]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import urllib.error

try:
    X, y = fetch_california_housing(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=4)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse:.2f}")

except urllib.error.HTTPError as e:
    print(f"Error fetching dataset: {e}")
    print("Possible causes and solutions:")
    print("- Check your internet connection.")
    print("- Verify that the dataset URL is correct and accessible.")
    print("- If the dataset requires authentication, ensure you are providing the necessary credentials.")
    print("- The server hosting the dataset might be experiencing temporary issues. Try again later.")

Error fetching dataset: HTTP Error 403: Forbidden
Possible causes and solutions:
- Check your internet connection.
- Verify that the dataset URL is correct and accessible.
- If the dataset requires authentication, ensure you are providing the necessary credentials.
- The server hosting the dataset might be experiencing temporary issues. Try again later.


XGBoost

In [14]:
import xgboost as xgb

from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    'max_depth': 5,
    'eta': 0.3,
    'objective': 'binary:logistic',  # for classification
    'silent': True
}

model = xgb.train(params, dtrain, num_boost_round=100)

y_pred = model.predict(dtest)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred.round())
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.9000


Parameters: { "silent" } are not used.



LightGBM

In [17]:
import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

lgb_train = lgb.Dataset(X_train, label=y_train)
lgb_eval = lgb.Dataset(X_test, label=y_test, reference=lgb_train)

params = {
    "objective": "multiclass",
    "metric": "multi_logloss",
    "num_leaves": 31,
    "learning_rate": 0.1,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.8,
    "bagging_freq": 5,
    "verbose": 0,
    "num_class": 3
}

model = lgb.train(params, lgb_train, valid_sets=[lgb_eval], num_boost_round=100)

y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred.argmax(axis=1))  # One-hot encoded predictions
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9333
