#**XGBoost Random Forest - Classification (Sklearn)**

**Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from sklearn.datasets import load_diabetes, load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.preprocessing import MaxAbsScaler, PowerTransformer
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score, explained_variance_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
import xgboost as xgb
from xgboost import XGBRegressor, XGBClassifier

**Code**

In [2]:
class RandomForestWithXGBoost:
    def __init__(self, n_forests=1, n_trees=5, max_depth=10, min_samples_split=2, xgboost_trees=2):
        self.n_forests = n_forests  # Number of random forests
        self.n_trees = n_trees  # Number of trees in each random forest
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.xgboost_trees = xgboost_trees  # Number of XGBoost trees
        self.forests = []  # Store the forests
        self.rf_model = None  # Random Forest model
        self.xgb_model = None  # XGBoost model

    def fit(self, X, y):
        # Train Random Forest Classifier
        self.rf_model = RandomForestClassifier(n_estimators=self.n_trees, max_depth=self.max_depth,
                                               min_samples_split=self.min_samples_split, random_state=42)
        self.rf_model.fit(X, y)

        # Print Random Forest structure
        print("\nRandom Forest trained with the following parameters:")
        print(f"n_trees: {self.n_trees}, max_depth: {self.max_depth}, min_samples_split: {self.min_samples_split}")

        # Train XGBoost model
        self.xgb_model = xgb.XGBClassifier(n_estimators=self.xgboost_trees, max_depth=self.max_depth,
                                            objective='multi:softmax', eval_metric='mlogloss', random_state=42)
        self.xgb_model.fit(X, y)

    def predict(self, X):
        # Get predictions from Random Forest
        rf_preds = self.rf_model.predict(X)
        print(f"Random Forest predictions: {rf_preds}")

        # Get predictions from XGBoost
        xgb_preds = self.xgb_model.predict(X)
        print(f"XGBoost predictions: {xgb_preds}")

        return rf_preds, xgb_preds

    def evaluate(self, X, y):
        rf_preds, xgb_preds = self.predict(X)

        # Calculate the classification report and confusion matrix for Random Forest
        print("\nClassification Report for Random Forest:")
        print(classification_report(y, rf_preds))
        print("Confusion Matrix for Random Forest:")
        print(confusion_matrix(y, rf_preds))

        # Calculate the classification report and confusion matrix for XGBoost
        print("\nClassification Report for XGBoost:")
        print(classification_report(y, xgb_preds))
        print("Confusion Matrix for XGBoost:")
        print(confusion_matrix(y, xgb_preds))

In [3]:
# Load Iris dataset
data = load_iris()
X, y = data.data, data.target

In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Initialize and train RandomForestWithXGBoost
rf_xgb = RandomForestWithXGBoost(n_forests=1, n_trees=5, xgboost_trees=2)
rf_xgb.fit(X_train, y_train)


Random Forest trained with the following parameters:
n_trees: 5, max_depth: 10, min_samples_split: 2


In [6]:
y_pred = rf_xgb.predict(X_test)
print(y_pred[:5])

Random Forest predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 1 0 2 2 2 2 2 0 0]
XGBoost predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
(array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 1,
       0, 2, 2, 2, 2, 2, 0, 0]), array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0], dtype=int32))


In [7]:
# Evaluate the model
rf_xgb.evaluate(X_test, y_test)

Random Forest predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 1 0 2 2 2 2 2 0 0]
XGBoost predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]

Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      1.00      0.95         9
           2       1.00      0.91      0.95        11

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

Confusion Matrix for Random Forest:
[[10  0  0]
 [ 0  9  0]
 [ 0  1 10]]

Classification Report for XGBoost:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00 