In [None]:
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
import joblib
import pandas as pd


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import numpy as np

class XGBoostModel:
    """
    A wrapper class for XGBoost Regression model with enhanced functionality
    """
    def __init__(self, 
                 objective='reg:squarederror',
                 n_estimators=100,
                 max_depth=3,
                 learning_rate=0.1,
                 subsample=1.0,
                 colsample_bytree=1.0,
                 gamma=0):
        """
        Initialize the XGBoost model with configurable parameters

        Parameters:
        - objective: Learning objective
        - n_estimators: Number of boosting rounds
        - max_depth: Maximum tree depth
        - learning_rate: Step size shrinkage to prevent overfitting
        - subsample: Subsample ratio of the training instances
        - colsample_bytree: Subsample ratio of columns when constructing each tree
        - gamma: Minimum loss reduction required to make a further partition
        """
        self.model = XGBRegressor(
            objective=objective,
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            gamma=gamma,
            random_state=42
        )
        self.feature_importances_ = None
        self.best_params_ = None

    def fit(self, X_train, y_train):
        """
        Train the XGBoost model

        Parameters:
        - X_train: Training features
        - y_train: Training target values
        """
        self.model.fit(X_train, y_train)
        self.feature_importances_ = self.model.feature_importances_
        return self

    def predict(self, X_test):
        """
        Make predictions using the trained model

        Parameters:
        - X_test: Test features

        Returns:
        - Predictions
        """
        return self.model.predict(X_test)

    def optimize_hyperparameters(self, X_train, y_train, param_grid=None, n_iter=20, cv=3):
        """
        Optimize hyperparameters using RandomizedSearchCV

        Parameters:
        - X_train: Training features
        - y_train: Training target values
        - param_grid: Dictionary of hyperparameters to search
        - n_iter: Number of parameter settings sampled
        - cv: Number of cross-validation folds

        Returns:
        - Optimized model
        """
        if param_grid is None:
            param_grid = {
                'n_estimators': [50, 100, 200],
                'max_depth': [3, 5, 7],
                'learning_rate': [0.01, 0.1, 0.3],
                'subsample': [0.6, 0.8, 1.0],
                'colsample_bytree': [0.6, 0.8, 1.0],
                'gamma': [0, 0.1, 1]
            }

        base_model = XGBRegressor(objective='reg:squarederror', random_state=42)
        
        search = RandomizedSearchCV(
            base_model, 
            param_grid, 
            n_iter=n_iter,
            scoring='neg_mean_squared_error', 
            n_jobs=-1,
            cv=cv, 
            random_state=42,
            verbose=1
        )
        
        search.fit(X_train, y_train)
        
        # Update the model with best parameters
        self.model = search.best_estimator_
        self.best_params_ = search.best_params_
        self.feature_importances_ = self.model.feature_importances_
        
        print('Best Mean Squared Error: %.3f' % -search.best_score_)
        print('Best Config: %s' % search.best_params_)
        
        return self

    def evaluate(self, X_test, y_test):
        """
        Evaluate model performance

        Parameters:
        - X_test: Test features
        - y_test: True target values

        Returns:
        - Dictionary of performance metrics
        """
        y_pred = self.predict(X_test)
        return {
            'mse': mean_squared_error(y_test, y_pred),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
            'mae': mean_absolute_error(y_test, y_pred),
            'r2': r2_score(y_test, y_pred)
        }

    def save(self, filepath):
        """
        Save the trained model to a file

        Parameters:
        - filepath: Path to save the model
        """
        joblib.dump(self, filepath)

    @classmethod
    def load(cls, filepath):
        """
        Load a trained model from a file

        Parameters:
        - filepath: Path to the saved model

        Returns:
        - Loaded model
        """
        return joblib.load(filepath)

# Utility function for backward compatibility
def train_xgb_model(X_train, y_train, **kwargs):
    """
    Convenience function to train an XGBoost model

    Parameters:
    - X_train: Training features
    - y_train: Training target values
    - **kwargs: Additional parameters for XGBoostModel

    Returns:
    - Trained XGBoostModel
    """
    model = XGBoostModel(**kwargs)
    return model.fit(X_train, y_train)