In [None]:
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
import joblib
import pandas as pd

In [1]:
def train_xgb_model(X_train, y_train):
    """
    Train an XGBoost regression model with hyperparameter optimization.
    
    Args:
        X_train: Training features
        y_train: Training target values
        
    Returns:
        The best trained XGBoost model
    """
    # Create XGBoost regressor
    model = XGBRegressor(objective='reg:squarederror')
    
    # Define a smaller grid of hyperparameters to search
    grid = {
        'n_estimators': [50, 100],
        'max_depth': [3, 7],
        'learning_rate': [0.05, 0.1],
        'subsample': [0.7, 1.0],
        'colsample_bytree': [0.6, 0.8],
        'gamma': [0, 1]
    }
    
    # Define the evaluation procedure with limited parallelism
    search = RandomizedSearchCV(
        model, 
        grid, 
        n_iter=20,  # Reduced iterations
        scoring='neg_mean_squared_error', 
        n_jobs=1,   # Limited to 2 cores
        cv=3, 
        random_state=1,
        verbose=1
    )
    
    # Execute the search
    search.fit(X_train, y_train)
    
    # Summarize best results
    print('Best Mean Squared Error: %.3f' % -search.best_score_)
    print('Best Config: %s' % search.best_params_)
    
    # Save best model
    best_model = search.best_estimator_
    
    # Make sure the directory exists before saving
    import os
    if not os.path.exists('models'):
        os.makedirs('models')
    
    joblib.dump(best_model, 'models/xgb_model.pkl')
    print("Model saved as models/xgb_model.pkl")
    
    return best_model