In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
import xgboost as xgb

In [6]:
X_train=pd.read_csv('Splitted-Data/X_train_data.csv')
X_test=pd.read_csv('Splitted-Data/X_test.csv')
Y_train=pd.read_csv('Splitted-Data/Y_train.csv')
Y_test=pd.read_csv('Splitted-Data/Y_test.csv')

In [7]:
from sklearn.model_selection import RandomizedSearchCV
param_grid = {
    'n_estimators': [500, 1000, 1500,2000],        # Number of trees
    'learning_rate': [0.01, 0.05, 0.1,.001,.5,.0001],       # Step size
    'max_depth': [3, 5, 6, 8,10,12],                # Depth of each tree
    'min_child_weight': [1, 3, 5],            # Minimum sum of instance weight (hessian) needed in a child
    'subsample': [0.7, 0.8, 0.9, 1.0,.05],        # Fraction of observations to be randomly samples for each tree
    'colsample_bytree': [0.7, 0.8, 0.9, 1.0], # Fraction of columns to be randomly samples for each tree
    'gamma': [0, 0.1, 0.2]                    # Minimum loss reduction required to make a further partition
}

In [8]:
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_jobs=-1
)

In [9]:
random_search = RandomizedSearchCV(
    estimator=xgb_model,
    param_distributions=param_grid,
    n_iter=50,
    scoring='neg_mean_squared_error',
    cv=7,
    verbose=1,
    random_state=42,
    n_jobs=-1
)

In [10]:
random_search.fit(X_train, Y_train)

Fitting 7 folds for each of 50 candidates, totalling 350 fits




In [11]:
best_model = random_search.best_estimator_

In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
Y_pred_tuned = best_model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred_tuned)
rmse = np.sqrt(mse)
mae = mean_absolute_error(Y_test, Y_pred_tuned)
r2 = r2_score(Y_test, Y_pred_tuned)
print("--- Regression Performance Metrics ---")
print(f"R2 Score: {r2:.4f}")
print(f"RMSE:     {rmse:.4f} ")
print(f"MAE:      {mae:.4f}")

--- Regression Performance Metrics ---
R2 Score: 0.4030
RMSE:     38.4072 
MAE:      1.6036


In [14]:
with open('XGB-Model.pkl','wb') as file:
          pickle.dump(best_model,file)