XG Boosting (Classification and Regression on Diabetes Dataset)

In [8]:
from xgboost import XGBClassifier, XGBRegressor
from sklearn.datasets import load_diabetes
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, mean_squared_error,r2_score
import numpy as np
import pandas as pd


In [18]:
# Load Diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Convert the target to a binary classification problem for classification tasks (e.g., target > 140 as diabetic)
y_class = (y > 140).astype(int)  # Set threshold as 140 for diabetic vs non-diabetic

# Split data for classification
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y_class, test_size=0.3, random_state=42)

# Split data for regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y, test_size=0.3, random_state=42)


In [19]:
 # Initialize and train the classifier
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_clf.fit(X_train_clf, y_train_clf)

# Predict and evaluate
y_pred_clf = xgb_clf.predict(X_test_clf)
conf_matrix_clf = confusion_matrix(y_test_clf, y_pred_clf)
class_report_clf = classification_report(y_test_clf, y_pred_clf)

# Print results
print("Confusion Matrix for Classification:\n", conf_matrix_clf)
print("\nClassification Report:\n", class_report_clf)


Confusion Matrix for Classification:
 [[53 19]
 [10 51]]

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.74      0.79        72
           1       0.73      0.84      0.78        61

    accuracy                           0.78       133
   macro avg       0.78      0.79      0.78       133
weighted avg       0.79      0.78      0.78       133



In [21]:
# Initialize and train the regressor
xgb_reg = XGBRegressor(objective='reg:squarederror')
xgb_reg.fit(X_train_reg, y_train_reg)

# Predict and evaluate
y_pred_reg = xgb_reg.predict(X_test_reg)
mse_reg = mean_squared_error(y_test_reg, y_pred_reg)
r2 = r2_score(y_test_reg, y_pred_reg)

# Print Mean Squared Error
print("\nMean Squared Error for Regression:", mse_reg)
print(f"R^2 Score: {r2}")


Mean Squared Error for Regression: 3513.659206003472
R^2 Score: 0.3491183976557938
