Gradient boosting (Regression)

In [1]:
# Import necessary libraries
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
file_path = './../../datafiles/diabetes.csv'
data = pd.read_csv(file_path)

# Separate features and target
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Split the dataset into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Train the Gradient Boosting Regressor on the training data
gbr.fit(X_train, y_train)

# Predict the target on the test data
y_pred_gbr = gbr.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred_gbr)
r2 = r2_score(y_test, y_pred_gbr)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Test with a sample input for regression prediction
sample_input = [[5, 116, 74, 25, 0, 32.2, 0.201, 30]]  # Example input
sample_prediction_gbr = gbr.predict(sample_input)

# Show the predicted value
print("Sample input prediction (Regression):", sample_prediction_gbr)


Mean Squared Error: 0.1853998201680722
R^2 Score: 0.18103312880889877
Sample input prediction (Regression): [0.41703659]


Gradient boosting (Classification)

In [2]:
# Import necessary libraries
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# Initialize the Gradient Boosting Classifier
gbc = GradientBoostingClassifier(n_estimators=100, random_state=42)

# Train the Gradient Boosting Classifier on the training data
gbc.fit(X_train, y_train)

# Predict the target on the test data
y_pred_gbc = gbc.predict(X_test)

# Compute confusion matrix
conf_matrix_gbc = confusion_matrix(y_test, y_pred_gbc)
print(conf_matrix_gbc)

# Generate classification report
class_report_gbc = classification_report(y_test, y_pred_gbc)

# Display the classification report
print(class_report_gbc)

# Test with a sample input for classification prediction
sample_input = [[5, 116, 74, 25, 0, 32.2, 0.201, 30]]  # Example input
sample_prediction_gbc = gbc.predict(sample_input)

# Show the predicted outcome (0: Non-diabetic, 1: Diabetic)
print("Sample input prediction (Classification, 0: Non-diabetic, 1: Diabetic):", sample_prediction_gbc)


[[120  31]
 [ 27  53]]
              precision    recall  f1-score   support

           0       0.82      0.79      0.81       151
           1       0.63      0.66      0.65        80

    accuracy                           0.75       231
   macro avg       0.72      0.73      0.73       231
weighted avg       0.75      0.75      0.75       231

Sample input prediction (Classification, 0: Non-diabetic, 1: Diabetic): [0]
