- XGBoost: eXtreme Gradient Boosting
- LightGBM: Light Gradient Boosting Machine
- CatBoost: Categorical Boosting
- Scikit-learn: Has two estimators for regression and classification

In [1]:
import pandas as pd
import seaborn as sns
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
# Load the diamonds dataset from Seaborn
diamonds = sns.load_dataset("diamonds")

# Split data into features and target
X = diamonds.drop("cut", axis=1)
y = diamonds["cut"]

In [3]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
   X, y, test_size=0.2, random_state=42
)

In [5]:
# Define categorical and numerical features
categorical_features = X.select_dtypes(
   include=["object"]
).columns.tolist()

numerical_features = X.select_dtypes(
   include=["float64", "int64"]
).columns.tolist()

In [6]:
preprocessor = ColumnTransformer(
   transformers=[
       ("cat", OneHotEncoder(), categorical_features),
       ("num", StandardScaler(), numerical_features),
   ]
)

In [10]:
pipeline = Pipeline(
   [
       ("preprocessor", preprocessor),
       ("classifier", GradientBoostingClassifier(learning_rate=0.5,random_state=42)),
   ]
)


In [11]:
# Perform 5-fold cross-validation
cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5)

# Fit the model on the training data
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred)


In [12]:
print(f"Mean Cross-Validation Accuracy: {cv_scores.mean():.4f}")
print("\nClassification Report:")
print(report)

Mean Cross-Validation Accuracy: 0.7601

Classification Report:
              precision    recall  f1-score   support

        Fair       0.68      0.30      0.42       335
        Good       0.77      0.59      0.67      1004
       Ideal       0.82      0.91      0.86      4292
     Premium       0.81      0.82      0.81      2775
   Very Good       0.60      0.58      0.59      2382

    accuracy                           0.76     10788
   macro avg       0.73      0.64      0.67     10788
weighted avg       0.76      0.76      0.76     10788

