In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load dataset (UCI format uses semicolons)
df = pd.read_csv("winequality-white.csv", sep=";")

# Clean column names (remove extra spaces)
df.columns = df.columns.str.strip()

# Convert numeric 'quality' to categorical labels
def quality_label(q):
    if q <= 5:
        return 0  # Low quality
    elif q == 6:
        return 1  # Medium quality
    else:
        return 2  # High quality

df["label"] = df["quality"].apply(quality_label)

# Split into features and target
X = df.drop(["quality", "label"], axis=1)
y = df["label"]

# Train-test split (stratified for balanced labels)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale data (optional but helps for boosting consistency)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----- GRADIENT BOOSTING -----
gb_model = GradientBoostingClassifier(
    n_estimators=200,        # number of boosting stages
    learning_rate=0.1,       # shrinkage rate
    max_depth=3,             # depth of individual estimators
    subsample=0.8,           # use a fraction of data for stochastic boosting
    random_state=42
)

# Train the model
gb_model.fit(X_train_scaled, y_train)

# Predict on test data
y_pred = gb_model.predict(X_test_scaled)

# Evaluate
acc = accuracy_score(y_test, y_pred)
print(f"🌿 Gradient Boosting Classifier Accuracy: {acc:.4f}\n")

# Detailed metrics
print("📊 Classification Report:")
print(classification_report(y_test, y_pred, zero_division=0))
