# Gradient Boosting Classification and Hyperparameter Experiments

In [27]:
# Dataset: Autism Children (autism_children.csv)

# 1. Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, f1_score,
    classification_report, make_scorer
)

# 2. Load and preprocess the data
df = pd.read_csv('autism_children.csv')  # ensure file is in working directory
df = df.drop(columns=['Unnamed: 0', 'result'])  # drop index & leakage
df = df.drop_duplicates()  # remove exact duplicates

y = df['class']
X = pd.get_dummies(df.drop(columns=['class']))  # one-hot encode categoricals

# 3. Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)



#  Gradient Boosting model

In [28]:
# 4. Baseline Gradient Boosting model
baseline = GradientBoostingClassifier(random_state=42)
baseline.fit(X_train, y_train)

y_pred_base = baseline.predict(X_test)
acc_base = accuracy_score(y_test, y_pred_base)
prec_base = precision_score(y_test, y_pred_base, average='weighted')
f1_base = f1_score(y_test, y_pred_base, average='weighted')

print("=== Baseline Gradient Boosting Results ===")
print(f"Accuracy: {acc_base:.4f}")
print(f"Precision (weighted): {prec_base:.4f}")
print(f"F1 Score (weighted): {f1_base:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_base))

# 5. Hyperparameter grid search setup
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.6, 0.8, 1.0]
}
scorer = make_scorer(f1_score, average='weighted')

grid_search = GridSearchCV(
    estimator=GradientBoostingClassifier(random_state=42),
    param_grid=param_grid,
    scoring=scorer,
    cv=5,
    n_jobs=-1,
    verbose=1,
    return_train_score=True
)



=== Baseline Gradient Boosting Results ===
Accuracy: 0.8793
Precision (weighted): 0.8799
F1 Score (weighted): 0.8793

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.87      0.88        30
           1       0.86      0.89      0.88        28

    accuracy                           0.88        58
   macro avg       0.88      0.88      0.88        58
weighted avg       0.88      0.88      0.88        58



# hyperparameter experiments

In [29]:
# 6. Run hyperparameter experiments
grid_search.fit(X_train, y_train)
print("\n=== Grid Search Completed ===")
print("Best Parameters:", grid_search.best_params_)

# 7. Evaluate tuned model
y_pred_tuned = grid_search.best_estimator_.predict(X_test)
acc_tuned = accuracy_score(y_test, y_pred_tuned)
prec_tuned = precision_score(y_test, y_pred_tuned, average='weighted')
f1_tuned = f1_score(y_test, y_pred_tuned, average='weighted')

print("\n=== Tuned Gradient Boosting Results ===")
print(f"Accuracy: {acc_tuned:.4f}")
print(f"Precision (weighted): {prec_tuned:.4f}")
print(f"F1 Score (weighted): {f1_tuned:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_tuned))

# 8. Compare baseline vs. tuned performance
print("\n=== Performance Improvement ===")
print(f"Accuracy Improvement: {acc_tuned - acc_base:+.4f}")
print(f"Precision Improvement: {prec_tuned - prec_base:+.4f}")
print(f"F1 Score Improvement: {f1_tuned - f1_base:+.4f}")

# 9. Save full CV results for reporting
df_cv = pd.DataFrame(grid_search.cv_results_)
df_cv.to_csv('autism_gb_gridsearch_results.csv', index=False)
print("Saved CV results to 'autism_gb_gridsearch_results.csv'.")


Fitting 5 folds for each of 81 candidates, totalling 405 fits

=== Grid Search Completed ===
Best Parameters: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.6}

=== Tuned Gradient Boosting Results ===
Accuracy: 0.9310
Precision (weighted): 0.9310
F1 Score (weighted): 0.9310

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        30
           1       0.93      0.93      0.93        28

    accuracy                           0.93        58
   macro avg       0.93      0.93      0.93        58
weighted avg       0.93      0.93      0.93        58


=== Performance Improvement ===
Accuracy Improvement: +0.0517
Precision Improvement: +0.0511
F1 Score Improvement: +0.0517
Saved CV results to 'autism_gb_gridsearch_results.csv'.
