In [19]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import train_test_split, cross_val_score, KFold
import time
from Boosting import *

In [20]:
def custom_cross_val_score(model_class, X, y, cv=5, **model_params):
    scores = []
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)

    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model = model_class(**model_params)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)

        scores.append(accuracy_score(y_test, preds))

    return np.mean(scores)

In [21]:
diabetes_df = pd.read_csv("diabetes.csv")
X_clf = diabetes_df.drop("Outcome", axis=1)
y_clf = diabetes_df["Outcome"]

X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(
    X_clf, y_clf, test_size=0.2, random_state=33
)

start_time = time.time()
custom_gb_clf = CustomGradientBoosting(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    task_type=TaskType.CLASSIFICATION
)

custom_gb_clf.fit(X_train_clf, y_train_clf)
train_time = time.time() - start_time

y_pred_clf = custom_gb_clf.predict(X_test_clf)
acc_custom = accuracy_score(y_test_clf, y_pred_clf)
acc_custom_cv = custom_cross_val_score(CustomGradientBoosting, X_clf, y_clf, cv=5, n_estimators=100, learning_rate=0.1, max_depth=5, task_type=TaskType.CLASSIFICATION)


print("=== Custom GB Classifier (Single train/test split) ===")
print(f"Accuracy: {acc_custom:.4f}")
print(f"Accuracy CV: {acc_custom_cv:.4f}")
print(f"Training time: {train_time:.4f} сек.\n")


start_time = time.time()
sk_gb_clf = GradientBoostingClassifier(
    n_estimators=100, learning_rate=0.1, max_depth=5, random_state=32
)
sk_gb_clf.fit(X_train_clf, y_train_clf)
train_time_sklearn = time.time() - start_time

y_pred_clf_sklearn = sk_gb_clf.predict(X_test_clf)
acc_sklearn = accuracy_score(y_test_clf, y_pred_clf_sklearn)
acc_sklearn_cv = cross_val_score(sk_gb_clf, X_clf, y_clf, cv=5)

print("=== Sklearn GB Classifier (Single train/test split) ===")
print(f"Accuracy: {acc_sklearn:.4f}")
print(f"Accuracy CV: {acc_sklearn_cv.mean():.4f}")
print(f"Training time: {train_time_sklearn:.4f} сек.\n")

=== Custom GB Classifier (Single train/test split) ===
Accuracy: 0.6818
Accuracy CV: 0.7617
Training time: 0.9251 сек.

=== Sklearn GB Classifier (Single train/test split) ===
Accuracy: 0.7403
Accuracy CV: 0.7761
Training time: 0.4801 сек.



In [23]:
housing_df = pd.read_csv("housing.csv")
X_reg = housing_df.drop("price", axis=1)
binary_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
X_reg[binary_cols] = X_reg[binary_cols].replace({'yes': 1, 'no': 0})
X_reg = pd.get_dummies(X_reg, columns=['furnishingstatus'], drop_first=True)
y_reg = housing_df["price"].values

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

custom_gb_reg = CustomGradientBoosting(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    task_type=TaskType.REGRESSION
)

start_time = time.time()
custom_gb_reg.fit(X_train_reg, y_train_reg)
train_time_reg = time.time() - start_time

y_pred_reg = custom_gb_reg.predict(X_test_reg)
r2_custom = r2_score(y_test_reg, y_pred_reg)

print("=== Custom GB Regressor (Single train/test split) ===")
print(f"R^2: {r2_custom:.4f}")
print(f"Training time: {train_time_reg:.4f} сек.\n")

sk_gb_reg = GradientBoostingRegressor(
    n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42
)

start_time = time.time()
sk_gb_reg.fit(X_train_reg, y_train_reg)
train_time_reg_sklearn = time.time() - start_time

y_pred_reg_sklearn = sk_gb_reg.predict(X_test_reg)
r2_sklearn = r2_score(y_test_reg, y_pred_reg_sklearn)


print("=== Sklearn GB Regressor (Single train/test split) ===")
print(f"R^2: {r2_sklearn:.4f}")
print(f"Training time: {train_time_reg_sklearn:.4f} сек.\n")

=== Custom GB Regressor (Single train/test split) ===
R^2: 0.6338
Training time: 0.4568 сек.

=== Sklearn GB Regressor (Single train/test split) ===
R^2: 0.6321
Training time: 0.1617 сек.

