In [11]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import train_test_split, cross_val_score, KFold
import time
from Boosting import *

In [12]:
def custom_cross_val_score(model_class, X, y, cv=5, **model_params):
    scores = []
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)

    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model = model_class(**model_params)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)

        scores.append(accuracy_score(y_test, preds))

    return np.mean(scores)

In [13]:
diabetes_df = pd.read_csv("diabetes.csv")
X_clf = diabetes_df.drop("Outcome", axis=1).values
y_clf = diabetes_df["Outcome"].values

X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(
    X_clf, y_clf, test_size=0.3, random_state=42
)

start_time = time.time()
custom_gb_clf = CustomGradientBoosting(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    task_type=TaskType.CLASSIFICATION
)

custom_gb_clf.fit(X_train_clf, y_train_clf)
train_time = time.time() - start_time

y_pred_clf = custom_gb_clf.predict(X_test_clf)
acc_custom = accuracy_score(y_test_clf, y_pred_clf)

print("=== Custom GB Classifier (Single train/test split) ===")
print(f"Accuracy: {acc_custom:.4f}")
print(f"Training time: {train_time:.4f} сек.\n")


start_time = time.time()
sk_gb_clf = GradientBoostingClassifier(
    n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
)
sk_gb_clf.fit(X_train_clf, y_train_clf)
train_time_sklearn = time.time() - start_time

y_pred_clf_sklearn = sk_gb_clf.predict(X_test_clf)
acc_sklearn = accuracy_score(y_test_clf, y_pred_clf_sklearn)

print("=== Sklearn GB Classifier (Single train/test split) ===")
print(f"Accuracy: {acc_sklearn:.4f}")
print(f"Training time: {train_time_sklearn:.4f} сек.\n")

=== Custom GB Classifier (Single train/test split) ===
Accuracy: 0.7143
Training time: 0.0936 сек.

=== Sklearn GB Classifier (Single train/test split) ===
Accuracy: 0.7489
Training time: 0.0924 сек.



In [14]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

In [15]:
housing_df = pd.read_csv("housing.csv")
X_reg = housing_df.drop("price", axis=1)
for column in X_reg.columns:
    X_reg[column] = encoder.fit_transform(X_reg[column])
y_reg = housing_df["price"].values

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

custom_gb_reg = CustomGradientBoosting(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    task_type=TaskType.REGRESSION
)

start_time = time.time()
custom_gb_reg.fit(X_train_reg, y_train_reg)
train_time_reg = time.time() - start_time

y_pred_reg = custom_gb_reg.predict(X_test_reg)
r2_custom = r2_score(y_test_reg, y_pred_reg)

print("=== Custom GB Regressor (Single train/test split) ===")
print(f"R^2: {r2_custom:.4f}")
print(f"Training time: {train_time_reg:.4f} сек.\n")

# Сравниваем со sklearn
sk_gb_reg = GradientBoostingRegressor(
    n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
)

start_time = time.time()
sk_gb_reg.fit(X_train_reg, y_train_reg)
train_time_reg_sklearn = time.time() - start_time

y_pred_reg_sklearn = sk_gb_reg.predict(X_test_reg)
r2_sklearn = r2_score(y_test_reg, y_pred_reg_sklearn)

print("=== Sklearn GB Regressor (Single train/test split) ===")
print(f"R^2: {r2_sklearn:.4f}")
print(f"Training time: {train_time_reg_sklearn:.4f} сек.\n")

=== Custom GB Regressor (Single train/test split) ===
R^2: 0.6557
Training time: 0.1207 сек.

=== Sklearn GB Regressor (Single train/test split) ===
R^2: 0.6556
Training time: 0.0450 сек.

