In [None]:
import pandas as pd
import numpy as np
import optuna
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, root_mean_squared_error, mean_absolute_percentage_error

def smape(y_true, y_pred):
    return 100 * np.mean(2 * np.abs(y_pred - y_true) /
                         (np.abs(y_true) + np.abs(y_pred)))

df = pd.read_excel("dataset_vrp/2014-2023_lags_EWM_targets_train.xlsx")

target_col = "log_target_next_year"
test_year = 2023

exclude_cols = [
    "district", "region", "year",
    "target_next_year", "delta_target", "delta_target_percent"
]

num_cols = [col for col in df.select_dtypes("number").columns if col not in (exclude_cols + [target_col])]
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

train = df[df["year"] < test_year]
test = df[df["year"] == test_year]

train = train.drop(columns=exclude_cols)
test = test.drop(columns=exclude_cols)

X_train = train.drop(columns=[target_col])
y_train = train[target_col]

X_test = test.drop(columns=[target_col])
y_test = test[target_col]

y_test_real = np.exp(y_test)

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1500),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.5),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 5.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 5.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "objective": "reg:squarederror",
        "tree_method": "hist"
    }

    model = XGBRegressor(**params)
    model.fit(X_train, y_train)

    pred = model.predict(X_test)

    mae_log = mean_absolute_error(y_test, pred)

    return mae_log


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

best_params = study.best_params
print(f"Лучшие параметры: {best_params}")

model = XGBRegressor(
    **best_params,
    objective="reg:squarederror",
    tree_method="hist"
)
model.fit(X_train, y_train)

pred_log = model.predict(X_test)
pred_real = np.exp(pred_log)

metrics = {
    "MAE_log": mean_absolute_error(y_test, pred_log),
    "MSE_log": mean_squared_error(y_test, pred_log),
    "R2_log": r2_score(y_test, pred_log),

    "MAE_real": mean_absolute_error(y_test_real, pred_real),
    "MSE_real": mean_squared_error(y_test_real, pred_real),
    "RMSE_real": root_mean_squared_error(y_test_real, pred_real),

    "MAPE_real": mean_absolute_percentage_error(y_test_real, pred_real),
    "SMAPE_real": smape(y_test_real, pred_real)
}

print("\nМетрики 2023:")
for name, value in metrics.items():
    print(f"{name}: {value}")

[I 2025-11-29 16:07:17,965] A new study created in memory with name: no-name-11782ff1-5969-4ad8-820c-49db253688f9
[I 2025-11-29 16:07:18,593] Trial 0 finished with value: 0.16410473086829425 and parameters: {'n_estimators': 286, 'max_depth': 9, 'learning_rate': 0.42506812306363356, 'subsample': 0.7932921273761582, 'colsample_bytree': 0.6086401432234204, 'reg_alpha': 2.8690535887997304, 'reg_lambda': 4.5926164199393575, 'min_child_weight': 3}. Best is trial 0 with value: 0.16410473086829425.
[I 2025-11-29 16:07:19,091] Trial 1 finished with value: 0.14501265596287463 and parameters: {'n_estimators': 259, 'max_depth': 8, 'learning_rate': 0.12148085318569753, 'subsample': 0.8460145524245558, 'colsample_bytree': 0.626397233343989, 'reg_alpha': 4.047114462101119, 'reg_lambda': 0.6795709819953077, 'min_child_weight': 7}. Best is trial 1 with value: 0.14501265596287463.
[I 2025-11-29 16:07:21,111] Trial 2 finished with value: 0.1537287780570774 and parameters: {'n_estimators': 1436, 'max_dept

Лучшие параметры: {'n_estimators': 1169, 'max_depth': 4, 'learning_rate': 0.14131182035900636, 'subsample': 0.9903884134400565, 'colsample_bytree': 0.6694156204151209, 'reg_alpha': 0.28002415455811314, 'reg_lambda': 3.460418777805833, 'min_child_weight': 5}

Метрики 2023:
MAE_log: 0.11921484079965214
MSE_log: 0.027102835778714463
R2_log: 0.9799378698270691
MAE_real: 72641.99036397974
MSE_real: 107718510131.12685
RMSE_real: 328204.98188042006
MAPE_real: 0.1254345509866375
SMAPE_real: 11.850358299305555


: 