In [None]:
import pandas as pd
import numpy as np
import optuna
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score, root_mean_squared_error, mean_absolute_percentage_error
)

def smape(y_true, y_pred):
    return 100 * np.mean(2 * np.abs(y_pred - y_true) /
                         (np.abs(y_true) + np.abs(y_pred)))

df = pd.read_excel("dataset_vrp/2014-2023_lags_EWM_targets_train.xlsx")

target_col = "log_target_next_year"

exclude_cols = ["delta_target", "delta_target_percent", "target_next_year", 'year', 'region', 'district']

num_cols = [col for col in df.select_dtypes("number").columns if col not in (exclude_cols + [target_col])]
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

train = df[df["year"] < 2023]
test  = df[df["year"] == 2023]

train = train.drop(columns=exclude_cols)
test = test.drop(columns=exclude_cols)

X_train = train.drop(columns=target_col)
y_train = train[target_col]

X_test = test.drop(columns=target_col)
y_test = test[target_col]

y_test_real = np.exp(y_test)

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1500),
        "max_depth": trial.suggest_int("max_depth", 4, 15),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_float("max_features", 0.3, 1.0),
    }

    model = RandomForestRegressor(
        random_state=42,
        n_jobs=-1,
        **params
    )
    model.fit(X_train, y_train)

    pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, pred)
    return mae

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

best_params = study.best_params
print(f"Лучшие параметры: {best_params}")

best_model = RandomForestRegressor(
    random_state=42,
    n_jobs=-1,
    **best_params
)
best_model.fit(X_train, y_train)

pred_log  = best_model.predict(X_test)
pred_real = np.exp(pred_log)

MAE_log = mean_absolute_error(y_test, pred_log)
MSE_log = mean_squared_error(y_test, pred_log)
R2_log  = r2_score(y_test, pred_log)

MAE_real = mean_absolute_error(y_test_real, pred_real)
MSE_real = mean_squared_error(y_test_real, pred_real)
RMSE_real = root_mean_squared_error(y_test_real, pred_real)
MAPE_real = mean_absolute_percentage_error(y_test_real, pred_real)
SMAPE_real = smape(y_test_real, pred_real)

metrics = {
    "MAE_log": MAE_log,
    "MSE_log": MSE_log,
    "R2_log": R2_log,

    "MAE_real": MAE_real,
    "MSE_real": MSE_real,
    "RMSE_real": RMSE_real,
    "MAPE_real": MAPE_real,
    "SMAPE_real": SMAPE_real
}

print("\nМетрики 2023:")
for name, value in metrics.items():
    print(f"{name}: {value}")


[I 2025-11-29 15:38:10,437] A new study created in memory with name: no-name-bdf15e14-2cc2-44e4-9435-028ca3d8fbaf
[I 2025-11-29 15:38:11,032] Trial 0 finished with value: 0.16467379048233777 and parameters: {'n_estimators': 447, 'max_depth': 5, 'min_samples_split': 9, 'min_samples_leaf': 10, 'max_features': 0.34693460565408935}. Best is trial 0 with value: 0.16467379048233777.
[I 2025-11-29 15:38:14,389] Trial 1 finished with value: 0.13942667518173046 and parameters: {'n_estimators': 1497, 'max_depth': 9, 'min_samples_split': 20, 'min_samples_leaf': 7, 'max_features': 0.7987729770225552}. Best is trial 1 with value: 0.13942667518173046.
[I 2025-11-29 15:38:14,716] Trial 2 finished with value: 0.14667363392612282 and parameters: {'n_estimators': 194, 'max_depth': 6, 'min_samples_split': 17, 'min_samples_leaf': 8, 'max_features': 0.48914140856102983}. Best is trial 1 with value: 0.13942667518173046.
[I 2025-11-29 15:38:16,740] Trial 3 finished with value: 0.124161170514373 and parameter

Лучшие параметры: {'n_estimators': 1174, 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 0.9051225392178933}

Метрики 2023:
MAE_log: 0.11971791574156658
MSE_log: 0.024865667584900535
R2_log: 0.981593872169019
MAE_real: 66057.59006087198
MSE_real: 62983724595.109344
RMSE_real: 250965.5844834294
MAPE_real: 0.12066040705899243
SMAPE_real: 11.91538005218315


: 