In [1]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error, root_mean_squared_error
import optuna


df = pd.read_excel("dataset_vrp/2014-2023_lags_EWM_targets_train.xlsx")

target_col = 'target_next_year'
log_target_col = 'log_target_next_year'
test_year = 2023

exclude_cols = [
    'year', 'district', 'region',
    'target_next_year', 'delta_target', 'delta_target_percent', 'log_target_next_year'
]
features = [col for col in df.columns if col not in exclude_cols]

train_df = df[df['year'] < test_year]
test_df  = df[df['year'] == test_year]

X_train = train_df[features]
y_train = train_df[target_col]

X_test = test_df[features]
y_test = test_df[target_col]


def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 500, 2000),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
        'random_seed': 42,
        'verbose': 0
    }
    model = CatBoostRegressor(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    return mae 

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print(f"\nЛучшие параметры: {study.best_params}")


best_params = study.best_trial.params
best_params.update({'random_seed': 42, 'verbose': 100})
final_model = CatBoostRegressor(**best_params)
final_model.fit(X_train, y_train)

def smape(a, f):
    return 100 * np.mean(2 * np.abs(f - a) / (np.abs(a) + np.abs(f)))

y_pred = final_model.predict(X_test)

MAE_log = mean_absolute_error(test_df[log_target_col], np.log1p(y_pred))
MSE_log = mean_squared_error(test_df[log_target_col], np.log1p(y_pred))
R2_log  = r2_score(test_df[log_target_col], np.log1p(y_pred))

MAE_real = mean_absolute_error(y_test, y_pred)
MSE_real = mean_squared_error(y_test, y_pred)
RMSE_real = root_mean_squared_error(y_test, y_pred)
MAPE_real = mean_absolute_percentage_error(y_test, y_pred)
SMAPE_real = smape(y_test, y_pred)

metrics = {
    "MAE_log": MAE_log,
    "MSE_log": MSE_log,
    "R2_log": R2_log,
    "MAE_real": MAE_real,
    "MSE_real": MSE_real,
    "RMSE_real": RMSE_real,
    "MAPE_real": MAPE_real,
    "SMAPE_real": SMAPE_real
}

print(f"\nМетрики 2023:")
for name, value in metrics.items():
    print(f"{name}: {value}")


[I 2025-11-28 09:59:59,069] A new study created in memory with name: no-name-f8187f4e-1c46-4ed3-9603-9befa3ba8ec8
[I 2025-11-28 10:00:16,107] Trial 0 finished with value: 82807.14444398311 and parameters: {'iterations': 1994, 'depth': 7, 'learning_rate': 0.06244046524124799, 'l2_leaf_reg': 6.515739761898409}. Best is trial 0 with value: 82807.14444398311.
[I 2025-11-28 10:00:50,665] Trial 1 finished with value: 87409.43451519596 and parameters: {'iterations': 1962, 'depth': 8, 'learning_rate': 0.19697999337254665, 'l2_leaf_reg': 6.694644434934581}. Best is trial 0 with value: 82807.14444398311.
[I 2025-11-28 10:01:00,804] Trial 2 finished with value: 86062.10394763738 and parameters: {'iterations': 795, 'depth': 7, 'learning_rate': 0.19654206972183913, 'l2_leaf_reg': 7.531423268186922}. Best is trial 0 with value: 82807.14444398311.
[I 2025-11-28 10:01:03,810] Trial 3 finished with value: 98362.20418935284 and parameters: {'iterations': 630, 'depth': 5, 'learning_rate': 0.2667226994831


Лучшие параметры: {'iterations': 823, 'depth': 5, 'learning_rate': 0.021362268680894553, 'l2_leaf_reg': 1.3210848927268273}
0:	learn: 531894.8251031	total: 4.57ms	remaining: 3.75s
100:	learn: 143038.6917304	total: 396ms	remaining: 2.83s
200:	learn: 67189.4583268	total: 763ms	remaining: 2.36s
300:	learn: 45542.6438140	total: 1.16s	remaining: 2.02s
400:	learn: 34319.0617100	total: 1.53s	remaining: 1.61s
500:	learn: 27112.9810457	total: 1.9s	remaining: 1.22s
600:	learn: 22305.6734227	total: 2.28s	remaining: 842ms
700:	learn: 19069.6078157	total: 2.66s	remaining: 463ms
800:	learn: 16510.5348025	total: 3.04s	remaining: 83.4ms
822:	learn: 16102.3552634	total: 3.12s	remaining: 0us

Метрики 2023:
MAE_log: 0.14656063282443907
MSE_log: 0.03654136845345936
R2_log: 0.9729512551160392
MAE_real: 67576.93112411912
MSE_real: 29099010296.867542
RMSE_real: 170584.32019639888
MAPE_real: 0.15696045947229922
SMAPE_real: 14.55676935623697
