In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from bayes_opt import BayesianOptimization
from catboost import CatBoostRegressor
data = pd.read_excel('3.xlsx')
x = data.iloc[:, :2].values 
y = data.iloc[:, -1].values 
scaler = StandardScaler()
x = scaler.fit_transform(x)
def optimize(depth, learning_rate, l2_leaf_reg):
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    r2_scores = []
    for train_idx, val_idx in kfold.split(x):
        train_x, val_x = x[train_idx], x[val_idx]
        train_y, val_y = y[train_idx], y[val_idx]
        
        model = CatBoostRegressor(
            depth=int(depth),
            learning_rate=learning_rate,
            l2_leaf_reg=l2_leaf_reg,
            iterations=1000,
            silent=True,
            random_state=42
        )
        model.fit(train_x, train_y)
        predictions = model.predict(val_x)
        r2 = r2_score(val_y, predictions)
        r2_scores.append(r2)
    return np.mean(r2_scores)

optimizer = BayesianOptimization(
    f=optimize,
    pbounds={
        'depth': (3, 10),
        'learning_rate': (0.01, 0.3),
        'l2_leaf_reg': (1, 10)
    },
    random_state=42,
)

optimizer.maximize(n_iter=100000, init_points=10)

best_params = optimizer.max['params']
best_params['depth'] = int(best_params['depth'])

best_model = CatBoostRegressor(
    **best_params,
    iterations=1000,
    silent=True,
    random_state=42
)
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1, random_state=42)
best_model.fit(train_x, train_y)
train_pred = best_model.predict(train_x)
test_pred = best_model.predict(test_x)

train_mse = mean_squared_error(train_y, train_pred)
train_rmse = np.sqrt(train_mse)
train_mae = mean_absolute_error(train_y, train_pred)
train_r2 = r2_score(train_y, train_pred)

test_mse = mean_squared_error(test_y, test_pred)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(test_y, test_pred)
test_r2 = r2_score(test_y, test_pred)

print(f'Train - MSE: {train_mse}, RMSE: {train_rmse}, MAE: {train_mae}, R^2 score: {train_r2}')
print(f'Test - MSE: {test_mse}, RMSE: {test_rmse}, MAE: {test_mae}, R^2 score: {test_r2}')
print(f'Best parameters found: {best_params}')
print(f'Best R^2 score during optimization: {optimizer.max["target"]}')