# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
from catboost import CatBoostRegressor
import skopt
from skopt import BayesSearchCV
from skopt.space import Integer, Real
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler
import pickle
from catboost import CatBoostRegressor, Pool, cv
from skopt.space import Integer, Real

# Model

In [3]:
directory = '../data/'

Mounted at /content/drive


In [4]:
with open(directory + 'data_split.pkl', 'rb') as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

In [5]:
estimators = [
    ('reg', CatBoostRegressor(random_seed=8, silent=True))  
]
pipe = Pipeline(steps=estimators)

In [6]:
search_space = {
    'reg__depth': Integer(2, 8),
    'reg__learning_rate': Real(0.001, 1.0, prior='log-uniform'),
    'reg__subsample': Real(0.5, 1.0),
    'reg__l2_leaf_reg': Real(0.0, 10.0),
}

In [7]:
opt = BayesSearchCV(pipe, search_space, cv=3, n_iter=10, scoring='neg_mean_squared_error', random_state=8)
opt.fit(X_train, y_train)

In [8]:
y_pred = opt.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.17515439996003626
Mean Absolute Error: 0.045369651363129725
R-squared: 0.8147436287953858


# Export

In [9]:
results = {
    'model': opt,
    'y_pred': y_pred,
    'mse': mse,
    'mae': mae,
    'r2': r2
}

with open(directory + 'catboost_results.pkl', 'wb') as file:
    pickle.dump(results, file)