# Cat Boosting Regression


In [1]:
import pandas as pd
from catboost import CatBoostRegressor
from joblib import dump
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    mean_squared_log_error,
    r2_score,
    root_mean_squared_error,
    root_mean_squared_log_error,
)
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("../../../Datasets/IceCreamData.csv")
df.head()

Unnamed: 0,Temperature,Revenue
0,24.6,534.8
1,26.0,625.19
2,27.8,660.63
3,20.6,487.71
4,11.5,316.24


In [3]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [5]:
cbr = CatBoostRegressor()
cbr.fit(X_train, y_train)

Learning rate set to 0.03609
0:	learn: 171.6142857	total: 55.9ms	remaining: 55.9s
1:	learn: 166.4404057	total: 56.1ms	remaining: 28s
2:	learn: 161.4080085	total: 56.8ms	remaining: 18.9s
3:	learn: 156.3365328	total: 57ms	remaining: 14.2s
4:	learn: 151.5550019	total: 57.6ms	remaining: 11.5s
5:	learn: 146.9789222	total: 58ms	remaining: 9.61s
6:	learn: 142.5644582	total: 59.4ms	remaining: 8.42s
7:	learn: 138.2443048	total: 59.6ms	remaining: 7.39s
8:	learn: 134.3070579	total: 59.8ms	remaining: 6.58s
9:	learn: 130.2742800	total: 60ms	remaining: 5.94s
10:	learn: 126.4236652	total: 60.3ms	remaining: 5.42s
11:	learn: 122.7634252	total: 60.7ms	remaining: 5s
12:	learn: 119.1367920	total: 60.9ms	remaining: 4.62s
13:	learn: 115.6031664	total: 61.6ms	remaining: 4.34s
14:	learn: 112.2519949	total: 62ms	remaining: 4.07s
15:	learn: 108.9075883	total: 62.5ms	remaining: 3.84s
16:	learn: 105.7754863	total: 63.7ms	remaining: 3.68s
17:	learn: 102.6616659	total: 63.9ms	remaining: 3.48s
18:	learn: 99.7689088	

<catboost.core.CatBoostRegressor at 0x15978e3f0>

In [6]:
y_pred = cbr.predict(X_test)

In [7]:
print("The coefficient of determination :", r2_score(y_test, y_pred))
adjusted_r2_score = 1 - ((1 - r2_score(y_test, y_pred)) * (len(y_test) - 1)) / (  # type: ignore
    len(y_test) - X_test.shape[1] - 1
)
print("The adjusted coefficient of determination :", adjusted_r2_score)

The coefficient of determination : 0.9806374964644575
The adjusted coefficient of determination : 0.9802341109741337


In [8]:
print("Mean Absolute Error (MAE) :", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE) :", mean_squared_error(y_test, y_pred))
print(
    "Root Mean Squared Error (RMSE) :",
    root_mean_squared_error(y_test, y_pred),
)
print("Mean Squared Log Error (MSLE) :", mean_squared_log_error(y_test, y_pred))
print(
    "Root Mean Squared Log Error (RMSLE) :",
    root_mean_squared_log_error(y_test, y_pred),
)
print(
    "Mean Absolute Percentage Error (MAPE) :",
    mean_absolute_percentage_error(y_test, y_pred),
)

Mean Absolute Error (MAE) : 17.889040815945283
Mean Squared Error (MSE) : 473.4092614654089
Root Mean Squared Error (RMSE) : 21.757970067665067
Mean Squared Log Error (MSLE) : 0.0019727644531114742
Root Mean Squared Log Error (RMSLE) : 0.04441581309749349
Mean Absolute Percentage Error (MAPE) : 0.03653275325701722


In [9]:
dump(cbr, "../../../Models/catBoostingRegressor.joblib")

['../../../Models/catBoostingRegressor.joblib']