In [1]:
!pip install catboost
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from scipy.stats import randint, uniform

Collecting catboost
  Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl (98.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.5


In [2]:
data = load_wine().data
df = pd.DataFrame(data, columns=load_wine().feature_names)
X = df.drop('alcohol', axis=1)
y = df['alcohol']
#Train
X_train, X_test, y_train, y_test = train_test_split(
    X, y, shuffle=True, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
#Random Forest
rfr_params = {
    'n_estimators': randint(50, 100),
    'max_depth': randint(5, 30),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10)
}
rfr = RandomForestRegressor(random_state=42)
rfr_search = RandomizedSearchCV(rfr, rfr_params, n_iter=50, scoring='r2', cv=3, random_state=42, n_jobs=-1)
rfr_search.fit(X_train, y_train)
rfr_best = rfr_search.best_estimator_
y_pred_rfr = rfr_best.predict(X_test)
print(f' Random Forest: R2 = {r2_score(y_test, y_pred_rfr):.4f}')

 Random Forest: R2 = 0.7710


In [5]:
#XGBoost
xgbr_params = {
    'n_estimators': randint(50, 100),
    'learning_rate': uniform(0.01, 0.5),
    'max_depth': randint(3, 15)
}
xgbr = XGBRegressor(random_state=42, use_label_encoder=False, eval_metric='rmse')
xgbr_search = RandomizedSearchCV(xgbr, xgbr_params, n_iter=50, scoring='r2', cv=3, random_state=42, n_jobs=-1)
xgbr_search.fit(X_train, y_train)
xgbr_best = xgbr_search.best_estimator_
y_pred_xgbr = xgbr_best.predict(X_test)
print(f' XGBoost: R2 = {r2_score(y_test, y_pred_xgbr):.4f}')

 XGBoost: R2 = 0.6620


Parameters: { "use_label_encoder" } are not used.



In [6]:
# CatBoost
cbr_params = {
    'iterations': randint(50, 100),
    'learning_rate': uniform(0.01, 0.3),
    'depth': randint(4, 12)
}
cbr = CatBoostRegressor(random_state=42, verbose=0)
cbr_search = RandomizedSearchCV(cbr, cbr_params, n_iter=50, scoring='r2', cv=3, random_state=42, n_jobs=-1)
cbr_search.fit(X_train, y_train)
cbr_best = cbr_search.best_estimator_
y_pred_cbr = cbr_best.predict(X_test)
print(f' CatBoost: R2 = {r2_score(y_test, y_pred_cbr):.4f}')


 CatBoost: R2 = 0.7411
