In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import  OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_transformer, make_column_selector
from tqdm import tqdm

In [2]:
sonar = pd.read_csv("C:/Python/Cases/Sonar/Sonar.csv")
le = LabelEncoder()
y = le.fit_transform( sonar['Class'] )
X = sonar.drop('Class', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                    stratify=y)

In [4]:
rates = np.linspace(0.01, 0.8, 20)
n_est = [50, 100, 200]
depths = [3, 5, None]
scores = []
for r in tqdm(rates):
    for n in n_est:
        for d in depths:
            gbm = GradientBoostingClassifier(random_state=25,n_estimators=n,
                                             max_depth=d, learning_rate=r)
            gbm.fit(X_train, y_train)
            y_pred_prob = gbm.predict_proba(X_test)
            scores.append( [r, n, d, log_loss(y_test, y_pred_prob)] )
df_scores = pd.DataFrame( scores, columns=['rate','n_est','depth','score'] )
df_scores.sort_values( 'score', ascending=True )    

100%|██████████| 20/20 [00:50<00:00,  2.53s/it]


Unnamed: 0,rate,n_est,depth,score
12,0.051579,100,3.0,0.446052
18,0.093158,50,3.0,0.452037
9,0.051579,50,3.0,0.457518
6,0.010000,200,3.0,0.470463
45,0.217895,50,3.0,0.472469
...,...,...,...,...
176,0.800000,100,,4.070971
173,0.800000,50,,4.070971
136,0.633684,50,5.0,4.225648
142,0.633684,200,5.0,4.225648


XGBoost

In [5]:
from xgboost import XGBClassifier

In [6]:
rates = np.linspace(0.01, 0.8, 20)
n_est = [50, 100, 200]
depths = [3, 5, None]
scores = []
for r in tqdm(rates):
    for n in n_est:
        for d in depths:
            gbm = XGBClassifier(random_state=25,n_estimators=n,
                                             max_depth=d, learning_rate=r)
            gbm.fit(X_train, y_train)
            y_pred_prob = gbm.predict_proba(X_test)
            scores.append( [r, n, d, log_loss(y_test, y_pred_prob)] )
df_scores = pd.DataFrame( scores, columns=['rate','n_est','depth','score'] )
df_scores.sort_values( 'score', ascending=True )  

100%|██████████| 20/20 [00:20<00:00,  1.01s/it]


Unnamed: 0,rate,n_est,depth,score
65,0.301053,50,,0.422442
64,0.301053,50,5.0,0.422442
145,0.675263,50,5.0,0.429255
146,0.675263,50,,0.429255
119,0.550526,50,,0.431690
...,...,...,...,...
1,0.010000,50,5.0,0.577834
2,0.010000,50,,0.577834
0,0.010000,50,3.0,0.588441
105,0.467368,200,3.0,0.599991


Light GBM

In [8]:
from lightgbm import LGBMClassifier

In [11]:
rates = np.linspace(0.01, 0.8, 20)
n_est = [50, 100, 200]
depths = [3, 5, None]
scores = []
for r in tqdm(rates):
    for n in n_est:
        for d in depths:
            gbm = LGBMClassifier(random_state=25,n_estimators=n,verbose=-1, 
                                             max_depth=d, learning_rate=r)
            gbm.fit(X_train, y_train)
            y_pred_prob = gbm.predict_proba(X_test)
            scores.append( [r, n, d, log_loss(y_test, y_pred_prob)] )
df_scores = pd.DataFrame( scores, columns=['rate','n_est','depth','score'] )
df_scores.sort_values( 'score', ascending=True ) 

100%|██████████| 20/20 [00:03<00:00,  5.87it/s]


Unnamed: 0,rate,n_est,depth,score
20,0.093158,50,,0.390546
19,0.093158,50,5.0,0.390546
29,0.134737,50,,0.393635
28,0.134737,50,5.0,0.393635
38,0.176316,50,,0.398391
...,...,...,...,...
162,0.758421,50,3.0,1.136319
148,0.675263,100,5.0,1.155843
149,0.675263,100,,1.155843
151,0.675263,200,5.0,1.155843


In [14]:
from catboost import CatBoostClassifier

In [None]:
rates = np.linspace(0.01, 0.8, 20)
n_est = [50, 100, 200]
depths = [3, 5, None]
scores = []
for r in tqdm(rates):
    for n in n_est:
        for d in depths:
            gbm = CatBoostClassifier(random_state=25,n_estimators=n,verbose=0, 
                                     max_depth=d, learning_rate=r)
            gbm.fit(X_train, y_train)
            y_pred_prob = gbm.predict_proba(X_test)
            scores.append( [r, n, d, log_loss(y_test, y_pred_prob)] )
df_scores = pd.DataFrame( scores, columns=['rate','n_est','depth','score'] )
df_scores.sort_values( 'score', ascending=True ) 

100%|██████████| 20/20 [01:17<00:00,  3.85s/it]


Unnamed: 0,rate,n_est,depth,score
21,0.093158,100,3.0,0.399040
13,0.051579,100,5.0,0.401615
22,0.093158,100,5.0,0.405820
17,0.051579,200,,0.408290
16,0.051579,200,5.0,0.409212
...,...,...,...,...
115,0.508947,200,5.0,0.786656
165,0.758421,100,3.0,0.791089
177,0.800000,200,3.0,0.815231
174,0.800000,100,3.0,0.837950
