In [1]:
# !pip install hyperopt

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore")
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
import lightgbm as lgb


from sklearn.metrics import f1_score, roc_auc_score, precision_score, classification_report
from sklearn.metrics import precision_recall_curve, confusion_matrix, mean_squared_error

from hyperopt import fmin, tpe, hp, partial

In [3]:
def f_score(y_true, y_pred, b=1):
    precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
    fscore = (1 + b ** 2) * (precision * recall) / (b ** 2 * precision + recall)
    r_a_score = roc_auc_score(y_true, y_pred)
    np.nan_to_num(fscore, copy=False)
    # locate the index of the largest f score
    ix = np.argmax(fscore)
    return thresholds[ix], fscore[ix], precision[ix], recall[ix], r_a_score

In [4]:
#соберем наш простой pipeline, но нам понадобится написать класс для выбора нужного поля
class FeatureSelector(BaseEstimator, TransformerMixin):
    def __init__(self, column):
        self.column = column

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        return X[self.column]
    
class NumberSelector(BaseEstimator, TransformerMixin):
    """
    Transformer to select a single column from the data frame to perform additional transformations on
    Use on numeric columns in the data
    """
    def __init__(self, key):
        self.key = key

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X[[self.key]]
    
class OHEEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, key):
        self.key = key
        self.columns = []

    def fit(self, X, y=None):
        self.columns = [col for col in pd.get_dummies(X, prefix=self.key).columns]
        return self

    def transform(self, X):
        X = pd.get_dummies(X, prefix=self.key)
        test_columns = [col for col in X.columns]
        for col_ in self.columns:
            if col_ not in test_columns:
                X[col_] = 0
        return X[self.columns]

1. Для нашего пайплайна (Case1) поэкспериментировать с разными моделями: 1 - бустинг, 2 - логистическая регрессия (не забудьте здесь добавить в cont_transformer стандартизацию - нормирование вещественных признаков)
2. Отобрать лучшую модель по метрикам (кстати, какая по вашему мнению здесь наиболее подходящая DS-метрика)
3. Для отобранной модели (на отложенной выборке) сделать оценку экономической эффективности при тех же вводных, как в вопросе 2 (1 доллар на привлечение, 2 доллара - с каждого правильно классифицированного (True Positive) удержанного). (подсказка) нужно посчитать FP/TP/FN/TN для выбранного оптимального порога вероятности и посчитать выручку и траты. 
4. (опционально) Провести подбор гиперпараметров лучшей модели по итогам 2-3
5. (опционально) Еще раз провести оценку экономической эффективности

In [5]:
df = pd.read_csv('churn_data.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
df['Exited'].value_counts(normalize=True)

0    0.7963
1    0.2037
Name: Exited, dtype: float64

In [7]:
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

In [8]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


Разобьем датасет на 3 выборки: трейн, тест и валидационную

In [9]:
x, X_test, y, y_test = train_test_split(df.drop('Exited', axis=1), df['Exited'], test_size=0.2, train_size=0.8)
X_train, x_cv, y_train, y_cv = train_test_split(x, y, test_size = 0.25, train_size =0.75)

In [10]:
categorical_columns = ['Geography', 'Gender', 'Tenure', 'HasCrCard', 'IsActiveMember']
continuous_columns = ['CreditScore', 'Age', 'Balance', 'NumOfProducts', 'EstimatedSalary']

In [11]:
final_transformers = list()

for cat_col in categorical_columns:
    cat_transformer = Pipeline([
                ('selector', FeatureSelector(column=cat_col)),
                ('ohe', OHEEncoder(key=cat_col))
            ])
    final_transformers.append((cat_col, cat_transformer))
    
for cont_col in continuous_columns:
    cont_transformer = Pipeline([
                ('selector', NumberSelector(key=cont_col)),
                ('scaler', StandardScaler()),
            ])
    final_transformers.append((cont_col, cont_transformer))

In [12]:
feats = FeatureUnion(final_transformers)
feature_processing = Pipeline([('feats', feats)])

Создадим список пайплайнов с моделями

In [13]:
model_list = [LogisticRegression, 
              RandomForestClassifier, 
              GradientBoostingClassifier, 
              LGBMClassifier,
              XGBClassifier]
classifier_list = [make_pipeline(feats, model(random_state = 42)) for model in model_list]

Получим предикты от каждой из них и сведем все в датафрейм

In [14]:
names = []
metrics = []
for pipe in classifier_list:
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict_proba(X_test)[:, 1]
    names.append(pipe.steps[1][0])
    thr, f1, pr, rec, roc = f_score(y_test.values, y_pred, b=1)
    tn, fp, fn, tp = confusion_matrix(y_test.values, y_pred > thr).ravel()
    metrics.append([thr, f1, pr, rec, roc, tp, tn, fp, fn, fp / (fp + tn)])



In [15]:
m_cols = ['Threshhold', 'F-score', 'Precision', 'Recall', 'ROC-AUC', 'TP', 'TN', 'FP', 'FN', 'FPR']
result_df = pd.DataFrame(metrics, 
                         index=names, 
                         columns=m_cols)
result_df['FPR'] = result_df['FP'] / (result_df['FP'] + result_df['TN'])

In [16]:
result_df

Unnamed: 0,Threshhold,F-score,Precision,Recall,ROC-AUC,TP,TN,FP,FN,FPR
logisticregression,0.307705,0.488208,0.469388,0.5086,0.750519,206,1359,234,201,0.146893
randomforestclassifier,0.41,0.600277,0.686709,0.53317,0.834033,211,1499,94,196,0.059008
gradientboostingclassifier,0.359124,0.611549,0.656338,0.572482,0.851091,232,1471,122,175,0.076585
lgbmclassifier,0.367175,0.621554,0.634271,0.609337,0.851255,247,1450,143,160,0.089768
xgbclassifier,0.316108,0.600715,0.583333,0.619165,0.835745,251,1413,180,156,0.112994


В принципе, все бустинговые модели отработали очень похоже, выберем для теста оптимизации параметров <b>lightgbm</b>

Оценим работу нашей модели на отложенной выборке

In [17]:
valid_pred = classifier_list[3].predict_proba(x_cv)[:, 1]

In [18]:
thr, f1, pr, rec, roc = f_score(y_cv.values, valid_pred, b=1)
tn, fp, fn, tp = confusion_matrix(y_cv.values, valid_pred > thr).ravel()
fpr = fp / (fp + tn)
n_met = [thr, f1, pr, rec, roc, tp, tn, fp, fn, fpr]

In [19]:
pd.Series({i: j for i, j in zip(m_cols, n_met)}, name='lgbm(on holdout)')

Threshhold       0.416449
F-score          0.625698
Precision        0.704403
Recall           0.562814
ROC-AUC          0.846188
TP             223.000000
TN            1508.000000
FP              94.000000
FN             175.000000
FPR              0.058677
Name: lgbm(on holdout), dtype: float64

Посчитаем следующим образом:

Деньги мы тратим на удержание клиентов, которые, как мы считаем, уходят. То есть это TN и FN. Но с FN мы получаем деньги, так как они на самом деле не уходят.  

На TP мы ничего не тратим, они приносят нам деньги  
На FP мы также ничего не тратим, так как считаем что они не уходят - деньги они не приносят  
На TN мы тратим деньги на удержание  

In [20]:
hold = -1
true_p = 2

tp * true_p + fp * 0 + (fn * hold + fn * true_p) + tn * hold

-887

Модель не целесообразна экономически

Попробуем оптимизировать.  

Код, с небольшими модификациями, <a href='https://www.programmersought.com/article/81271625281/'>отсюда</a>

In [21]:
x_tr, x_te, y_tr, y_te = train_test_split(feature_processing.fit_transform(df.drop(['Exited'], 
                                                                                   axis=1)), 
                                          df['Exited'], test_size=0.3, random_state=0)

In [22]:
train_data = lgb.Dataset(data=x_tr,label=y_tr)
test_data = lgb.Dataset(data=x_te,label=y_te)

In [23]:
# Customize the parameter space of hyperopt
space = {"max_depth": hp.randint("max_depth", 15),
         "num_trees": hp.randint("num_trees", 300),
         'learning_rate': hp.uniform('learning_rate', 1e-3, 5e-1),
         "bagging_fraction": hp.randint("bagging_fraction", 5),
         "num_leaves": hp.randint("num_leaves", 6),
         }
 
def argsDict_tranform(argsDict, isPrint=False):
    argsDict["max_depth"] = argsDict["max_depth"] + 5
    argsDict['num_trees'] = argsDict['num_trees'] + 150
    argsDict["learning_rate"] = argsDict["learning_rate"] * 0.02 + 0.05
    argsDict["bagging_fraction"] = argsDict["bagging_fraction"] * 0.1 + 0.5
    argsDict["num_leaves"] = argsDict["num_leaves"] * 3 + 10
    if isPrint:
        print(argsDict)
    else:
        pass
 
    return argsDict

In [24]:
def lightgbm_factory(argsDict):
    argsDict = argsDict_tranform(argsDict)
 
    params = {'nthread': -1, # number of processes
                     'max_depth': argsDict['max_depth'], # maximum depth
                     'num_trees': argsDict['num_trees'], # number of trees
                     'eta': argsDict['learning_rate'], # learning rate
                     'bagging_fraction': argsDict['bagging_fraction'], # baggingsamples
                     'num_leaves': argsDict['num_leaves'], # The sum of the smallest sample of the endpoints
                     'objective': 'regression',
                     'feature_fraction': 0.7, #sample column sampling
                     'lambda_l1': 0, # L1 regularization
                     'lambda_l2': 0, # L2 Regularization
                     'bagging_seed': 100, # random seed, default in light 100
              }
    #rmse
    params['metric'] = ['rmse']
 
    model_lgb = lgb.train(params, train_data, num_boost_round=300, valid_sets=[test_data],early_stopping_rounds=100)
 
    return get_tranformer_score(model_lgb)
 
def get_tranformer_score(tranformer):
 
    model = tranformer
    prediction = model.predict(x_te, num_iteration=model.best_iteration)
 
    return mean_squared_error(y_te, prediction)

In [25]:
#Start using hyperopt for automatic parameter adjustment
algo = partial(tpe.suggest, n_startup_jobs=1)
best = fmin(lightgbm_factory, space, algo=algo, max_evals=20, pass_expr_memo_ctrl=None)

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 874                      
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 25
[LightGBM] [Info] Start training from score 0.202286  
[1]	valid_0's rmse: 0.400007                          
Training until validation scores don't improve for 100 rounds
[2]	valid_0's rmse: 0.395327                          
[3]	valid_0's rmse: 0.391478                          
[4]	valid_0's rmse: 0.38577                           
[5]	valid_0's rmse: 0.379858                          
[6]	valid_0's rmse: 0.376827                          
[7]	valid_0's rmse: 0.373153                          
[8]	valid_0's rmse: 0.368273                          
[9]	valid_0's rmse: 0.365268                          
[10]	valid_0's rmse: 0.361142                         
[11]	valid_0's rmse: 0.358533                         
[12]	valid_0's rmse: 0.354987                         
[13]	valid_0's rms

[142]	valid_0's rmse: 0.314446                        
[143]	valid_0's rmse: 0.314459                        
[144]	valid_0's rmse: 0.314484                        
[145]	valid_0's rmse: 0.314529                        
[146]	valid_0's rmse: 0.314563                        
[147]	valid_0's rmse: 0.314561                        
[148]	valid_0's rmse: 0.314563                        
[149]	valid_0's rmse: 0.314583                        
[150]	valid_0's rmse: 0.314618                        
[151]	valid_0's rmse: 0.314603                        
[152]	valid_0's rmse: 0.314579                        
[153]	valid_0's rmse: 0.314574                        
[154]	valid_0's rmse: 0.314533                        
[155]	valid_0's rmse: 0.314524                        
[156]	valid_0's rmse: 0.314518                        
[157]	valid_0's rmse: 0.314448                        
[158]	valid_0's rmse: 0.314433                        
[159]	valid_0's rmse: 0.314443                        
[160]	vali

[58]	valid_0's rmse: 0.316949                                                    
[59]	valid_0's rmse: 0.316749                                                    
[60]	valid_0's rmse: 0.316591                                                    
[61]	valid_0's rmse: 0.316523                                                    
[62]	valid_0's rmse: 0.316424                                                    
[63]	valid_0's rmse: 0.316357                                                    
[64]	valid_0's rmse: 0.316321                                                    
[65]	valid_0's rmse: 0.316248                                                    
[66]	valid_0's rmse: 0.316171                                                    
[67]	valid_0's rmse: 0.316139                                                    
[68]	valid_0's rmse: 0.316064                                                    
[69]	valid_0's rmse: 0.315992                                                    
[70]	valid_0's r

[157]	valid_0's rmse: 0.315001                                                   
[158]	valid_0's rmse: 0.31515                                                    
[159]	valid_0's rmse: 0.315185                                                   
[160]	valid_0's rmse: 0.315169                                                   
[161]	valid_0's rmse: 0.315176                                                   
[162]	valid_0's rmse: 0.315188                                                   
[163]	valid_0's rmse: 0.315201                                                   
[164]	valid_0's rmse: 0.31523                                                    
[165]	valid_0's rmse: 0.315246                                                   
[166]	valid_0's rmse: 0.315255                                                   
[167]	valid_0's rmse: 0.315263                                                   
[168]	valid_0's rmse: 0.315347                                                   
[169]	valid_0's 

[54]	valid_0's rmse: 0.316146                                                    
[55]	valid_0's rmse: 0.316037                                                    
[56]	valid_0's rmse: 0.315944                                                    
[57]	valid_0's rmse: 0.315913                                                    
[58]	valid_0's rmse: 0.315892                                                    
[59]	valid_0's rmse: 0.315792                                                    
[60]	valid_0's rmse: 0.315774                                                    
[61]	valid_0's rmse: 0.315686                                                    
[62]	valid_0's rmse: 0.315614                                                    
[63]	valid_0's rmse: 0.315587                                                    
[64]	valid_0's rmse: 0.31555                                                     
[65]	valid_0's rmse: 0.315548                                                    
[66]	valid_0's r

[150]	valid_0's rmse: 0.315672                                                   
[151]	valid_0's rmse: 0.315703                                                   
[152]	valid_0's rmse: 0.31571                                                    
[153]	valid_0's rmse: 0.31574                                                    
[154]	valid_0's rmse: 0.315741                                                   
[155]	valid_0's rmse: 0.315708                                                   
[156]	valid_0's rmse: 0.315717                                                   
[157]	valid_0's rmse: 0.315724                                                   
[158]	valid_0's rmse: 0.315785                                                   
[159]	valid_0's rmse: 0.315823                                                   
[160]	valid_0's rmse: 0.315786                                                   
[161]	valid_0's rmse: 0.31576                                                    
[162]	valid_0's 

[50]	valid_0's rmse: 0.318056                                                    
[51]	valid_0's rmse: 0.317824                                                    
[52]	valid_0's rmse: 0.317506                                                    
[53]	valid_0's rmse: 0.317237                                                    
[54]	valid_0's rmse: 0.317035                                                    
[55]	valid_0's rmse: 0.316889                                                    
[56]	valid_0's rmse: 0.316838                                                    
[57]	valid_0's rmse: 0.316692                                                    
[58]	valid_0's rmse: 0.316565                                                    
[59]	valid_0's rmse: 0.316462                                                    
[60]	valid_0's rmse: 0.316392                                                    
[61]	valid_0's rmse: 0.316242                                                    
[62]	valid_0's r

[131]	valid_0's rmse: 0.314075                                                   
[132]	valid_0's rmse: 0.314088                                                   
[133]	valid_0's rmse: 0.314122                                                   
[134]	valid_0's rmse: 0.314113                                                   
[135]	valid_0's rmse: 0.314112                                                   
[136]	valid_0's rmse: 0.314121                                                   
[137]	valid_0's rmse: 0.314072                                                   
[138]	valid_0's rmse: 0.314106                                                   
[139]	valid_0's rmse: 0.314109                                                   
[140]	valid_0's rmse: 0.314133                                                   
[141]	valid_0's rmse: 0.314178                                                   
[142]	valid_0's rmse: 0.314191                                                   
[143]	valid_0's 

[213]	valid_0's rmse: 0.314866                                                   
[214]	valid_0's rmse: 0.314887                                                   
[215]	valid_0's rmse: 0.314891                                                   
[216]	valid_0's rmse: 0.314863                                                   
[217]	valid_0's rmse: 0.314878                                                   
[218]	valid_0's rmse: 0.314918                                                   
[219]	valid_0's rmse: 0.314935                                                   
[220]	valid_0's rmse: 0.314947                                                   
[221]	valid_0's rmse: 0.314978                                                   
[222]	valid_0's rmse: 0.315025                                                   
[223]	valid_0's rmse: 0.315013                                                   
[224]	valid_0's rmse: 0.315026                                                   
[225]	valid_0's 

[66]	valid_0's rmse: 0.315182                                                    
[67]	valid_0's rmse: 0.315008                                                    
[68]	valid_0's rmse: 0.31495                                                     
[69]	valid_0's rmse: 0.31488                                                     
[70]	valid_0's rmse: 0.314783                                                    
[71]	valid_0's rmse: 0.314658                                                    
[72]	valid_0's rmse: 0.314586                                                    
[73]	valid_0's rmse: 0.314548                                                    
[74]	valid_0's rmse: 0.314477                                                    
[75]	valid_0's rmse: 0.314421                                                    
[76]	valid_0's rmse: 0.314415                                                    
[77]	valid_0's rmse: 0.314381                                                    
[78]	valid_0's r

[142]	valid_0's rmse: 0.314761                                                   
[143]	valid_0's rmse: 0.314786                                                   
[144]	valid_0's rmse: 0.314751                                                   
[145]	valid_0's rmse: 0.314745                                                   
[146]	valid_0's rmse: 0.314752                                                   
[147]	valid_0's rmse: 0.314753                                                   
[148]	valid_0's rmse: 0.314774                                                   
[149]	valid_0's rmse: 0.314809                                                   
[150]	valid_0's rmse: 0.314822                                                   
[151]	valid_0's rmse: 0.314831                                                   
[152]	valid_0's rmse: 0.314824                                                   
[153]	valid_0's rmse: 0.314854                                                   
[154]	valid_0's 

[27]	valid_0's rmse: 0.327571                                                    
[28]	valid_0's rmse: 0.326403                                                    
[29]	valid_0's rmse: 0.325312                                                    
[30]	valid_0's rmse: 0.324575                                                    
[31]	valid_0's rmse: 0.323765                                                    
[32]	valid_0's rmse: 0.323009                                                    
[33]	valid_0's rmse: 0.322493                                                    
[34]	valid_0's rmse: 0.322302                                                    
[35]	valid_0's rmse: 0.321651                                                    
[36]	valid_0's rmse: 0.321126                                                    
[37]	valid_0's rmse: 0.320679                                                    
[38]	valid_0's rmse: 0.320236                                                    
[39]	valid_0's r

[126]	valid_0's rmse: 0.314278                                                   
[127]	valid_0's rmse: 0.314265                                                   
[128]	valid_0's rmse: 0.314268                                                   
[129]	valid_0's rmse: 0.314291                                                   
[130]	valid_0's rmse: 0.314332                                                   
[131]	valid_0's rmse: 0.314347                                                   
[132]	valid_0's rmse: 0.314347                                                   
[133]	valid_0's rmse: 0.31438                                                    
[134]	valid_0's rmse: 0.31435                                                    
[135]	valid_0's rmse: 0.314343                                                   
[136]	valid_0's rmse: 0.31437                                                    
[137]	valid_0's rmse: 0.314408                                                   
[138]	valid_0's 

[1]	valid_0's rmse: 0.399164                                                     
Training until validation scores don't improve for 100 rounds                    
[2]	valid_0's rmse: 0.393627                                                     
[3]	valid_0's rmse: 0.389259                                                     
[4]	valid_0's rmse: 0.382752                                                     
[5]	valid_0's rmse: 0.376116                                                     
[6]	valid_0's rmse: 0.372813                                                     
[7]	valid_0's rmse: 0.368741                                                     
[8]	valid_0's rmse: 0.363373                                                     
[9]	valid_0's rmse: 0.360117                                                     
[10]	valid_0's rmse: 0.355734                                                    
[11]	valid_0's rmse: 0.35292                                                     
[12]	valid_0's r

[99]	valid_0's rmse: 0.315108                                                    
[100]	valid_0's rmse: 0.315089                                                   
[101]	valid_0's rmse: 0.315022                                                   
[102]	valid_0's rmse: 0.315044                                                   
[103]	valid_0's rmse: 0.315074                                                   
[104]	valid_0's rmse: 0.315116                                                   
[105]	valid_0's rmse: 0.315097                                                   
[106]	valid_0's rmse: 0.31515                                                    
[107]	valid_0's rmse: 0.315173                                                   
[108]	valid_0's rmse: 0.315177                                                   
[109]	valid_0's rmse: 0.315147                                                   
[110]	valid_0's rmse: 0.315173                                                   
[111]	valid_0's 

[5]	valid_0's rmse: 0.379648                                                     
[6]	valid_0's rmse: 0.376499                                                     
[7]	valid_0's rmse: 0.372716                                                     
[8]	valid_0's rmse: 0.367759                                                     
[9]	valid_0's rmse: 0.364607                                                     
[10]	valid_0's rmse: 0.360485                                                    
[11]	valid_0's rmse: 0.35776                                                     
[12]	valid_0's rmse: 0.354121                                                    
[13]	valid_0's rmse: 0.351984                                                    
[14]	valid_0's rmse: 0.348893                                                    
[15]	valid_0's rmse: 0.346787                                                    
[16]	valid_0's rmse: 0.344321                                                    
[17]	valid_0's r

[87]	valid_0's rmse: 0.314423                                                    
[88]	valid_0's rmse: 0.314401                                                    
[89]	valid_0's rmse: 0.314361                                                    
[90]	valid_0's rmse: 0.314369                                                    
[91]	valid_0's rmse: 0.314409                                                    
[92]	valid_0's rmse: 0.314431                                                    
[93]	valid_0's rmse: 0.3144                                                      
[94]	valid_0's rmse: 0.314381                                                    
[95]	valid_0's rmse: 0.314399                                                    
[96]	valid_0's rmse: 0.314385                                                    
[97]	valid_0's rmse: 0.314376                                                    
[98]	valid_0's rmse: 0.314281                                                    
[99]	valid_0's r

[162]	valid_0's rmse: 0.314265                                                   
[163]	valid_0's rmse: 0.314285                                                   
[164]	valid_0's rmse: 0.314309                                                   
[165]	valid_0's rmse: 0.314326                                                   
[166]	valid_0's rmse: 0.314342                                                   
[167]	valid_0's rmse: 0.314367                                                   
[168]	valid_0's rmse: 0.314363                                                   
[169]	valid_0's rmse: 0.314288                                                   
[170]	valid_0's rmse: 0.314272                                                   
[171]	valid_0's rmse: 0.314266                                                   
[172]	valid_0's rmse: 0.314248                                                   
[173]	valid_0's rmse: 0.314163                                                   
[174]	valid_0's 

[48]	valid_0's rmse: 0.317791                                                    
[49]	valid_0's rmse: 0.317611                                                    
[50]	valid_0's rmse: 0.317407                                                    
[51]	valid_0's rmse: 0.317273                                                    
[52]	valid_0's rmse: 0.317004                                                    
[53]	valid_0's rmse: 0.316799                                                    
[54]	valid_0's rmse: 0.316633                                                    
[55]	valid_0's rmse: 0.316401                                                    
[56]	valid_0's rmse: 0.316305                                                    
[57]	valid_0's rmse: 0.316148                                                    
[58]	valid_0's rmse: 0.315998                                                    
[59]	valid_0's rmse: 0.315898                                                    
[60]	valid_0's r

[145]	valid_0's rmse: 0.314091                                                   
[146]	valid_0's rmse: 0.314096                                                   
[147]	valid_0's rmse: 0.314064                                                   
[148]	valid_0's rmse: 0.314052                                                   
[149]	valid_0's rmse: 0.314027                                                   
[150]	valid_0's rmse: 0.314037                                                   
[151]	valid_0's rmse: 0.314062                                                   
[152]	valid_0's rmse: 0.314071                                                   
[153]	valid_0's rmse: 0.314032                                                   
[154]	valid_0's rmse: 0.31403                                                    
[155]	valid_0's rmse: 0.314014                                                   
[156]	valid_0's rmse: 0.314017                                                   
[157]	valid_0's 

[16]	valid_0's rmse: 0.34295                                                     
[17]	valid_0's rmse: 0.341178                                                    
[18]	valid_0's rmse: 0.340548                                                    
[19]	valid_0's rmse: 0.338981                                                    
[20]	valid_0's rmse: 0.337832                                                    
[21]	valid_0's rmse: 0.336104                                                    
[22]	valid_0's rmse: 0.334787                                                    
[23]	valid_0's rmse: 0.333128                                                    
[24]	valid_0's rmse: 0.332029                                                    
[25]	valid_0's rmse: 0.330668                                                    
[26]	valid_0's rmse: 0.329438                                                    
[27]	valid_0's rmse: 0.329086                                                    
[28]	valid_0's r

[115]	valid_0's rmse: 0.31424                                                    
[116]	valid_0's rmse: 0.314205                                                   
[117]	valid_0's rmse: 0.314205                                                   
[118]	valid_0's rmse: 0.314252                                                   
[119]	valid_0's rmse: 0.314216                                                   
[120]	valid_0's rmse: 0.314273                                                   
[121]	valid_0's rmse: 0.31427                                                    
[122]	valid_0's rmse: 0.314226                                                   
[123]	valid_0's rmse: 0.314231                                                   
[124]	valid_0's rmse: 0.314235                                                   
[125]	valid_0's rmse: 0.314193                                                   
[126]	valid_0's rmse: 0.314153                                                   
[127]	valid_0's 

[214]	valid_0's rmse: 0.313927                                                   
[215]	valid_0's rmse: 0.313916                                                   
[216]	valid_0's rmse: 0.313919                                                   
[217]	valid_0's rmse: 0.313939                                                   
[218]	valid_0's rmse: 0.313952                                                   
Did not meet early stopping. Best iteration is:                                  
[215]	valid_0's rmse: 0.313916
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 874                                                  
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 25
[LightGBM] [Info] Start training from score 0.202286                              
[1]	valid_0's rmse: 0.399978                                                      
Training until validation scores don't improve for 100 rounds                 

[87]	valid_0's rmse: 0.314627                                                     
[88]	valid_0's rmse: 0.314581                                                     
[89]	valid_0's rmse: 0.31454                                                      
[90]	valid_0's rmse: 0.314538                                                     
[91]	valid_0's rmse: 0.314549                                                     
[92]	valid_0's rmse: 0.314564                                                     
[93]	valid_0's rmse: 0.314532                                                     
[94]	valid_0's rmse: 0.314553                                                     
[95]	valid_0's rmse: 0.314538                                                     
[96]	valid_0's rmse: 0.314567                                                     
[97]	valid_0's rmse: 0.314525                                                     
[98]	valid_0's rmse: 0.314535                                                     
[99]

[185]	valid_0's rmse: 0.314349                                                    
[186]	valid_0's rmse: 0.314344                                                    
[187]	valid_0's rmse: 0.31439                                                     
[188]	valid_0's rmse: 0.314371                                                    
[189]	valid_0's rmse: 0.314386                                                    
[190]	valid_0's rmse: 0.314363                                                    
[191]	valid_0's rmse: 0.314357                                                    
[192]	valid_0's rmse: 0.314328                                                    
[193]	valid_0's rmse: 0.314337                                                    
[194]	valid_0's rmse: 0.314348                                                    
[195]	valid_0's rmse: 0.314361                                                    
[196]	valid_0's rmse: 0.314427                                                    
[197

[8]	valid_0's rmse: 0.368162                                                      
[9]	valid_0's rmse: 0.364997                                                      
[10]	valid_0's rmse: 0.361237                                                     
[11]	valid_0's rmse: 0.358742                                                     
[12]	valid_0's rmse: 0.355462                                                     
[13]	valid_0's rmse: 0.353207                                                     
[14]	valid_0's rmse: 0.350478                                                     
[15]	valid_0's rmse: 0.348233                                                     
[16]	valid_0's rmse: 0.345731                                                     
[17]	valid_0's rmse: 0.343844                                                     
[18]	valid_0's rmse: 0.343212                                                     
[19]	valid_0's rmse: 0.341651                                                     
[20]

[106]	valid_0's rmse: 0.314384                                                    
[107]	valid_0's rmse: 0.314321                                                    
[108]	valid_0's rmse: 0.314311                                                    
[109]	valid_0's rmse: 0.314287                                                    
[110]	valid_0's rmse: 0.314275                                                    
[111]	valid_0's rmse: 0.314238                                                    
[112]	valid_0's rmse: 0.314212                                                    
[113]	valid_0's rmse: 0.314208                                                    
[114]	valid_0's rmse: 0.314211                                                    
[115]	valid_0's rmse: 0.31422                                                     
[116]	valid_0's rmse: 0.314242                                                    
[117]	valid_0's rmse: 0.314217                                                    
[118

[204]	valid_0's rmse: 0.314312                                                    
[205]	valid_0's rmse: 0.314286                                                    
[206]	valid_0's rmse: 0.31429                                                     
[207]	valid_0's rmse: 0.314257                                                    
[208]	valid_0's rmse: 0.314286                                                    
[209]	valid_0's rmse: 0.314245                                                    
[210]	valid_0's rmse: 0.314231                                                    
[211]	valid_0's rmse: 0.314242                                                    
[212]	valid_0's rmse: 0.314224                                                    
[213]	valid_0's rmse: 0.314217                                                    
[214]	valid_0's rmse: 0.314223                                                    
[215]	valid_0's rmse: 0.314234                                                    
[216

[33]	valid_0's rmse: 0.323041                                                     
[34]	valid_0's rmse: 0.322694                                                     
[35]	valid_0's rmse: 0.322018                                                     
[36]	valid_0's rmse: 0.321503                                                     
[37]	valid_0's rmse: 0.321044                                                     
[38]	valid_0's rmse: 0.320711                                                     
[39]	valid_0's rmse: 0.320401                                                     
[40]	valid_0's rmse: 0.320055                                                     
[41]	valid_0's rmse: 0.319583                                                     
[42]	valid_0's rmse: 0.319366                                                     
[43]	valid_0's rmse: 0.318927                                                     
[44]	valid_0's rmse: 0.318594                                                     
[45]

[131]	valid_0's rmse: 0.313938                                                    
[132]	valid_0's rmse: 0.313973                                                    
[133]	valid_0's rmse: 0.314036                                                    
[134]	valid_0's rmse: 0.314037                                                    
[135]	valid_0's rmse: 0.314037                                                    
[136]	valid_0's rmse: 0.314026                                                    
[137]	valid_0's rmse: 0.314006                                                    
[138]	valid_0's rmse: 0.314044                                                    
[139]	valid_0's rmse: 0.314014                                                    
[140]	valid_0's rmse: 0.313992                                                    
[141]	valid_0's rmse: 0.314006                                                    
[142]	valid_0's rmse: 0.314044                                                    
[143

[229]	valid_0's rmse: 0.314492                                                    
[230]	valid_0's rmse: 0.314564                                                    
Early stopping, best iteration is:                                                
[130]	valid_0's rmse: 0.313893
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 874                                                  
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 25
[LightGBM] [Info] Start training from score 0.202286                              
[1]	valid_0's rmse: 0.399284                                                      
Training until validation scores don't improve for 100 rounds                     
[2]	valid_0's rmse: 0.393957                                                      
[3]	valid_0's rmse: 0.389614                                                      
[4]	valid_0's rmse: 0.383487                                            

[90]	valid_0's rmse: 0.314047                                                     
[91]	valid_0's rmse: 0.313935                                                     
[92]	valid_0's rmse: 0.313975                                                     
[93]	valid_0's rmse: 0.314022                                                     
[94]	valid_0's rmse: 0.313924                                                     
[95]	valid_0's rmse: 0.313912                                                     
[96]	valid_0's rmse: 0.313891                                                     
[97]	valid_0's rmse: 0.313906                                                     
[98]	valid_0's rmse: 0.313981                                                     
[99]	valid_0's rmse: 0.313952                                                     
[100]	valid_0's rmse: 0.313969                                                    
[101]	valid_0's rmse: 0.313877                                                    
[102

[188]	valid_0's rmse: 0.314005                                                    
[189]	valid_0's rmse: 0.314007                                                    
[190]	valid_0's rmse: 0.313962                                                    
[191]	valid_0's rmse: 0.31399                                                     
[192]	valid_0's rmse: 0.314012                                                    
[193]	valid_0's rmse: 0.31406                                                     
[194]	valid_0's rmse: 0.314055                                                    
[195]	valid_0's rmse: 0.314074                                                    
[196]	valid_0's rmse: 0.314038                                                    
[197]	valid_0's rmse: 0.314024                                                    
[198]	valid_0's rmse: 0.314058                                                    
[199]	valid_0's rmse: 0.314                                                       
[200

[77]	valid_0's rmse: 0.315409                                                     
[78]	valid_0's rmse: 0.315384                                                     
[79]	valid_0's rmse: 0.315276                                                     
[80]	valid_0's rmse: 0.315238                                                     
[81]	valid_0's rmse: 0.315216                                                     
[82]	valid_0's rmse: 0.3152                                                       
[83]	valid_0's rmse: 0.315079                                                     
[84]	valid_0's rmse: 0.314984                                                     
[85]	valid_0's rmse: 0.314932                                                     
[86]	valid_0's rmse: 0.314871                                                     
[87]	valid_0's rmse: 0.314893                                                     
[88]	valid_0's rmse: 0.314898                                                     
[89]

[175]	valid_0's rmse: 0.314442                                                    
[176]	valid_0's rmse: 0.314528                                                    
[177]	valid_0's rmse: 0.314523                                                    
[178]	valid_0's rmse: 0.314551                                                    
[179]	valid_0's rmse: 0.314552                                                    
[180]	valid_0's rmse: 0.314606                                                    
[181]	valid_0's rmse: 0.31457                                                     
[182]	valid_0's rmse: 0.314614                                                    
[183]	valid_0's rmse: 0.314631                                                    
[184]	valid_0's rmse: 0.314634                                                    
[185]	valid_0's rmse: 0.314635                                                    
[186]	valid_0's rmse: 0.314635                                                    
[187

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 874                                                  
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 25
[LightGBM] [Info] Start training from score 0.202286                              
[1]	valid_0's rmse: 0.39978                                                       
Training until validation scores don't improve for 100 rounds                     
[2]	valid_0's rmse: 0.3948                                                        
[3]	valid_0's rmse: 0.39084                                                       
[4]	valid_0's rmse: 0.384806                                                      
[5]	valid_0's rmse: 0.378567                                                      
[6]	valid_0's rmse: 0.375441                                                      
[7]	valid_0's rmse: 0.371669                                                      
[8]	valid_0's rmse: 

[89]	valid_0's rmse: 0.314463                                                     
[90]	valid_0's rmse: 0.314467                                                     
[91]	valid_0's rmse: 0.314532                                                     
[92]	valid_0's rmse: 0.314551                                                     
[93]	valid_0's rmse: 0.314503                                                     
[94]	valid_0's rmse: 0.314466                                                     
[95]	valid_0's rmse: 0.31455                                                      
[96]	valid_0's rmse: 0.314575                                                     
[97]	valid_0's rmse: 0.314543                                                     
[98]	valid_0's rmse: 0.31453                                                      
[99]	valid_0's rmse: 0.314508                                                     
[100]	valid_0's rmse: 0.314532                                                    
[101

[173]	valid_0's rmse: 0.31527                                                     
[174]	valid_0's rmse: 0.315258                                                    
[175]	valid_0's rmse: 0.315253                                                    
[176]	valid_0's rmse: 0.315277                                                    
[177]	valid_0's rmse: 0.315267                                                    
[178]	valid_0's rmse: 0.3153                                                      
[179]	valid_0's rmse: 0.315359                                                    
[180]	valid_0's rmse: 0.315344                                                    
[181]	valid_0's rmse: 0.315378                                                    
[182]	valid_0's rmse: 0.315389                                                    
[183]	valid_0's rmse: 0.315408                                                    
[184]	valid_0's rmse: 0.315418                                                    
[185

[72]	valid_0's rmse: 0.314689                                                     
[73]	valid_0's rmse: 0.314744                                                     
[74]	valid_0's rmse: 0.314693                                                     
[75]	valid_0's rmse: 0.31473                                                      
[76]	valid_0's rmse: 0.314768                                                     
[77]	valid_0's rmse: 0.314767                                                     
[78]	valid_0's rmse: 0.314772                                                     
[79]	valid_0's rmse: 0.314819                                                     
[80]	valid_0's rmse: 0.314829                                                     
[81]	valid_0's rmse: 0.314877                                                     
[82]	valid_0's rmse: 0.314924                                                     
[83]	valid_0's rmse: 0.314924                                                     
[84]

[170]	valid_0's rmse: 0.314543                                                    
[171]	valid_0's rmse: 0.314575                                                    
[172]	valid_0's rmse: 0.314536                                                    
[173]	valid_0's rmse: 0.314569                                                    
[174]	valid_0's rmse: 0.314619                                                    
[175]	valid_0's rmse: 0.314632                                                    
[176]	valid_0's rmse: 0.314633                                                    
[177]	valid_0's rmse: 0.314632                                                    
[178]	valid_0's rmse: 0.314676                                                    
[179]	valid_0's rmse: 0.314681                                                    
[180]	valid_0's rmse: 0.314682                                                    
[181]	valid_0's rmse: 0.314646                                                    
[182

[14]	valid_0's rmse: 0.346617                                                     
[15]	valid_0's rmse: 0.344535                                                     
[16]	valid_0's rmse: 0.342012                                                     
[17]	valid_0's rmse: 0.340224                                                     
[18]	valid_0's rmse: 0.339593                                                     
[19]	valid_0's rmse: 0.338009                                                     
[20]	valid_0's rmse: 0.336871                                                     
[21]	valid_0's rmse: 0.335166                                                     
[22]	valid_0's rmse: 0.333868                                                     
[23]	valid_0's rmse: 0.332239                                                     
[24]	valid_0's rmse: 0.331145                                                     
[25]	valid_0's rmse: 0.329817                                                     
[26]

[112]	valid_0's rmse: 0.31405                                                     
[113]	valid_0's rmse: 0.314059                                                    
[114]	valid_0's rmse: 0.314045                                                    
[115]	valid_0's rmse: 0.314018                                                    
[116]	valid_0's rmse: 0.314057                                                    
[117]	valid_0's rmse: 0.314058                                                    
[118]	valid_0's rmse: 0.314076                                                    
[119]	valid_0's rmse: 0.314088                                                    
[120]	valid_0's rmse: 0.31412                                                     
[121]	valid_0's rmse: 0.314175                                                    
[122]	valid_0's rmse: 0.314083                                                    
[123]	valid_0's rmse: 0.314065                                                    
[124

[2]	valid_0's rmse: 0.39432                                                       
[3]	valid_0's rmse: 0.390105                                                      
[4]	valid_0's rmse: 0.384145                                                      
[5]	valid_0's rmse: 0.378153                                                      
[6]	valid_0's rmse: 0.374911                                                      
[7]	valid_0's rmse: 0.371033                                                      
[8]	valid_0's rmse: 0.366104                                                      
[9]	valid_0's rmse: 0.362926                                                      
[10]	valid_0's rmse: 0.358726                                                     
[11]	valid_0's rmse: 0.355991                                                     
[12]	valid_0's rmse: 0.352601                                                     
[13]	valid_0's rmse: 0.350413                                                     
[14]

[100]	valid_0's rmse: 0.314749                                                    
[101]	valid_0's rmse: 0.314776                                                    
[102]	valid_0's rmse: 0.314745                                                    
[103]	valid_0's rmse: 0.31477                                                     
[104]	valid_0's rmse: 0.314763                                                    
[105]	valid_0's rmse: 0.314768                                                    
[106]	valid_0's rmse: 0.314724                                                    
[107]	valid_0's rmse: 0.314707                                                    
[108]	valid_0's rmse: 0.314757                                                    
[109]	valid_0's rmse: 0.314791                                                    
[110]	valid_0's rmse: 0.314827                                                    
[111]	valid_0's rmse: 0.31481                                                     
[112

[198]	valid_0's rmse: 0.31497                                                     
[199]	valid_0's rmse: 0.314974                                                    
[200]	valid_0's rmse: 0.315006                                                    
[201]	valid_0's rmse: 0.314994                                                    
[202]	valid_0's rmse: 0.315018                                                    
[203]	valid_0's rmse: 0.314992                                                    
[204]	valid_0's rmse: 0.31494                                                     
[205]	valid_0's rmse: 0.314958                                                    
[206]	valid_0's rmse: 0.314955                                                    
[207]	valid_0's rmse: 0.314974                                                    
[208]	valid_0's rmse: 0.31499                                                     
[209]	valid_0's rmse: 0.31502                                                     
[210

[26]	valid_0's rmse: 0.32854                                                      
[27]	valid_0's rmse: 0.328203                                                     
[28]	valid_0's rmse: 0.327147                                                     
[29]	valid_0's rmse: 0.326134                                                     
[30]	valid_0's rmse: 0.325398                                                     
[31]	valid_0's rmse: 0.324442                                                     
[32]	valid_0's rmse: 0.323852                                                     
[33]	valid_0's rmse: 0.32332                                                      
[34]	valid_0's rmse: 0.322983                                                     
[35]	valid_0's rmse: 0.322356                                                     
[36]	valid_0's rmse: 0.321789                                                     
[37]	valid_0's rmse: 0.321441                                                     
[38]

[124]	valid_0's rmse: 0.314489                                                    
[125]	valid_0's rmse: 0.31445                                                     
[126]	valid_0's rmse: 0.314401                                                    
[127]	valid_0's rmse: 0.314398                                                    
[128]	valid_0's rmse: 0.314377                                                    
[129]	valid_0's rmse: 0.314318                                                    
[130]	valid_0's rmse: 0.314335                                                    
[131]	valid_0's rmse: 0.314292                                                    
[132]	valid_0's rmse: 0.31433                                                     
[133]	valid_0's rmse: 0.314349                                                    
[134]	valid_0's rmse: 0.314373                                                    
[135]	valid_0's rmse: 0.314355                                                    
[136

[222]	valid_0's rmse: 0.314546                                                    
[223]	valid_0's rmse: 0.314505                                                    
[224]	valid_0's rmse: 0.314492                                                    
[225]	valid_0's rmse: 0.314499                                                    
[226]	valid_0's rmse: 0.314516                                                    
[227]	valid_0's rmse: 0.314509                                                    
[228]	valid_0's rmse: 0.314508                                                    
[229]	valid_0's rmse: 0.314566                                                    
[230]	valid_0's rmse: 0.314566                                                    
[231]	valid_0's rmse: 0.314547                                                    
Early stopping, best iteration is:                                                
[131]	valid_0's rmse: 0.314292
100%|██████████| 20/20 [01:18<00:00,  3.94s/trial, best 

In [26]:
RMSE = lightgbm_factory(best)
print('best :', best)
print('best param after transform :')
argsDict_tranform(best,isPrint=True)
print('rmse of the best lightgbm:', np.sqrt(RMSE))

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 874
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 25
[LightGBM] [Info] Start training from score 0.202286
[1]	valid_0's rmse: 0.399284
Training until validation scores don't improve for 100 rounds
[2]	valid_0's rmse: 0.393957
[3]	valid_0's rmse: 0.389614
[4]	valid_0's rmse: 0.383487
[5]	valid_0's rmse: 0.377345
[6]	valid_0's rmse: 0.37403
[7]	valid_0's rmse: 0.370073
[8]	valid_0's rmse: 0.365062
[9]	valid_0's rmse: 0.361843
[10]	valid_0's rmse: 0.357601
[11]	valid_0's rmse: 0.354844
[12]	valid_0's rmse: 0.35136
[13]	valid_0's rmse: 0.349163
[14]	valid_0's rmse: 0.34621
[15]	valid_0's rmse: 0.344128
[16]	valid_0's rmse: 0.34161
[17]	valid_0's rmse: 0.339826
[18]	valid_0's rmse: 0.339195
[19]	valid_0's rmse: 0.337617
[20]	valid_0's rmse: 0.336487
[21]	valid_0's rmse: 0.334794
[22]	valid_0's rmse: 0.333503
[23]	valid_0's rmse: 0.33189
[24]	valid_0's rmse: 0.330

In [27]:
mod = LGBMClassifier(**best)

In [28]:
pip1 = make_pipeline(feats, mod)

In [29]:
pip1.fit(X_train, y_train)



Pipeline(steps=[('featureunion',
                 FeatureUnion(transformer_list=[('Geography',
                                                 Pipeline(steps=[('selector',
                                                                  FeatureSelector(column='Geography')),
                                                                 ('ohe',
                                                                  OHEEncoder(key='Geography'))])),
                                                ('Gender',
                                                 Pipeline(steps=[('selector',
                                                                  FeatureSelector(column='Gender')),
                                                                 ('ohe',
                                                                  OHEEncoder(key='Gender'))])),
                                                ('Tenure',
                                                 Pipeline(steps=[('selector',
       

In [30]:
y_pred = pip1.predict_proba(X_test)[:, 1]

thr, f1, pr, rec, roc = f_score(y_test.values, y_pred, b=1)
tn, fp, fn, tp = confusion_matrix(y_test.values, y_pred > thr).ravel()
fpr = fp / (fp + tn)

n_met = [thr, f1, pr, rec, roc, tp, tn, fp, fn, fpr]

In [31]:
result_df.append(pd.Series({i: j for i, j in zip(m_cols, n_met)}, name='lgbm(optimized)'))

Unnamed: 0,Threshhold,F-score,Precision,Recall,ROC-AUC,TP,TN,FP,FN,FPR
logisticregression,0.307705,0.488208,0.469388,0.5086,0.750519,206.0,1359.0,234.0,201.0,0.146893
randomforestclassifier,0.41,0.600277,0.686709,0.53317,0.834033,211.0,1499.0,94.0,196.0,0.059008
gradientboostingclassifier,0.359124,0.611549,0.656338,0.572482,0.851091,232.0,1471.0,122.0,175.0,0.076585
lgbmclassifier,0.367175,0.621554,0.634271,0.609337,0.851255,247.0,1450.0,143.0,160.0,0.089768
xgbclassifier,0.316108,0.600715,0.583333,0.619165,0.835745,251.0,1413.0,180.0,156.0,0.112994
lgbm(optimized),0.225072,0.609566,0.556911,0.673219,0.845127,273.0,1375.0,218.0,134.0,0.136849


Оценим работу оптимизированной модели на отложенной выборке

In [32]:
valid_pred = pip1.predict_proba(x_cv)[:, 1]

In [33]:
thr, f1, pr, rec, roc = f_score(y_cv.values, valid_pred, b=1)
tn, fp, fn, tp = confusion_matrix(y_cv.values, valid_pred > thr).ravel()
fpr = fp / (fp + tn)
n_met = [thr, f1, pr, rec, roc, tp, tn, fp, fn, fpr]

In [34]:
pd.Series({i: j for i, j in zip(m_cols, n_met)}, name='lgbm(optimized on holdout)')

Threshhold       0.263558
F-score          0.603865
Precision        0.581395
Recall           0.628141
ROC-AUC          0.838297
TP             249.000000
TN            1422.000000
FP             180.000000
FN             149.000000
FPR              0.112360
Name: lgbm(optimized on holdout), dtype: float64

Можно сделать вывод что наш вариант оптимизации особой пользы не принес

Посчитаем экономику

In [35]:
hold = -1
true_p = 2

tp * true_p + fp * 0 + (fn * hold + fn * true_p) + tn * hold

-775

Модель не целесообразна экономически (хотя незначительное улучшение мы все таки получили)