In [14]:
import warnings

warnings.filterwarnings('ignore')

import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from scorecardpipeline import *
from automl import auto_lightgbm, logger

In [15]:
X, y = make_classification(n_samples=1000,n_features=30,n_classes=2,random_state=328)
data = pd.DataFrame(X)
data.columns = [f"f{i}" for i in range(len(data.columns))]
data['target'] = y

In [24]:
def auto_logistic(data, target="target", params={}, early_stopping_rounds=10, importance=1e-4, corr=0.4, psi=0.5, test_size=0.25, seed=None, max_rounds=128, mertic="weight", balance_weight=0.2, C=1., class_weight=None, max_iter=128, **kwargs):
    del_vars = []

    dev, oot = train_test_split(data, test_size=test_size, random_state=seed, stratify=data[target])

    for i in range(max_rounds):
        if len(del_vars) < len(data.columns) - 1:
            lgb_base = auto_lightgbm({"dev": dev.drop(columns=del_vars), "oot": oot.drop(columns=del_vars)}, params=params, early_stopping_rounds=early_stopping_rounds)
            model, new_var_names = lgb_base.train(
                                                    select_feature=True,
                                                    select_type='shap',
                                                    single_delete=True,
                                                    imp_threhold=importance,
                                                    corr_threhold=corr,
                                                    psi_threhold=psi,
                                                    target=mertic,
                                                    params_weight=balance_weight,
                                                )
            
            logistic = ITLubberLogisticRegression(target=target, class_weight=class_weight, C=C, max_iter=max_iter, **kwargs)
            logistic.fit(data[new_var_names + [target]])
            summary = logistic.summary()

            if len(summary[summary["Coef."] < 0]) > 0:
                del_vars.append(summary[summary["Coef."] < 0]["P>|z|"].idxmax())
            else:
                return logistic
        else:
            raise "自动逻辑回归建模失败"

In [26]:
logistic = auto_logistic(data, target="target", params={}, early_stopping_rounds=10, importance=1e-4, corr=0.4, psi=0.5, test_size=0.25, seed=348, 
                         max_rounds=128, mertic="weight", balance_weight=0.2, C=10, class_weight=None, max_iter=128)

[ 2023-07-26 00:54:49,123 ][ INFO ][ model.py:train:75 ] 开始自动建模...
[ 2023-07-26 00:54:49,124 ][ INFO ][ model.py:train:76 ] --------------------------------------------------
[ 2023-07-26 00:54:49,160 ][ INFO ][ methods.py:feature_select:38 ] Shap阈值 0.0001
[ 2023-07-26 00:54:49,161 ][ INFO ][ methods.py:feature_select:39 ] shap删除特征个数：22, shap保留特征个数：8
[ 2023-07-26 00:54:49,161 ][ INFO ][ methods.py:feature_select:40 ] --------------------------------------------------
[ 2023-07-26 00:54:49,183 ][ INFO ][ methods.py:feature_select:58 ] 相关性阈值: 0.4, 相关性删除特征个数: 2, 相关性保留特征个数: 6
[ 2023-07-26 00:54:49,183 ][ INFO ][ methods.py:feature_select:59 ] --------------------------------------------------
[ 2023-07-26 00:54:49,211 ][ INFO ][ methods.py:feature_select:73 ] PSI阈值 0.5
[ 2023-07-26 00:54:49,211 ][ INFO ][ methods.py:feature_select:74 ] PSI删除特征个数: 0, PSI保留特征个数: 6
[ 2023-07-26 00:54:49,212 ][ INFO ][ methods.py:feature_select:75 ] --------------------------------------------------
[ 2023-07-

100%|██████████| 6/6 [00:00<00:00, 88.41it/s]

[ 2023-07-26 00:54:51,482 ][ INFO ][ methods.py:auto_delete_vars:289 ] (End) train_n: 6, ootks: 0.888 del_list_vars: []
[ 2023-07-26 00:54:51,482 ][ INFO ][ methods.py:auto_delete_vars:296 ] 逐步删除特征个数: 0, 逐步保留特征个数: 6
[ 2023-07-26 00:54:51,483 ][ INFO ][ model.py:train:118 ] --------------------------------------------------
[ 2023-07-26 00:54:51,510 ][ INFO ][ model.py:train:128 ] KS & PSI: {'devks': 0.927864375924451, 'ootks': 0.944, 'ootpsi': 0.03034708155969933}
[ 2023-07-26 00:54:51,514 ][ INFO ][ model.py:train:129 ] --------------------------------------------------
[ 2023-07-26 00:54:51,518 ][ INFO ][ model.py:train:130 ] AutoML建模完成
[ 2023-07-26 00:54:51,544 ][ INFO ][ model.py:train:75 ] 开始自动建模...
[ 2023-07-26 00:54:51,545 ][ INFO ][ model.py:train:76 ] --------------------------------------------------
[ 2023-07-26 00:54:51,573 ][ INFO ][ methods.py:feature_select:38 ] Shap阈值 0.0001
[ 2023-07-26 00:54:51,573 ][ INFO ][ methods.py:feature_select:39 ] shap删除特征个数：26, shap保留特征个数：3





[ 2023-07-26 00:54:51,624 ][ INFO ][ methods.py:auto_choose_params:211 ] train_number: 0, devks: 0.8719706451245877, ootks: 0.904, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 3, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}
[ 2023-07-26 00:54:51,655 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 1, devks: 0.9038002048014564, ootks: 0.92, params: {'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'reg_lambda': 13, 'reg_alpha': 0.85, 'num_leaves': 31, 'learning_rate': 0.02, 'min_data': 50, 'min_hessian': 0.05, 'num_threads': 1, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 2, 'verbose': -1, 'num_boost_round': 100}
[ 2023-07-26 00:54:51,795 ][ INFO ][ methods.py:check_params:110 ] (Good) train_number: 7, devks: 0.927864375

100%|██████████| 2/2 [00:00<00:00, 69.91it/s]

[ 2023-07-26 00:54:53,413 ][ INFO ][ methods.py:auto_delete_vars:289 ] (End) train_n: 2, ootks: 0.888 del_list_vars: []
[ 2023-07-26 00:54:53,414 ][ INFO ][ methods.py:auto_delete_vars:296 ] 逐步删除特征个数: 0, 逐步保留特征个数: 2
[ 2023-07-26 00:54:53,415 ][ INFO ][ model.py:train:118 ] --------------------------------------------------
[ 2023-07-26 00:54:53,436 ][ INFO ][ model.py:train:128 ] KS & PSI: {'devks': 0.8719706451245877, 'ootks': 0.904, 'ootpsi': 0.01525263517521588}
[ 2023-07-26 00:54:53,436 ][ INFO ][ model.py:train:129 ] --------------------------------------------------
[ 2023-07-26 00:54:53,437 ][ INFO ][ model.py:train:130 ] AutoML建模完成





In [27]:
logistic.summary()

Unnamed: 0,Coef.,Std.Err,z,P>|z|,[ 0.025,0.975 ],VIF
const,0.4548,0.154062,2.952053,0.003156689,0.152838,0.756762,1.001048
f8,3.788075,0.250967,15.093901,1.776236e-51,3.296179,4.279971,1.010374
f13,0.624334,0.116327,5.36708,8.002171e-08,0.396334,0.852334,1.010374
