In [1]:
# import os
# import time
# import random
# from tqdm import tqdm
# import numpy as np
# from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
# from sklearn.inspection import permutation_importance
# from sklearn.model_selection import StratifiedKFold
# import lightgbm as lgb
# import shap

In [2]:
class ScoreWrapper(object):
    def fit(self, X, y):
        print('ScoreWrapper fit called')
        
    def predict(self, x):
        raise NotimplementedError()
    
    def score(self, X, y):
        y_pred_proba = self.predict(X)
        auroc = roc_auc_score(y, y_pred_proba)
        return auroc

In [3]:
class LGBMWrapper(ScoreWrapper):
    def __init__(self, model):
        self.model = model
    def predict(self, X):
        return self.model.predict(X, num_iteration=self.model.best_iteration)
    
    def feature_importance(self, **kwargs):
        return self.model.feature_importance(**kwargs)

In [4]:
def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    

In [None]:
def train_kfold(df, label, features, features_cat, params, seed=42, kfold=5):
    num_features = len(features)
    oof_preds = np.zeros(df.shape[0])
    shap_values_valid_array = np.zeros((kfold, num_features))
    perm_imp_valid_array = np.zeros((kfold, num_features))
    feat_imp_array = np.zeros((kfold, num_features))
    
    folds = StratifiedKFold(n_splits=kfold, random_seed=seed, shuffle=True)
    for c in features_cat:
        df[c] = df[c].astype('category')
    current_time = time.strftime('%y%m%d_%H%M')
    
    for fold, (train_idx, valid_idx) in enumerate(folds.split(df[features], df[label])):
        train_X, valid_X = df[features].iloc[train_idx], df[features].iloc[valid_idx]
        train_y valid_y = df[label].iloc[train_idx], df[label].iloc[valid_idx]