In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib  inline
plt.style.use('seaborn-darkgrid')
sns.set(style = 'darkgrid')
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve, average_precision_score, log_loss, mean_squared_error
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import auc
from dateutil.relativedelta import relativedelta
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import lightgbm as lgb
import xgboost as xgb
import catboost as cat
from sklearn.model_selection import  train_test_split
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from scipy.stats import ks_2samp
import gc
import re
import time
import warnings 
warnings.filterwarnings('ignore')
pd.options.display.max_rows=200
pd.set_option('float_format', lambda x: '%.3f' % x)

In [None]:
def train_model_xlc(data_, test_, y_, folds_, model_type='lgb'):
    oof_preds = np.zeros(data_.shape[0])  # 几折合并后的验证集预测label, (oof means out of fold)
    sub_preds = np.zeros(test_.shape[0])  #  测试集预测概率结果
    feature_importance_df = pd.DataFrame()  #  特征重要性数据框
    feats = [f for f in feature_null_importance if f not in ['loan_id','isDefault','idx','label', 'date','prob']]

    
    if(model_type=='lgb'):   
        for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_,y_)):  # 统一放到模型篇在搞这个对比了
    #     for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
            trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
            val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]

            clf = LGBMClassifier(
                n_estimators=4000,
                learning_rate=0.02,
                num_leaves=2**5,
                colsample_bytree=0.65,
                subsample=0.9,
                max_depth=5,
                reg_alpha=0.3,
                reg_lambda=0.3,
                min_split_gain=0.01,
                min_child_weight=2,
                silent=-1,
                verbose=-1,
            )

            clf.fit(trn_x, trn_y, 
                    eval_set= [(trn_x, trn_y), (val_x, val_y)], 
                    eval_metric='auc', callbacks=[lgb.early_stopping(200), lgb.log_evaluation(100)]
                   )
            
            oof_preds[val_idx] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)[:, 1]
            sub_preds += clf.predict_proba(test_[feats], num_iteration=clf.best_iteration_)[:, 1] / folds_.n_splits
            
            fold_importance_df = pd.DataFrame()
            fold_importance_df["feature"] = feats
            fold_importance_df["importance"] = clf.feature_importances_
            fold_importance_df["fold"] = n_fold + 1
            feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)

            print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))  #  (某一折真实验证集label, 某一折预测验证集label)
            del clf, trn_x, trn_y, val_x, val_y   

            gc.collect()    

        print('Full AUC score %.6f' % roc_auc_score(y_, oof_preds))     # (训练集真实label, 几折合并后的验证集预测label)

        test_['isDefault'] = sub_preds # 测试集预测概率结果
        Full_AUC = roc_auc_score(y_, oof_preds)
    
    
    if(model_type=='xgb'):   
        for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_,y_)):  # 统一放到模型篇在搞这个对比了
    #     for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
            trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
            val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]
    
            clf = XGBClassifier(
                n_estimators=4000,
                learning_rate=0.08,
                objective = 'binary:logistic',
                use_label_encoder=False,
                booster='gbtree',
                colsample_bytree=0.75,
                subsample=0.9,
                max_depth=5,
                reg_alpha=0.3,
                reg_lambda=0.3,
                min_child_weight=2,
                gamma = 0.1,
                )
            
            clf.fit(trn_x, trn_y, 
                eval_set= [(trn_x, trn_y), (val_x, val_y)], 
                eval_metric='auc', verbose=100, early_stopping_rounds=40  #30
               )
            
            oof_preds[val_idx] = clf.predict_proba(val_x)[:, 1]
            sub_preds += clf.predict_proba(test_[feats], )[:, 1] / folds_.n_splits
            
            fold_importance_df = pd.DataFrame()
            fold_importance_df["feature"] = feats
            fold_importance_df["importance"] = clf.feature_importances_
            fold_importance_df["fold"] = n_fold + 1
            feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
            
            print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))  #  (某一折真实验证集label, 某一折预测验证集label)
            del clf, trn_x, trn_y, val_x, val_y   

            gc.collect() 

        print('Full AUC score %.6f' % roc_auc_score(y_, oof_preds))     # (训练集真实label, 几折合并后的验证集预测label)
        
        test_['isDefault'] = sub_preds # 测试集预测概率结果
        Full_AUC = roc_auc_score(y_, oof_preds)
    
    if(model_type=='cat'):   
        for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_,y_)):  # 统一放到模型篇在搞这个对比了
    #     for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
            trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
            val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]
    
            clf = CatBoostClassifier(
                iterations=4000,
                learning_rate=0.08,
                num_leaves=2**5,
                subsample=0.9,
                depth=5,
                reg_lambda=0.3,
                silent=True,
                eval_metric='AUC' # catboost的eval_metric是要写在实例化这里的, 不同于xgb, lgb
#                 verbose=-1,
            )

            clf.fit(trn_x, trn_y, 
                eval_set= [(trn_x, trn_y), (val_x, val_y)], 
#                 eval_metric='auc',
                verbose=0, 
                early_stopping_rounds=40  #30
               )
            
            oof_preds[val_idx] = clf.predict_proba(val_x)[:, 1]
            sub_preds += clf.predict_proba(test_[feats])[:, 1] / folds_.n_splits
            
            fold_importance_df = pd.DataFrame()
            fold_importance_df["feature"] = feats
            fold_importance_df["importance"] = clf.feature_importances_
            fold_importance_df["fold"] = n_fold + 1
            feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)

            print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))  #  (某一折真实验证集label, 某一折预测验证集label)
            del clf, trn_x, trn_y, val_x, val_y   

            gc.collect()    

        print('Full AUC score %.6f' % roc_auc_score(y_, oof_preds))     # (训练集真实label, 几折合并后的验证集预测label)

        test_['isDefault'] = sub_preds # 测试集预测概率结果
        Full_AUC = roc_auc_score(y_, oof_preds)
    
    
    return oof_preds, test_[['loan_id', 'isDefault']],  feature_importance_df
     # 几折合并后的验证集预测label, 测试集预测结果，特征重要性数据框

In [None]:
path = '/2024年四川省大学生数据科学与统计建模竞赛/'

In [None]:
train_df = pd.read_csv(path + '训练集/train_data.csv')
train_target = pd.read_csv(path + '训练集/train_target.csv')
train_df['isDefault'] = train_target['y']
train_df['date'] = train_target['date']
train_df['loan_id']  = train_df['idx'] 
test = pd.read_csv(path + '测试集/test_data.csv')
test['loan_id'] = test['idx']
#date可以作为一个类别
test['date'] = np.nan

# 初步分析

In [None]:
pivot=pd.pivot_table(train_df,index=['isDefault'],columns=['date'],values=['X278'],
                    aggfunc=np.mean)
pivot

pivot1 = pivot.T.reset_index()
pivot1['差'] = pivot1[1.0]-pivot1[0.0]
pivot1[[0.0,1.0]].plot(figsize = (10,2))

# 数据分析

In [None]:
train_df_not_123 = train_df[train_df['date'].isin([1,2,3])]

In [None]:
date_feas = ['date']
orige_feas = ['X'+str(i) for i in range(0,21)]
history_loan_feas = ['X'+str(i) for i in range(21,257)]
other_act_feas = ['X'+str(i) for i in range(257,3806)]

In [None]:
len(history_loan_feas[30:40])

In [None]:
# 数据箱型图
dist_cols = 5
dist_rows = len(history_loan_feas[210:250])
plt.figure(figsize=(4*dist_cols,4*dist_rows))
i = 1
for col in history_loan_feas[210:250]:
    ax = plt.subplot(dist_rows,dist_cols,i)
    ax = sns.boxenplot(train_df[col])
    ax.set_xlabel(col[:30])
    ax.set_ylabel('data')
    i+=1
plt.savefig('箱线图.png',dpi = 70)
plt.show()

# 分布

In [None]:
# 数据分布查看
dist_cols = 5
dist_rows = len(history_loan_feas[30:70])
plt.figure(figsize=(4*dist_cols,4*dist_rows))
i = 1
for col in history_loan_feas[30:70]:
    ax = plt.subplot(dist_rows,dist_cols,i)
    ax = sns.kdeplot(train_df[col],color='Red',shade= True)
    ax  = sns.kdeplot(test[col],color='Blue',shade=True)
    ax.set_xlabel(col)
    ax.set_ylabel('Frequency')
    ax = ax.legend(['train','test'])
    i+=1
plt.savefig('fenbu.png',dpi = 70)
plt.show()

# 相关性

In [None]:
# 找出相关程度
data_train=train_df_not_123[history_loan_feas[30:50]]
plt.figure(figsize=(20, 16))  # 指定绘图对象宽度和高度
colnm = data_train.columns.tolist()  # 列表头
mcorr = data_train[colnm].corr(method="spearman")  # 相关系数矩阵，即给出了任意两个变量之间的相关系数
mask = np.zeros_like(mcorr)  # 构造与mcorr同维数矩阵 为bool型
mask[np.triu_indices_from(mask)] = True  # 角分线右侧为True
cmap = sns.diverging_palette(220, 10, as_cmap=True)  # 返回matplotlib colormap对象
g = sns.heatmap(mcorr, mask=mask, cmap=cmap, square=True, annot=True, fmt='0.2f')  # 热力图（看两两相似度）
plt.savefig('xinagguanxing.png',dpi = 70)
plt.show()

# 样本选择

In [None]:
train_df_not = train_df[~train_df['date'].isin([22, 27, 36, 61, 67])]

# 选择一部分样本作为特征筛选基础

In [None]:
train_df_not_123 = train_df_not[train_df_not['date'].isin([1,2,3])]

In [None]:
# 计算相关系数矩阵
corr_matrix = train_df_not_123.corr().abs()
# 创建一个要删除的特征集合
to_drop = set()

# 遍历相关系数矩阵
for i in range(len(corr_matrix.columns)):
    for j in range(i):
        # 取出相关性
        if corr_matrix.iloc[i, j] > 0.8:
            # 添加其中一个特征到要删除的集合
            to_drop.add(corr_matrix.columns[i])

# 剔除相关性高的特征
df_reduced = train_df_not_123.drop(columns=to_drop)

print("原始特征：", train_df_not_123.columns.tolist())
print("剔除的特征：", list(to_drop))
print("减少后的特征：", df_reduced.columns.tolist())

# 使用null_importance来筛选特征

In [None]:
feats = [f for f in df_reduced.columns.tolist() + ['date'] if f not in ['loan_id','isDefault','idx','label']]

In [None]:
data = train_df_not[feats]
data['label'] = train_df_not['isDefault'] 

In [None]:
def get_feature_importances(data, shuffle, seed=None):
    # 获得特征
    train_features = [f for f in data if f not in ['label']]
    #记录分数和功能重要性
    #无序放目标
    y = data['label'].copy()
    if shuffle:
        y = data['label'].copy().sample(frac=1.0)

        # 拟合 LightGBM
    dtrain = lgb.Dataset(data[train_features], y, free_raw_data=False)
    lgb_params = {
        'objective': 'binary',
        'boosting_type': 'rf',
        'subsample': 0.623,
        'colsample_bytree': 0.7,
        'num_leaves': 127,
        'max_depth': 8,
        'seed': seed,
        'bagging_freq': 1,
        'verbose': -1,
        'n_jobs': 4
    }

    # 拟合模型
    clf = lgb.train(params=lgb_params, train_set=dtrain, num_boost_round=200)# categorical_feature=categorical_feats)

    # 的到特征重要性
    imp_df = pd.DataFrame()
    imp_df["feature"] = list(train_features)
    imp_df["importance_gain"] = clf.feature_importance(importance_type='gain')
    imp_df["importance_split"] = clf.feature_importance(importance_type='split')
    imp_df['trn_score'] = roc_auc_score(y, clf.predict(data[train_features]))

    return imp_df

In [None]:
actual_imp_df = get_feature_importances(data=data, shuffle=False)

null_imp_df = pd.DataFrame()
nb_runs = 80
for i in tqdm(range(nb_runs)):
    # #获取当前运行重要性
    imp_df = get_feature_importances(data=data, shuffle=True)
    null_imp_df = pd.concat([null_imp_df, imp_df], axis=0)

In [None]:
feature_scores = []
for _f in tqdm(actual_imp_df['feature'].unique()):
    f_null_imps_gain = null_imp_df.loc[null_imp_df['feature'] == _f, 'importance_gain'].values
    f_act_imps_gain = actual_imp_df.loc[actual_imp_df['feature'] == _f, 'importance_gain'].mean()
    gain_score = np.log(1e-10 + f_act_imps_gain / (1 + np.percentile(f_null_imps_gain, 75)))  # 避免被零分割
    f_null_imps_split = null_imp_df.loc[null_imp_df['feature'] == _f, 'importance_split'].values
    f_act_imps_split = actual_imp_df.loc[actual_imp_df['feature'] == _f, 'importance_split'].mean()
    split_score = np.log(1e-10 + f_act_imps_split / (1 + np.percentile(f_null_imps_split, 75)))  # 避免被零分割
    feature_scores.append((_f, split_score, gain_score))

scores_df = pd.DataFrame(feature_scores, columns=['feature', 'split_score', 'gain_score'])
scores_df = scores_df.sort_values('split_score', ascending=False)

In [None]:
scores_df.reset_index().drop('index',axis=1).head(15)

In [None]:
# -1 0.7058
feature_null_importance = scores_df[scores_df['gain_score']>-1]['feature'].to_list()

In [None]:
print(len(feature_null_importance))

# model1

In [None]:
y = train_df_not['isDefault']
folds = KFold(n_splits=5, shuffle=True, random_state=8828) # lgb: 0.879945   xgb: 0.871812  cat: 0.878727
oof_preds_cat1, IntePre_cat1, importances_cat1 = train_model_xlc(train_df_not, test, y, folds, model_type='cat')

# y = train_df_not['isDefault']
# folds = KFold(n_splits=5, shuffle=True, random_state=8828) # lgb: 0.879945   xgb: 0.871812  cat: 0.878727
# oof_preds_xgb, IntePre_xgb, importances_xgb = train_model_xlc(train_df_not, test, y, folds, model_type='xgb')

y = train_df_not['isDefault']
folds = KFold(n_splits=5, shuffle=True, random_state=8828) # lgb: 0.698101   xgb: 0.693  cat: 0.696
oof_preds, IntePre, importances = train_model_xlc(train_df_not, test, y, folds, model_type='lgb')

In [None]:
IntePre

# model2

In [None]:
def train_model_xlc(data_, test_, y_, folds_,feature1, model_type='lgb'):
    oof_preds = np.zeros(data_.shape[0])  # 几折合并后的验证集预测label, (oof means out of fold)
    sub_preds = np.zeros(test_.shape[0])  #  测试集预测概率结果
    feature_importance_df = pd.DataFrame()  #  特征重要性数据框
    feats = [f for f in data_.columns if f not in ['loan_id','isDefault','idx','label'] + feature1]

    
    if(model_type=='lgb'):   
        for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_,y_)):  # 统一放到模型篇在搞这个对比了
    #     for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
            trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
            val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]

            clf = LGBMClassifier(
                n_estimators=4000,
                learning_rate=0.02,
                num_leaves=2**5,
                colsample_bytree=0.65,
                subsample=0.9,
                max_depth=5,
                reg_alpha=0.3,
                reg_lambda=0.3,
                min_split_gain=0.01,
                min_child_weight=2,
                silent=-1,
                verbose=-1,
            )

            clf.fit(trn_x, trn_y, 
                    eval_set= [(trn_x, trn_y), (val_x, val_y)], 
                    eval_metric='auc', callbacks=[lgb.early_stopping(200), lgb.log_evaluation(100)]
                   )
            
            oof_preds[val_idx] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)[:, 1]
            sub_preds += clf.predict_proba(test_[feats], num_iteration=clf.best_iteration_)[:, 1] / folds_.n_splits
            
            fold_importance_df = pd.DataFrame()
            fold_importance_df["feature"] = feats
            fold_importance_df["importance"] = clf.feature_importances_
            fold_importance_df["fold"] = n_fold + 1
            feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)

            print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))  #  (某一折真实验证集label, 某一折预测验证集label)
            del clf, trn_x, trn_y, val_x, val_y   

            gc.collect()    

        print('Full AUC score %.6f' % roc_auc_score(y_, oof_preds))     # (训练集真实label, 几折合并后的验证集预测label)

        test_['isDefault'] = sub_preds # 测试集预测概率结果
        Full_AUC = roc_auc_score(y_, oof_preds)
    
    
    if(model_type=='xgb'):   
        for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_,y_)):  # 统一放到模型篇在搞这个对比了
    #     for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
            trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
            val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]
    
            clf = XGBClassifier(
                n_estimators=4000,
                learning_rate=0.08,
                objective = 'binary:logistic',
                use_label_encoder=False,
                booster='gbtree',
                colsample_bytree=0.75,
                subsample=0.9,
                max_depth=5,
                reg_alpha=0.3,
                reg_lambda=0.3,
                min_child_weight=2,
                gamma = 0.1,
                )
            
            clf.fit(trn_x, trn_y, 
                eval_set= [(trn_x, trn_y), (val_x, val_y)], 
                eval_metric='auc', verbose=100, early_stopping_rounds=40  #30
               )
            
            oof_preds[val_idx] = clf.predict_proba(val_x)[:, 1]
            sub_preds += clf.predict_proba(test_[feats], )[:, 1] / folds_.n_splits
            
            fold_importance_df = pd.DataFrame()
            fold_importance_df["feature"] = feats
            fold_importance_df["importance"] = clf.feature_importances_
            fold_importance_df["fold"] = n_fold + 1
            feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
            
            print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))  #  (某一折真实验证集label, 某一折预测验证集label)
            del clf, trn_x, trn_y, val_x, val_y   

            gc.collect() 

        print('Full AUC score %.6f' % roc_auc_score(y_, oof_preds))     # (训练集真实label, 几折合并后的验证集预测label)
        
        test_['isDefault'] = sub_preds # 测试集预测概率结果
        Full_AUC = roc_auc_score(y_, oof_preds)
    
    if(model_type=='cat'):   
        for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_,y_)):  # 统一放到模型篇在搞这个对比了
    #     for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
            trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
            val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]
    
            clf = CatBoostClassifier(
                iterations=4000,
                learning_rate=0.08,
                num_leaves=2**5,
                subsample=0.9,
                depth=5,
                reg_lambda=0.3,
                silent=True,
                eval_metric='AUC' # catboost的eval_metric是要写在实例化这里的, 不同于xgb, lgb
#                 verbose=-1,
            )

            clf.fit(trn_x, trn_y, 
                eval_set= [(trn_x, trn_y), (val_x, val_y)], 
#                 eval_metric='auc',
                verbose=0, 
                early_stopping_rounds=40  #30
               )
            
            oof_preds[val_idx] = clf.predict_proba(val_x)[:, 1]
            sub_preds += clf.predict_proba(test_[feats])[:, 1] / folds_.n_splits
            
            fold_importance_df = pd.DataFrame()
            fold_importance_df["feature"] = feats
            fold_importance_df["importance"] = clf.feature_importances_
            fold_importance_df["fold"] = n_fold + 1
            feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)

            print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))  #  (某一折真实验证集label, 某一折预测验证集label)
            del clf, trn_x, trn_y, val_x, val_y   

            gc.collect()    

        print('Full AUC score %.6f' % roc_auc_score(y_, oof_preds))     # (训练集真实label, 几折合并后的验证集预测label)

        test_['isDefault'] = sub_preds # 测试集预测概率结果
        Full_AUC = roc_auc_score(y_, oof_preds)
    
    
    return oof_preds, test_[['loan_id', 'isDefault']],  feature_importance_df,Full_AUC
     # 几折合并后的验证集预测label, 测试集预测结果，特征重要性数据框

In [None]:
date_feas = ['date']
orige_feas = ['X'+str(i) for i in range(0,21)]
history_loan_feas = ['X'+str(i) for i in range(21,257)]
other_act_feas = ['X'+str(i) for i in range(257,3806)]

In [None]:
score_result = []
feature1 = other_act_feas[1800:100+1800]
y = train_df_not['isDefault']
folds = KFold(n_splits=5, shuffle=True, random_state=8828) 
oof_preds_cat, IntePre_cat, importances_cat,score = train_model_xlc(train_df_not, test, y, folds,feature1, model_type='cat')

submit_cat = pd.DataFrame()
submit_cat['idx'] = test['idx']
submit_cat['y_pred'] = IntePre_cat['isDefault']
submit_cat.to_csv(f'{score}_cat_date.csv',index=0)

# 融合

In [None]:
result_pan = (IntePre_cat1*IntePre_cat*IntePre)**(1/3)

In [None]:
result_pan

In [None]:
result_pan_final = pd.DataFrame({'idx':test['idx'],'y_pred':result_pan['isDefault']})

# 和队友融合

In [None]:
leaf_result = pd.read_csv('/leaf/0.7088.csv')

In [None]:
def fusion(results_a, results_b, threshold=0.02):
    
    fused_results = []
    for a, b in zip(results_a['y_pred'], results_b['y_pred']):
        if abs(a - b) > threshold:
            # 进行等权融合
            fused_results.append((a * b) ** (1/2)) 
        else:
            # 选择一个文件的结果
            fused_results.append(a)  # 也可以选择 b
    
    return fused_results

In [None]:
final_result = fusion(result_pan_final, leaf_result, threshold=0.02)

In [None]:
pd.DataFrame({'idx':test['idx'],'y_pred':final_result}).to_csv('0.712777.csv',index=False)

In [None]:
pd.DataFrame({'idx':test['idx'],'y_pred':final_result})