# 基本信息

In [1]:
from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score,roc_auc_score,precision_recall_curve,roc_curve
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.model_selection import GridSearchCV,StratifiedKFold,KFold,GroupKFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures,OneHotEncoder,LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from tensorflow import keras
import graphviz
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
import lightgbm as lgb
import random
import gc

sns.set(style="white", color_codes=True)
#显示所有列
pd.set_option('display.max_columns', None)
#取消科学计数，显示小数点后1位
pd.set_option('float_format', lambda x: '%.1f' % x)
# pd.set_option('display.max_rows', None)
seed = 100
np.random.seed(seed)
random.seed(seed)

In [2]:
trainfile = '../data/train.csv'
train_data = pd.read_csv(trainfile)
print("rows:",train_data.shape[0]," columns:", train_data.shape[1])

rows: 1521787  columns: 23


In [3]:
testfile = '../data/test.csv'
test_data = pd.read_csv(testfile)
print("rows:",test_data.shape[0]," columns:", test_data.shape[1])

rows: 421665  columns: 22


In [4]:
# 改时间为小时
def change_time(data):
    time = data['loctm']
    hours = []
    minutes = []
    for i in time:
        h = int(i/10000)
        m = (i/10000 - h)*100
        hours.append(h)
        minutes.append(m)
    data['loctm'] = hours
    data['minutes'] = minutes

In [5]:
change_time(train_data)
change_time(test_data)

In [6]:
train_data = train_data.replace(['N','Y'],[0,1])
test_data = test_data.replace(['N','Y'],[0,1])

In [7]:
# FREQUENCY ENCODE TOGETHER
def encode_FE(df1, df2, cols):
    for col in cols:
        df = pd.concat([df1[col],df2[col]])
        vc = df.value_counts(dropna=True, normalize=True).to_dict()
        nm = col+'_FE'
        df1[nm] = df1[col].map(vc)
        df1[nm] = df1[nm].astype('float32')
        df2[nm] = df2[col].map(vc)
        df2[nm] = df2[nm].astype('float32')
        print(nm,', ',end='')
        
# LABEL ENCODE
def encode_LE(col,train=train_data,test=test_data,verbose=True):
    df_comb = pd.concat([train[col],test[col]],axis=0)
    df_comb,_ = df_comb.factorize(sort=True)
    nm = col
    if df_comb.max()>32000: 
        train[nm] = df_comb[:len(train)].astype('int32')
        test[nm] = df_comb[len(train):].astype('int32')
    else:
        train[nm] = df_comb[:len(train)].astype('int16')
        test[nm] = df_comb[len(train):].astype('int16')
    del df_comb; x=gc.collect()
    if verbose: print(nm,', ',end='')
        
# COMBINE FEATURES
def encode_CB(col1,col2,df1=train_data,df2=test_data):
    nm = col1+'_'+col2
    df1[nm] = df1[col1].astype(str)+'_'+df1[col2].astype(str)
    df2[nm] = df2[col1].astype(str)+'_'+df2[col2].astype(str) 
    encode_LE(nm,verbose=False)
    print(nm,', ',end='')
    
# GROUP AGGREGATION MEAN AND STD
# https://www.kaggle.com/kyakovlev/ieee-fe-with-some-eda
def encode_AG(main_columns, uids, aggregations=['mean'], train_df=train_data, test_df=test_data, 
              fillna=True):
    # AGGREGATION OF MAIN WITH UID FOR GIVEN STATISTICS
    for main_column in main_columns:  
        for col in uids:
            for agg_type in aggregations:
                new_col_name = main_column+'_'+col+'_'+agg_type
                #拼接训练集和测试集
                temp_df = pd.concat([train_df[[col, main_column]], test_df[[col,main_column]]])  
                #求AGG
                temp_df = temp_df.groupby([col])[main_column].agg([agg_type]).reset_index().rename(columns={agg_type: new_col_name})
                # 取出目标列作为索引
                temp_df.index = list(temp_df[col])
                # 生成map对应的字典
                temp_df = temp_df[new_col_name].to_dict()   

                train_df[new_col_name] = train_df[col].map(temp_df).astype('float32')
                test_df[new_col_name]  = test_df[col].map(temp_df).astype('float32')
                
                if fillna:
                    train_df[new_col_name].fillna(-1,inplace=True)
                    test_df[new_col_name].fillna(-1,inplace=True)  
                    
                print("'"+new_col_name+"'",', ',end='')
                
# GROUP AGGREGATION NUNIQUE
def encode_AG2(main_columns, uids, train_df=train_data, test_df=test_data):
    for main_column in main_columns:  
        for col in uids:
            comb = pd.concat([train_df[[col]+[main_column]],test_df[[col]+[main_column]]],axis=0)
            mp = comb.groupby(col)[main_column].agg(['nunique'])['nunique'].to_dict()
            train_df[main_column+'_'+ col+'_uni'] = train_df[col].map(mp).astype('float32')
            test_df[main_column+'_'+ col+'_uni'] = test_df[col].map(mp).astype('float32')
            print(main_column+'_'+ col+'_uni', ',',end='')

# GROUP AGGREGATION MODE
def encode_AG3(main_columns, uids, train_df=train_data, test_df=test_data):   
    for main_column in main_columns:  
        for col in uids:
            new_col_name = main_column+'_'+ col+'_mode'
            comb = pd.concat([train_df[[col]+[main_column]],test_df[[col]+[main_column]]],axis=0)
            
            t1 = comb.groupby([col, main_column]).size().reset_index()
            t1.columns = [col, main_column, 'count']
            t2 = t1.groupby([col])['count'].max().reset_index()
            t2.columns = [col, 'max_count']
            t1 = t1.merge(t2, on=[col], how='left')
            t1 = t1[t1['count']==t1['max_count']]
            comb = t1.groupby([col])[main_column].mean().reset_index()
            
            # 取出目标列作为索引
            comb.index = list(comb[col])
            # 生成map对应的字典
            comb = comb[main_column].to_dict()   

            train_df[new_col_name] = train_df[col].map(comb).astype('float32')
            test_df[new_col_name]  = test_df[col].map(comb).astype('float32')
            print("'"+new_col_name+"'",', ',end='')
            
# GROUP AGGREGATION FREC (only 0/1 feature)
def encode_AG4(main_columns, uids, train_df=train_data, test_df=test_data):
    for main_column in main_columns:  
        for col in uids:
            comb = pd.concat([train_df[[col]+[main_column]],test_df[[col]+[main_column]]],axis=0)
            mp = comb.groupby(col)[main_column].agg(['sum'])
            mp = (mp['sum'] / comb.groupby(col)[main_column].size()).to_dict()
            train_df[main_column+'_'+ col+'_frec'] = train_df[col].map(mp).astype('float32')
            test_df[main_column+'_'+ col+'_frec'] = test_df[col].map(mp).astype('float32')
            print(main_column+'_'+ col+'_frec', ',',end='')

In [8]:
# # # conam to cents
# train_data['cents'] = (train_data['conam'] - np.floor(train_data['conam'])).astype('float32')
# test_data['cents'] = (test_data['conam'] - np.floor(test_data['conam'])).astype('float32')
# print('cents, ', end='')

# combine
cb_feature = ['cano__locdt','mcc__mchno','cano__mcc_mchno','cano_locdt__mcc_mchno']

for i in cb_feature:
    f1,f2 = i.split('__')
    encode_CB(f1,f2)


# frequence 
cn_feature = ['acqic','bacno','cano','scity','mchno','conam','mcc',
              'cano_locdt','mcc_mchno','cano_mcc_mchno','cano_locdt_mcc_mchno']
encode_FE(train_data,test_data,cn_feature)


# mean std
encode_AG(['conam','loctm','mcc','mchno','stscd'],['cano'],['mean','std'])
encode_AG(['acqic'],['cano'],['std'])
encode_AG(['loctm','mcc','mchno'],['cano_locdt'],['mean','std'])
encode_AG(['conam'],['cano_locdt'],['mean'])
encode_AG(['conam',],['cano_mcc_mchno'],['mean','std'])
encode_AG(['loctm'],['cano_mcc_mchno'],['std'])
encode_AG(['minutes'],['cano_locdt_mcc_mchno'],['std'])
encode_AG(['stscd'],['cano_locdt_mcc_mchno'],['mean'])

cano_locdt , mcc_mchno , cano_mcc_mchno , cano_locdt_mcc_mchno , acqic_FE , bacno_FE , cano_FE , scity_FE , mchno_FE , conam_FE , mcc_FE , cano_locdt_FE , mcc_mchno_FE , cano_mcc_mchno_FE , cano_locdt_mcc_mchno_FE , 'conam_cano_mean' , 'conam_cano_std' , 'loctm_cano_mean' , 'loctm_cano_std' , 'mcc_cano_mean' , 'mcc_cano_std' , 'mchno_cano_mean' , 'mchno_cano_std' , 'stscd_cano_mean' , 'stscd_cano_std' , 'acqic_cano_std' , 'loctm_cano_locdt_mean' , 'loctm_cano_locdt_std' , 'mcc_cano_locdt_mean' , 'mcc_cano_locdt_std' , 'mchno_cano_locdt_mean' , 'mchno_cano_locdt_std' , 'conam_cano_locdt_mean' , 'conam_cano_mcc_mchno_mean' , 'conam_cano_mcc_mchno_std' , 'loctm_cano_mcc_mchno_std' , 'minutes_cano_locdt_mcc_mchno_std' , 'stscd_cano_locdt_mcc_mchno_mean' , 

In [9]:
model1_features = list(train_data.columns).copy()
del_col = ['bacno','cano','contp','flbmk','flg_3dsmk','hcefg','insfg','iterm','ovrlt','txkey','locdt','fraud_ind'
           ,'cano_locdt','cano_mcc_mchno','cano_locdt_mcc_mchno']

for col in del_col:
    model1_features.remove(col)

X_train = train_data[model1_features].copy()
Y_train = train_data['fraud_ind'].copy()
X_test = test_data[model1_features].copy()
print(X_train.shape,Y_train.shape)
print(X_test.shape)
print(X_train.info())

(1521787, 47) (1521787,)
(421665, 47)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1521787 entries, 0 to 1521786
Data columns (total 47 columns):
acqic                               1521787 non-null int64
conam                               1521787 non-null float64
csmcu                               1521787 non-null int64
ecfg                                1521787 non-null int64
etymd                               1521787 non-null int64
loctm                               1521787 non-null int64
mcc                                 1521787 non-null int64
mchno                               1521787 non-null int64
scity                               1521787 non-null int64
stocn                               1521787 non-null int64
stscd                               1521787 non-null int64
minutes                             1521787 non-null float64
mcc_mchno                           1521787 non-null int32
acqic_FE                            1521787 non-null float32
bacno_FE         

In [10]:
# 特征选择利器
def permutation_importance(X, y, model): 
    perm = {}
    y_true = model.predict(X)
    baseline= roc_auc_score(y, y_true)
    for cols in X.columns:
        value = X[cols].copy()
        X[cols] = np.random.permutation(X[cols].values)
        y_true_sub = model.predict(X)
        perm[cols] = roc_auc_score(y, y_true_sub) - baseline
        X[cols] = np.array(value)
    return perm

In [11]:
params = {'num_leaves': 499,
          'min_child_weight': 0.009,
          'feature_fraction': 0.89,
          'min_data_in_leaf': 120,
          'objective': 'binary',
          'max_depth': 47,
          'learning_rate': 0.1,
          "boosting_type": "gbdt",
          "metric": 'auc',
          "verbosity": -1,
          'num_threads':4,
          'reg_alpha': 1.2,
          'reg_lambda': 1.1,
          'is_unbalance': True,
          'random_state': seed,  
         }

In [None]:
splits = 3
folds = GroupKFold(n_splits = splits)
split_groups = train_data['locdt']

oof = np.zeros(len(X_train))

permutation_result = []

for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train.values, Y_train.values,groups=split_groups)):
    print("Fold {}".format(fold_))
    print(len(trn_idx),len(val_idx))
    train_df, train_lb = X_train.iloc[trn_idx], Y_train.iloc[trn_idx]
    valid_df, valid_lb = X_train.iloc[val_idx], Y_train.iloc[val_idx]
    
    trn_data = lgb.Dataset(train_df, label=train_lb)
    val_data = lgb.Dataset(valid_df, label=valid_lb)
    
    clf = lgb.train(params,
                    trn_data,
                    num_boost_round= 5000,
                    valid_sets = [trn_data, val_data],
                    verbose_eval=200,
                    early_stopping_rounds = 200)
    
    valid_pre = clf.predict(valid_df)
    oof[val_idx] = valid_pre
    
    result = permutation_importance(valid_df,valid_lb,clf)
    permutation_result.append(result)
    print("-"*100,'\r\n')
print("auc_oof:",roc_auc_score(Y_train,oof))

Fold 0
1014452 507335
Training until validation scores don't improve for 200 rounds
[200]	training's auc: 0.999999	valid_1's auc: 0.988321
[400]	training's auc: 1	valid_1's auc: 0.989159
[600]	training's auc: 1	valid_1's auc: 0.98924
[800]	training's auc: 1	valid_1's auc: 0.989207
Early stopping, best iteration is:
[613]	training's auc: 1	valid_1's auc: 0.989261


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the doc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

---------------------------------------------------------------------------------------------------- 

Fold 1
1014609 507178
Training until validation scores don't improve for 200 rounds
[200]	training's auc: 0.999999	valid_1's auc: 0.985736
[400]	training's auc: 1	valid_1's auc: 0.986316
[600]	training's auc: 1	valid_1's auc: 0.986402
[800]	training's auc: 1	valid_1's auc: 0.98643
[1000]	training's auc: 1	valid_1's auc: 0.986484
Early stopping, best iteration is:
[872]	training's auc: 1	valid_1's auc: 0.986499


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the doc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

---------------------------------------------------------------------------------------------------- 

Fold 2
1014513 507274
Training until validation scores don't improve for 200 rounds
[200]	training's auc: 0.999999	valid_1's auc: 0.982942


# permutation_importance 挑选特征

In [41]:
k =  pd.DataFrame(permutation_result)
k

Unnamed: 0,acqic,conam,csmcu,ecfg,etymd,loctm,mcc,mchno,scity,stocn,stscd,minutes,mcc_mchno,acqic_FE,bacno_FE,cano_FE,scity_FE,mchno_FE,conam_FE,mcc_FE,cano_locdt_FE,mcc_mchno_FE,cano_mcc_mchno_FE,cano_locdt_mcc_mchno_FE,acqic_cano_mean,acqic_cano_std,csmcu_cano_mean,csmcu_cano_std,conam_cano_mean,conam_cano_std,loctm_cano_mean,loctm_cano_std,mcc_cano_mean,mcc_cano_std,mchno_cano_mean,mchno_cano_std,stscd_cano_mean,stscd_cano_std,conam_cano_locdt_mean,conam_cano_locdt_std,loctm_cano_locdt_mean,loctm_cano_locdt_std,mcc_cano_locdt_mean,mcc_cano_locdt_std,mchno_cano_locdt_mean,mchno_cano_locdt_std,conam_cano_mcc_mchno_mean,conam_cano_mcc_mchno_std,loctm_cano_mcc_mchno_mean,loctm_cano_mcc_mchno_std,conam_cano_locdt_mcc_mchno_mean,conam_cano_locdt_mcc_mchno_std,minutes_cano_locdt_mcc_mchno_mean,minutes_cano_locdt_mcc_mchno_std,stscd_cano_locdt_mcc_mchno_mean,stscd_cano_locdt_mcc_mchno_std
0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
1,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0
2,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0


In [42]:
kd = k>0
kds = kd.sum()

In [43]:
kds[kds>0]

stscd                                1
acqic_cano_mean                      3
csmcu_cano_mean                      1
csmcu_cano_std                       1
conam_cano_locdt_std                 1
loctm_cano_mcc_mchno_mean            1
conam_cano_locdt_mcc_mchno_mean      1
conam_cano_locdt_mcc_mchno_std       1
minutes_cano_locdt_mcc_mchno_mean    2
stscd_cano_locdt_mcc_mchno_std       1
dtype: int64

In [29]:
a = k['acqic_FE']
for i in a:
    print(i)

-0.0010816105394544362
0.00034254460506044637
0.00011043566145518824


In [None]:
'''是否将oof存为新指标'''
print("save oof file...")
k.to_csv('../result/feature.csv',index=None)