In [1]:
import os
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append("..") 
from gcforest.gcforest import GCForest

import xlwt

  from numpy.core.umath_tests import inner1d


# 数据及参数

In [2]:
random_seed = 42
cv=4
score = 'f1_weighted'

In [3]:
def get_toy_config():
    config = {}
    ca_config = {}
    ca_config["random_state"] = random_seed
    ca_config["max_layers"] = 10
    ca_config["early_stopping_rounds"] = 3
    ca_config["n_classes"] = 6
    ca_config["estimators"] = []
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "DecisionTreeClassifier"})
    ca_config["estimators"].append({"n_folds": 5, "type": "DecisionTreeClassifier"})
    ca_config["estimators"].append({"n_folds": 5, "type": "DecisionTreeClassifier"})
    ca_config["estimators"].append({"n_folds": 5, "type": "LogisticRegression"})
    config["cascade"] = ca_config
    return config

In [4]:
path = os.getcwd()+'/../data/20122018freshwater_four_feature.csv'
data = pd.read_csv(path, na_values = np.nan)

In [5]:
X = data.drop(['本周水质'], axis=1).values # Series
y = data['本周水质'].values.reshape(-1,1) - 1

# 1. 中位数填充缺失值，2.Z-score标准化
clean_pipeline = Pipeline([('imputer', preprocessing.Imputer(missing_values='NaN',strategy="median")),
                           ('std_scaler', preprocessing.StandardScaler())])
X = clean_pipeline.fit_transform(X)

In [6]:
X.shape

(33612, 4)

# k折交叉验证

In [7]:
# function：使用5折交叉验证统计各类别5次平均后的Acc，5次平均后的F1，和模型的总Acc以及总Weighted F1

# Input: 
#     X: 总样本
#     y: 总样本
#     model: function
#     cv: cross_validation的次数
# Output:
#     Acc_mean, 各类别的Acc
#     F1_mean, 各类别F1
#     Support_mean, 各类别预测样本占总样本的比重
#     Acc, 总Acc
#     F1_weighted 总Weighted F1
# function：使用5折交叉验证统计各类别5次平均后的Acc，5次平均后的F1，和模型的总Acc以及总Weighted F1

# Input: 
#     X: 总样本
#     y: 总样本
#     model: function
#     cv: cross_validation的次数
# Output:
#     Acc_mean, 各类别的Acc
#     F1_mean, 各类别F1
#     Support_mean, 各类别预测样本占总样本的比重
#     Acc, 总Acc
#     F1_weighted 总Weighted F1


def kftrain(X, y, model, cv):
    model_name = model.__class__.__name__

    n_samples = X.shape[0]
    n_features = X.shape[1]
    n_class = np.unique(y).shape[0]

    Acc_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组test data的cv个acc
    _Acc_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组train data的cv个acc
    F1_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组test data的cv个acc
    _F1_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组train data的cv个acc
    cv_F1_weighted = np.zeros(cv) # 5次 test data 的F1_weighted
    _cv_F1_weighted = np.zeros(cv) # 5次 train data 的F1_weighted
    cv_Acc = np.zeros(cv) # 5次test data的Acc
    _cv_Acc = np.zeros(cv) # 5次train data的Acc
    # 各类别的Support，每个类别的support由5次fold后得到的5个support求平均得到，这里support：各类别样本占总样本的比列
    # 各类别的F1，由5次fold后得到的5个F1求平均得到
    # 所以，Weighted F1 等于“5次fold得到的5个 Weighted F1 求平均得到” ——等价于—— “6个support*6个F1”得到
    Support_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 5次fold后test data各类别的support
    _Support_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 5次fold后train data各类别的support
    
    # 各类别的Mean，SD
    Acc_mean_class = np.zeros(6)
    _Acc_mean_class = np.zeros(6)
    Acc_SD_class = np.zeros(6)
    _Acc_SD_class = np.zeros(6)
    F1_mean_class = np.zeros(6)
    _F1_mean_class = np.zeros(6)
    F1_SD_class = np.zeros(6)
    _F1_SD_class = np.zeros(6)
    
    k = 0
    skf = StratifiedKFold(n_splits=cv) # 定义5折分层划分器
    
    for train_index, test_index in skf.split(X, y):
        K_train_x, K_test_x = X[train_index], X[test_index]
        K_train_y, K_test_y = y[train_index], y[test_index]
        if model_name == 'GCForest':
            model.fit_transform(K_train_x, K_train_y.reshape(K_train_y.shape[0]))
        else:
            model.fit(K_train_x, K_train_y)
    
        K_test_y_pred = model.predict(K_test_x)
        K_train_y_pred = model.predict(K_train_x)
        
        # 由混淆矩阵计算各类别的Acc
        test_cm = confusion_matrix(K_test_y, K_test_y_pred)
        train_cm = confusion_matrix(K_train_y, K_train_y_pred)
        test_acc_all_class = np.zeros(n_class) # 6个类别上的测试acc
        train_acc_all_class = np.zeros(n_class) # 6个类别上的训练acc
        
        i = 0
        for c in test_cm:
            test_acc_all_class[i] = c[i]/np.sum(c)
            i += 1
        Acc_matrix[:,k] = test_acc_all_class
        
        i = 0
        for c in train_cm:
            train_acc_all_class[i] = c[i]/np.sum(c)
            i += 1
        _Acc_matrix[:,k] = train_acc_all_class
        
        # 由classification_report提取各类别的F1
        cr = classification_report(K_test_y, K_test_y_pred, digits=4)
        _cr = classification_report(K_train_y, K_train_y_pred, digits=4)
        test_f1_all_class = np.zeros(n_class) # test data 6个类别上的测试f1
        train_f1_all_class = np.zeros(n_class) # train data 6个类别上的训练f1
        support_all_class = np.zeros(n_class) # test data 6个类别的support
        _support_all_class = np.zeros(n_class) # train data 6个类别的support
        
        i = 0
        for l in range(2,8):
            test_f1_all_class[i] = float(cr.splitlines()[l].split()[3])
            train_f1_all_class[i] = float(_cr.splitlines()[l].split()[3])
            support_all_class[i] = float(cr.splitlines()[l].split()[4])/(n_samples/cv)
            _support_all_class[i] = float(_cr.splitlines()[l].split()[4])/(n_samples*(1-1/cv))
            i = i + 1
        F1_matrix[:,k] = test_f1_all_class
        _F1_matrix[:,k] = train_f1_all_class
        Support_matrix[:,k] = support_all_class
        _Support_matrix[:,k] = _support_all_class
        
        # cv_F1_weighted[k] = float(cr.splitlines()[9].split()[5]) # 第k折时的Weighted F1
        cv_F1_weighted[k] = f1_score(K_test_y, K_test_y_pred, average="weighted")
        _cv_F1_weighted[k] = f1_score(K_train_y, K_train_y_pred, average="weighted")
        cv_Acc[k] = accuracy_score(K_test_y, K_test_y_pred)
        _cv_Acc[k] = accuracy_score(K_train_y, K_train_y_pred)
        
        k += 1
    
    Acc_mean = np.mean(Acc_matrix, axis=1) # 6个类别的k-fold平均acc
    _Acc_mean = np.mean(_Acc_matrix, axis=1) # 6个类别的k-fold平均acc
    
    F1_mean = np.mean(F1_matrix, axis=1) # 6个类别的k-fold平均F1
    _F1_mean = np.mean(_F1_matrix, axis=1) # 6个类别的k-fold平均F1
    Support_mean = np.mean(Support_matrix, axis=1) # 6个类别的k-fold平均support
    _Support_mean = np.mean(_Support_matrix, axis=1) # 6个类别的k-fold平均support
    
    Acc = np.mean(Acc_mean)
    _Acc = np.mean(_Acc_mean)
    F1_weighted = np.mean(cv_F1_weighted) # 5个Weighted F1 求平均，并可以证明其等价于 np.sum(F1_mean*Support_mean)
    _F1_weighted = np.mean(_cv_F1_weighted) # 5个Weighted F1 求平均，并可以证明其等价于 np.sum(F1_mean*Support_mean)
    SD_Acc = np.std(cv_Acc)
    _SD_Acc = np.std(cv_Acc)
    SD_F1 = np.std(cv_F1_weighted)
    _SD_F1 = np.std(cv_F1_weighted)
    
    Acc_mean_class = np.mean(Acc_matrix, axis=1)
    _Acc_mean_class = np.mean(_Acc_matrix, axis=1)
    Acc_SD_class = np.std(Acc_matrix, axis=1)
    _Acc_SD_class = np.std(_Acc_matrix, axis=1)
    F1_mean_class = np.mean(F1_matrix, axis=1)
    _F1_mean_class = np.mean(_F1_matrix, axis=1)
    F1_SD_class = np.std(F1_matrix, axis=1)
    _F1_SD_class = np.std(_F1_matrix, axis=1)
    
    # return Acc_mean, F1_mean, Support_mean, Acc, F1_weighted, SD_Acc, SD_F1, Acc_mean_class, Acc_SD_class, F1_mean_class, F1_SD_class
    return Acc_matrix, F1_matrix, Support_matrix, _Acc_matrix, _F1_matrix, _Support_matrix, Acc_mean, _Acc_mean, F1_mean, _F1_mean, Acc, _Acc, F1_weighted, _F1_weighted, SD_Acc, _SD_Acc, SD_F1, _SD_F1, Acc_mean_class, _Acc_mean_class, Acc_SD_class, _Acc_SD_class, F1_mean_class, _F1_mean_class, F1_SD_class, _F1_SD_class

In [8]:
config = get_toy_config()

models = [
    LogisticRegression(),
    LinearDiscriminantAnalysis(),
    SVC(probability=True),
    DecisionTreeClassifier(),
    ExtraTreeClassifier(),
    GaussianNB(),
    KNeighborsClassifier(),
    RandomForestClassifier(random_state=random_seed),
    ExtraTreesClassifier(random_state=random_seed),
    GCForest(config)
]

workbook = xlwt.Workbook(encoding = 'utf-8')  

for model in models:
    model_name = model.__class__.__name__
    print(model_name)
    # Acc_class, F1_class, Support_class, Acc, F1_weighted, SD_Acc, SD_F1, Acc_mean_class, Acc_SD_class, F1_mean_class, F1_SD_class = kftrain(X, y, model, 5)
    Acc_matrix, F1_matrix, Support_matrix, _Acc_matrix, _F1_matrix, _Support_matrix, Acc_class, _Acc_class, F1_class, _F1_class, Acc, _Acc, F1_weighted, _F1_weighted, SD_Acc, _SD_Acc, SD_F1, _SD_F1, Acc_mean_class, _Acc_mean_class, Acc_SD_class, _Acc_SD_class, F1_mean_class, _F1_mean_class, F1_SD_class, _F1_SD_class = kftrain(X, y, model, cv)

    worksheet = workbook.add_sheet(model_name, cell_overwrite_ok=True)
    worksheet.write(0,0, "train_Acc_matrix")
    for i in range(6):
        for j in range(cv):
            worksheet.write(i+1, j, _Acc_matrix[i,j])

    worksheet.write(0,6, "test_Acc_matrix")
    for i in range(6):
        for j in range(cv):
            worksheet.write(i+1, j+7, Acc_matrix[i,j])



    worksheet.write(11,0, "train_F1_matrix")
    for i in range(6):
        for j in range(cv):
            worksheet.write(i+12, j, _F1_matrix[i,j])

    worksheet.write(11,6, "test_F1_matrix")
    for i in range(6):
        for j in range(cv):
            worksheet.write(i+12, j+7, F1_matrix[i,j])



    worksheet.write(20,0, "train_support_matrix")
    for i in range(6):
        for j in range(cv):
            worksheet.write(i+21, j, _Support_matrix[i,j])

    worksheet.write(20,6, "test_support_matrix")
    for i in range(6):
        for j in range(cv):
            worksheet.write(i+21, j+7, Support_matrix[i,j])
            
workbook.save("../res/basemodels_4flod.xls")

LogisticRegression


  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


LinearDiscriminantAnalysis




SVC
DecisionTreeClassifier
ExtraTreeClassifier
GaussianNB
KNeighborsClassifier




RandomForestClassifier




ExtraTreesClassifier


[ 2019-05-21 16:33:13,122][cascade_classifier.fit_transform] X_groups_train.shape=[(25207, 4)],y_train.shape=(25207,),X_groups_test.shape=no_test,y_test.shape=no_test
[ 2019-05-21 16:33:13,124][cascade_classifier.fit_transform] group_dims=[4]
[ 2019-05-21 16:33:13,125][cascade_classifier.fit_transform] group_starts=[0]
[ 2019-05-21 16:33:13,126][cascade_classifier.fit_transform] group_ends=[4]
[ 2019-05-21 16:33:13,126][cascade_classifier.fit_transform] X_train.shape=(25207, 4),X_test.shape=(0, 4)
[ 2019-05-21 16:33:13,129][cascade_classifier.fit_transform] [layer=0] look_indexs=[0], X_cur_train.shape=(25207, 4), X_cur_test.shape=(0, 4)
[ 2019-05-21 16:33:13,300][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_0.predict)=99.64%


GCForest


[ 2019-05-21 16:33:13,450][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_1.predict)=99.64%
[ 2019-05-21 16:33:13,596][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_2.predict)=99.56%
[ 2019-05-21 16:33:13,741][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_3.predict)=99.54%
[ 2019-05-21 16:33:13,885][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_4.predict)=99.60%
[ 2019-05-21 16:33:13,892][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_cv.predict)=99.60%
[ 2019-05-21 16:33:14,053][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_0.predict)=99.68%
[ 2019-05-21 16:33:14,197][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_1.predict)=99.56%
[ 2019-05-21 16:33:14,338][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds

[ 2019-05-21 16:33:19,916][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_1.predict)=99.68%
[ 2019-05-21 16:33:20,117][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_2.predict)=99.68%
[ 2019-05-21 16:33:20,310][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_3.predict)=99.72%
[ 2019-05-21 16:33:20,525][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_4.predict)=99.68%
[ 2019-05-21 16:33:20,541][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_cv.predict)=99.69%
[ 2019-05-21 16:33:20,859][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_0.predict)=99.76%
[ 2019-05-21 16:33:21,069][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_1.predict)=99.80%
[ 2019-05-21 16:33:21,283][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds

[ 2019-05-21 16:33:33,518][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds.train_3.predict)=99.46%
[ 2019-05-21 16:33:33,709][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds.train_4.predict)=99.44%
[ 2019-05-21 16:33:33,718][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds.train_cv.predict)=99.39%
[ 2019-05-21 16:33:34,019][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_0.predict)=99.27%
[ 2019-05-21 16:33:34,218][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_1.predict)=99.44%
[ 2019-05-21 16:33:34,445][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_2.predict)=99.56%
[ 2019-05-21 16:33:34,626][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_3.predict)=99.23%
[ 2019-05-21 16:33:34,809][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds

[ 2019-05-21 16:33:46,562][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_6 - 5_folds.train_cv.predict)=99.32%
[ 2019-05-21 16:33:47,238][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_0.predict)=99.66%
[ 2019-05-21 16:33:47,940][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_1.predict)=99.72%
[ 2019-05-21 16:33:48,691][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_2.predict)=99.74%
[ 2019-05-21 16:33:49,381][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_3.predict)=99.64%
[ 2019-05-21 16:33:50,038][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_4.predict)=99.78%
[ 2019-05-21 16:33:50,046][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_cv.predict)=99.71%
[ 2019-05-21 16:33:50,054][cascade_classifier.calc_f1] Weighted F1 (layer_3 - train.classifier_average

[ 2019-05-21 16:34:01,546][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_0 - 5_folds.train_cv.predict)=99.69%
[ 2019-05-21 16:34:01,801][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_0.predict)=99.58%
[ 2019-05-21 16:34:02,014][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_1.predict)=99.72%
[ 2019-05-21 16:34:02,229][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_2.predict)=99.72%
[ 2019-05-21 16:34:02,428][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_3.predict)=99.80%
[ 2019-05-21 16:34:02,647][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_4.predict)=99.64%
[ 2019-05-21 16:34:02,654][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_cv.predict)=99.69%
[ 2019-05-21 16:34:02,860][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_fold

[ 2019-05-21 16:34:14,636][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_cv.predict)=99.21%
[ 2019-05-21 16:34:14,827][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_0.predict)=99.25%
[ 2019-05-21 16:34:15,004][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_1.predict)=99.23%
[ 2019-05-21 16:34:15,178][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_2.predict)=98.97%
[ 2019-05-21 16:34:15,342][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_3.predict)=99.31%
[ 2019-05-21 16:34:15,532][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_4.predict)=99.27%
[ 2019-05-21 16:34:15,539][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_cv.predict)=99.20%
[ 2019-05-21 16:34:15,718][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_2 - 5_fold

[ 2019-05-21 16:34:22,851][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_cv.predict)=99.30%
[ 2019-05-21 16:34:23,146][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_0.predict)=99.37%
[ 2019-05-21 16:34:23,424][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_1.predict)=99.35%
[ 2019-05-21 16:34:23,683][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_2.predict)=99.37%
[ 2019-05-21 16:34:23,947][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_3.predict)=99.33%
[ 2019-05-21 16:34:24,209][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_4.predict)=99.09%
[ 2019-05-21 16:34:24,216][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_cv.predict)=99.30%
[ 2019-05-21 16:34:24,580][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_4 - 5_fold

[ 2019-05-21 16:34:40,557][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_1.predict)=98.45%
[ 2019-05-21 16:34:40,871][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_2.predict)=98.48%
[ 2019-05-21 16:34:41,162][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_3.predict)=98.55%
[ 2019-05-21 16:34:41,467][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_4.predict)=98.65%
[ 2019-05-21 16:34:41,474][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_cv.predict)=98.48%
[ 2019-05-21 16:34:41,786][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds.train_0.predict)=98.67%
[ 2019-05-21 16:34:42,097][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds.train_1.predict)=98.39%
[ 2019-05-21 16:34:42,372][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds

[ 2019-05-21 16:34:58,893][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_3.predict)=99.19%
[ 2019-05-21 16:34:59,582][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_4.predict)=99.46%
[ 2019-05-21 16:34:59,589][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_cv.predict)=99.30%
[ 2019-05-21 16:34:59,596][cascade_classifier.calc_f1] Weighted F1 (layer_3 - train.classifier_average)=99.29%
[ 2019-05-21 16:34:59,603][cascade_classifier.fit_transform] [layer=4] look_indexs=[0], X_cur_train.shape=(25208, 52), X_cur_test.shape=(0, 52)
[ 2019-05-21 16:34:59,891][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.train_0.predict)=99.43%
[ 2019-05-21 16:35:00,118][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.train_1.predict)=99.03%
[ 2019-05-21 16:35:00,396][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.

[ 2019-05-21 16:35:15,789][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_3.predict)=99.37%
[ 2019-05-21 16:35:16,040][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_4.predict)=99.27%
[ 2019-05-21 16:35:16,048][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_cv.predict)=99.29%
[ 2019-05-21 16:35:16,341][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_0.predict)=99.31%
[ 2019-05-21 16:35:16,583][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_1.predict)=99.23%
[ 2019-05-21 16:35:16,857][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_2.predict)=99.05%
[ 2019-05-21 16:35:17,132][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_3.predict)=99.50%
[ 2019-05-21 16:35:17,390][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds

[ 2019-05-21 16:35:30,718][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_3 - 5_folds.train_cv.predict)=99.28%
[ 2019-05-21 16:35:31,000][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_0.predict)=98.59%
[ 2019-05-21 16:35:31,259][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_1.predict)=98.42%
[ 2019-05-21 16:35:31,506][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_2.predict)=98.47%
[ 2019-05-21 16:35:31,767][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_3.predict)=98.45%
[ 2019-05-21 16:35:32,052][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_4.predict)=98.33%
[ 2019-05-21 16:35:32,059][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_cv.predict)=98.45%
[ 2019-05-21 16:35:32,325][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_fold

[ 2019-05-21 16:35:44,553][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_3.predict)=99.29%
[ 2019-05-21 16:35:44,719][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_4.predict)=99.22%
[ 2019-05-21 16:35:44,725][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_cv.predict)=99.24%
[ 2019-05-21 16:35:44,779][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds.train_0.predict)=98.59%
[ 2019-05-21 16:35:44,821][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds.train_1.predict)=98.50%
[ 2019-05-21 16:35:44,861][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds.train_2.predict)=98.31%
[ 2019-05-21 16:35:44,902][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds.train_3.predict)=98.57%
[ 2019-05-21 16:35:44,944][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds

[ 2019-05-21 16:35:53,490][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_3.predict)=98.71%
[ 2019-05-21 16:35:53,762][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_4.predict)=98.39%
[ 2019-05-21 16:35:53,769][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_cv.predict)=98.62%
[ 2019-05-21 16:35:54,052][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds.train_0.predict)=98.61%
[ 2019-05-21 16:35:54,328][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds.train_1.predict)=98.71%
[ 2019-05-21 16:35:54,578][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds.train_2.predict)=98.20%
[ 2019-05-21 16:35:54,849][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds.train_3.predict)=98.73%
[ 2019-05-21 16:35:55,121][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds

[ 2019-05-21 16:36:10,057][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_cv.predict)=99.32%
[ 2019-05-21 16:36:10,065][cascade_classifier.calc_f1] Weighted F1 (layer_2 - train.classifier_average)=99.31%
[ 2019-05-21 16:36:10,072][cascade_classifier.fit_transform] [layer=3] look_indexs=[0], X_cur_train.shape=(25210, 52), X_cur_test.shape=(0, 52)
[ 2019-05-21 16:36:10,342][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.train_0.predict)=99.25%
[ 2019-05-21 16:36:10,578][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.train_1.predict)=99.31%
[ 2019-05-21 16:36:10,804][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.train_2.predict)=99.29%
[ 2019-05-21 16:36:11,039][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.train_3.predict)=99.41%
[ 2019-05-21 16:36:11,275][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.

[ 2019-05-21 16:36:24,689][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_cv.predict)=99.33%
[ 2019-05-21 16:36:24,954][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_0.predict)=99.44%
[ 2019-05-21 16:36:25,187][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_1.predict)=99.27%
[ 2019-05-21 16:36:25,444][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_2.predict)=99.25%
[ 2019-05-21 16:36:25,694][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_3.predict)=99.27%
[ 2019-05-21 16:36:25,953][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_4.predict)=99.36%
[ 2019-05-21 16:36:25,960][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_cv.predict)=99.32%
[ 2019-05-21 16:36:26,224][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_3 - 5_fold

[ 2019-05-21 16:36:37,987][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_2 - 5_folds.train_1.predict)=99.48%
[ 2019-05-21 16:36:38,138][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_2 - 5_folds.train_2.predict)=99.41%
[ 2019-05-21 16:36:38,289][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_2 - 5_folds.train_3.predict)=99.33%
[ 2019-05-21 16:36:38,436][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_2 - 5_folds.train_4.predict)=99.35%
[ 2019-05-21 16:36:38,443][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_2 - 5_folds.train_cv.predict)=99.37%
[ 2019-05-21 16:36:38,635][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_0.predict)=99.45%
[ 2019-05-21 16:36:38,796][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_1.predict)=99.41%
[ 2019-05-21 16:36:38,942][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds

[ 2019-05-21 16:36:45,868][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_4 - 5_folds.train_1.predict)=98.87%
[ 2019-05-21 16:36:46,087][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_4 - 5_folds.train_2.predict)=98.79%
[ 2019-05-21 16:36:46,345][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_4 - 5_folds.train_3.predict)=98.65%
[ 2019-05-21 16:36:46,621][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_4 - 5_folds.train_4.predict)=98.89%
[ 2019-05-21 16:36:46,629][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_4 - 5_folds.train_cv.predict)=98.79%
[ 2019-05-21 16:36:46,923][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_0.predict)=98.73%
[ 2019-05-21 16:36:47,168][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_1.predict)=98.83%
[ 2019-05-21 16:36:47,401][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds

[ 2019-05-21 16:37:00,187][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds.train_2.predict)=98.93%
[ 2019-05-21 16:37:00,439][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds.train_3.predict)=98.85%
[ 2019-05-21 16:37:00,656][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds.train_4.predict)=98.73%
[ 2019-05-21 16:37:00,662][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_folds.train_cv.predict)=98.81%
[ 2019-05-21 16:37:01,306][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_0.predict)=99.50%
[ 2019-05-21 16:37:01,938][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_1.predict)=99.50%
[ 2019-05-21 16:37:02,540][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_2.predict)=99.45%
[ 2019-05-21 16:37:03,243][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds

[ 2019-05-21 16:37:15,735][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.train_2.predict)=99.48%
[ 2019-05-21 16:37:15,948][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.train_3.predict)=99.42%
[ 2019-05-21 16:37:16,159][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.train_4.predict)=99.44%
[ 2019-05-21 16:37:16,165][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.train_cv.predict)=99.42%
[ 2019-05-21 16:37:16,413][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_0.predict)=99.43%
[ 2019-05-21 16:37:16,626][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_1.predict)=99.44%
[ 2019-05-21 16:37:16,823][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_2.predict)=99.25%
[ 2019-05-21 16:37:17,034][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds

[ 2019-05-21 16:37:29,783][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_4.predict)=99.35%
[ 2019-05-21 16:37:29,789][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_cv.predict)=99.42%
[ 2019-05-21 16:37:30,033][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds.train_0.predict)=99.39%
[ 2019-05-21 16:37:30,246][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds.train_1.predict)=99.43%
[ 2019-05-21 16:37:30,457][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds.train_2.predict)=99.37%
[ 2019-05-21 16:37:30,686][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds.train_3.predict)=99.66%
[ 2019-05-21 16:37:30,896][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds.train_4.predict)=99.23%
[ 2019-05-21 16:37:30,902][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds

[ 2019-05-21 16:37:43,805][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_folds.train_0.predict)=99.05%
[ 2019-05-21 16:37:44,037][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_folds.train_1.predict)=98.67%
[ 2019-05-21 16:37:44,280][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_folds.train_2.predict)=98.93%
[ 2019-05-21 16:37:44,515][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_folds.train_3.predict)=98.65%
[ 2019-05-21 16:37:44,740][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_folds.train_4.predict)=98.83%
[ 2019-05-21 16:37:44,747][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_5 - 5_folds.train_cv.predict)=98.83%
[ 2019-05-21 16:37:44,978][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds.train_0.predict)=98.77%
[ 2019-05-21 16:37:45,227][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds

[ 2019-05-21 16:37:59,978][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_7 - estimator_7 - 5_folds.train_2.predict)=99.52%
[ 2019-05-21 16:38:00,634][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_7 - estimator_7 - 5_folds.train_3.predict)=99.31%
[ 2019-05-21 16:38:01,299][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_7 - estimator_7 - 5_folds.train_4.predict)=99.46%
[ 2019-05-21 16:38:01,307][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_7 - estimator_7 - 5_folds.train_cv.predict)=99.43%
[ 2019-05-21 16:38:01,315][cascade_classifier.calc_f1] Weighted F1 (layer_7 - train.classifier_average)=99.43%
[ 2019-05-21 16:38:01,315][cascade_classifier.fit_transform] [Result][Optimal Level Detected] opt_layer_num=5, weighted_f1_train=99.43%, weighted_f1_test=0.00%
[ 2019-05-21 16:38:01,321][cascade_classifier.transform] X_groups_test.shape=[(8401, 4)]
[ 2019-05-21 16:38:01,323][cascade_classifier.transform] group_dims=[4]
[ 2019-05-21 16:38:01,324][cascade_classifier.transform