In [1]:
import os
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append("..") 
from gcforest.gcforest import GCForest

import xlwt

  from numpy.core.umath_tests import inner1d


# 数据及参数

In [2]:
random_seed = 42
cv=5
score = 'f1_weighted'

In [3]:
def get_toy_config():
    config = {}
    ca_config = {}
    ca_config["random_state"] = random_seed
    ca_config["max_layers"] = 10
    ca_config["early_stopping_rounds"] = 3
    ca_config["n_classes"] = 6
    ca_config["estimators"] = []
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "random_state" : random_seed})
    ca_config["estimators"].append({"n_folds": 5, "type": "DecisionTreeClassifier"})
    ca_config["estimators"].append({"n_folds": 5, "type": "DecisionTreeClassifier"})
    ca_config["estimators"].append({"n_folds": 5, "type": "DecisionTreeClassifier"})
    ca_config["estimators"].append({"n_folds": 5, "type": "LogisticRegression"})
    config["cascade"] = ca_config
    return config

In [4]:
path = os.getcwd()+'/../data/20122018freshwater_four_feature.csv'
data = pd.read_csv(path, na_values = np.nan)

In [5]:
X = data.drop(['本周水质'], axis=1).values # Series
y = data['本周水质'].values.reshape(-1,1) - 1

# 1. 中位数填充缺失值，2.Z-score标准化
clean_pipeline = Pipeline([('imputer', preprocessing.Imputer(missing_values='NaN',strategy="median")),
                           ('std_scaler', preprocessing.StandardScaler())])
X = clean_pipeline.fit_transform(X)

In [6]:
X.shape

(33612, 4)

# k折交叉验证

In [7]:
# function：使用5折交叉验证统计各类别5次平均后的Acc，5次平均后的F1，和模型的总Acc以及总Weighted F1

# Input: 
#     X: 总样本
#     y: 总样本
#     model: function
#     cv: cross_validation的次数
# Output:
#     Acc_mean, 各类别的Acc
#     F1_mean, 各类别F1
#     Support_mean, 各类别预测样本占总样本的比重
#     Acc, 总Acc
#     F1_weighted 总Weighted F1
# function：使用5折交叉验证统计各类别5次平均后的Acc，5次平均后的F1，和模型的总Acc以及总Weighted F1

# Input: 
#     X: 总样本
#     y: 总样本
#     model: function
#     cv: cross_validation的次数
# Output:
#     Acc_mean, 各类别的Acc
#     F1_mean, 各类别F1
#     Support_mean, 各类别预测样本占总样本的比重
#     Acc, 总Acc
#     F1_weighted 总Weighted F1


def kftrain(X, y, model, cv):
    model_name = model.__class__.__name__

    n_samples = X.shape[0]
    n_features = X.shape[1]
    n_class = np.unique(y).shape[0]

    Acc_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组test data的cv个acc
    _Acc_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组train data的cv个acc
    F1_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组test data的cv个acc
    _F1_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组train data的cv个acc
    cv_F1_weighted = np.zeros(cv) # 5次 test data 的F1_weighted
    _cv_F1_weighted = np.zeros(cv) # 5次 train data 的F1_weighted
    cv_Acc = np.zeros(cv) # 5次test data的Acc
    _cv_Acc = np.zeros(cv) # 5次train data的Acc
    # 各类别的Support，每个类别的support由5次fold后得到的5个support求平均得到，这里support：各类别样本占总样本的比列
    # 各类别的F1，由5次fold后得到的5个F1求平均得到
    # 所以，Weighted F1 等于“5次fold得到的5个 Weighted F1 求平均得到” ——等价于—— “6个support*6个F1”得到
    Support_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 5次fold后test data各类别的support
    _Support_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 5次fold后train data各类别的support
    
    # 各类别的Mean，SD
    Acc_mean_class = np.zeros(6)
    _Acc_mean_class = np.zeros(6)
    Acc_SD_class = np.zeros(6)
    _Acc_SD_class = np.zeros(6)
    F1_mean_class = np.zeros(6)
    _F1_mean_class = np.zeros(6)
    F1_SD_class = np.zeros(6)
    _F1_SD_class = np.zeros(6)
    
    k = 0
    skf = StratifiedKFold(n_splits=cv) # 定义5折分层划分器
    
    for train_index, test_index in skf.split(X, y):
        K_train_x, K_test_x = X[train_index], X[test_index]
        K_train_y, K_test_y = y[train_index], y[test_index]
        if model_name == 'GCForest':
            model.fit_transform(K_train_x, K_train_y.reshape(K_train_y.shape[0]))
        else:
            model.fit(K_train_x, K_train_y)
    
        K_test_y_pred = model.predict(K_test_x)
        K_train_y_pred = model.predict(K_train_x)
        
        # 由混淆矩阵计算各类别的Acc
        test_cm = confusion_matrix(K_test_y, K_test_y_pred)
        train_cm = confusion_matrix(K_train_y, K_train_y_pred)
        test_acc_all_class = np.zeros(n_class) # 6个类别上的测试acc
        train_acc_all_class = np.zeros(n_class) # 6个类别上的训练acc
        
        i = 0
        for c in test_cm:
            test_acc_all_class[i] = c[i]/np.sum(c)
            i += 1
        Acc_matrix[:,k] = test_acc_all_class
        
        i = 0
        for c in train_cm:
            train_acc_all_class[i] = c[i]/np.sum(c)
            i += 1
        _Acc_matrix[:,k] = train_acc_all_class
        
        # 由classification_report提取各类别的F1
        cr = classification_report(K_test_y, K_test_y_pred, digits=4)
        _cr = classification_report(K_train_y, K_train_y_pred, digits=4)
        test_f1_all_class = np.zeros(n_class) # test data 6个类别上的测试f1
        train_f1_all_class = np.zeros(n_class) # train data 6个类别上的训练f1
        support_all_class = np.zeros(n_class) # test data 6个类别的support
        _support_all_class = np.zeros(n_class) # train data 6个类别的support
        
        i = 0
        for l in range(2,8):
            test_f1_all_class[i] = float(cr.splitlines()[l].split()[3])
            train_f1_all_class[i] = float(_cr.splitlines()[l].split()[3])
            support_all_class[i] = float(cr.splitlines()[l].split()[4])/(n_samples*0.2)
            _support_all_class[i] = float(_cr.splitlines()[l].split()[4])/(n_samples*0.8)
            i = i + 1
        F1_matrix[:,k] = test_f1_all_class
        _F1_matrix[:,k] = train_f1_all_class
        Support_matrix[:,k] = support_all_class
        _Support_matrix[:,k] = _support_all_class
        
        # cv_F1_weighted[k] = float(cr.splitlines()[9].split()[5]) # 第k折时的Weighted F1
        cv_F1_weighted[k] = f1_score(K_test_y, K_test_y_pred, average="weighted")
        _cv_F1_weighted[k] = f1_score(K_train_y, K_train_y_pred, average="weighted")
        cv_Acc[k] = accuracy_score(K_test_y, K_test_y_pred)
        _cv_Acc[k] = accuracy_score(K_train_y, K_train_y_pred)
        
        k += 1
    
    Acc_mean = np.mean(Acc_matrix, axis=1) # 6个类别的k-fold平均acc
    _Acc_mean = np.mean(_Acc_matrix, axis=1) # 6个类别的k-fold平均acc
    
    F1_mean = np.mean(F1_matrix, axis=1) # 6个类别的k-fold平均F1
    _F1_mean = np.mean(_F1_matrix, axis=1) # 6个类别的k-fold平均F1
    Support_mean = np.mean(Support_matrix, axis=1) # 6个类别的k-fold平均support
    _Support_mean = np.mean(_Support_matrix, axis=1) # 6个类别的k-fold平均support
    
    Acc = np.mean(Acc_mean)
    _Acc = np.mean(_Acc_mean)
    F1_weighted = np.mean(cv_F1_weighted) # 5个Weighted F1 求平均，并可以证明其等价于 np.sum(F1_mean*Support_mean)
    _F1_weighted = np.mean(_cv_F1_weighted) # 5个Weighted F1 求平均，并可以证明其等价于 np.sum(F1_mean*Support_mean)
    SD_Acc = np.std(cv_Acc)
    _SD_Acc = np.std(cv_Acc)
    SD_F1 = np.std(cv_F1_weighted)
    _SD_F1 = np.std(cv_F1_weighted)
    
    Acc_mean_class = np.mean(Acc_matrix, axis=1)
    _Acc_mean_class = np.mean(_Acc_matrix, axis=1)
    Acc_SD_class = np.std(Acc_matrix, axis=1)
    _Acc_SD_class = np.std(_Acc_matrix, axis=1)
    F1_mean_class = np.mean(F1_matrix, axis=1)
    _F1_mean_class = np.mean(_F1_matrix, axis=1)
    F1_SD_class = np.std(F1_matrix, axis=1)
    _F1_SD_class = np.std(_F1_matrix, axis=1)
    
    # return Acc_mean, F1_mean, Support_mean, Acc, F1_weighted, SD_Acc, SD_F1, Acc_mean_class, Acc_SD_class, F1_mean_class, F1_SD_class
    return Acc_matrix, F1_matrix, Support_matrix, _Acc_matrix, _F1_matrix, _Support_matrix, Acc_mean, _Acc_mean, F1_mean, _F1_mean, Acc, _Acc, F1_weighted, _F1_weighted, SD_Acc, _SD_Acc, SD_F1, _SD_F1, Acc_mean_class, _Acc_mean_class, Acc_SD_class, _Acc_SD_class, F1_mean_class, _F1_mean_class, F1_SD_class, _F1_SD_class

In [11]:
config = get_toy_config()

models = [
    LogisticRegression(),
    LinearDiscriminantAnalysis(),
    SVC(probability=True),
    DecisionTreeClassifier(),
    ExtraTreeClassifier(),
    GaussianNB(),
    KNeighborsClassifier(),
    RandomForestClassifier(random_state=random_seed),
    ExtraTreesClassifier(random_state=random_seed),
    GCForest(config)
]

workbook = xlwt.Workbook(encoding = 'utf-8')  

for model in models:
    model_name = model.__class__.__name__
    print(model_name)
    # Acc_class, F1_class, Support_class, Acc, F1_weighted, SD_Acc, SD_F1, Acc_mean_class, Acc_SD_class, F1_mean_class, F1_SD_class = kftrain(X, y, model, 5)
    Acc_matrix, F1_matrix, Support_matrix, _Acc_matrix, _F1_matrix, _Support_matrix, Acc_class, _Acc_class, F1_class, _F1_class, Acc, _Acc, F1_weighted, _F1_weighted, SD_Acc, _SD_Acc, SD_F1, _SD_F1, Acc_mean_class, _Acc_mean_class, Acc_SD_class, _Acc_SD_class, F1_mean_class, _F1_mean_class, F1_SD_class, _F1_SD_class = kftrain(X, y, model, 5)

    worksheet = workbook.add_sheet(model_name, cell_overwrite_ok=True)
    worksheet.write(0,0, "train_Acc_matrix")
    for i in range(6):
        for j in range(5):
            worksheet.write(i+1, j, _Acc_matrix[i,j])

    worksheet.write(0,6, "test_Acc_matrix")
    for i in range(6):
        for j in range(5):
            worksheet.write(i+1, j+7, Acc_matrix[i,j])



    worksheet.write(11,0, "train_F1_matrix")
    for i in range(6):
        for j in range(5):
            worksheet.write(i+12, j, _F1_matrix[i,j])

    worksheet.write(11,6, "test_F1_matrix")
    for i in range(6):
        for j in range(5):
            worksheet.write(i+12, j+7, F1_matrix[i,j])



    worksheet.write(20,0, "train_support_matrix")
    for i in range(6):
        for j in range(5):
            worksheet.write(i+21, j, _Support_matrix[i,j])

    worksheet.write(20,6, "test_support_matrix")
    for i in range(6):
        for j in range(5):
            worksheet.write(i+21, j+7, Support_matrix[i,j])
            
workbook.save("../res/basemodels.xls")

#     print("===Test Accuracy===")
#     for i in range(len(Acc_class)):
#         print("Class %d: %.2f%%" %(i, Acc_class[i]*100))
#         print("Mean±SD: %.2f±%.2f%%" %(Acc_mean_class[i]*100,Acc_SD_class[i]*100))
    
#     print("Overall: %.2f%%" %(Acc*100))
#     print("Mean±SD: %.2f±%.2f%%" %(Acc*100,SD_Acc*100))
    
#     print("===Train Accuracy===")
#     for i in range(len(_Acc_class)):
#         print("Class %d: %.2f%%" %(i, _Acc_class[i]*100))
#         print("Mean±SD: %.2f±%.2f%%" %(_Acc_mean_class[i]*100, _Acc_SD_class[i]*100))
        
#     print("Overall: %.2f%%" %(_Acc*100))
#     print("Mean±SD: %.2f±%.2f%%" %(_Acc*100, _SD_Acc*100))

#     print("===  Test F1  ===")
#     for i in range(len(F1_class)):
#         print("Class %d: %.2f%%" %(i, F1_class[i]*100))
#         print("Mean±SD: %.2f±%.2f%%" %(F1_mean_class[i]*100,F1_SD_class[i]*100))
        
#     print("F{beta}: %.2f%%" %(F1_weighted*100))
#     print("Mean±SD: %.2f±%.2f%%" %(F1_weighted*100,SD_F1*100))
    
    
#     print("===  Train F1  ===")
#     for i in range(len(_F1_class)):
#         print("Class %d: %.2f%%" %(i, _F1_class[i]*100))
#         print("Mean±SD: %.2f±%.2f%%" %(_F1_mean_class[i]*100, _F1_SD_class[i]*100))
        
#     print("F{beta}: %.2f%%" %(_F1_weighted*100))
#     print("Mean±SD: %.2f±%.2f%%" %(_F1_weighted*100, _SD_F1*100))
#     print("\n===============\n")
    
    
#     print("=== Train Acc_matrix===")
#     print(_Acc_matrix)
#     print("=== Train F1_matrix===")
#     print(_F1_matrix)
#     print("=== Train Support_matrix===")
#     print(_Support_matrix)
    
#     print("=== Test Acc_matrix===")
#     print(Acc_matrix)
#     print("=== Test F1_matrix===")
#     print(F1_matrix)
#     print("=== Test Support_matrix===")
#     print(Support_matrix)


LogisticRegression


  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


LinearDiscriminantAnalysis
SVC
DecisionTreeClassifier
ExtraTreeClassifier
GaussianNB
KNeighborsClassifier




RandomForestClassifier




ExtraTreesClassifier


[ 2019-05-11 17:07:23,670][cascade_classifier.fit_transform] X_groups_train.shape=[(26886, 4)],y_train.shape=(26886,),X_groups_test.shape=no_test,y_test.shape=no_test
[ 2019-05-11 17:07:23,671][cascade_classifier.fit_transform] group_dims=[4]
[ 2019-05-11 17:07:23,672][cascade_classifier.fit_transform] group_starts=[0]
[ 2019-05-11 17:07:23,673][cascade_classifier.fit_transform] group_ends=[4]
[ 2019-05-11 17:07:23,675][cascade_classifier.fit_transform] X_train.shape=(26886, 4),X_test.shape=(0, 4)
[ 2019-05-11 17:07:23,678][cascade_classifier.fit_transform] [layer=0] look_indexs=[0], X_cur_train.shape=(26886, 4), X_cur_test.shape=(0, 4)
[ 2019-05-11 17:07:23,849][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_0.predict)=99.52%


GCForest


[ 2019-05-11 17:07:24,015][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_1.predict)=99.55%
[ 2019-05-11 17:07:24,169][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_2.predict)=99.46%
[ 2019-05-11 17:07:24,325][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_3.predict)=99.50%
[ 2019-05-11 17:07:24,481][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_4.predict)=99.42%
[ 2019-05-11 17:07:24,487][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_0 - 5_folds.train_cv.predict)=99.49%
[ 2019-05-11 17:07:24,670][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_0.predict)=99.52%
[ 2019-05-11 17:07:24,827][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds.train_1.predict)=99.55%
[ 2019-05-11 17:07:24,989][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_1 - 5_folds

[ 2019-05-11 17:07:31,962][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_3.predict)=99.55%
[ 2019-05-11 17:07:32,239][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_4.predict)=99.61%
[ 2019-05-11 17:07:32,246][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_folds.train_cv.predict)=99.56%
[ 2019-05-11 17:07:32,523][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_0.predict)=99.50%
[ 2019-05-11 17:07:32,784][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_1.predict)=99.59%
[ 2019-05-11 17:07:33,052][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_2.predict)=99.59%
[ 2019-05-11 17:07:33,317][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds.train_3.predict)=99.57%
[ 2019-05-11 17:07:33,591][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_3 - 5_folds

[ 2019-05-11 17:07:48,758][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds.train_cv.predict)=99.17%
[ 2019-05-11 17:07:49,005][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_0.predict)=99.00%
[ 2019-05-11 17:07:49,253][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_1.predict)=98.94%
[ 2019-05-11 17:07:49,490][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_2.predict)=98.98%
[ 2019-05-11 17:07:49,740][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_3.predict)=99.14%
[ 2019-05-11 17:07:49,969][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_4.predict)=99.13%
[ 2019-05-11 17:07:49,976][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_5 - 5_folds.train_cv.predict)=99.04%
[ 2019-05-11 17:07:50,237][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_6 - 5_fold

[ 2019-05-11 17:08:03,563][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_1.predict)=99.55%
[ 2019-05-11 17:08:04,174][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_2.predict)=99.61%
[ 2019-05-11 17:08:04,786][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_3.predict)=99.50%
[ 2019-05-11 17:08:05,407][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_4.predict)=99.52%
[ 2019-05-11 17:08:05,413][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_7 - 5_folds.train_cv.predict)=99.57%
[ 2019-05-11 17:08:05,420][cascade_classifier.calc_f1] Weighted F1 (layer_3 - train.classifier_average)=99.56%
[ 2019-05-11 17:08:05,426][cascade_classifier.fit_transform] [layer=4] look_indexs=[0], X_cur_train.shape=(26886, 52), X_cur_test.shape=(0, 52)
[ 2019-05-11 17:08:05,651][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_0 - 5_folds.

[ 2019-05-11 17:08:17,855][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_1.predict)=99.50%
[ 2019-05-11 17:08:18,068][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_2.predict)=99.52%
[ 2019-05-11 17:08:18,283][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_3.predict)=99.57%
[ 2019-05-11 17:08:18,499][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_4.predict)=99.68%
[ 2019-05-11 17:08:18,506][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_1 - 5_folds.train_cv.predict)=99.55%
[ 2019-05-11 17:08:18,739][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_0.predict)=99.55%
[ 2019-05-11 17:08:18,951][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds.train_1.predict)=99.59%
[ 2019-05-11 17:08:19,172][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_2 - 5_folds

[ 2019-05-11 17:08:31,415][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_3 - 5_folds.train_3.predict)=99.39%
[ 2019-05-11 17:08:31,639][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_3 - 5_folds.train_4.predict)=99.57%
[ 2019-05-11 17:08:31,646][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_3 - 5_folds.train_cv.predict)=99.55%
[ 2019-05-11 17:08:31,878][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_0.predict)=99.05%
[ 2019-05-11 17:08:32,087][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_1.predict)=99.16%
[ 2019-05-11 17:08:32,324][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_2.predict)=99.16%
[ 2019-05-11 17:08:32,543][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds.train_3.predict)=99.18%
[ 2019-05-11 17:08:32,758][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_4 - 5_folds

[ 2019-05-11 17:08:43,709][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_1.predict)=99.41%
[ 2019-05-11 17:08:43,870][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_2.predict)=99.52%
[ 2019-05-11 17:08:44,030][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_3.predict)=99.52%
[ 2019-05-11 17:08:44,190][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_4.predict)=99.42%
[ 2019-05-11 17:08:44,197][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_3 - 5_folds.train_cv.predict)=99.44%
[ 2019-05-11 17:08:44,255][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds.train_0.predict)=98.63%
[ 2019-05-11 17:08:44,299][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds.train_1.predict)=99.00%
[ 2019-05-11 17:08:44,344][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_4 - 5_folds

[ 2019-05-11 17:08:52,700][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_1.predict)=98.98%
[ 2019-05-11 17:08:52,987][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_2.predict)=98.72%
[ 2019-05-11 17:08:53,258][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_3.predict)=98.83%
[ 2019-05-11 17:08:53,548][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_4.predict)=99.05%
[ 2019-05-11 17:08:53,555][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_5 - 5_folds.train_cv.predict)=98.87%
[ 2019-05-11 17:08:53,843][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds.train_0.predict)=98.90%
[ 2019-05-11 17:08:54,136][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds.train_1.predict)=98.83%
[ 2019-05-11 17:08:54,417][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_6 - 5_folds

[ 2019-05-11 17:09:09,502][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_3.predict)=99.46%
[ 2019-05-11 17:09:10,385][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_4.predict)=99.46%
[ 2019-05-11 17:09:10,394][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_7 - 5_folds.train_cv.predict)=99.49%
[ 2019-05-11 17:09:10,407][cascade_classifier.calc_f1] Weighted F1 (layer_2 - train.classifier_average)=99.49%
[ 2019-05-11 17:09:10,422][cascade_classifier.fit_transform] [layer=3] look_indexs=[0], X_cur_train.shape=(26887, 52), X_cur_test.shape=(0, 52)
[ 2019-05-11 17:09:10,807][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.train_0.predict)=99.55%
[ 2019-05-11 17:09:11,162][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.train_1.predict)=99.48%
[ 2019-05-11 17:09:11,517][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_0 - 5_folds.

[ 2019-05-11 17:09:25,047][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_3.predict)=99.46%
[ 2019-05-11 17:09:25,271][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_4.predict)=99.35%
[ 2019-05-11 17:09:25,278][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_1 - 5_folds.train_cv.predict)=99.49%
[ 2019-05-11 17:09:25,525][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_0.predict)=99.37%
[ 2019-05-11 17:09:25,769][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_1.predict)=99.65%
[ 2019-05-11 17:09:26,014][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_2.predict)=99.40%
[ 2019-05-11 17:09:26,243][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds.train_3.predict)=99.53%
[ 2019-05-11 17:09:26,483][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_2 - 5_folds

[ 2019-05-11 17:09:39,259][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_3 - 5_folds.train_cv.predict)=99.49%
[ 2019-05-11 17:09:39,539][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_4 - 5_folds.train_0.predict)=98.79%
[ 2019-05-11 17:09:39,774][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_4 - 5_folds.train_1.predict)=98.94%
[ 2019-05-11 17:09:40,010][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_4 - 5_folds.train_2.predict)=98.90%
[ 2019-05-11 17:09:40,263][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_4 - 5_folds.train_3.predict)=98.92%
[ 2019-05-11 17:09:40,510][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_4 - 5_folds.train_4.predict)=99.03%
[ 2019-05-11 17:09:40,516][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_4 - 5_folds.train_cv.predict)=98.92%
[ 2019-05-11 17:09:40,768][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_5 - 5_fold

[ 2019-05-11 17:09:53,749][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds.train_1.predict)=98.72%
[ 2019-05-11 17:09:53,997][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds.train_2.predict)=98.83%
[ 2019-05-11 17:09:54,252][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds.train_3.predict)=99.11%
[ 2019-05-11 17:09:54,511][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds.train_4.predict)=99.00%
[ 2019-05-11 17:09:54,518][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_6 - 5_folds.train_cv.predict)=98.92%
[ 2019-05-11 17:09:55,161][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_7 - 5_folds.train_0.predict)=99.41%
[ 2019-05-11 17:09:55,752][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_7 - 5_folds.train_1.predict)=99.48%
[ 2019-05-11 17:09:56,351][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_7 - 5_folds

[ 2019-05-11 17:10:08,937][cascade_classifier.transform] X_test.shape=(6725, 4)
[ 2019-05-11 17:10:08,938][cascade_classifier.transform] [layer=0] look_indexs=[0], X_cur_test.shape=(6725, 4)
[ 2019-05-11 17:10:09,094][cascade_classifier.transform] [layer=1] look_indexs=[0], X_cur_test.shape=(6725, 52)
[ 2019-05-11 17:10:09,261][cascade_classifier.transform] [layer=2] look_indexs=[0], X_cur_test.shape=(6725, 52)
[ 2019-05-11 17:10:09,422][cascade_classifier.transform] [layer=3] look_indexs=[0], X_cur_test.shape=(6725, 52)
[ 2019-05-11 17:10:09,584][cascade_classifier.transform] [layer=4] look_indexs=[0], X_cur_test.shape=(6725, 52)
[ 2019-05-11 17:10:09,745][cascade_classifier.transform] X_groups_test.shape=[(26887, 4)]
[ 2019-05-11 17:10:09,746][cascade_classifier.transform] group_dims=[4]
[ 2019-05-11 17:10:09,747][cascade_classifier.transform] X_test.shape=(26887, 4)
[ 2019-05-11 17:10:09,748][cascade_classifier.transform] [layer=0] look_indexs=[0], X_cur_test.shape=(26887, 4)
[ 2019

[ 2019-05-11 17:10:17,297][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_7 - 5_folds.train_4.predict)=61.64%
[ 2019-05-11 17:10:17,303][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_0 - estimator_7 - 5_folds.train_cv.predict)=61.22%
[ 2019-05-11 17:10:17,309][cascade_classifier.calc_f1] Weighted F1 (layer_0 - train.classifier_average)=99.29%
[ 2019-05-11 17:10:17,319][cascade_classifier.fit_transform] [layer=1] look_indexs=[0], X_cur_train.shape=(26891, 52), X_cur_test.shape=(0, 52)
[ 2019-05-11 17:10:17,606][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_0.predict)=99.39%
[ 2019-05-11 17:10:17,888][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_1.predict)=99.59%
[ 2019-05-11 17:10:18,156][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_2.predict)=99.20%
[ 2019-05-11 17:10:18,422][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.

[ 2019-05-11 17:10:34,587][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_1 - 5_folds.train_4.predict)=99.42%
[ 2019-05-11 17:10:34,593][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_1 - 5_folds.train_cv.predict)=99.35%
[ 2019-05-11 17:10:34,857][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_0.predict)=99.44%
[ 2019-05-11 17:10:35,115][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_1.predict)=99.37%
[ 2019-05-11 17:10:35,356][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_2.predict)=99.33%
[ 2019-05-11 17:10:35,608][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_3.predict)=99.27%
[ 2019-05-11 17:10:35,866][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_4.predict)=99.37%
[ 2019-05-11 17:10:35,872][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds

[ 2019-05-11 17:10:50,003][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_0.predict)=98.55%
[ 2019-05-11 17:10:50,279][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_1.predict)=98.73%
[ 2019-05-11 17:10:50,584][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_2.predict)=98.75%
[ 2019-05-11 17:10:50,856][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_3.predict)=98.68%
[ 2019-05-11 17:10:51,136][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_4.predict)=98.51%
[ 2019-05-11 17:10:51,143][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_cv.predict)=98.65%
[ 2019-05-11 17:10:51,430][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_0.predict)=98.53%
[ 2019-05-11 17:10:51,714][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds

[ 2019-05-11 17:11:07,216][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_6 - 5_folds.train_2.predict)=98.59%
[ 2019-05-11 17:11:07,497][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_6 - 5_folds.train_3.predict)=98.85%
[ 2019-05-11 17:11:07,776][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_6 - 5_folds.train_4.predict)=98.46%
[ 2019-05-11 17:11:07,783][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_6 - 5_folds.train_cv.predict)=98.57%
[ 2019-05-11 17:11:08,433][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_0.predict)=99.46%
[ 2019-05-11 17:11:09,067][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_1.predict)=99.24%
[ 2019-05-11 17:11:09,670][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_2.predict)=99.27%
[ 2019-05-11 17:11:10,325][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds

[ 2019-05-11 17:11:23,257][cascade_classifier.transform] [layer=0] look_indexs=[0], X_cur_test.shape=(6721, 4)
[ 2019-05-11 17:11:23,413][cascade_classifier.transform] [layer=1] look_indexs=[0], X_cur_test.shape=(6721, 52)
[ 2019-05-11 17:11:23,584][cascade_classifier.transform] [layer=2] look_indexs=[0], X_cur_test.shape=(6721, 52)
[ 2019-05-11 17:11:23,751][cascade_classifier.transform] X_groups_test.shape=[(26891, 4)]
[ 2019-05-11 17:11:23,752][cascade_classifier.transform] group_dims=[4]
[ 2019-05-11 17:11:23,753][cascade_classifier.transform] X_test.shape=(26891, 4)
[ 2019-05-11 17:11:23,755][cascade_classifier.transform] [layer=0] look_indexs=[0], X_cur_test.shape=(26891, 4)
[ 2019-05-11 17:11:24,283][cascade_classifier.transform] [layer=1] look_indexs=[0], X_cur_test.shape=(26891, 52)
[ 2019-05-11 17:11:24,895][cascade_classifier.transform] [layer=2] look_indexs=[0], X_cur_test.shape=(26891, 52)
[ 2019-05-11 17:11:25,528][cascade_classifier.fit_transform] X_groups_train.shape=[(

[ 2019-05-11 17:11:30,381][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_0.predict)=99.44%
[ 2019-05-11 17:11:30,627][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_1.predict)=99.70%
[ 2019-05-11 17:11:30,859][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_2.predict)=99.29%
[ 2019-05-11 17:11:31,104][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_3.predict)=99.33%
[ 2019-05-11 17:11:31,348][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_4.predict)=99.35%
[ 2019-05-11 17:11:31,355][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_cv.predict)=99.42%
[ 2019-05-11 17:11:31,619][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_0.predict)=99.48%
[ 2019-05-11 17:11:31,889][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds

[ 2019-05-11 17:11:45,908][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_2.predict)=99.42%
[ 2019-05-11 17:11:46,146][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_3.predict)=99.40%
[ 2019-05-11 17:11:46,388][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_4.predict)=99.48%
[ 2019-05-11 17:11:46,394][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_2 - 5_folds.train_cv.predict)=99.44%
[ 2019-05-11 17:11:46,654][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_0.predict)=99.52%
[ 2019-05-11 17:11:46,895][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_1.predict)=99.48%
[ 2019-05-11 17:11:47,134][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_2.predict)=99.39%
[ 2019-05-11 17:11:47,379][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds

[ 2019-05-11 17:12:01,353][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_4.predict)=98.98%
[ 2019-05-11 17:12:01,360][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_4 - 5_folds.train_cv.predict)=98.90%
[ 2019-05-11 17:12:01,627][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_0.predict)=99.07%
[ 2019-05-11 17:12:01,882][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_1.predict)=98.85%
[ 2019-05-11 17:12:02,116][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_2.predict)=98.57%
[ 2019-05-11 17:12:02,379][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_3.predict)=98.77%
[ 2019-05-11 17:12:02,654][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_4.predict)=98.98%
[ 2019-05-11 17:12:02,661][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds

[ 2019-05-11 17:12:16,240][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_0.predict)=99.57%
[ 2019-05-11 17:12:16,883][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_1.predict)=99.42%
[ 2019-05-11 17:12:17,509][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_2.predict)=99.35%
[ 2019-05-11 17:12:18,128][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_3.predict)=99.31%
[ 2019-05-11 17:12:18,779][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_4.predict)=99.53%
[ 2019-05-11 17:12:18,785][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_cv.predict)=99.44%
[ 2019-05-11 17:12:18,792][cascade_classifier.calc_f1] Weighted F1 (layer_4 - train.classifier_average)=99.42%
[ 2019-05-11 17:12:18,799][cascade_classifier.fit_transform] [layer=5] look_indexs=[0], X_cur_train.shape=(26892, 52), X

[ 2019-05-11 17:12:31,330][cascade_classifier.transform] [layer=0] look_indexs=[0], X_cur_test.shape=(26892, 4)
[ 2019-05-11 17:12:31,842][cascade_classifier.transform] [layer=1] look_indexs=[0], X_cur_test.shape=(26892, 52)
[ 2019-05-11 17:12:32,422][cascade_classifier.transform] [layer=2] look_indexs=[0], X_cur_test.shape=(26892, 52)
[ 2019-05-11 17:12:33,033][cascade_classifier.fit_transform] X_groups_train.shape=[(26892, 4)],y_train.shape=(26892,),X_groups_test.shape=no_test,y_test.shape=no_test
[ 2019-05-11 17:12:33,034][cascade_classifier.fit_transform] group_dims=[4]
[ 2019-05-11 17:12:33,035][cascade_classifier.fit_transform] group_starts=[0]
[ 2019-05-11 17:12:33,036][cascade_classifier.fit_transform] group_ends=[4]
[ 2019-05-11 17:12:33,037][cascade_classifier.fit_transform] X_train.shape=(26892, 4),X_test.shape=(0, 4)
[ 2019-05-11 17:12:33,039][cascade_classifier.fit_transform] [layer=0] look_indexs=[0], X_cur_train.shape=(26892, 4), X_cur_test.shape=(0, 4)
[ 2019-05-11 17:1

[ 2019-05-11 17:12:39,009][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_0 - 5_folds.train_cv.predict)=99.38%
[ 2019-05-11 17:12:39,285][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_0.predict)=99.35%
[ 2019-05-11 17:12:39,549][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_1.predict)=99.46%
[ 2019-05-11 17:12:39,793][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_2.predict)=99.24%
[ 2019-05-11 17:12:40,061][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_3.predict)=99.29%
[ 2019-05-11 17:12:40,316][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_4.predict)=99.48%
[ 2019-05-11 17:12:40,323][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_1 - 5_folds.train_cv.predict)=99.36%
[ 2019-05-11 17:12:40,581][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_1 - estimator_2 - 5_fold

[ 2019-05-11 17:12:55,480][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_1.predict)=99.41%
[ 2019-05-11 17:12:55,721][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_2.predict)=99.35%
[ 2019-05-11 17:12:55,997][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_3.predict)=99.44%
[ 2019-05-11 17:12:56,239][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_4.predict)=99.20%
[ 2019-05-11 17:12:56,245][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_3 - 5_folds.train_cv.predict)=99.37%
[ 2019-05-11 17:12:56,561][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds.train_0.predict)=98.57%
[ 2019-05-11 17:12:56,835][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds.train_1.predict)=98.68%
[ 2019-05-11 17:12:57,126][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_2 - estimator_4 - 5_folds

[ 2019-05-11 17:13:11,001][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_3.predict)=98.62%
[ 2019-05-11 17:13:11,268][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_4.predict)=98.61%
[ 2019-05-11 17:13:11,275][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_5 - 5_folds.train_cv.predict)=98.65%
[ 2019-05-11 17:13:11,544][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_6 - 5_folds.train_0.predict)=98.72%
[ 2019-05-11 17:13:11,814][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_6 - 5_folds.train_1.predict)=98.85%
[ 2019-05-11 17:13:12,069][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_6 - 5_folds.train_2.predict)=98.70%
[ 2019-05-11 17:13:12,356][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_6 - 5_folds.train_3.predict)=98.72%
[ 2019-05-11 17:13:12,633][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_3 - estimator_6 - 5_folds

[ 2019-05-11 17:13:27,730][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_4 - estimator_7 - 5_folds.train_cv.predict)=99.39%
[ 2019-05-11 17:13:27,737][cascade_classifier.calc_f1] Weighted F1 (layer_4 - train.classifier_average)=99.37%
[ 2019-05-11 17:13:27,743][cascade_classifier.fit_transform] [layer=5] look_indexs=[0], X_cur_train.shape=(26892, 52), X_cur_test.shape=(0, 52)
[ 2019-05-11 17:13:27,997][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_0 - 5_folds.train_0.predict)=99.29%
[ 2019-05-11 17:13:28,241][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_0 - 5_folds.train_1.predict)=99.35%
[ 2019-05-11 17:13:28,475][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_0 - 5_folds.train_2.predict)=99.42%
[ 2019-05-11 17:13:28,719][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_0 - 5_folds.train_3.predict)=99.24%
[ 2019-05-11 17:13:28,953][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_5 - estimator_0 - 5_folds.

[ 2019-05-11 17:13:42,241][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_1 - 5_folds.train_cv.predict)=99.35%
[ 2019-05-11 17:13:42,508][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_2 - 5_folds.train_0.predict)=99.42%
[ 2019-05-11 17:13:42,755][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_2 - 5_folds.train_1.predict)=99.42%
[ 2019-05-11 17:13:42,989][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_2 - 5_folds.train_2.predict)=99.41%
[ 2019-05-11 17:13:43,229][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_2 - 5_folds.train_3.predict)=99.29%
[ 2019-05-11 17:13:43,472][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_2 - 5_folds.train_4.predict)=99.28%
[ 2019-05-11 17:13:43,479][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_2 - 5_folds.train_cv.predict)=99.36%
[ 2019-05-11 17:13:43,743][kfold_wrapper.log_eval_metrics] Weighted F1 (layer_6 - estimator_3 - 5_fold

In [14]:
m=LogisticRegression()

In [15]:
# function：使用5折交叉验证统计各类别5次平均后的Acc，5次平均后的F1，和模型的总Acc以及总Weighted F1

# Input: 
#     X: 总样本
#     y: 总样本
#     model: function
#     cv: cross_validation的次数
# Output:
#     Acc_mean, 各类别的Acc
#     F1_mean, 各类别F1
#     Support_mean, 各类别预测样本占总样本的比重
#     Acc, 总Acc
#     F1_weighted 总Weighted F1
# function：使用5折交叉验证统计各类别5次平均后的Acc，5次平均后的F1，和模型的总Acc以及总Weighted F1

# Input: 
#     X: 总样本
#     y: 总样本
#     model: function
#     cv: cross_validation的次数
# Output:
#     Acc_mean, 各类别的Acc
#     F1_mean, 各类别F1
#     Support_mean, 各类别预测样本占总样本的比重
#     Acc, 总Acc
#     F1_weighted 总Weighted F1


def kftrain_copy(X, y, model, cv):
    model_name = model.__class__.__name__

    n_samples = X.shape[0]
    n_features = X.shape[1]
    n_class = np.unique(y).shape[0]
    print(n_samples)
    
    Acc_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组test data的cv个acc
    _Acc_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组train data的cv个acc
    F1_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组test data的cv个acc
    _F1_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 第i行：类别i的在cv组train data的cv个acc
    cv_F1_weighted = np.zeros(cv) # 5次 test data 的F1_weighted
    _cv_F1_weighted = np.zeros(cv) # 5次 train data 的F1_weighted
    cv_Acc = np.zeros(cv) # 5次test data的Acc
    _cv_Acc = np.zeros(cv) # 5次train data的Acc
    # 各类别的Support，每个类别的support由5次fold后得到的5个support求平均得到，这里support：各类别样本占总样本的比列
    # 各类别的F1，由5次fold后得到的5个F1求平均得到
    # 所以，Weighted F1 等于“5次fold得到的5个 Weighted F1 求平均得到” ——等价于—— “6个support*6个F1”得到
    Support_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 5次fold后test data各类别的support
    _Support_matrix = np.zeros((n_class, cv)) # 矩阵(6,5) 5次fold后train data各类别的support
    
    # 各类别的Mean，SD
    Acc_mean_class = np.zeros(6)
    _Acc_mean_class = np.zeros(6)
    Acc_SD_class = np.zeros(6)
    _Acc_SD_class = np.zeros(6)
    F1_mean_class = np.zeros(6)
    _F1_mean_class = np.zeros(6)
    F1_SD_class = np.zeros(6)
    _F1_SD_class = np.zeros(6)
    
    k = 0
    skf = StratifiedKFold(n_splits=cv) # 定义5折分层划分器
    
    for train_index, test_index in skf.split(X, y):
        K_train_x, K_test_x = X[train_index], X[test_index]
        K_train_y, K_test_y = y[train_index], y[test_index]
        print(K_train_x.shape[0])
        print(K_test_x.shape[0])
        if model_name == 'GCForest':
            model.fit_transform(K_train_x, K_train_y.reshape(K_train_y.shape[0]))
        else:
            model.fit(K_train_x, K_train_y)
    
        K_test_y_pred = model.predict(K_test_x)
        K_train_y_pred = model.predict(K_train_x)
        
        # 由混淆矩阵计算各类别的Acc
        test_cm = confusion_matrix(K_test_y, K_test_y_pred)
        train_cm = confusion_matrix(K_train_y, K_train_y_pred)
        test_acc_all_class = np.zeros(n_class) # 6个类别上的测试acc
        train_acc_all_class = np.zeros(n_class) # 6个类别上的训练acc
        
        i = 0
        for c in test_cm:
            test_acc_all_class[i] = c[i]/np.sum(c)
            i += 1
        Acc_matrix[:,k] = test_acc_all_class
        
        i = 0
        for c in train_cm:
            train_acc_all_class[i] = c[i]/np.sum(c)
            i += 1
        _Acc_matrix[:,k] = train_acc_all_class
        
        # 由classification_report提取各类别的F1
        cr = classification_report(K_test_y, K_test_y_pred, digits=4)
        _cr = classification_report(K_train_y, K_train_y_pred, digits=4)
        test_f1_all_class = np.zeros(n_class) # test data 6个类别上的测试f1
        train_f1_all_class = np.zeros(n_class) # train data 6个类别上的训练f1
        support_all_class = np.zeros(n_class) # test data 6个类别的support
        _support_all_class = np.zeros(n_class) # train data 6个类别的support
        print(cr)
        print(_cr)
        i = 0
        for l in range(2,8):
            test_f1_all_class[i] = float(cr.splitlines()[l].split()[3])
            train_f1_all_class[i] = float(_cr.splitlines()[l].split()[3])
            support_all_class[i] = float(cr.splitlines()[l].split()[4])/(n_samples*0.2)
            _support_all_class[i] = float(_cr.splitlines()[l].split()[4])/(n_samples*0.8)
            i = i + 1
        F1_matrix[:,k] = test_f1_all_class
        _F1_matrix[:,k] = train_f1_all_class
        Support_matrix[:,k] = support_all_class
        _Support_matrix[:,k] = _support_all_class
        
        # cv_F1_weighted[k] = float(cr.splitlines()[9].split()[5]) # 第k折时的Weighted F1
        cv_F1_weighted[k] = f1_score(K_test_y, K_test_y_pred, average="weighted")
        _cv_F1_weighted[k] = f1_score(K_train_y, K_train_y_pred, average="weighted")
        cv_Acc[k] = accuracy_score(K_test_y, K_test_y_pred)
        _cv_Acc[k] = accuracy_score(K_train_y, K_train_y_pred)
        
        k += 1
    
    Acc_mean = np.mean(Acc_matrix, axis=1) # 6个类别的k-fold平均acc
    _Acc_mean = np.mean(_Acc_matrix, axis=1) # 6个类别的k-fold平均acc
    
    F1_mean = np.mean(F1_matrix, axis=1) # 6个类别的k-fold平均F1
    _F1_mean = np.mean(_F1_matrix, axis=1) # 6个类别的k-fold平均F1
    Support_mean = np.mean(Support_matrix, axis=1) # 6个类别的k-fold平均support
    _Support_mean = np.mean(_Support_matrix, axis=1) # 6个类别的k-fold平均support
    
    Acc = np.mean(Acc_mean)
    _Acc = np.mean(_Acc_mean)
    F1_weighted = np.mean(cv_F1_weighted) # 5个Weighted F1 求平均，并可以证明其等价于 np.sum(F1_mean*Support_mean)
    _F1_weighted = np.mean(_cv_F1_weighted) # 5个Weighted F1 求平均，并可以证明其等价于 np.sum(F1_mean*Support_mean)
    SD_Acc = np.std(cv_Acc)
    _SD_Acc = np.std(cv_Acc)
    SD_F1 = np.std(cv_F1_weighted)
    _SD_F1 = np.std(cv_F1_weighted)
    
    Acc_mean_class = np.mean(Acc_matrix, axis=1)
    _Acc_mean_class = np.mean(_Acc_matrix, axis=1)
    Acc_SD_class = np.std(Acc_matrix, axis=1)
    _Acc_SD_class = np.std(_Acc_matrix, axis=1)
    F1_mean_class = np.mean(F1_matrix, axis=1)
    _F1_mean_class = np.mean(_F1_matrix, axis=1)
    F1_SD_class = np.std(F1_matrix, axis=1)
    _F1_SD_class = np.std(_F1_matrix, axis=1)
    
    # return Acc_mean, F1_mean, Support_mean, Acc, F1_weighted, SD_Acc, SD_F1, Acc_mean_class, Acc_SD_class, F1_mean_class, F1_SD_class
    return Acc_matrix, F1_matrix, Support_matrix, _Acc_matrix, _F1_matrix, _Support_matrix, Acc_mean, _Acc_mean, F1_mean, _F1_mean, Acc, _Acc, F1_weighted, _F1_weighted, SD_Acc, _SD_Acc, SD_F1, _SD_F1, Acc_mean_class, _Acc_mean_class, Acc_SD_class, _Acc_SD_class, F1_mean_class, _F1_mean_class, F1_SD_class, _F1_SD_class

In [17]:
kftrain_copy(X, y, m, 5)

33612
26886
6726


  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

          0     0.8269    0.0881    0.1593       488
          1     0.6759    0.9857    0.8019      2655
          2     0.6098    0.5915    0.6005      1760
          3     0.6150    0.3808    0.4704      1095
          4     0.0000    0.0000    0.0000       298
          5     0.7050    0.6837    0.6942       430

avg / total     0.6316    0.6560    0.6062      6726

             precision    recall  f1-score   support

          0     0.9559    0.0667    0.1246      1950
          1     0.6813    0.9928    0.8081     10617
          2     0.6482    0.5892    0.6173      7037
          3     0.6211    0.4542    0.5247      4377
          4     0.0000    0.0000    0.0000      1189
          5     0.7984    0.7780    0.7881      1716

avg / total     0.6601    0.6747    0.6254     26886

26887
6725
             precision    recall  f1-score   support

          0     1.0000    0.0082    0.0163       488
          1     0.6816    0.

(array([[0.08811475, 0.00819672, 0.0307377 , 0.08213552, 0.07802875],
        [0.98568738, 0.99246704, 0.99171063, 0.99321778, 0.99547852],
        [0.59147727, 0.60795455, 0.5997726 , 0.53780557, 0.54519613],
        [0.38082192, 0.49680365, 0.42595978, 0.45155393, 0.45155393],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.68372093, 0.75058275, 0.85314685, 0.83449883, 0.67132867]]),
 array([[0.1593, 0.0163, 0.0596, 0.1501, 0.1445],
        [0.8019, 0.8082, 0.8086, 0.7976, 0.7969],
        [0.6005, 0.6302, 0.6217, 0.5863, 0.5907],
        [0.4704, 0.5643, 0.5038, 0.5211, 0.5117],
        [0.    , 0.    , 0.    , 0.    , 0.    ],
        [0.6942, 0.7902, 0.8243, 0.8164, 0.7138]]),
 array([[0.07259312, 0.07259312, 0.07259312, 0.07244437, 0.07244437],
        [0.39494823, 0.39494823, 0.39479948, 0.39479948, 0.39479948],
        [0.26181126, 0.26181126, 0.2616625 , 0.2616625 , 0.2616625 ],
        [0.16288825, 0.16288825, 0.1627395 , 0.1627395 , 0.1627395