In [None]:
import pandas as pd
import numpy as np
import gc
from scipy import interpolate
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.model_selection import cross_val_score
from lightgbm.sklearn import LGBMClassifier
from imblearn.over_sampling import SMOTE, BorderlineSMOTE
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import RandomUnderSampler
from datetime import datetime, timedelta
from sklearn.metrics import plot_roc_curve,roc_curve,auc,roc_auc_score
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, roc_curve, auc, classification_report
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import confusion_matrix 
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_selection import SelectKBest, chi2, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier

In [None]:
train_x = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/train_x_12_feature_engineering.csv')
train_y = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/train_y_12_feature_engineering.csv')
usecol_x = [col for col in train_x.columns if col not in ['Unnamed: 0']]
train_x= train_x[usecol_x]
usecol_y = [col for col in train_y.columns if col not in ['Unnamed: 0']]
train_y = train_y[usecol_y]

val_x = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/valid_x_12_feature_engineering.csv')
val_y = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/valid_y_12_feature_engineering.csv')
usecol_val_x = [col for col in val_x.columns if col not in ['Unnamed: 0']]
val_x= val_x[usecol_val_x]
usecol_val_y = [col for col in val_y.columns if col not in ['Unnamed: 0']]
val_y = val_y[usecol_val_y]

input_x, _, input_y, _ = train_test_split(train_x, train_y, train_size=0.4, random_state=2021, stratify=train_y)

# Decision Tree
dtc = tree.DecisionTreeClassifier(
    splitter = 'best',
    min_samples_leaf = 1,
    min_samples_split = 2,
    max_depth = 45,
    max_features = 3,
    random_state = 2021
)

# SVM
scl = svm.SVC(C = 1, 
              kernel = 'rbf', 
              gamma = 1, 
              decision_function_shape = 'ovr')

# Random Forest
rfc = RandomForestClassifier(n_estimators = 94,
                             random_state = 2021
)

# AdaBoost
weakClassifier = tree.DecisionTreeClassifier(
    splitter = 'best',
    min_samples_leaf = 1,
    min_samples_split = 2,
    max_depth = 45,
    max_features = 3,
    random_state = 2021
)

abc = AdaBoostClassifier(base_estimator = weakClassifier, 
                         n_estimators = 91, 
                         learning_rate = 0.1)

# Neural Network
mlf = MLPClassifier(solver='adam', 
                    activation='relu', 
                    learning_rate='adaptive', 
                    learning_rate_init=0.001, 
                    random_state=1)

dtc.fit(input_x, input_y)
scl.fit(input_x, input_y)
rfc.fit(input_x, input_y)
abc.fit(input_x, input_y)
mlf.fit(input_x, input_y)

for i in [0.2, 0.4, 0.6, 0.8]:
    val_train_x, val_test_x, val_train_y, val_test_y = train_test_split(val_x, val_y, test_size = i, random_state = 2021, stratify=val_y)

    y_pred_dt = dtc.predict(val_test_x)
    y_pred_sv = scl.predict(val_test_x)
    y_pred_rf = rfc.predict(val_test_x)
    y_pred_ab = abc.predict(val_test_x)
    y_pred_ml = mlf.predict(val_test_x)
    
    # Decision Tree
    dt_test_score_auc = roc_auc_score(val_test_y, y_pred_dt)
    print('dt_test_score_roc_auc_{0}={1}'.format(i, dt_test_score_auc))
    
    dt_test_score_precision = precision_score(val_test_y, y_pred_dt)
    print('dt_test_score_precision_{0}={1}'.format(i, dt_test_score_precision))
    
    dt_test_score_recall = recall_score(val_test_y, y_pred_dt)
    print('dt_test_score_recall_{0}={1}'.format(i, dt_test_score_recall))
    
    dt_test_score_f1 = f1_score(val_test_y, y_pred_dt)
    print('dt_test_score_f1_{0}={1}'.format(i, dt_test_score_f1))

    print(classification_report(val_test_y, y_pred_dt))
    print(confusion_matrix(val_test_y, y_pred_dt))
    print('==============================')
    
    # SVM
    sv_test_score_auc = roc_auc_score(val_test_y, y_pred_sv)
    print('sv_test_score_roc_auc_{0}={1}'.format(i, sv_test_score_auc))
    
    sv_test_score_precision = precision_score(val_test_y, y_pred_sv)
    print('sv_test_score_precision_{0}={1}'.format(i, sv_test_score_precision))
    
    sv_test_score_recall = recall_score(val_test_y, y_pred_sv)
    print('sv_test_score_recall_{0}={1}'.format(i, sv_test_score_recall))
    
    sv_test_score_f1 = f1_score(val_test_y, y_pred_sv)
    print('sv_test_score_f1_{0}={1}'.format(i, sv_test_score_f1))

    print(classification_report(val_test_y, y_pred_sv))
    print(confusion_matrix(val_test_y, y_pred_sv))
    print('==============================')
    
    # Random Forest
    rf_test_score_auc = roc_auc_score(val_test_y, y_pred_rf)
    print('rf_test_score_roc_auc_{0}={1}'.format(i, rf_test_score_auc))
    
    rf_test_score_precision = precision_score(val_test_y, y_pred_rf)
    print('rf_test_score_precision_{0}={1}'.format(i, rf_test_score_precision))
    
    rf_test_score_recall = recall_score(val_test_y, y_pred_rf)
    print('rf_test_score_recall_{0}={1}'.format(i, rf_test_score_recall))
    
    rf_test_score_f1 = f1_score(val_test_y, y_pred_rf)
    print('rf_test_score_f1_{0}={1}'.format(i, rf_test_score_f1))

    print(classification_report(val_test_y, y_pred_rf))
    print(confusion_matrix(val_test_y, y_pred_rf))
    print('==============================')
    
    # AdaBoost
    ab_test_score_auc = roc_auc_score(val_test_y, y_pred_ab)
    print('ab_test_score_roc_auc_{0}={1}'.format(i, ab_test_score_auc))
    
    ab_test_score_precision = precision_score(val_test_y, y_pred_ab)
    print('ab_test_score_precision_{0}={1}'.format(i, ab_test_score_precision))
    
    ab_test_score_recall = recall_score(val_test_y, y_pred_ab)
    print('ab_test_score_recall_{0}={1}'.format(i, ab_test_score_recall))
    
    ab_test_score_f1 = f1_score(val_test_y, y_pred_ab)
    print('ab_test_score_f1_{0}={1}'.format(i, ab_test_score_f1))

    print(classification_report(val_test_y, y_pred_ab))
    print(confusion_matrix(val_test_y, y_pred_ab))
    print('==============================')

    # Neural Network
    ml_test_score_auc = roc_auc_score(val_test_y, y_pred_ml)
    print('ml_test_score_roc_auc_{0}={1}'.format(i, ml_test_score_auc))
    
    ml_test_score_precision = precision_score(val_test_y, y_pred_ml)
    print('ml_test_score_precision_{0}={1}'.format(i, ml_test_score_precision))
    
    ml_test_score_recall = recall_score(val_test_y, y_pred_ml)
    print('ml_test_score_recall_{0}={1}'.format(i, ml_test_score_recall))
    
    ml_test_score_f1 = f1_score(val_test_y, y_pred_ml)
    print('ml_test_score_f1_{0}={1}'.format(i, ml_test_score_f1))

    print(classification_report(val_test_y, y_pred_ml))
    print(confusion_matrix(val_test_y, y_pred_ml))
    print('==============================')
print('####################################')