In [None]:
import pandas as pd
import numpy as np
import gc
from scipy import interpolate
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.model_selection import cross_val_score
from lightgbm.sklearn import LGBMClassifier
from imblearn.over_sampling import SMOTE, BorderlineSMOTE
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import RandomUnderSampler
from datetime import datetime, timedelta
from sklearn.metrics import plot_roc_curve,roc_curve,auc,roc_auc_score
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, roc_curve, auc, classification_report
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import confusion_matrix 
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_selection import SelectKBest, chi2, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier

In [None]:
train_x = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/train_x_12_feature_engineering.csv')
train_y = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/train_y_12_feature_engineering.csv')
usecol_x = [col for col in train_x.columns if col not in ['Unnamed: 0']]
train_x= train_x[usecol_x]
usecol_y = [col for col in train_y.columns if col not in ['Unnamed: 0']]
train_y = train_y[usecol_y]

val_x = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/valid_x_12_feature_engineering.csv')
val_y = pd.read_csv('/Users/yangxianjie/测试项目/AI/cube_data/valid_y_12_feature_engineering.csv')
usecol_val_x = [col for col in val_x.columns if col not in ['Unnamed: 0']]
val_x= val_x[usecol_val_x]
usecol_val_y = [col for col in val_y.columns if col not in ['Unnamed: 0']]
val_y = val_y[usecol_val_y]

input_x, _, input_y, _ = train_test_split(train_x, train_y, train_size=0.4, random_state=2021, stratify=train_y)

# Decision Tree
dtc = tree.DecisionTreeClassifier(
    splitter = 'best',
    min_samples_leaf = 1,
    min_samples_split = 2,
    max_depth = 45,
    max_features = 3,
    random_state = 2021
)

# SVM
scl = svm.SVC(C = 1, 
              kernel = 'rbf', 
              gamma = 1, 
              decision_function_shape = 'ovr')

# Random Forest
rfc = RandomForestClassifier(n_estimators = 94,
                             random_state = 2021
)

# AdaBoost
weakClassifier = tree.DecisionTreeClassifier(
    splitter = 'best',
    min_samples_leaf = 1,
    min_samples_split = 2,
    max_depth = 45,
    max_features = 3,
    random_state = 2021
)

abc = AdaBoostClassifier(base_estimator = weakClassifier, 
                         n_estimators = 91, 
                         learning_rate = 0.1)

# Neural Network
mlf = MLPClassifier(solver='adam', 
                    activation='relu', 
                    learning_rate='adaptive', 
                    learning_rate_init=0.001, 
                    random_state=1)

# Decision Tree
dtc.fit(input_x, input_y)
output_pred_dtc = dtc.predict(val_x)

split_score_roc_auc_dtc = roc_auc_score(val_y, output_pred_dtc)
print('split_score_roc_auc_dtc=',split_score_roc_auc_dtc)

split_score_recall_dtc = recall_score(val_y, output_pred_dtc)
print('split_score_recall_dtc=',split_score_recall_dtc)

print(classification_report(val_y, output_pred_dtc))
print(confusion_matrix(val_y, output_pred_dtc))
print('=========================================')

# SVM
scl.fit(input_x, input_y)
output_pred_scl = scl.predict(val_x)

split_score_roc_auc_scl = roc_auc_score(val_y, output_pred_scl)
print('split_score_roc_auc_scl=',split_score_roc_auc_scl)

split_score_recall_scl = recall_score(val_y, output_pred_scl)
print('split_score_recall_scl=',split_score_recall_scl)

print(classification_report(val_y, output_pred_scl))
print(confusion_matrix(val_y, output_pred_scl))
print('=========================================')

# Random Forest
rfc.fit(input_x, input_y)
output_pred_rfc = rfc.predict(val_x)

split_score_roc_auc_rfc = roc_auc_score(val_y, output_pred_rfc)
print('split_score_roc_auc_rfc=',split_score_roc_auc_rfc)

split_score_recall_rfc = recall_score(val_y, output_pred_rfc)
print('split_score_recall_rfc=',split_score_recall_rfc)

print(classification_report(val_y, output_pred_rfc))
print(confusion_matrix(val_y, output_pred_rfc))
print('=========================================')

# AdaBoost
abc.fit(input_x, input_y)
output_pred_abc = abc.predict(val_x)

split_score_roc_auc_abc = roc_auc_score(val_y, output_pred_abc)
print('split_score_roc_auc_abc=',split_score_roc_auc_abc)

split_score_recall_abc = recall_score(val_y, output_pred_abc)
print('split_score_recall_abc=',split_score_recall_abc)

print(classification_report(val_y, output_pred_abc))
print(confusion_matrix(val_y, output_pred_abc))
print('=========================================')

# Neural Network
mlf.fit(input_x, input_y)
output_pred_mlf = mlf.predict(val_x)

split_score_roc_auc_mlf = roc_auc_score(val_y, output_pred_mlf)
print('split_score_roc_auc_mlf=',split_score_roc_auc_mlf)

split_score_recall_mlf = recall_score(val_y, output_pred_mlf)
print('split_score_recall_mlf=',split_score_recall_mlf)

print(classification_report(val_y, output_pred_mlf))
print(confusion_matrix(val_y, output_pred_mlf))
print('=========================================')

# ROC Curve
fpr_dt,tpr_dt,thres_dt = roc_curve(val_y,output_pred_dtc,)
fpr_rf,tpr_rf,thres_rf = roc_curve(val_y,output_pred_rfc,)
fpr_ab,tpr_ab,thres_ab = roc_curve(val_y,output_pred_abc,)
fpr_sv,tpr_sv,thres_sv = roc_curve(val_y,output_pred_scl,)
fpr_ml,tpr_ml,thres_ml = roc_curve(val_y,output_pred_mlf,)

#Create the canvas
fig,ax = plt.subplots(figsize=(10,8))

#Custom label name-label=''
ax.plot(fpr_dt,tpr_dt,linewidth=1.0,
        label='Decision Tree')
ax.plot(fpr_rf,tpr_rf,linewidth=1.0,
        label='Random Forest')
ax.plot(fpr_ab,tpr_ab,linewidth=1.0,
        label='AdaBoost')
ax.plot(fpr_sv,tpr_sv,linewidth=1.0,
        label='SVM')
ax.plot(fpr_ml,tpr_ml,linewidth=1.0,
        label='MLP')
#Draw diagonal
ax.plot([0,1],[0,1],linestyle='--',color='grey')

#Adjust font size
plt.legend(fontsize=12)

plt.show()