In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTEN
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score, confusion_matrix, roc_curve
from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV 
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
from imblearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

In [4]:
data=pd.read_csv(r'C:\Users\Anaconda\Desktop\bank.csv',index_col=0)
print(data.shape)

(119535, 93)


In [5]:
for col_name in data.columns:
    data[col_name]=pd.Categorical(data[col_name])

In [6]:
train_data, test_data = train_test_split(data,test_size=0.2, random_state=100)
print(train_data.shape)
print(test_data.shape)

(95628, 93)
(23907, 93)


In [7]:
X_train = train_data.iloc[:,1:]
y_train = train_data['TARGET']
X_test = test_data.iloc[:,1:]
y_test = test_data['TARGET']

In [8]:
smoten = SMOTEN()
X_train_samp, y_train_samp = smoten.fit_resample(X_train,y_train)

In [9]:
def get_eval(y_test,pred=None):
    confusion=confusion_matrix(y_test,pred)
    accuracy=accuracy_score(y_test,pred)
    precision=precision_score(y_test,pred)
    recall=recall_score(y_test, pred)
    f1 = f1_score(y_test,pred)
    precision2=precision_score(y_test,pred,pos_label=0)
    recall2=recall_score(y_test,pred,pos_label=0)
    f1_2=f1_score(y_test,pred,pos_label=0)
    print("confusion matrix")
    print(confusion)
    print('accuracy : {0:.4f} , precision : {1:.4f} , recall : {2:.4f}, F1 score : {3:.4f} \n precision(0) : {4:.4f} , recall(0) : {5:.4f}, f1(0) : {6:.4f}'.format(accuracy,precision, recall,f1,precision2,recall2,f1_2))
    
def precision_recall_curve_plot(y_test, pred_proba_cl):
    precisions, recalls, thresholds = precision_recall_curve(y_test, pred_proba_cl)
    plt.figure(figsize=(8,6))
    threshold_boundary = thresholds.shape[0]
    plt.plot(thresholds, precisions[0:threshold_boundary], linestyle='--', label = 'precision')
    plt.plot(thresholds, recalls[0:threshold_boundary], label = 'recall')
    start, end = plt.xlim()
    plt.xticks(np.round(np.arange(start, end, 0.1),2))
    plt.xlabel('Threshold value')
    plt.ylabel('Precision and Recall value')
    plt.legend()
    plt.grid()
    plt.show()

def roc_curve_plot(y_test, pred_proba_c1):
    fprs, tprs, thresholds = roc_curve(y_test, pred_proba_c1)
    plt.plot(fprs, tprs, label = 'ROC')
    plt.plot([0,1],[0,1],'k--',label='Random')
    start, end = plt.xlim()
    plt.xticks(np.round(np.arange(start,end,0.1),2))
    plt.xlim(0,1);plt.ylim(0,1)
    plt.xlabel('FPR(1-Sensitivity)');plt.ylabel('TPR(Recall)')
    plt.legend()

# 사이킷런을 이용한 MLP 모델 

In [10]:
# 샘플링 없을 때
scaler = StandardScaler()   # 객체 만들기
scaler.fit(X_train)     # 변환 규칙을 익히기
x_train_scaled = scaler.transform(X_train)  # 데이터를 표준화 전처리
 
mlp = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', solver='sgd', alpha=0.01, batch_size=32,learning_rate_init=0.1, max_iter=500)  # 객체 생성
 
mlp.fit(x_train_scaled, y_train)    # 훈련하기
pred = mlp.predict(X_test)
get_eval(y_test,pred)      # 정확도 평가

confusion matrix
[[23751     0]
 [  156     0]]
accuracy : 0.9935 , precision : 0.0000 , recall : 0.0000, F1 score : 0.0000 
 precision(0) : 0.9935 , recall(0) : 1.0000, f1(0) : 0.9967


  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
# logistic activation function을 이용
scaler = StandardScaler()   # 객체 만들기
scaler.fit(X_train_samp)     # 변환 규칙을 익히기
x_train_samp_scaled = scaler.transform(X_train_samp)  # 데이터를 표준화 전처리
 
mlp = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', solver='sgd', alpha=0.01, batch_size=32,learning_rate_init=0.1, max_iter=500)  # 객체 생성
 
mlp.fit(x_train_samp_scaled, y_train_samp)    # 훈련하기
pred = mlp.predict(X_test)
get_eval(y_test,pred)      # 정확도 평가

confusion matrix
[[17925  5826]
 [  101    55]]
accuracy : 0.7521 , precision : 0.0094 , recall : 0.3526, F1 score : 0.0182 
 precision(0) : 0.9944 , recall(0) : 0.7547, f1(0) : 0.8581


In [15]:
# ReLU activation function을 이용
scaler = StandardScaler()   # 객체 만들기
scaler.fit(X_train_samp)     # 변환 규칙을 익히기
x_train_samp_scaled = scaler.transform(X_train_samp)  # 데이터를 표준화 전처리
 
mlp = MLPClassifier(activation='relu')  # 객체 생성
 
mlp.fit(x_train_samp_scaled, y_train_samp)    # 훈련하기
pred = mlp.predict(X_test)
get_eval(y_test,pred)      # 정확도 평가

confusion matrix
[[12459 11292]
 [   50   106]]
accuracy : 0.5256 , precision : 0.0093 , recall : 0.6795, F1 score : 0.0183 
 precision(0) : 0.9960 , recall(0) : 0.5246, f1(0) : 0.6872


In [14]:
scaler = StandardScaler()   # 객체 만들기
scaler.fit(X_train_samp)     # 변환 규칙을 익히기
x_train_samp_scaled = scaler.transform(X_train_samp)  # 데이터를 표준화 전처리
 
mlp = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', solver='sgd', alpha=0.01, batch_size=32,learning_rate_init=0.1, max_iter=500)  # 객체 생성
 
mlp.fit(x_train_samp_scaled, y_train_samp)    # 훈련하기
pred = mlp.predict(X_test)
get_eval(y_test,pred)      # 정확도 평가

confusion matrix
[[ 9972 13779]
 [   10   146]]
accuracy : 0.4232 , precision : 0.0105 , recall : 0.9359, F1 score : 0.0207 
 precision(0) : 0.9990 , recall(0) : 0.4199, f1(0) : 0.5912


In [16]:
parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

In [None]:
grid_cv = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3, verbose = 2)
grid_cv.fit(x_train_samp_scaled, y_train_samp)

print('최적 하이퍼 파라미터: ', grid_cv.best_params_)
print('최고 예측 정확도: {:.4f}'.format(grid_cv.best_score_))

Fitting 3 folds for each of 48 candidates, totalling 144 fits
