In [5]:
import pandas as pd
import numpy as np
import warnings
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.python.keras import callbacks
from keras import backend as K
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from keras.layers import Dense, Dropout
from sklearn.model_selection import KFold
from keras.optimizers import Adam, SGD
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

warnings.filterwarnings(action='ignore')
tf.config.set_visible_devices([], 'GPU')

In [6]:
df = pd.read_csv("data/modified_0420.csv")
x_train = df.drop(columns=['loan_status','Unnamed: 0'])
y_train = df['loan_status']

In [7]:
nan_indices = np.isnan(x_train).any(axis=1)
x_train = x_train[~nan_indices]
y_train = y_train[~nan_indices]

In [8]:
# minmax scaler 찾아보기
scaler = StandardScaler()
X_scaled = scaler.fit_transform(x_train)

# PCA로 차원 축소
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X_scaled)
x_train = X_pca.astype('float32')
# stratify로 학습에 용이하게 비율 유지
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size=0.2, stratify=y_train)
y_train=to_categorical(y_train, 2).astype(int)
y_validation=to_categorical(y_validation, 2).astype(int)

In [11]:
def f1_score(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())    
    f1_val = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1_val

def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

### without_earlystopping

In [15]:
model_file_path = "dnn_models/log_transformed/pca95_feature50/without_earlystopping_0421.h5"
valid_accs, valid_f1s, valid_recalls, valid_precisions = [], [], [], []

# GPU 설정
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)
# KFold validation 사용
# k: n_splits=5
kf = KFold(random_state=30,
           n_splits=5,
           shuffle=True
          )
model = tf.keras.models.load_model(model_file_path, custom_objects={"f1_score":f1_score, "recall":recall, "precision":precision})
for train_index, val_index in kf.split(x_train, y_train):
    X_train_fold, X_val_fold = x_train[train_index], x_train[val_index]
    Y_train_fold, Y_val_fold = y_train[train_index], y_train[val_index]

    # 모델 validation
    valid_loss, valid_acc, valid_f1, valid_recall, valid_precision = model.evaluate(X_val_fold, Y_val_fold)
    valid_accs.append(valid_acc)
    valid_f1s.append(valid_f1)
    valid_recalls.append(valid_recall)
    valid_precisions.append(valid_precision)
    print("===================================")
    print("Validation accuracy:", valid_acc)
    print("Validation F1-score:", valid_f1)
    print("Validation recall:", valid_recall)
    print("Validation precision:", valid_precision)
print("###################################")
print("avg Validation accuracy:", np.mean(valid_accs))
print("avg Validation F1-score:", np.mean(valid_f1s))
print("avg Validation recall:", np.mean(valid_recalls))
print("avg Validation precision:", np.mean(valid_precisions))

Validation accuracy: 0.8022632002830505
Validation F1-score: 0.80223548412323
Validation recall: 0.8022355437278748
Validation precision: 0.8022355437278748
Validation accuracy: 0.8011862635612488
Validation F1-score: 0.8011748194694519
Validation recall: 0.8011748790740967
Validation precision: 0.8011748790740967
Validation accuracy: 0.800551176071167
Validation F1-score: 0.8005477786064148
Validation recall: 0.8005478382110596
Validation precision: 0.8005478382110596
Validation accuracy: 0.8037378191947937
Validation F1-score: 0.8037179112434387
Validation recall: 0.8037180304527283
Validation precision: 0.8037180304527283
Validation accuracy: 0.8034892678260803
Validation F1-score: 0.8034775257110596
Validation recall: 0.8034776449203491
Validation precision: 0.8034776449203491
###################################
avg Validation accuracy: 0.8022455453872681
avg Validation F1-score: 0.802230703830719
avg Validation recall: 0.8022307872772216
avg Validation precision: 0.802230787277221

In [13]:
model_file_path = "dnn_models/log_transformed/pca95_feature50/with_earlystopping_001_0421.h5"
valid_accs, valid_f1s, valid_recalls, valid_precisions = [], [], [], []

# GPU 설정
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)
# KFold validation 사용
# k: n_splits=5
kf = KFold(random_state=30,
           n_splits=5,
           shuffle=True
          )
model = tf.keras.models.load_model(model_file_path, custom_objects={"f1_score":f1_score, "recall":recall, "precision":precision})
for train_index, val_index in kf.split(x_train, y_train):
    X_train_fold, X_val_fold = x_train[train_index], x_train[val_index]
    Y_train_fold, Y_val_fold = y_train[train_index], y_train[val_index]

    # 모델 validation
    valid_loss, valid_acc, valid_f1, valid_recall, valid_precision = model.evaluate(X_val_fold, Y_val_fold)
    valid_accs.append(valid_acc)
    valid_f1s.append(valid_f1)
    valid_recalls.append(valid_recall)
    valid_precisions.append(valid_precision)
    print("===================================")
    print("Validation accuracy:", valid_acc)
    print("Validation F1-score:", valid_f1)
    print("Validation recall:", valid_recall)
    print("Validation precision:", valid_precision)
print("###################################")
print("avg Validation accuracy:", np.mean(valid_accs))
print("avg Validation F1-score:", np.mean(valid_f1s))
print("avg Validation recall:", np.mean(valid_recalls))
print("avg Validation precision:", np.mean(valid_precisions))

Validation accuracy: 0.800203263759613
Validation F1-score: 0.8001757264137268
Validation recall: 0.8001757860183716
Validation precision: 0.8001757860183716
Validation accuracy: 0.7989164590835571
Validation F1-score: 0.7989051938056946
Validation recall: 0.7989052534103394
Validation precision: 0.7989052534103394
Validation accuracy: 0.7984524965286255
Validation F1-score: 0.7984493374824524
Validation recall: 0.7984493970870972
Validation precision: 0.7984493970870972
Validation accuracy: 0.8016391396522522
Validation F1-score: 0.8016194701194763
Validation recall: 0.8016195893287659
Validation precision: 0.8016195893287659
Validation accuracy: 0.8010647892951965
Validation F1-score: 0.8010532855987549
Validation recall: 0.8010534048080444
Validation precision: 0.8010534048080444
###################################
avg Validation accuracy: 0.8000552296638489
avg Validation F1-score: 0.800040602684021
avg Validation recall: 0.8000406861305237
avg Validation precision: 0.8000406861305

In [14]:
model_file_path = "dnn_models/log_transformed/pca95_feature50/with_earlystopping_0001_0421.h5"
valid_accs, valid_f1s, valid_recalls, valid_precisions = [], [], [], []

# GPU 설정
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)
# KFold validation 사용
# k: n_splits=5
kf = KFold(random_state=30,
           n_splits=5,
           shuffle=True
          )
model = tf.keras.models.load_model(model_file_path, custom_objects={"f1_score":f1_score, "recall":recall, "precision":precision})
for train_index, val_index in kf.split(x_train, y_train):
    X_train_fold, X_val_fold = x_train[train_index], x_train[val_index]
    Y_train_fold, Y_val_fold = y_train[train_index], y_train[val_index]

    # 모델 validation
    valid_loss, valid_acc, valid_f1, valid_recall, valid_precision = model.evaluate(X_val_fold, Y_val_fold)
    valid_accs.append(valid_acc)
    valid_f1s.append(valid_f1)
    valid_recalls.append(valid_recall)
    valid_precisions.append(valid_precision)
    print("===================================")
    print("Validation accuracy:", valid_acc)
    print("Validation F1-score:", valid_f1)
    print("Validation recall:", valid_recall)
    print("Validation precision:", valid_precision)
print("###################################")
print("avg Validation accuracy:", np.mean(valid_accs))
print("avg Validation F1-score:", np.mean(valid_f1s))
print("avg Validation recall:", np.mean(valid_recalls))
print("avg Validation precision:", np.mean(valid_precisions))

Validation accuracy: 0.8013851046562195
Validation F1-score: 0.8013654947280884
Validation recall: 0.8013655543327332
Validation precision: 0.8013655543327332
Validation accuracy: 0.8011642098426819
Validation F1-score: 0.8011527061462402
Validation recall: 0.801152765750885
Validation precision: 0.801152765750885
Validation accuracy: 0.800346851348877
Validation F1-score: 0.8003434538841248
Validation recall: 0.8003435134887695
Validation precision: 0.8003435134887695
Validation accuracy: 0.803041934967041
Validation F1-score: 0.8030221462249756
Validation recall: 0.8030222058296204
Validation precision: 0.8030222058296204
Validation accuracy: 0.8024620413780212
Validation F1-score: 0.80245041847229
Validation recall: 0.8024504780769348
Validation precision: 0.8024504780769348
###################################
avg Validation accuracy: 0.8016800284385681
avg Validation F1-score: 0.8016668438911438
avg Validation recall: 0.8016669034957886
avg Validation precision: 0.8016669034957886
