In [1]:
import warnings
warnings.filterwarnings('ignore')

딥러닝

In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import roc_auc_score

# ✅ 데이터 로드
train_path = "train.csv"
test_path = "test.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

# ✅ 목표 변수 분리
X = train_df.drop(columns=["UID", "채무 불이행 여부"])
y = train_df["채무 불이행 여부"]

# 테스트 데이터 UID 저장
test_UID = test_df.pop("UID")
X_test = test_df

# ✅ 범주형 변수 인코딩
categorical_cols = ["주거 형태", "현재 직장 근속 연수", "대출 목적", "대출 상환 기간"]
encoder = OneHotEncoder(sparse_output=False, drop="first")

X_encoded = encoder.fit_transform(X[categorical_cols])
X_test_encoded = encoder.transform(X_test[categorical_cols])

encoded_cols = encoder.get_feature_names_out(categorical_cols)

X_encoded_df = pd.DataFrame(X_encoded, columns=encoded_cols, index=X.index)
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=encoded_cols, index=X_test.index)

# ✅ 기존 데이터에서 범주형 변수 제거 후 결합
X = X.drop(columns=categorical_cols).reset_index(drop=True)
X_test = X_test.drop(columns=categorical_cols).reset_index(drop=True)

X = pd.concat([X, X_encoded_df], axis=1)
X_test = pd.concat([X_test, X_test_encoded_df], axis=1)

# ✅ 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# ✅ 훈련 데이터 분할
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# ✅ MLP (Multi-Layer Perceptron) 모델 설계
def build_model(input_dim):
    model = keras.Sequential([
        layers.Dense(256, activation="relu", input_shape=(input_dim,)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(128, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(64, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(1, activation="sigmoid")  # Binary Classification
    ])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
                  loss="binary_crossentropy",
                  metrics=[keras.metrics.AUC()])

    return model

# ✅ 모델 생성
model = build_model(X_train.shape[1])

# ✅ 모델 학습
early_stopping = keras.callbacks.EarlyStopping(monitor="val_auc", patience=10, restore_best_weights=True, mode="max")

history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# ✅ 검증 데이터 성능 평가
y_valid_pred = model.predict(X_valid).flatten()
roc_auc = roc_auc_score(y_valid, y_valid_pred)
print(f"🔥 딥러닝 모델 ROC-AUC: {roc_auc:.6f}")

# ✅ 테스트 데이터 예측
y_test_pred = model.predict(X_test_scaled).flatten()

# ✅ 제출 파일 생성
submission = pd.DataFrame({"UID": test_UID, "채무 불이행 확률": y_test_pred})
submission.to_csv("submission.csv", index=False)

print("✅ 딥러닝 모델 예측 완료! 결과가 submission.csv에 저장되었습니다.")


Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - auc: 0.5884 - loss: 0.8126 - val_auc: 0.6980 - val_loss: 0.5990
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - auc: 0.6620 - loss: 0.6416 - val_auc: 0.7164 - val_loss: 0.5784
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - auc: 0.6723 - loss: 0.6168 - val_auc: 0.7257 - val_loss: 0.5721
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - auc: 0.7099 - loss: 0.5892 - val_auc: 0.7328 - val_loss: 0.5662
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - auc: 0.7284 - loss: 0.5689 - val_auc: 0.7264 - val_loss: 0.5694
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - auc: 0.7236 - loss: 0.5725 - val_auc: 0.7324 - val_loss: 0.5650
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/