<a href="https://colab.research.google.com/github/ekdls02/ekdls2025/blob/main/xai_%EA%B3%BC%EC%A0%9C_3_%EC%8A%A4%ED%8A%B8%EB%A0%88%EC%8A%A4_%EC%A7%80%EC%88%98_%EC%98%88%EC%B8%A1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. 라이브러리 임포트
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score


# 2. 데이터 불러오기
train = pd.read_csv("/content/train.csv")
test = pd.read_csv("/content/test.csv")


# 3. X, y 분리
X = train.drop("Stress Level", axis=1)
y = train["Stress Level"]


# 4. 문자열 feature -> 숫자 변환
str_cols = X.select_dtypes(include='object').columns
for col in str_cols:
    le_col = LabelEncoder()
    X[col] = le_col.fit_transform(X[col])

    # test 데이터에 unseen 값 처리
    test[col] = test[col].map(lambda s: s if s in le_col.classes_ else "Unknown")
    # "Unknown"도 encoder에 추가
    le_col.classes_ = np.append(le_col.classes_, "Unknown")
    test[col] = le_col.transform(test[col])


# 5. y(label) -> 숫자 -> 원-핫
le_y = LabelEncoder()
y_encoded = le_y.fit_transform(y)
y_categorical = to_categorical(y_encoded)


# 6. 결측치 처리
X = X.fillna(X.median())
test = test.fillna(test.median())


# 7. 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
test_scaled = scaler.transform(test)


# 8. 학습/검증 데이터 분리
X_train, X_valid, y_train, y_valid = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42
)


# 9. ANN 모델 구성 (다중 클래스)
num_classes = y_categorical.shape[1]

model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='softmax')  # 다중 클래스
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])


# 10. 모델 학습
history = model.fit(X_train, y_train,
                    validation_data=(X_valid, y_valid),
                    epochs=50,
                    batch_size=16,
                    verbose=1)


# 11. 검증 정확도 확인
y_pred_valid = model.predict(X_valid)
y_pred_classes = np.argmax(y_pred_valid, axis=1)
y_valid_classes = np.argmax(y_valid, axis=1)
acc = accuracy_score(y_valid_classes, y_pred_classes)
print(f"Validation Accuracy: {acc:.5f}")


# 12. 테스트 데이터 예측
y_pred_test = model.predict(test_scaled)
y_pred_classes_test = np.argmax(y_pred_test, axis=1)
y_pred_labels_test = le_y.inverse_transform(y_pred_classes_test)


# 13. 제출 파일 생성
df = pd.read_csv("/content/submission.csv")
df.dropna(axis=1, inplace=True)
df["Stress Level"] = y_pred_labels_test
df.to_csv("new_submission.csv", index=False)

print("new_submission.csv 파일 생성 완료!")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 93ms/step - accuracy: 0.1710 - loss: 1.8047 - val_accuracy: 0.6833 - val_loss: 1.5344
Epoch 2/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.5623 - loss: 1.5304 - val_accuracy: 0.8500 - val_loss: 1.2077
Epoch 3/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7749 - loss: 1.2049 - val_accuracy: 0.8333 - val_loss: 0.8410
Epoch 4/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8237 - loss: 0.9118 - val_accuracy: 0.9333 - val_loss: 0.5419
Epoch 5/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8752 - loss: 0.6626 - val_accuracy: 0.9500 - val_loss: 0.3444
Epoch 6/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8460 - loss: 0.5758 - val_accuracy: 0.9333 - val_loss: 0.2573
Epoch 7/50
[1m15/15[0m [32m━━━━━━━━