<a href="https://colab.research.google.com/github/ekdls02/ekdls2025/blob/main/xai_%EA%B3%BC%EC%A0%9C_5_X_Ray_%EC%9D%B4%EB%AF%B8%EC%A7%80_%EC%9D%B8%EC%8B%9D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 라이브러리 불러오기
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# 학습 데이터 로드 및 전처리
train_data = np.load("/content/train.npz")
x = train_data['x']          # 이미지
y = train_data['y']          # 문자열 레이블

# 문자열 -> 정수 변환
le = LabelEncoder()
y_int = le.fit_transform(y)

# one-hot encoding
num_classes = len(le.classes_)
y_encoded = to_categorical(y_int, num_classes=num_classes)

# 이미지 정규화 및 채널 차원 추가
x = x[..., np.newaxis] / 255.0

# train/validation split
x_train, x_val, y_train, y_val = train_test_split(
    x, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# CNN 모델 설계
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=x_train.shape[1:]),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# 모델 컴파일 및 학습
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

es = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=20,
    batch_size=32,
    callbacks=[es]
)

# 시험 데이터 로드 및 예측
test_data = np.load("/content/test.npz")
x_test = test_data['x']
x_test = x_test[..., np.newaxis] / 255.0

y_pred_prob = model.predict(x_test)
y_pred_int = np.argmax(y_pred_prob, axis=1)

# 정수 -> 문자열로 변환
y_pred_str = le.inverse_transform(y_pred_int)

# submission 파일 생성
df = pd.read_csv("/content/submission.csv")
df["result"] = y_pred_str
df.to_csv("new_submission.csv", index=False)

print("Submission 파일 생성 완료")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 38ms/step - accuracy: 0.9293 - loss: 0.2129 - val_accuracy: 0.9967 - val_loss: 0.0113
Epoch 2/20
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 36ms/step - accuracy: 0.9958 - loss: 0.0152 - val_accuracy: 0.9974 - val_loss: 0.0085
Epoch 3/20
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 39ms/step - accuracy: 0.9970 - loss: 0.0107 - val_accuracy: 0.9987 - val_loss: 0.0038
Epoch 4/20
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 38ms/step - accuracy: 0.9979 - loss: 0.0069 - val_accuracy: 0.9989 - val_loss: 0.0030
Epoch 5/20
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 37ms/step - accuracy: 0.9984 - loss: 0.0055 - val_accuracy: 0.9990 - val_loss: 0.0026
Epoch 6/20
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 37ms/step - accuracy: 0.9989 - loss: 0.0029 - val_accuracy: 0.9989 - val_loss: 0.0039
Epoch 7/20
[1m9