In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import image_dataset_from_directory
import matplotlib.pyplot as plt
import numpy as np

IMAGE_SIZE = (256, 256)
BATCH_SIZE = 32

TRAIN_DIR = 'unsupervised/train/normal' 
TEST_DIR = 'unsupervised/test'

# 학습 데이터
train_dataset = image_dataset_from_directory(
    TRAIN_DIR, labels=None, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, shuffle=True
)

# 테스트 데이터
test_dataset = image_dataset_from_directory(
    TEST_DIR, labels='inferred', label_mode='binary', image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, shuffle=False
)


def normalize(image):
    return tf.cast(image, tf.float32) / 255.0

def normalize_with_label(image, label):
    return normalize(image), label

train_dataset = train_dataset.map(normalize).cache().prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.map(normalize_with_label).cache().prefetch(tf.data.AUTOTUNE)


input_img = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

# Encoder
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# Decoder
x = Conv2D(16, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

# 모델 학습 (정상 데이터만 사용) ---
print("\n오토인코더 학습 시작...")
autoencoder.fit(
    train_dataset,
    epochs=30,
    shuffle=True
)
print("학습 완료.")

#이상 탐지  ---
def get_reconstruction_error(dataset):
    errors = []
    labels = []
    for batch_images, batch_labels in dataset:
        reconstructed = autoencoder.predict(batch_images)
        
        mse = np.mean(np.power(batch_images - reconstructed, 2), axis=(1, 2, 3))
        errors.extend(mse)
        labels.extend(batch_labels.numpy())
    return np.array(errors), np.array(labels)

errors, labels = get_reconstruction_error(test_dataset)

# 시각화
plt.figure(figsize=(10, 5))
plt.hist(errors[labels==0], bins=50, alpha=0.7, label='Normal')
plt.hist(errors[labels==1], bins=50, alpha=0.7, label='Anomaly (Scratch)')
plt.xlabel("Reconstruction Error (MSE)")
plt.ylabel("Count")
plt.legend()
plt.title("Reconstruction Error Distribution")
plt.show()

# 임계값 설정 
threshold = np.percentile(errors[labels==0], 95)
print(f"설정된 이상 탐지 임계값: {threshold}")

#  결과 시각화 ---
plt.figure(figsize=(10, 10))
test_images, test_labels = next(iter(test_dataset))
reconstructed_images = autoencoder.predict(test_images)

for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    original_img = test_images[i].numpy()
    reconstructed_img = reconstructed_images[i]
    
    error = np.mean(np.power(original_img - reconstructed_img, 2))
    is_anomaly = "Anomaly" if error > threshold else "Normal"
    true_label = "Anomaly" if test_labels[i] == 1 else "Normal"

    # 원본 이미지 위에 표시
    plt.imshow(original_img)
    plt.title(f"True: {true_label}\nPred: {is_anomaly}\nError: {error:.4f}")
    plt.axis("off")
plt.tight_layout()
plt.show()