<a href="https://colab.research.google.com/github/dhckdduf/first-repository/blob/main/practice02-04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers, regularizers

# 로이터 데이터셋 로드
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.text import Tokenizer

# 데이터 로드 (num_words=10000은 가장 많이 등장하는 10,000개의 단어만 사용)
max_words = 10000
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words)

# 데이터를 원-핫 인코딩 벡터로 변환
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')

# 라벨을 원-핫 벡터로 변환
num_classes = np.max(y_train) + 1
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 모델 생성 함수
def build_model(reg=None, dropout_rate=None):
    model = keras.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(max_words,),
                           kernel_regularizer=reg))
    if dropout_rate:
        model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(64, activation='relu', kernel_regularizer=reg))
    if dropout_rate:
        model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# 각 모델 학습 및 비교
epochs = 20
batch_size = 512

# (1) 기본 모델
model_basic = build_model()
history_basic = model_basic.fit(x_train, y_train,
                                epochs=epochs, batch_size=batch_size,
                                validation_data=(x_test, y_test),
                                verbose=0)

# (2) 가중치 규제 적용 모델 (L2 정규화)
model_l2 = build_model(reg=regularizers.l2(0.001))
history_l2 = model_l2.fit(x_train, y_train,
                          epochs=epochs, batch_size=batch_size,
                          validation_data=(x_test, y_test),
                          verbose=0)

# (3) 드롭아웃 적용 모델
model_dropout = build_model(dropout_rate=0.5)
history_dropout = model_dropout.fit(x_train, y_train,
                                    epochs=epochs, batch_size=batch_size,
                                    validation_data=(x_test, y_test),
                                    verbose=0)

# 학습 곡선 비교
def plot_history(histories, title):
    plt.figure(figsize=(12, 5))

    # 손실 곡선
    plt.subplot(1, 2, 1)
    for label, history in histories.items():
        plt.plot(history.history['loss'], label=f'{label} (train)')
        plt.plot(history.history['val_loss'], linestyle='dashed', label=f'{label} (val)')
    plt.title(f'{title} - Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # 정확도 곡선
    plt.subplot(1, 2, 2)
    for label, history in histories.items():
        plt.plot(history.history['accuracy'], label=f'{label} (train)')
        plt.plot(history.history['val_accuracy'], linestyle='dashed', label=f'{label} (val)')
    plt.title(f'{title} - Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.show()

# 학습 곡선 비교
histories = {
    'Basic': history_basic,
    'L2 Regularization': history_l2,
    'Dropout': history_dropout
}

plot_history(histories, 'Reuters Classification Model')
