In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

# 데이터 불러오기
data = pd.read_csv('fake_reviews_dataset.csv')  # 데이터 파일 경로 수정

# 레이블 인코딩
label_encoder = LabelEncoder()
data['label_encoded'] = label_encoder.fit_transform(data['label'])

# 텍스트와 레이블 분리
texts = data['text_']
labels = data['label_encoded']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# 텍스트 토크나이징 및 패딩
max_words = 10000  # 사용할 최대 단어 수
max_len = 100  # 시퀀스의 최대 길이

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post')

# CNN 모델 구축
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))  # 임베딩 레이어
model.add(Conv1D(64, 5, activation='relu'))  # CNN 레이어
model.add(GlobalMaxPooling1D())  # Global Max Pooling
model.add(Dropout(0.3))  # Dropout
model.add(Dense(16, activation='relu'))  # 완전 연결층
model.add(Dense(1, activation='sigmoid'))  # 출력층 (이진 분류)

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
history = model.fit(
    X_train_pad, y_train,
    epochs=5,
    batch_size=128,
    validation_data=(X_test_pad, y_test)
)

# 모델 평가
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")




Epoch 1/5
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 44ms/step - accuracy: 0.7494 - loss: 0.5197 - val_accuracy: 0.9209 - val_loss: 0.2052
Epoch 2/5
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 69ms/step - accuracy: 0.9386 - loss: 0.1632 - val_accuracy: 0.9319 - val_loss: 0.1723
Epoch 3/5
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 62ms/step - accuracy: 0.9732 - loss: 0.0814 - val_accuracy: 0.9351 - val_loss: 0.1785
Epoch 4/5
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 55ms/step - accuracy: 0.9849 - loss: 0.0479 - val_accuracy: 0.9362 - val_loss: 0.1888
Epoch 5/5
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 50ms/step - accuracy: 0.9923 - loss: 0.0265 - val_accuracy: 0.9361 - val_loss: 0.2175
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9359 - loss: 0.2018
Test Accuracy: 93.61%


In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

# CNN 모델 학습 함수
def train_cnn(filters, kernel_size, dropout_rate, dense_units, batch_size, epochs):
    model = Sequential()
    model.add(Embedding(input_dim=10000, output_dim=128, input_length=100))  # 임베딩 레이어
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'))  # CNN 레이어
    model.add(GlobalMaxPooling1D())  # Global Max Pooling
    model.add(Dropout(dropout_rate))  # Dropout
    model.add(Dense(dense_units, activation='relu'))  # 완전 연결층
    model.add(Dense(1, activation='sigmoid'))  # 출력층
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    # EarlyStopping으로 과적합 방지
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    # 모델 학습
    model.fit(
        X_train_pad, y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(X_test_pad, y_test),
        callbacks=[early_stopping],
        verbose=0
    )
    
    # 검증 데이터 정확도 계산
    y_pred = (model.predict(X_test_pad) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# 하이퍼파라미터 값 설정
filters_list = [64, 128]
kernel_size_list = [3, 5]
dropout_rate_list = [0.3, 0.5]
dense_units_list = [32, 64]
batch_size_list = [32, 64]
epochs_list = [5, 10]

# 최적 하이퍼파라미터 탐색
best_accuracy = 0
best_params = {}

for filters in filters_list:
    for kernel_size in kernel_size_list:
        for dropout_rate in dropout_rate_list:
            for dense_units in dense_units_list:
                for batch_size in batch_size_list:
                    for epochs in epochs_list:
                        print(f"Training with filters={filters}, kernel_size={kernel_size}, dropout_rate={dropout_rate}, "
                              f"dense_units={dense_units}, batch_size={batch_size}, epochs={epochs}")
                        
                        accuracy = train_cnn(filters, kernel_size, dropout_rate, dense_units, batch_size, epochs)
                        print(f"Accuracy: {accuracy:.4f}")
                        
                        if accuracy > best_accuracy:
                            best_accuracy = accuracy
                            best_params = {
                                'filters': filters,
                                'kernel_size': kernel_size,
                                'dropout_rate': dropout_rate,
                                'dense_units': dense_units,
                                'batch_size': batch_size,
                                'epochs': epochs
                            }

# 최적 하이퍼파라미터 출력
print("Best Accuracy:", best_accuracy)
print("Best Hyperparameters:", best_params)


Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9332
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9265
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9287
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9299
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9321
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9280
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9333
Training with filters=64, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9273
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9228
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9214
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9225
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9190
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9221
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9243
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9173
Training with filters=64, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9278
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9309
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9338
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9400
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9356
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9385
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Accuracy: 0.9371
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9378
Training with filters=64, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Accuracy: 0.9392
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Accuracy: 0.9290
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9301
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Accuracy: 0.9285
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9235
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9364
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9301
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9295
Training with filters=64, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Accuracy: 0.9309
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9295
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Accuracy: 0.9300
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9330
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9341
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9314
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9305
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step
Accuracy: 0.9295
Training with filters=128, kernel_size=3, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9312
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9308
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9262
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9257
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9311
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9309
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9306
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9305
Training with filters=128, kernel_size=3, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9355
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9397
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9395
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9387
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9393
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9399
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.9421
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.9380
Training with filters=128, kernel_size=5, dropout_rate=0.3, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Accuracy: 0.9410
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.9285
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9296
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step
Accuracy: 0.9268
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=32, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step
Accuracy: 0.9411
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.9337
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=32, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.9324
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=5




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.9322
Training with filters=128, kernel_size=5, dropout_rate=0.5, dense_units=64, batch_size=64, epochs=10




[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.9347
Best Accuracy: 0.9421293433906269
Best Hyperparameters: {'filters': 128, 'kernel_size': 5, 'dropout_rate': 0.3, 'dense_units': 64, 'batch_size': 32, 'epochs': 10}


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model

# 최적의 하이퍼파라미터
best_params = {
    "filters": 128,
    "kernel_size": 3,
    "dropout_rate": 0.5,
    "dense_units": 64,
    "batch_size": 32,
    "epochs": 10
}

# 고정된 환경 설정 (random_state 고정)
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

# CNN 모델 재학습
model_save_path = "best_final_cnn_model.h5"

model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=128, input_length=100))
model.add(Conv1D(filters=best_params['filters'], kernel_size=best_params['kernel_size'], activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dropout(best_params['dropout_rate']))
model.add(Dense(best_params['dense_units'], activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 콜백 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(filepath=model_save_path, monitor='val_loss', save_best_only=True, verbose=1)

# 모델 학습
history = model.fit(
    X_train_pad, y_train,
    batch_size=best_params['batch_size'],
    epochs=best_params['epochs'],
    validation_data=(X_test_pad, y_test),
    callbacks=[early_stopping, model_checkpoint],
    verbose=1
)

# 모델 평가
y_pred = (model.predict(X_test_pad) > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
print(f"Final Model Accuracy: {accuracy:.4f}")
print(f"Best Model Saved at: {model_save_path}")


In [1]:
from keras.models import load_model

model = load_model('best_final_cnn_model.h5')
model.summary()

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'best_final_cnn_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)