In [None]:
import itertools
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score

# 하이퍼파라미터 후보 정의
param_grid = {
    "embedding_dim": [64, 128],
    "lstm_units": [32, 64],
    "dropout_rate": [0.3, 0.5],
    "dense_units": [16, 32],
    "batch_size": [32, 64],
    "epochs": [10]
}

# 가능한 모든 하이퍼파라미터 조합 생성
param_combinations = list(itertools.product(*param_grid.values()))

best_accuracy = 0
best_params = None
best_model = None

# 하이퍼파라미터 탐색
for params in param_combinations:
    embedding_dim, lstm_units, dropout_rate, dense_units, batch_size, epochs = params

    print(f"Testing combination: {params}")
    
    # 모델 생성
    model = Sequential([
        Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    # 조기 종료 콜백
    early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

    # 모델 학습
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose=0
    )

    # 모델 평가
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    acc = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {acc:.4f}")
    
    # 최적 모델 저장
    if acc > best_accuracy:
        best_accuracy = acc
        best_params = params
        best_model = model

# 최적 하이퍼파라미터 및 모델 출력
print(f"Best Accuracy: {best_accuracy:.4f}")
print(f"Best Parameters: {dict(zip(param_grid.keys(), best_params))}")

# 최적 모델 저장
best_model.save("best_lstm_model.h5")
print("Best model saved as 'best_lstm_model.h5'")


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. 데이터 불러오기
file_path = "fake_reviews_dataset.csv"  # 데이터 파일 경로
data = pd.read_csv(file_path)

# 2. 데이터 확인 및 전처리
# Null 값 확인 및 제거
print(data.info())
data.dropna(inplace=True)

# 텍스트와 라벨 분리
texts = data['text_']
labels = data['label']

# 'CG'를 1로, 'OR'을 0으로 변환
labels = labels.map({'CG': 1, 'OR': 0}).astype(np.float32)

# 3. 텍스트 데이터 전처리
# 토크나이저 정의
max_words = 10000  # 사용할 최대 단어 수
max_len = 100  # 리뷰의 최대 길이
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

# 텍스트 시퀀스 변환 및 패딩
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

# 4. 데이터셋 분리
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# 5. 모델 생성
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    LSTM(64, return_sequences=False),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 6. 모델 학습
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# 7. 모델 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# 8. 모델 저장
model.save("ai_human_review_classifier.h5")
