In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. 데이터 로드 및 전처리
data_path = 'fake_reviews_dataset.csv'  # 데이터 파일 경로
data = pd.read_csv(data_path)

# 2. 인간과 AI 리뷰 분리
human_reviews = data[data['label'] == 'OR']
ai_reviews = data[data['label'] == 'CG']

# 3. 데이터 전처리 함수 정의
def preprocess_data(df, max_words=10000, max_len=200):
    texts = df['text_'].astype(str).values
    ratings = df['rating'].values

    # 텍스트 토큰화
    tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

    return padded_sequences, ratings, tokenizer

# 4. 모델 정의 함수
def build_lstm_model(max_words=10000, max_len=200, embedding_dim=128):
    model = Sequential([
        Embedding(max_words, embedding_dim, input_length=max_len),
        LSTM(128, return_sequences=True, dropout=0.2),
        LSTM(64, dropout=0.2),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='linear')  # 회귀 문제
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# 5. 학습 및 평가
def train_and_evaluate(df, max_words=10000, max_len=200, embedding_dim=128, test_size=0.2, batch_size=32, epochs=10):
    X, y, tokenizer = preprocess_data(df, max_words, max_len)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42)

    model = build_lstm_model(max_words, max_len, embedding_dim)
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stopping]
    )

    val_loss, val_mae = model.evaluate(X_val, y_val)
    print(f"Validation Loss: {val_loss}, Validation MAE: {val_mae}")

    return model, tokenizer

# 6. 인간 리뷰 학습
print("Training on Human Reviews...")
human_model, human_tokenizer = train_and_evaluate(human_reviews)

# 7. AI 리뷰 학습
print("Training on AI Reviews...")
ai_model, ai_tokenizer = train_and_evaluate(ai_reviews)

# 모델 저장 (선택 사항)
# human_model.save('human_review_model.h5')
# ai_model.save('ai_review_model.h5')

human_model.save('human_review_model.h5', save_format='h5', include_optimizer=True)
ai_model.save('ai_review_model.h5', save_format='h5', include_optimizer=True)

Training on Human Reviews...




Epoch 1/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 592ms/step - loss: 2.7238 - mae: 1.2206 - val_loss: 1.3361 - val_mae: 0.9088
Epoch 2/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m316s[0m 623ms/step - loss: 1.6098 - mae: 0.9891 - val_loss: 1.3544 - val_mae: 0.8787
Epoch 3/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 525ms/step - loss: 1.6021 - mae: 0.9832 - val_loss: 1.3340 - val_mae: 0.8916
Epoch 4/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 639ms/step - loss: 1.6776 - mae: 1.0053 - val_loss: 1.3300 - val_mae: 0.9155
Epoch 5/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 560ms/step - loss: 1.5889 - mae: 0.9708 - val_loss: 1.3338 - val_mae: 0.9253
Epoch 6/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 628ms/step - loss: 1.5714 - mae: 0.9679 - val_loss: 1.3918 - val_mae: 0.9490
Epoch 7/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



Validation Loss: 1.293982744216919, Validation MAE: 0.9004335999488831




In [7]:
import tensorflow as tf
from keras.metrics import MeanSquaredError

# 'mse'를 커스텀 오브젝트로 등록
custom_objects = {
    "mse": MeanSquaredError()
}

# 모델 불러오기
human_model = tf.keras.models.load_model('human_review_model.h5', custom_objects=custom_objects)
ai_model = tf.keras.models.load_model('ai_review_model.h5', custom_objects=custom_objects)



In [2]:
import tensorflow as tf

# 모델 불러오기
human_model = tf.keras.models.load_model('human_review_model_x2.keras')
ai_model = tf.keras.models.load_model('ai_review_model_x2.keras')

  saveable.load_own_variables(weights_store.get(inner_path))


In [8]:
# 다시 컴파일
human_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
ai_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. 데이터 로드 및 전처리
data_path = 'fake_reviews_dataset.csv'  # 데이터 파일 경로
data = pd.read_csv(data_path)

# 2. 인간과 AI 리뷰 분리
human_reviews = data[data['label'] == 'OR']
ai_reviews = data[data['label'] == 'CG']

# 3. 데이터 전처리 함수 정의
def preprocess_data(df, max_words=10000, max_len=200):
    texts = df['text_'].astype(str).values
    ratings = df['rating'].values

    # 텍스트 토큰화
    tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

    return padded_sequences, ratings, tokenizer

# 4. 모델 정의 함수
def build_lstm_model(max_words=10000, max_len=200, embedding_dim=128):
    model = Sequential([
        Embedding(max_words, embedding_dim, input_length=max_len),
        LSTM(128, return_sequences=True, dropout=0.2),
        LSTM(64, dropout=0.2),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='linear')  # 회귀 문제
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# 5. 학습 및 평가
def train_and_evaluate(df, max_words=10000, max_len=200, embedding_dim=128, test_size=0.2, batch_size=32, epochs=10, model = None):
    X, y, tokenizer = preprocess_data(df, max_words, max_len)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42)

    # model = build_lstm_model(max_words, max_len, embedding_dim)
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stopping]
    )

    val_loss, val_mae = model.evaluate(X_val, y_val)
    print(f"Validation Loss: {val_loss}, Validation MAE: {val_mae}")

    return model, tokenizer

# 6. 인간 리뷰 학습
print("Training on Human Reviews...")
human_model, human_tokenizer = train_and_evaluate(human_reviews, model = human_model)

# 7. AI 리뷰 학습
print("Training on AI Reviews...")
ai_model, ai_tokenizer = train_and_evaluate(ai_reviews, model = ai_model)

# 모델 저장 (선택 사항)
# human_model.save('human_review_model.h5')
# ai_model.save('ai_review_model.h5')

human_model.save('human_review_model_x2.keras', include_optimizer=True)
ai_model.save('ai_review_model_x2.keras', include_optimizer=True)

Training on Human Reviews...
Epoch 1/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 245ms/step - loss: 1.6056 - mae: 0.9797 - val_loss: 1.4377 - val_mae: 1.0071
Epoch 2/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 292ms/step - loss: 1.4981 - mae: 0.9401 - val_loss: 1.3431 - val_mae: 0.9134
Epoch 3/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 282ms/step - loss: 1.5460 - mae: 0.9708 - val_loss: 1.3508 - val_mae: 0.9395
Epoch 4/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 262ms/step - loss: 1.4644 - mae: 0.9288 - val_loss: 1.3346 - val_mae: 0.9186
Epoch 5/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 279ms/step - loss: 1.4210 - mae: 0.9104 - val_loss: 1.3403 - val_mae: 0.9106
Epoch 6/10
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 305ms/step - loss: 1.4073 - mae: 0.9119 - val_loss: 1.3521 - val_mae: 0.9324
Epoch 7/10
[1m506/506[0m [32m━━━

In [10]:
human_model.save('human_review_model_x2.keras', include_optimizer=True)
ai_model.save('ai_review_model_x2.keras', include_optimizer=True)