In [4]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks

In [5]:
# 1. 데이터 로딩: RoBERTa만 사용
def load_data(path):
    df = pd.read_json(path)
    roberta = np.array(df['roberta_cls'].tolist())
    y = df['fake'].values
    return roberta, y

In [None]:
# 2. 모델 정의: RoBERTa만 사용
def build_model():
    roberta_input = layers.Input(shape=(768,))

    x = layers.Dense(256, activation='relu')(roberta_input)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = models.Model(inputs=roberta_input, outputs=output)
    model.compile(optimizer=optimizers.Adam(1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [7]:
# 3. 학습 함수 수정: 입력 하나만
def train_model(X, y):
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42)

    model = build_model()

    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=100,
        batch_size=64,
        callbacks=[early_stop]
    )

    y_pred_prob = model.predict(X_val)
    y_pred = (y_pred_prob > 0.5).astype(int)

    acc = accuracy_score(y_val, y_pred)
    prec = precision_score(y_val, y_pred)
    rec = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)

    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1 Score : {f1:.4f}")

    return model

In [17]:
X, y = load_data("emb_cls.json")
model = train_model(X, y)

Epoch 1/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6250 - loss: 0.6377 - val_accuracy: 0.7119 - val_loss: 0.5586
Epoch 2/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7133 - loss: 0.5619 - val_accuracy: 0.7194 - val_loss: 0.5485
Epoch 3/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7178 - loss: 0.5536 - val_accuracy: 0.7206 - val_loss: 0.5435
Epoch 4/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7252 - loss: 0.5438 - val_accuracy: 0.7230 - val_loss: 0.5404
Epoch 5/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7294 - loss: 0.5371 - val_accuracy: 0.7274 - val_loss: 0.5357
Epoch 6/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7295 - loss: 0.5351 - val_accuracy: 0.7294 - val_loss: 0.5335
Epoch 7/10