In [9]:
print("Best params: {'embedding_dim': 64, 'lstm_units': 32, \n'dropout_rate': 0.3, 'dense_units': 32, \n'batch_size': 32, 'epochs': 10")

Best params: {'embedding_dim': 64, 'lstm_units': 32, 
'dropout_rate': 0.3, 'dense_units': 32, 
'batch_size': 32, 'epochs': 10


In [7]:
import itertools
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score

# 하이퍼파라미터 후보 정의
param_grid = {
    "embedding_dim": [64, 128],
    "lstm_units": [32, 64],
    "dropout_rate": [0.3, 0.5],
    "dense_units": [16, 32],
    "batch_size": [32, 64],
    "epochs": [5, 10]
}

# 가능한 모든 하이퍼파라미터 조합 생성
param_combinations = list(itertools.product(*param_grid.values()))

best_accuracy = 0
best_params = None
best_model = None

# 하이퍼파라미터 탐색
for params in param_combinations:
    embedding_dim, lstm_units, dropout_rate, dense_units, batch_size, epochs = params

    print(f"Testing combination: {params}")
    
    # 모델 생성
    model = Sequential([
        Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    # 조기 종료 콜백
    early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

    # 모델 학습
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose=2
    )

    # 모델 평가
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    acc = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {acc:.4f}")
    
    # 최적 모델 저장
    if acc > best_accuracy:
        best_accuracy = acc
        best_params = params
        best_model = model

# 최적 하이퍼파라미터 및 모델 출력
print(f"Best Accuracy: {best_accuracy:.4f}")
print(f"Best Parameters: {dict(zip(param_grid.keys(), best_params))}")

# 최적 모델 저장
best_model.save("best_lstm_model.h5")
print("Best model saved as 'best_lstm_model.h5'")


Testing combination: (64, 32, 0.3, 16, 32, 5)
Epoch 1/5
809/809 - 30s - 37ms/step - accuracy: 0.6791 - loss: 0.5725 - val_accuracy: 0.7646 - val_loss: 0.3717
Epoch 2/5
809/809 - 30s - 38ms/step - accuracy: 0.8354 - loss: 0.4099 - val_accuracy: 0.8578 - val_loss: 0.3707
Epoch 3/5
809/809 - 28s - 35ms/step - accuracy: 0.8712 - loss: 0.3492 - val_accuracy: 0.8953 - val_loss: 0.2701
Epoch 4/5
809/809 - 29s - 36ms/step - accuracy: 0.9137 - loss: 0.2567 - val_accuracy: 0.9082 - val_loss: 0.2531
Epoch 5/5
809/809 - 31s - 38ms/step - accuracy: 0.9250 - loss: 0.2269 - val_accuracy: 0.8995 - val_loss: 0.2925
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
Accuracy: 0.9038
Testing combination: (64, 32, 0.3, 16, 32, 10)
Epoch 1/10




809/809 - 32s - 39ms/step - accuracy: 0.6792 - loss: 0.6115 - val_accuracy: 0.7398 - val_loss: 0.5753
Epoch 2/10
809/809 - 31s - 38ms/step - accuracy: 0.7273 - loss: 0.5265 - val_accuracy: 0.8218 - val_loss: 0.4215
Epoch 3/10
809/809 - 30s - 37ms/step - accuracy: 0.8336 - loss: 0.4037 - val_accuracy: 0.8028 - val_loss: 0.3740
Epoch 4/10
809/809 - 31s - 38ms/step - accuracy: 0.7558 - loss: 0.4751 - val_accuracy: 0.5996 - val_loss: 0.5801
Epoch 5/10
809/809 - 31s - 38ms/step - accuracy: 0.7823 - loss: 0.4365 - val_accuracy: 0.8941 - val_loss: 0.2824
Epoch 6/10
809/809 - 30s - 37ms/step - accuracy: 0.9232 - loss: 0.2192 - val_accuracy: 0.9260 - val_loss: 0.1882
Epoch 7/10
809/809 - 31s - 38ms/step - accuracy: 0.9493 - loss: 0.1468 - val_accuracy: 0.9311 - val_loss: 0.1940
Epoch 8/10
809/809 - 32s - 40ms/step - accuracy: 0.9662 - loss: 0.1016 - val_accuracy: 0.9303 - val_loss: 0.2145
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9226
Testing combin



405/405 - 21s - 51ms/step - accuracy: 0.6445 - loss: 0.6371 - val_accuracy: 0.6420 - val_loss: 0.6327
Epoch 2/5
405/405 - 17s - 43ms/step - accuracy: 0.6485 - loss: 0.6250 - val_accuracy: 0.6101 - val_loss: 0.6182
Epoch 3/5
405/405 - 18s - 45ms/step - accuracy: 0.6927 - loss: 0.5676 - val_accuracy: 0.8201 - val_loss: 0.4467
Epoch 4/5
405/405 - 17s - 43ms/step - accuracy: 0.8621 - loss: 0.3771 - val_accuracy: 0.8969 - val_loss: 0.2827
Epoch 5/5
405/405 - 18s - 43ms/step - accuracy: 0.9116 - loss: 0.2712 - val_accuracy: 0.9125 - val_loss: 0.2593
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9059
Testing combination: (64, 32, 0.3, 16, 64, 10)
Epoch 1/10




405/405 - 25s - 61ms/step - accuracy: 0.6059 - loss: 0.6059 - val_accuracy: 0.6445 - val_loss: 0.5551
Epoch 2/10
405/405 - 19s - 46ms/step - accuracy: 0.7393 - loss: 0.5062 - val_accuracy: 0.8088 - val_loss: 0.4127
Epoch 3/10
405/405 - 18s - 46ms/step - accuracy: 0.7832 - loss: 0.4975 - val_accuracy: 0.8521 - val_loss: 0.3806
Epoch 4/10
405/405 - 17s - 42ms/step - accuracy: 0.8613 - loss: 0.3467 - val_accuracy: 0.8731 - val_loss: 0.2884
Epoch 5/10
405/405 - 16s - 40ms/step - accuracy: 0.8788 - loss: 0.2748 - val_accuracy: 0.8720 - val_loss: 0.2728
Epoch 6/10
405/405 - 17s - 41ms/step - accuracy: 0.9030 - loss: 0.2314 - val_accuracy: 0.9031 - val_loss: 0.2526
Epoch 7/10
405/405 - 16s - 40ms/step - accuracy: 0.9206 - loss: 0.1908 - val_accuracy: 0.9026 - val_loss: 0.2300
Epoch 8/10
405/405 - 16s - 40ms/step - accuracy: 0.9328 - loss: 0.1632 - val_accuracy: 0.9193 - val_loss: 0.2144
Epoch 9/10
405/405 - 16s - 40ms/step - accuracy: 0.9374 - loss: 0.1445 - val_accuracy: 0.9119 - val_loss: 0



809/809 - 33s - 40ms/step - accuracy: 0.5987 - loss: 0.6565 - val_accuracy: 0.6046 - val_loss: 0.6153
Epoch 2/5
809/809 - 31s - 38ms/step - accuracy: 0.7660 - loss: 0.4742 - val_accuracy: 0.8964 - val_loss: 0.2607
Epoch 3/5
809/809 - 31s - 39ms/step - accuracy: 0.9273 - loss: 0.2093 - val_accuracy: 0.9332 - val_loss: 0.1842
Epoch 4/5
809/809 - 33s - 41ms/step - accuracy: 0.9582 - loss: 0.1268 - val_accuracy: 0.9362 - val_loss: 0.1801
Epoch 5/5
809/809 - 32s - 39ms/step - accuracy: 0.9712 - loss: 0.0898 - val_accuracy: 0.9403 - val_loss: 0.1733
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.9382
Testing combination: (64, 32, 0.3, 32, 32, 10)
Epoch 1/10




809/809 - 35s - 43ms/step - accuracy: 0.6519 - loss: 0.6034 - val_accuracy: 0.6506 - val_loss: 0.5562
Epoch 2/10
809/809 - 31s - 39ms/step - accuracy: 0.7361 - loss: 0.5209 - val_accuracy: 0.8358 - val_loss: 0.3984
Epoch 3/10
809/809 - 32s - 40ms/step - accuracy: 0.8438 - loss: 0.4026 - val_accuracy: 0.8377 - val_loss: 0.4272
Epoch 4/10
809/809 - 32s - 40ms/step - accuracy: 0.7008 - loss: 0.5299 - val_accuracy: 0.6748 - val_loss: 0.5731
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.8355
Testing combination: (64, 32, 0.3, 32, 64, 5)
Epoch 1/5




405/405 - 21s - 51ms/step - accuracy: 0.7488 - loss: 0.5134 - val_accuracy: 0.7964 - val_loss: 0.3596
Epoch 2/5
405/405 - 17s - 42ms/step - accuracy: 0.7218 - loss: 0.5728 - val_accuracy: 0.7015 - val_loss: 0.5693
Epoch 3/5
405/405 - 18s - 44ms/step - accuracy: 0.7390 - loss: 0.5482 - val_accuracy: 0.8309 - val_loss: 0.4040
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.7892
Testing combination: (64, 32, 0.3, 32, 64, 10)
Epoch 1/10




405/405 - 23s - 57ms/step - accuracy: 0.5924 - loss: 0.6461 - val_accuracy: 0.6077 - val_loss: 0.6266
Epoch 2/10
405/405 - 18s - 44ms/step - accuracy: 0.7637 - loss: 0.5082 - val_accuracy: 0.8399 - val_loss: 0.4236
Epoch 3/10
405/405 - 18s - 44ms/step - accuracy: 0.8456 - loss: 0.4230 - val_accuracy: 0.8480 - val_loss: 0.4008
Epoch 4/10
405/405 - 19s - 47ms/step - accuracy: 0.8496 - loss: 0.4000 - val_accuracy: 0.8514 - val_loss: 0.4135
Epoch 5/10
405/405 - 18s - 45ms/step - accuracy: 0.8522 - loss: 0.3854 - val_accuracy: 0.8544 - val_loss: 0.3545
Epoch 6/10
405/405 - 18s - 44ms/step - accuracy: 0.8624 - loss: 0.3494 - val_accuracy: 0.8739 - val_loss: 0.3273
Epoch 7/10
405/405 - 19s - 47ms/step - accuracy: 0.8808 - loss: 0.3002 - val_accuracy: 0.8791 - val_loss: 0.2954
Epoch 8/10
405/405 - 19s - 46ms/step - accuracy: 0.9034 - loss: 0.2412 - val_accuracy: 0.8717 - val_loss: 0.2840
Epoch 9/10
405/405 - 19s - 46ms/step - accuracy: 0.9274 - loss: 0.2033 - val_accuracy: 0.9009 - val_loss: 0



809/809 - 35s - 43ms/step - accuracy: 0.5869 - loss: 0.6327 - val_accuracy: 0.6800 - val_loss: 0.5650
Epoch 2/5
809/809 - 31s - 38ms/step - accuracy: 0.8422 - loss: 0.3850 - val_accuracy: 0.8963 - val_loss: 0.3006
Epoch 3/5
809/809 - 32s - 39ms/step - accuracy: 0.8616 - loss: 0.3466 - val_accuracy: 0.9209 - val_loss: 0.2562
Epoch 4/5
809/809 - 31s - 38ms/step - accuracy: 0.9398 - loss: 0.1882 - val_accuracy: 0.9315 - val_loss: 0.1796
Epoch 5/5
809/809 - 31s - 39ms/step - accuracy: 0.9564 - loss: 0.1359 - val_accuracy: 0.9351 - val_loss: 0.1789
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.9295
Testing combination: (64, 32, 0.5, 16, 32, 10)
Epoch 1/10




809/809 - 33s - 40ms/step - accuracy: 0.5730 - loss: 0.6745 - val_accuracy: 0.5140 - val_loss: 0.6584
Epoch 2/10
809/809 - 30s - 38ms/step - accuracy: 0.6276 - loss: 0.6034 - val_accuracy: 0.7490 - val_loss: 0.4931
Epoch 3/10
809/809 - 30s - 37ms/step - accuracy: 0.6428 - loss: 0.5887 - val_accuracy: 0.7893 - val_loss: 0.4571
Epoch 4/10
809/809 - 31s - 39ms/step - accuracy: 0.8080 - loss: 0.4767 - val_accuracy: 0.8518 - val_loss: 0.3771
Epoch 5/10
809/809 - 32s - 39ms/step - accuracy: 0.8366 - loss: 0.3928 - val_accuracy: 0.8462 - val_loss: 0.3451
Epoch 6/10
809/809 - 31s - 39ms/step - accuracy: 0.9096 - loss: 0.2596 - val_accuracy: 0.9212 - val_loss: 0.2180
Epoch 7/10
809/809 - 29s - 36ms/step - accuracy: 0.9301 - loss: 0.2287 - val_accuracy: 0.9187 - val_loss: 0.2307
Epoch 8/10
809/809 - 33s - 41ms/step - accuracy: 0.9484 - loss: 0.1634 - val_accuracy: 0.9317 - val_loss: 0.1852
Epoch 9/10
809/809 - 31s - 39ms/step - accuracy: 0.9629 - loss: 0.1294 - val_accuracy: 0.9360 - val_loss: 0



405/405 - 20s - 48ms/step - accuracy: 0.6023 - loss: 0.6223 - val_accuracy: 0.7674 - val_loss: 0.6831
Epoch 2/5
405/405 - 18s - 43ms/step - accuracy: 0.6799 - loss: 0.6174 - val_accuracy: 0.5873 - val_loss: 0.6457
Epoch 3/5
405/405 - 18s - 44ms/step - accuracy: 0.6093 - loss: 0.6099 - val_accuracy: 0.6083 - val_loss: 0.5877
Epoch 4/5
405/405 - 18s - 44ms/step - accuracy: 0.6196 - loss: 0.5707 - val_accuracy: 0.6149 - val_loss: 0.5665
Epoch 5/5
405/405 - 18s - 45ms/step - accuracy: 0.6456 - loss: 0.5413 - val_accuracy: 0.7725 - val_loss: 0.4867
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step
Accuracy: 0.7774
Testing combination: (64, 32, 0.5, 16, 64, 10)
Epoch 1/10




405/405 - 21s - 53ms/step - accuracy: 0.6235 - loss: 0.6529 - val_accuracy: 0.7386 - val_loss: 0.5595
Epoch 2/10
405/405 - 17s - 43ms/step - accuracy: 0.6273 - loss: 0.6425 - val_accuracy: 0.4990 - val_loss: 0.6929
Epoch 3/10
405/405 - 17s - 43ms/step - accuracy: 0.5490 - loss: 0.6641 - val_accuracy: 0.7318 - val_loss: 0.5821
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.7361
Testing combination: (64, 32, 0.5, 32, 32, 5)
Epoch 1/5




809/809 - 38s - 47ms/step - accuracy: 0.6059 - loss: 0.6134 - val_accuracy: 0.8295 - val_loss: 0.4675
Epoch 2/5
809/809 - 34s - 42ms/step - accuracy: 0.8050 - loss: 0.4884 - val_accuracy: 0.6196 - val_loss: 0.6524
Epoch 3/5
809/809 - 34s - 42ms/step - accuracy: 0.7784 - loss: 0.4642 - val_accuracy: 0.8313 - val_loss: 0.4070
Epoch 4/5
809/809 - 31s - 38ms/step - accuracy: 0.8276 - loss: 0.4099 - val_accuracy: 0.8555 - val_loss: 0.3864
Epoch 5/5
809/809 - 32s - 39ms/step - accuracy: 0.8703 - loss: 0.3233 - val_accuracy: 0.8989 - val_loss: 0.2644
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Accuracy: 0.8925
Testing combination: (64, 32, 0.5, 32, 32, 10)
Epoch 1/10




809/809 - 33s - 41ms/step - accuracy: 0.5904 - loss: 0.6500 - val_accuracy: 0.8151 - val_loss: 0.4766
Epoch 2/10
809/809 - 31s - 38ms/step - accuracy: 0.7830 - loss: 0.4922 - val_accuracy: 0.6118 - val_loss: 0.6151
Epoch 3/10
809/809 - 33s - 41ms/step - accuracy: 0.6418 - loss: 0.6068 - val_accuracy: 0.8225 - val_loss: 0.4965
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step
Accuracy: 0.8191
Testing combination: (64, 32, 0.5, 32, 64, 5)
Epoch 1/5




405/405 - 21s - 51ms/step - accuracy: 0.6413 - loss: 0.6525 - val_accuracy: 0.6009 - val_loss: 0.6713
Epoch 2/5
405/405 - 18s - 44ms/step - accuracy: 0.6518 - loss: 0.5985 - val_accuracy: 0.8094 - val_loss: 0.4431
Epoch 3/5
405/405 - 17s - 43ms/step - accuracy: 0.7723 - loss: 0.4846 - val_accuracy: 0.6446 - val_loss: 0.5491
Epoch 4/5
405/405 - 17s - 41ms/step - accuracy: 0.8063 - loss: 0.4262 - val_accuracy: 0.8457 - val_loss: 0.3821
Epoch 5/5
405/405 - 17s - 41ms/step - accuracy: 0.8691 - loss: 0.3499 - val_accuracy: 0.6508 - val_loss: 1.5229
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy: 0.8456
Testing combination: (64, 32, 0.5, 32, 64, 10)
Epoch 1/10




405/405 - 20s - 50ms/step - accuracy: 0.5662 - loss: 0.6790 - val_accuracy: 0.5019 - val_loss: 0.6677
Epoch 2/10
405/405 - 18s - 45ms/step - accuracy: 0.6914 - loss: 0.6095 - val_accuracy: 0.7315 - val_loss: 0.5669
Epoch 3/10
405/405 - 18s - 44ms/step - accuracy: 0.7138 - loss: 0.5873 - val_accuracy: 0.7451 - val_loss: 0.5489
Epoch 4/10
405/405 - 18s - 44ms/step - accuracy: 0.7579 - loss: 0.5380 - val_accuracy: 0.7604 - val_loss: 0.5332
Epoch 5/10
405/405 - 18s - 44ms/step - accuracy: 0.6572 - loss: 0.5838 - val_accuracy: 0.8488 - val_loss: 0.4377
Epoch 6/10
405/405 - 17s - 42ms/step - accuracy: 0.7482 - loss: 0.5152 - val_accuracy: 0.6506 - val_loss: 0.7918
Epoch 7/10
405/405 - 17s - 43ms/step - accuracy: 0.7587 - loss: 0.4821 - val_accuracy: 0.8689 - val_loss: 0.3597
Epoch 8/10
405/405 - 18s - 44ms/step - accuracy: 0.6399 - loss: 0.5832 - val_accuracy: 0.6174 - val_loss: 0.5749
Epoch 9/10
405/405 - 18s - 44ms/step - accuracy: 0.7799 - loss: 0.4469 - val_accuracy: 0.8632 - val_loss: 0



809/809 - 41s - 51ms/step - accuracy: 0.5178 - loss: 0.6904 - val_accuracy: 0.4970 - val_loss: 0.6861
Epoch 2/5
809/809 - 40s - 49ms/step - accuracy: 0.5837 - loss: 0.6597 - val_accuracy: 0.6378 - val_loss: 0.6002
Epoch 3/5
809/809 - 40s - 50ms/step - accuracy: 0.6944 - loss: 0.5819 - val_accuracy: 0.6372 - val_loss: 0.5404
Epoch 4/5
809/809 - 41s - 51ms/step - accuracy: 0.8498 - loss: 0.3463 - val_accuracy: 0.8966 - val_loss: 0.3077
Epoch 5/5
809/809 - 41s - 50ms/step - accuracy: 0.9235 - loss: 0.2076 - val_accuracy: 0.9229 - val_loss: 0.2050
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step
Accuracy: 0.9197
Testing combination: (64, 64, 0.3, 16, 32, 10)
Epoch 1/10




809/809 - 42s - 52ms/step - accuracy: 0.6301 - loss: 0.6119 - val_accuracy: 0.8406 - val_loss: 0.4617
Epoch 2/10
809/809 - 42s - 52ms/step - accuracy: 0.8696 - loss: 0.3513 - val_accuracy: 0.8768 - val_loss: 0.3459
Epoch 3/10
809/809 - 40s - 49ms/step - accuracy: 0.8307 - loss: 0.4271 - val_accuracy: 0.8658 - val_loss: 0.3495
Epoch 4/10
809/809 - 39s - 48ms/step - accuracy: 0.9223 - loss: 0.2190 - val_accuracy: 0.9210 - val_loss: 0.2253
Epoch 5/10
809/809 - 38s - 47ms/step - accuracy: 0.9432 - loss: 0.1631 - val_accuracy: 0.9250 - val_loss: 0.2081
Epoch 6/10
809/809 - 38s - 47ms/step - accuracy: 0.9518 - loss: 0.1434 - val_accuracy: 0.9260 - val_loss: 0.2146
Epoch 7/10
809/809 - 39s - 48ms/step - accuracy: 0.9595 - loss: 0.1192 - val_accuracy: 0.9307 - val_loss: 0.2016
Epoch 8/10
809/809 - 40s - 49ms/step - accuracy: 0.9671 - loss: 0.0998 - val_accuracy: 0.9318 - val_loss: 0.1938
Epoch 9/10
809/809 - 41s - 51ms/step - accuracy: 0.9727 - loss: 0.0842 - val_accuracy: 0.9343 - val_loss: 0



405/405 - 30s - 74ms/step - accuracy: 0.6535 - loss: 0.6263 - val_accuracy: 0.7244 - val_loss: 0.5670
Epoch 2/5
405/405 - 28s - 70ms/step - accuracy: 0.7446 - loss: 0.5549 - val_accuracy: 0.7686 - val_loss: 0.5258
Epoch 3/5
405/405 - 28s - 69ms/step - accuracy: 0.6532 - loss: 0.5747 - val_accuracy: 0.6137 - val_loss: 0.5607
Epoch 4/5
405/405 - 29s - 71ms/step - accuracy: 0.7521 - loss: 0.4445 - val_accuracy: 0.8940 - val_loss: 0.2534
Epoch 5/5
405/405 - 24s - 59ms/step - accuracy: 0.9066 - loss: 0.2466 - val_accuracy: 0.9205 - val_loss: 0.2152
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step
Accuracy: 0.9191
Testing combination: (64, 64, 0.3, 16, 64, 10)
Epoch 1/10




405/405 - 28s - 69ms/step - accuracy: 0.6174 - loss: 0.6558 - val_accuracy: 0.5726 - val_loss: 0.6765
Epoch 2/10
405/405 - 29s - 72ms/step - accuracy: 0.6886 - loss: 0.5818 - val_accuracy: 0.6850 - val_loss: 0.5649
Epoch 3/10
405/405 - 29s - 70ms/step - accuracy: 0.6742 - loss: 0.6028 - val_accuracy: 0.6074 - val_loss: 0.6501
Epoch 4/10
405/405 - 25s - 63ms/step - accuracy: 0.5762 - loss: 0.6436 - val_accuracy: 0.5962 - val_loss: 0.6303
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step
Accuracy: 0.6826
Testing combination: (64, 64, 0.3, 32, 32, 5)
Epoch 1/5




809/809 - 42s - 52ms/step - accuracy: 0.6717 - loss: 0.6011 - val_accuracy: 0.5516 - val_loss: 0.6828
Epoch 2/5
809/809 - 54s - 67ms/step - accuracy: 0.5824 - loss: 0.6680 - val_accuracy: 0.5540 - val_loss: 0.6598
Epoch 3/5
809/809 - 39s - 48ms/step - accuracy: 0.5776 - loss: 0.6467 - val_accuracy: 0.6812 - val_loss: 0.5965
Epoch 4/5
809/809 - 45s - 56ms/step - accuracy: 0.7037 - loss: 0.5626 - val_accuracy: 0.7844 - val_loss: 0.4055
Epoch 5/5
809/809 - 54s - 67ms/step - accuracy: 0.5904 - loss: 0.6255 - val_accuracy: 0.5019 - val_loss: 0.6806
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step
Accuracy: 0.7821
Testing combination: (64, 64, 0.3, 32, 32, 10)
Epoch 1/10




809/809 - 44s - 54ms/step - accuracy: 0.6976 - loss: 0.5854 - val_accuracy: 0.7168 - val_loss: 0.5871
Epoch 2/10
809/809 - 41s - 51ms/step - accuracy: 0.7238 - loss: 0.5369 - val_accuracy: 0.7757 - val_loss: 0.4636
Epoch 3/10
809/809 - 41s - 51ms/step - accuracy: 0.8084 - loss: 0.4345 - val_accuracy: 0.8211 - val_loss: 0.4124
Epoch 4/10
809/809 - 43s - 53ms/step - accuracy: 0.5939 - loss: 0.6198 - val_accuracy: 0.5019 - val_loss: 0.6829
Epoch 5/10
809/809 - 43s - 54ms/step - accuracy: 0.5146 - loss: 0.6762 - val_accuracy: 0.5203 - val_loss: 0.6829
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step
Accuracy: 0.8138
Testing combination: (64, 64, 0.3, 32, 64, 5)




Epoch 1/5
405/405 - 67s - 165ms/step - accuracy: 0.6215 - loss: 0.6438 - val_accuracy: 0.6077 - val_loss: 0.6631
Epoch 2/5
405/405 - 28s - 69ms/step - accuracy: 0.7323 - loss: 0.5572 - val_accuracy: 0.8060 - val_loss: 0.4700
Epoch 3/5
405/405 - 27s - 67ms/step - accuracy: 0.6870 - loss: 0.5626 - val_accuracy: 0.6466 - val_loss: 0.6270
Epoch 4/5
405/405 - 24s - 60ms/step - accuracy: 0.7030 - loss: 0.5431 - val_accuracy: 0.8048 - val_loss: 0.3867
Epoch 5/5
405/405 - 25s - 63ms/step - accuracy: 0.8601 - loss: 0.3321 - val_accuracy: 0.8720 - val_loss: 0.2967
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step
Accuracy: 0.8749
Testing combination: (64, 64, 0.3, 32, 64, 10)
Epoch 1/10




405/405 - 33s - 81ms/step - accuracy: 0.6845 - loss: 0.6123 - val_accuracy: 0.7640 - val_loss: 0.5412
Epoch 2/10
405/405 - 27s - 66ms/step - accuracy: 0.6884 - loss: 0.5836 - val_accuracy: 0.6731 - val_loss: 0.6029
Epoch 3/10
405/405 - 26s - 64ms/step - accuracy: 0.6013 - loss: 0.6283 - val_accuracy: 0.5594 - val_loss: 0.6589
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step
Accuracy: 0.7626
Testing combination: (64, 64, 0.5, 16, 32, 5)
Epoch 1/5




809/809 - 43s - 53ms/step - accuracy: 0.5313 - loss: 0.6911 - val_accuracy: 0.5005 - val_loss: 0.6924
Epoch 2/5
809/809 - 37s - 46ms/step - accuracy: 0.5590 - loss: 0.6691 - val_accuracy: 0.5545 - val_loss: 0.6639
Epoch 3/5
809/809 - 38s - 47ms/step - accuracy: 0.5488 - loss: 0.6694 - val_accuracy: 0.6021 - val_loss: 0.6243
Epoch 4/5
809/809 - 42s - 52ms/step - accuracy: 0.7467 - loss: 0.5020 - val_accuracy: 0.8787 - val_loss: 0.3326
Epoch 5/5
809/809 - 41s - 50ms/step - accuracy: 0.8059 - loss: 0.4405 - val_accuracy: 0.9057 - val_loss: 0.2548
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step
Accuracy: 0.9037
Testing combination: (64, 64, 0.5, 16, 32, 10)
Epoch 1/10




809/809 - 52s - 64ms/step - accuracy: 0.5975 - loss: 0.6328 - val_accuracy: 0.6100 - val_loss: 0.5816
Epoch 2/10
809/809 - 69s - 85ms/step - accuracy: 0.6493 - loss: 0.5600 - val_accuracy: 0.8589 - val_loss: 0.3715
Epoch 3/10
809/809 - 59s - 73ms/step - accuracy: 0.8887 - loss: 0.3025 - val_accuracy: 0.9151 - val_loss: 0.2147
Epoch 4/10
809/809 - 97s - 119ms/step - accuracy: 0.9326 - loss: 0.2015 - val_accuracy: 0.9236 - val_loss: 0.1836
Epoch 5/10
809/809 - 94s - 117ms/step - accuracy: 0.9522 - loss: 0.1426 - val_accuracy: 0.9389 - val_loss: 0.1853
Epoch 6/10
809/809 - 79s - 97ms/step - accuracy: 0.9641 - loss: 0.1180 - val_accuracy: 0.9298 - val_loss: 0.1913
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 36ms/step
Accuracy: 0.9189
Testing combination: (64, 64, 0.5, 16, 64, 5)




Epoch 1/5
405/405 - 95s - 235ms/step - accuracy: 0.6436 - loss: 0.6437 - val_accuracy: 0.5594 - val_loss: 0.6774
Epoch 2/5
405/405 - 49s - 120ms/step - accuracy: 0.7231 - loss: 0.5376 - val_accuracy: 0.8082 - val_loss: 0.4882
Epoch 3/5
405/405 - 68s - 168ms/step - accuracy: 0.8022 - loss: 0.4691 - val_accuracy: 0.7350 - val_loss: 0.5605
Epoch 4/5
405/405 - 49s - 120ms/step - accuracy: 0.8507 - loss: 0.3978 - val_accuracy: 0.8720 - val_loss: 0.3141
Epoch 5/5
405/405 - 72s - 178ms/step - accuracy: 0.8305 - loss: 0.3960 - val_accuracy: 0.8623 - val_loss: 0.3147
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 35ms/step
Accuracy: 0.8687
Testing combination: (64, 64, 0.5, 16, 64, 10)




Epoch 1/10
405/405 - 80s - 198ms/step - accuracy: 0.7401 - loss: 0.5419 - val_accuracy: 0.8708 - val_loss: 0.3653
Epoch 2/10
405/405 - 54s - 134ms/step - accuracy: 0.8265 - loss: 0.4427 - val_accuracy: 0.7456 - val_loss: 0.4203
Epoch 3/10
405/405 - 45s - 110ms/step - accuracy: 0.6809 - loss: 0.5730 - val_accuracy: 0.5560 - val_loss: 0.6412
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 42ms/step
Accuracy: 0.8632
Testing combination: (64, 64, 0.5, 32, 32, 5)




Epoch 1/5
809/809 - 99s - 122ms/step - accuracy: 0.6367 - loss: 0.6040 - val_accuracy: 0.8097 - val_loss: 0.4397
Epoch 2/5
809/809 - 99s - 123ms/step - accuracy: 0.8147 - loss: 0.4222 - val_accuracy: 0.8737 - val_loss: 0.3054
Epoch 3/5
809/809 - 79s - 98ms/step - accuracy: 0.8779 - loss: 0.3279 - val_accuracy: 0.8249 - val_loss: 0.4281
Epoch 4/5
809/809 - 89s - 110ms/step - accuracy: 0.8198 - loss: 0.4196 - val_accuracy: 0.9141 - val_loss: 0.2406
Epoch 5/5
809/809 - 85s - 105ms/step - accuracy: 0.9279 - loss: 0.2222 - val_accuracy: 0.9008 - val_loss: 0.2833
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 35ms/step
Accuracy: 0.9107
Testing combination: (64, 64, 0.5, 32, 32, 10)




Epoch 1/10
809/809 - 108s - 133ms/step - accuracy: 0.6003 - loss: 0.6434 - val_accuracy: 0.4981 - val_loss: 0.6906
Epoch 2/10
809/809 - 70s - 87ms/step - accuracy: 0.5428 - loss: 0.6733 - val_accuracy: 0.6083 - val_loss: 0.6475
Epoch 3/10
809/809 - 90s - 111ms/step - accuracy: 0.5877 - loss: 0.6550 - val_accuracy: 0.6200 - val_loss: 0.6415
Epoch 4/10
809/809 - 71s - 87ms/step - accuracy: 0.5452 - loss: 0.6825 - val_accuracy: 0.6281 - val_loss: 0.6431
Epoch 5/10
809/809 - 75s - 93ms/step - accuracy: 0.6447 - loss: 0.6380 - val_accuracy: 0.6108 - val_loss: 0.6513
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 35ms/step
Accuracy: 0.6172
Testing combination: (64, 64, 0.5, 32, 64, 5)




Epoch 1/5
405/405 - 74s - 183ms/step - accuracy: 0.5482 - loss: 0.6831 - val_accuracy: 0.5019 - val_loss: 0.6918
Epoch 2/5
405/405 - 65s - 160ms/step - accuracy: 0.5219 - loss: 0.6919 - val_accuracy: 0.5250 - val_loss: 0.6909
Epoch 3/5
405/405 - 50s - 122ms/step - accuracy: 0.5252 - loss: 0.6904 - val_accuracy: 0.5273 - val_loss: 0.6891
Epoch 4/5
405/405 - 50s - 122ms/step - accuracy: 0.5400 - loss: 0.6872 - val_accuracy: 0.5367 - val_loss: 0.6823
Epoch 5/5
405/405 - 67s - 165ms/step - accuracy: 0.5507 - loss: 0.6764 - val_accuracy: 0.5160 - val_loss: 0.6869
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 31ms/step
Accuracy: 0.5401
Testing combination: (64, 64, 0.5, 32, 64, 10)




Epoch 1/10
405/405 - 61s - 150ms/step - accuracy: 0.5788 - loss: 0.6544 - val_accuracy: 0.6162 - val_loss: 0.6219
Epoch 2/10
405/405 - 95s - 234ms/step - accuracy: 0.6062 - loss: 0.6280 - val_accuracy: 0.5712 - val_loss: 0.6568
Epoch 3/10
405/405 - 52s - 128ms/step - accuracy: 0.6109 - loss: 0.5926 - val_accuracy: 0.6248 - val_loss: 0.5510
Epoch 4/10
405/405 - 50s - 122ms/step - accuracy: 0.8110 - loss: 0.4351 - val_accuracy: 0.8114 - val_loss: 0.4704
Epoch 5/10
405/405 - 66s - 163ms/step - accuracy: 0.8407 - loss: 0.4093 - val_accuracy: 0.8893 - val_loss: 0.3134
Epoch 6/10
405/405 - 43s - 107ms/step - accuracy: 0.8494 - loss: 0.3842 - val_accuracy: 0.8502 - val_loss: 0.3873
Epoch 7/10
405/405 - 47s - 116ms/step - accuracy: 0.8914 - loss: 0.3101 - val_accuracy: 0.8850 - val_loss: 0.2824
Epoch 8/10
405/405 - 100s - 247ms/step - accuracy: 0.9281 - loss: 0.2083 - val_accuracy: 0.9161 - val_loss: 0.2297
Epoch 9/10
405/405 - 49s - 122ms/step - accuracy: 0.9501 - loss: 0.1463 - val_accuracy:



Epoch 1/5


KeyboardInterrupt: 

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. 데이터 불러오기
file_path = "fake_reviews_dataset.csv"  # 데이터 파일 경로
data = pd.read_csv(file_path)

# 2. 데이터 확인 및 전처리
# Null 값 확인 및 제거
print(data.info())
data.dropna(inplace=True)

# 텍스트와 라벨 분리
texts = data['text_']
labels = data['label']

# 'CG'를 1로, 'OR'을 0으로 변환
labels = labels.map({'CG': 1, 'OR': 0}).astype(np.float32)

# 3. 텍스트 데이터 전처리
# 토크나이저 정의
max_words = 10000  # 사용할 최대 단어 수
max_len = 100  # 리뷰의 최대 길이
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

# 텍스트 시퀀스 변환 및 패딩
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

# 4. 데이터셋 분리
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# 5. 모델 생성
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    LSTM(64, return_sequences=False),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 6. 모델 학습
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=128,
    callbacks=[early_stopping],
    verbose=1
)

# 7. 모델 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# 8. 모델 저장
model.save("ai_human_review_classifier.h5")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40432 entries, 0 to 40431
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   category  40432 non-null  object 
 1   rating    40432 non-null  float64
 2   label     40432 non-null  object 
 3   text_     40432 non-null  object 
dtypes: float64(1), object(3)
memory usage: 1.2+ MB
None




Epoch 1/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 293ms/step - accuracy: 0.5449 - loss: 0.6761 - val_accuracy: 0.6069 - val_loss: 0.5796
Epoch 2/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 341ms/step - accuracy: 0.6449 - loss: 0.5511 - val_accuracy: 0.8371 - val_loss: 0.4207
Epoch 3/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 251ms/step - accuracy: 0.7776 - loss: 0.4818 - val_accuracy: 0.8510 - val_loss: 0.3648
Epoch 4/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 380ms/step - accuracy: 0.8523 - loss: 0.3586 - val_accuracy: 0.8841 - val_loss: 0.2848
Epoch 5/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 329ms/step - accuracy: 0.8989 - loss: 0.2750 - val_accuracy: 0.9054 - val_loss: 0.2473
Epoch 6/10
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 321ms/step - accuracy: 0.9196 - loss: 0.2212 - val_accuracy: 0.9131 - val_loss: 0.2475
Epoch 7/10



Test Loss: 0.2353, Test Accuracy: 0.9122


In [3]:
from keras.models import load_model

model = load_model('best_lstm_model.h5')
model.summary()



In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. 데이터 불러오기
file_path = "fake_reviews_dataset.csv"  # 데이터 파일 경로
data = pd.read_csv(file_path)

# 2. 데이터 확인 및 전처리
# Null 값 확인 및 제거
print(data.info())
data.dropna(inplace=True)

# 텍스트와 라벨 분리
texts = data['text_']
labels = data['label']

# 'CG'를 1로, 'OR'을 0으로 변환
labels = labels.map({'CG': 1, 'OR': 0}).astype(np.float32)

# 3. 텍스트 데이터 전처리
# 토크나이저 정의
max_words = 10000  # 사용할 최대 단어 수
max_len = 100  # 리뷰의 최대 길이
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

# 텍스트 시퀀스 변환 및 패딩
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

# 4. 데이터셋 분리
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# 7. 모델 평가
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40432 entries, 0 to 40431
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   category  40432 non-null  object 
 1   rating    40432 non-null  float64
 2   label     40432 non-null  object 
 3   text_     40432 non-null  object 
dtypes: float64(1), object(3)
memory usage: 1.2+ MB
None
Test Loss: 4.7615, Test Accuracy: 0.5009
