In [None]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

data_dir = "/content/drive/MyDrive/daa"  # 데이터 경로
human_poems_dir = os.path.join(data_dir, "human")
ai_poems_dir = os.path.join(data_dir, "ai")

human_poems = [open(os.path.join(human_poems_dir, file)).read() for file in os.listdir(human_poems_dir)]
ai_poems = [open(os.path.join(ai_poems_dir, file)).read() for file in os.listdir(ai_poems_dir)]

poems = human_poems + ai_poems
labels = [1] * len(human_poems) + [0] * len(ai_poems)

X_train, X_test, y_train, y_test = train_test_split(poems, labels, test_size=0.2, random_state=42)

max_words = 10000
max_len = 200

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

# Save tokenizer configuration
tokenizer_config_path = '/content/drive/MyDrive/daa/tokenizer_config.npy'
np.save(tokenizer_config_path, {'word_index': tokenizer.word_index, 'document_count': tokenizer.document_count})

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    tf.keras.layers.Conv1D(64, 5, activation='relu'),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train_pad, np.array(y_train), epochs=500, batch_size=16, validation_split=0.2)

loss, accuracy = model.evaluate(X_test_pad, np.array(y_test))
print(f'Test accuracy: {accuracy}')

# Save the model
model.save('/content/drive/MyDrive/daa/my_poem_model')


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [None]:

user_poem = input("예측하고 싶은 시를 입력하세요: ")

user_poem_seq = tokenizer.texts_to_sequences([user_poem])
user_poem_pad = pad_sequences(user_poem_seq, maxlen=max_len)
prediction = model.predict(user_poem_pad)[0, 0]

if prediction > 0.5:
    print(f"예측 결과: 인간의 시로 판단됩니다. (확률: {prediction:.4f})")
else:
    print(f"예측 결과: AI가 생성한 시로 판단됩니다. (확률: {1 - prediction:.4f})")


예측하고 싶은 시를 입력하세요: 하이
예측 결과: 인간의 시로 판단됩니다. (확률: 0.6730)
