In [5]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical


data = pd.read_csv('cleaned_data_final.csv')

# Transforming the data so that each row contains one response and one label
responses = data['Human_response'].tolist() + data['ChatGPT_3_5_response'].tolist()
labels = [0] * len(data['Human_response']) + [1] * len(data['ChatGPT_3_5_response'])
data_combined = pd.DataFrame({'response': responses, 'label': labels})

# Text tokenization
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data_combined['response'].values)
X = tokenizer.texts_to_sequences(data_combined['response'].values)
X = pad_sequences(X)


labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data_combined['label'])
y = to_categorical(integer_encoded)

# Splitting the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building the model
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
batch_size = 32
model.fit(X_train, y_train, epochs=7, batch_size=batch_size, verbose=2)

# Model evaluation
score, acc = model.evaluate(X_test, y_test, verbose=2, batch_size=batch_size)
print("Accuracy: %.2f" % (acc))


Epoch 1/7
50/50 - 78s - loss: 0.4891 - accuracy: 0.7706 - 78s/epoch - 2s/step
Epoch 2/7
50/50 - 71s - loss: 0.0834 - accuracy: 0.9725 - 71s/epoch - 1s/step
Epoch 3/7
50/50 - 60s - loss: 0.0395 - accuracy: 0.9887 - 60s/epoch - 1s/step
Epoch 4/7
50/50 - 60s - loss: 0.0176 - accuracy: 0.9950 - 60s/epoch - 1s/step
Epoch 5/7
50/50 - 62s - loss: 0.0147 - accuracy: 0.9981 - 62s/epoch - 1s/step
Epoch 6/7
50/50 - 59s - loss: 0.0036 - accuracy: 0.9994 - 59s/epoch - 1s/step
Epoch 7/7
50/50 - 60s - loss: 0.0010 - accuracy: 1.0000 - 60s/epoch - 1s/step
13/13 - 3s - loss: 0.1498 - accuracy: 0.9650 - 3s/epoch - 249ms/step
Accuracy: 0.96


In [3]:
import tensorflow as tf

print(tf.__version__)

2.10.0
