In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical


data = pd.read_csv('cleaned_data_final.csv')

# Transforming the data so that each row contains one response and one label
responses = data['Human_response'].tolist() + data['ChatGPT_3_5_response'].tolist()
labels = [0] * len(data['Human_response']) + [1] * len(data['ChatGPT_3_5_response'])
data_combined = pd.DataFrame({'response': responses, 'label': labels})

# Text tokenization
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data_combined['response'].values)
X = tokenizer.texts_to_sequences(data_combined['response'].values)
X = pad_sequences(X)


labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data_combined['label'])
y = to_categorical(integer_encoded)

# Splitting the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Defining the attention layer
class AttentionLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal")
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros")

        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = tf.nn.tanh(tf.add(tf.matmul(x, self.W), self.b))
        a = tf.nn.softmax(e, axis=1)
        output = x * a
        return tf.reduce_sum(output, axis=1)

# Building the model
model = tf.keras.Sequential([
    layers.Embedding(max_features, 128, input_length=X.shape[1]),
    layers.LSTM(64, return_sequences=True),
    AttentionLayer(),
    layers.Dense(2, activation='softmax')
])

# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Model evaluation
score, acc = model.evaluate(X_test, y_test, batch_size=32)
print("Accuracy: %.2f" % (acc))



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.93
