In [4]:
# https://pieriantraining.com/tensorflow-lstm-example-a-beginners-guide/
# https://www.tensorflow.org/guide/keras/working_with_rnns

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras.models import load_model
from keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Conv1D, GlobalMaxPooling1D
import joblib

In [6]:
dataset = pd.read_csv('../../data/fraction_preprocessed_data.csv',encoding='ISO-8859-1')
dataset = dataset.drop(['Unnamed: 0'], axis=1)
dataset = dataset.dropna(subset=['Text'])
# dataset = dataset.sample(frac=0.1, random_state=42)
dataset

Unnamed: 0,Text,Source,Human
0,ING AsiaPacific Companys Problems Research Pap...,Human,1
1,Crisis Love Inquiry Essay Critical Writing fol...,Human,1
2,Sure sex segregation makes lot sense many spor...,Human,1
3,Christianity Islam Values Essay Christianity f...,Human,1
4,Becca liked swim practiced everyday hours ente...,GLM-130B,0
...,...,...,...
78887,Mass Eoghan Chada 10 brother Ruairi 5 said St ...,OPT-30B,0
78888,Asian Teachers Polish Lesson Perfection Stigle...,Human,1
78889,Move knife slowly avoid slipping accidentally ...,OPT-6.7B,0
78890,Good dreams likely occur person feeling relaxe...,Text-Davinci-003,0


In [7]:
# Initialize the tokenizer
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(dataset['Text'])

In [8]:
# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(dataset['Text'])

# Pad sequences to ensure uniform input length
padded_sequences = pad_sequences(sequences, maxlen=100, padding='post', truncating='post')

In [9]:
# unitializing label encoder
label_encoder = LabelEncoder()
label_encoder.fit(dataset['Human'])

# labels to numerical format conversion
encoded_labels = label_encoder.transform(dataset['Human'])

In [10]:
joblib.dump(tokenizer, '../../models/NeuralNetworks/Convolutional/Variables/tokenizer.pkl')
joblib.dump(label_encoder, '../../models/NeuralNetworks/Convolutional/Variables/label_encoder.pkl')
joblib.dump(encoded_labels, '../../models/NeuralNetworks/Convolutional/Variables/encoded_labels.pkl')
joblib.dump(padded_sequences, '../../models/NeuralNetworks/Convolutional/Variables/padded_sequences.pkl')

['../../models/NeuralNetworks/Convolutional/Variables/padded_sequences.pkl']

In [11]:
# Convolutional instantiation 2 Conv Layers
conv = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=100),
    Conv1D(64, 1, activation='relu'),
    Conv1D(128, 1, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])



In [12]:
conv.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
# training phase
conv.fit(padded_sequences, encoded_labels, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m1973/1973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.7352 - loss: 0.4926 - val_accuracy: 0.8073 - val_loss: 0.3874
Epoch 2/10
[1m1973/1973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8231 - loss: 0.3635 - val_accuracy: 0.8236 - val_loss: 0.3642
Epoch 3/10
[1m1973/1973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8475 - loss: 0.3252 - val_accuracy: 0.8243 - val_loss: 0.3583
Epoch 4/10
[1m1973/1973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 13ms/step - accuracy: 0.8610 - loss: 0.2971 - val_accuracy: 0.8307 - val_loss: 0.3607
Epoch 5/10
[1m1973/1973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4ms/step - accuracy: 0.8774 - loss: 0.2666 - val_accuracy: 0.8245 - val_loss: 0.3631
Epoch 6/10
[1m1973/1973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8909 - loss: 0.2409 - val_accuracy: 0.8276 - val_loss: 0.3657
Epoch 7/10


<keras.src.callbacks.history.History at 0x275012338b0>

In [14]:
# 0.9843

In [15]:
conv.save('../../models/NeuralNetworks/conv_two_94.keras')