In [None]:
def read_sentences_from_file(filename):
    with open(filename, 'r', encoding="utf-8") as file:
        return [line.strip() for line in file]

Creating dataset.

In [None]:
valid = read_sentences_from_file('dataset/valid.txt')

invalid = read_sentences_from_file('dataset/invalid.txt')

valid = valid[:3000000]

invalid = invalid[:3000000]

Tokenizing words.

In [None]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import numpy as np
from sklearn.model_selection import train_test_split
all_sentences = invalid + valid

labels = np.array([0] * len(invalid) + [1] * len(valid))

combined_data = list(zip(all_sentences, labels))
np.random.shuffle(combined_data)
all_sentences, labels = zip(*combined_data)

train_sentences, test_sentences, train_labels, test_labels = train_test_split(all_sentences, labels, test_size=0.2, random_state=42)
train_sentences, val_sentences, train_labels, val_labels = train_test_split(train_sentences, train_labels, test_size=0.1, random_state=42)

tokenizer = Tokenizer(oov_token="None")
tokenizer.fit_on_texts(train_sentences)

word_index = tokenizer.word_index

max_sequence_length = 100
vocab_size = len(tokenizer.word_index) + 1
train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_sequences = pad_sequences(train_sequences, maxlen=max_sequence_length)

val_sequences = tokenizer.texts_to_sequences(val_sentences)
val_sequences = pad_sequences(val_sequences, maxlen=max_sequence_length)

test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_sequences = pad_sequences(test_sequences, maxlen=max_sequence_length)



Creating and training the deep learning model.

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, GRU, Dense, Dropout, Bidirectional, LSTM
from keras.callbacks import EarlyStopping

# Define model
model = Sequential(name="dl_model")
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, name="embedding_layer"))
lstm_first = LSTM(64, return_sequences=True, name="lstm_first")
model.add(Bidirectional(lstm_first, name="bidirectional_lstm_first"))
lstm_second = LSTM(32, name="lstm_second")
model.add(Bidirectional(lstm_second, name="bidirectional_lstm_second"))
model.add(Dense(64, activation='relu', name="dense_first"))
model.add(Dropout(0.5, name="dropout_first"))
model.add(Dense(1, activation='sigmoid', name="output_layer"))

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

import numpy as np

train_sequences = np.array(train_sequences)
train_labels = np.array(train_labels)

# Train model
history = model.fit(train_sequences, train_labels, epochs=4, batch_size=64, verbose=1,
                    validation_split=0.2)

model.summary()

Loading the model and converting it into TensorFlow.js files.

In [None]:
model.save("deepLearning.h5")
!tensorflowjs_converter --input_format keras deepLearning.h5 finalDL