In [1]:
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, SpatialDropout1D
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [2]:
all_texts = pd.read_csv("all_texts.csv")

In [3]:
train_texts, test_texts, train_labels, test_labels = train_test_split(all_texts['text'], all_texts['result'], test_size=0.3, random_state=42)

In [4]:
# Convert text data into numerical vectors using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_texts_tfidf = tfidf_vectorizer.fit_transform(train_texts)
test_texts_tfidf = tfidf_vectorizer.transform(test_texts)

In [5]:
max_words = 5000
max_len = 200

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)
train_pad = pad_sequences(train_sequences, maxlen=max_len)
test_pad = pad_sequences(test_sequences, maxlen=max_len)

In [7]:
# Define LSTM model architecture
lstm_model = Sequential()
lstm_model.add(Embedding(max_words, 64, input_length=max_len))
lstm_model.add(SpatialDropout1D(0.2))
lstm_model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 200, 64)           320000    
                                                                 
 spatial_dropout1d_1 (Spati  (None, 200, 64)           0         
 alDropout1D)                                                    
                                                                 
 lstm_1 (LSTM)               (None, 100)               66000     
                                                                 
 dense_1 (Dense)             (None, 1)                 101       
                                                                 
Total params: 386101 (1.47 MB)
Trainable params: 386101 (1.47 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
# Train the LSTM model
lstm_model.fit(train_pad, train_labels, epochs=5, batch_size=64, validation_split=0.3)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x2abb56a3110>

In [12]:
# Save the model
#lstm_model.save('LSTM_model.h5')

In [20]:
# Predict on test data
predictions = lstm_model.predict(test_pad)
binary_predictions = (predictions > 0.5).astype('int32')
# Calculate accuracy and other metrics
accuracy = accuracy_score(test_labels, binary_predictions)
classification_rep = classification_report(test_labels, binary_predictions)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_rep)

Accuracy: 0.9477888888888889
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.96      0.95     44901
           1       0.96      0.93      0.95     45099

    accuracy                           0.95     90000
   macro avg       0.95      0.95      0.95     90000
weighted avg       0.95      0.95      0.95     90000



In [30]:
from keras.models import load_model

# Load the saved model
loaded_model = load_model('lstm_model.h5')

In [35]:
random_text = "I was in Russia when the war broke out. Dimitri slovak, my contact at Moscow, hid me in his basement. I lived in that cold basement for 3 months until oneday Dimitri came to me and said, You are a good man alex but I cannot hide you anymore. If they find you here they will kill my whole family. I never wanted to become his burden that night I decided to leave. I packet my bag said farewell to Dimitri Slovak and my journey begin from Moscow to florida. It had just been few minutes I left Dimitri Slovak's residence, I saw a man about 6 ft tall wearing a large overcoat. He was drinking something from a bottle. He was far but I cound clearly tell he was having some liquor. I thought he was some drunkard and deided to walk just past him. As i was about to pass him, he looked at me and asked Do you want some vodka my friend?, No thanks I replied before even realizing a drunk man in the heart of Russia talked to me in English. my body frooze only thing I cound do was to stare at him thinking it was my end. Did you really think you could just walk out of here? he asked as I kept watching him in horror."
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(random_text)
text_sequence = tokenizer.texts_to_sequences(random_text)[0]
text_pad = pad_sequences([text_sequence], maxlen=max_len)
result = loaded_model.predict(text_pad)[0]
binary_result = [1] if result > 0.5 else [0] 
print("Predictions:", binary_result)

Predictions: [1]
