In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder



fake = pd.read_csv('Fake.csv', engine='python', on_bad_lines='skip')
true = pd.read_csv('True.csv', engine='python', on_bad_lines='skip')

# Assign labels: 0 = Fake, 1 = True
fake['label'] = 0
true['label'] = 1


data = pd.concat([fake, true], ignore_index=True)

# Shuffle data
data = data.sample(frac=1, random_state=42).reset_index(drop=True)



# Combine title and text
data['content'] = data['title'] + " " + data['text']

# Extract features and labels
texts = data['content'].values
labels = data['label'].values

# Tokenization
max_words = 10000  # Vocabulary size
max_len = 500      # Max length of sequences

tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')


X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)


# the LSTM model

embedding_dim = 128

model = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.2,
    verbose=1
)

# Evaluation of model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")




Epoch 1/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 45ms/step - accuracy: 0.5658 - loss: 0.6862 - val_accuracy: 0.6341 - val_loss: 0.6392
Epoch 2/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - accuracy: 0.5915 - loss: 0.6359 - val_accuracy: 0.6058 - val_loss: 0.6432
Epoch 3/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - accuracy: 0.5998 - loss: 0.6429 - val_accuracy: 0.6357 - val_loss: 0.6232
Epoch 4/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 47ms/step - accuracy: 0.6946 - loss: 0.5636 - val_accuracy: 0.8678 - val_loss: 0.3651
Epoch 5/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.8036 - loss: 0.4200 - val_accuracy: 0.7498 - val_loss: 0.4656
Epoch 6/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - accuracy: 0.7627 - loss: 0.4648 - val_accuracy: 0.6145 - val_loss: 0.6967
Epoch 7/10
[1m80/80[0m [32m━━━

In [7]:
import pickle

model.save('isot_lstm_model.h5')
print("Model saved to isot_lstm_model.h5")

with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
print("Tokenizer saved to tokenizer.pkl")




Model saved to isot_lstm_model.h5
Tokenizer saved to tokenizer.pkl


In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

model = tf.keras.models.load_model('isot_lstm_model.h5')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print("Model loaded from isot_lstm_model.h5")

with open('tokenizer.pkl', 'rb') as handle:
    tokenizer = pickle.load(handle)
print("Tokenizer loaded from tokenizer.pkl")

sample_text = ["India beats Pakistan in the 2025 world cup"]

max_len = 500  # Use the same max_len as during training
sequences = tokenizer.texts_to_sequences(sample_text)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

prediction = model.predict(padded_sequences)
print(f"Prediction (probability of being True news): {prediction[0][0]:.4f}")

label = "True News" if prediction[0][0] > 0.5 else "Fake News"
print(f"Predicted label: {label}")




Model loaded from isot_lstm_model.h5
Tokenizer loaded from tokenizer.pkl
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step
Prediction (probability of being True news): 0.9673
Predicted label: True News
