In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [25]:
# Load the dataset
data = pd.read_csv("fake_or_real_news.csv")

In [26]:
# Check for missing values
data.isnull().sum()

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

In [27]:
# Extract features and labels
x = np.array(data["title"])
y = np.array(data["label"])

In [28]:
# Convert labels to numerical format
le = LabelEncoder()
y = le.fit_transform(y)

In [29]:
# Tokenize the text data
max_words = 5000
tokenizer = Tokenizer(num_words=max_words, split=' ')
tokenizer.fit_on_texts(x)
x = tokenizer.texts_to_sequences(x)
x = pad_sequences(x)

In [30]:
# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [31]:
# Build the neural network model
model = Sequential()
model.add(Embedding(max_words, 128, input_length=x.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [32]:
# Train the model
batch_size = 32
epochs = 3
model.fit(xtrain, ytrain, epochs=epochs, batch_size=batch_size, validation_split=0.2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x1ad293af490>

In [34]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(xtest, ytest)
print(f"Model Accuracy: {accuracy:.2f}")
print(f"Model Loss: {loss}")

Model Accuracy: 0.80
Model Loss: 0.524014949798584


In [36]:
# Make predictions on a sample news headline
news_headline = input("Type News title here: ")
headline_seq = tokenizer.texts_to_sequences(news_headline)
headline_padded = pad_sequences(headline_seq, maxlen=x.shape[1])
result = model.predict(headline_padded)[0][0]
predicted_label = "Real" if result < 0.5 else "Fake"
print(f"Predicted Label: {predicted_label} (Probability: {result:.2f})")


Type News title here: Renewable Energy Could Be a Casualty in the War on Inflation. Here’s Why.
Predicted Label: Real (Probability: 0.20)
