In [12]:
import pandas as pd

real = pd.read_csv("/content/true.csv")
fake = pd.read_csv("/content/fake.csv")

# Add labels

real["label"] = 1
fake["label"] = 0

# Combine the datasets

df = pd.concat([real,fake], ignore_index = True)


df = df.sample(frac=1).reset_index(drop=True)

In [13]:
df.head(2)

Unnamed: 0,title,text,subject,date,label
0,Australian women go pro on back of investment ...,MELBOURNE (Reuters) - Lisa De Vanna once had t...,politicsNews,"September 21, 2017",1
1,More Californians dreaming of a country withou...,"SACRAMENTO, Calif. (Reuters) - The election of...",politicsNews,"January 23, 2017",1


In [18]:
x = df.drop(["date", "label"], axis =1)
y = df["label"]

In [21]:
import tensorflow as tf

In [24]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

In [None]:
vocab_size =  5000

# **Prepare the Text Column**

In [30]:
# use text, or combine both if text and title are existed

df["content"] = df["title"] + " " + df["text"] if "title" in df.columns else df["text"]

# **Text Cleaning (Basic)**

In [34]:
import re

def clean_text(text):
  text = re.sub(r"http\s+", "", text)
  text = re.sub(r"a-zA-Z", "", text)
  text = text.lower()
  return text

df["cleaned"] = df["content"].apply(clean_text)

# **Tokenization and Padding**

In [39]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_word = 5000
max_len = 500


#Tokenzer
tokenizer = Tokenizer(num_words=max_word)
tokenizer.fit_on_texts(df["cleaned"])
sequences = tokenizer.texts_to_sequences(df["cleaned"])

# Padding

X = pad_sequences(sequences, maxlen =max_len)
y = df["label"].values

# **Train-Test Split**

In [41]:
from sklearn.model_selection import train_test_split

X_train, X_text, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# **Build the LSTM Model**

In [50]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout

model = Sequential()
model.add(Embedding(input_dim = max_word, output_dim = 64, input_length = max_len ))
model.add(LSTM(64, return_sequences = False))
model.add(Dropout(0.5))
model.add(Dense(1, activation = "sigmoid"))


model.compile(loss="binary_crossentropy", optimizer = "adam", metrics =["accuracy"])

#  **Train the Model**

In [52]:
model.fit(X_train, y_train, epochs = 5, batch_size = 64, validation_split = 0.2)

Epoch 1/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 440ms/step - accuracy: 0.9882 - loss: 0.0436 - val_accuracy: 0.9910 - val_loss: 0.0330
Epoch 2/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 422ms/step - accuracy: 0.9919 - loss: 0.0277 - val_accuracy: 0.9815 - val_loss: 0.0668
Epoch 3/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 418ms/step - accuracy: 0.9915 - loss: 0.0303 - val_accuracy: 0.9926 - val_loss: 0.0287
Epoch 4/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 420ms/step - accuracy: 0.9947 - loss: 0.0199 - val_accuracy: 0.9908 - val_loss: 0.0323
Epoch 5/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 433ms/step - accuracy: 0.9283 - loss: 0.2316 - val_accuracy: 0.9897 - val_loss: 0.0409


<keras.src.callbacks.history.History at 0x78339054b310>

#  **Evaluate the Model**

In [54]:
loss, accuracy = model.evaluate(X_text, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 86ms/step - accuracy: 0.9859 - loss: 0.0511
Test Accuracy: 0.99


# **Make Predictions**

In [59]:
import numpy as np

def predict_news(news_text):
    cleaned = clean_text(news_text)
    seq = tokenizer.texts_to_sequences([cleaned])
    padded = pad_sequences(seq, maxlen=max_len)
    pred = model.predict(padded)
    return "REAL" if pred[0][0] >= 0.5 else "FAKE"

# Example:
print(predict_news("NASA announces new planet discovery!"))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
FAKE


In [56]:
model.save("lstm_fake_news_model.h5")

