In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
fake = pd.read_csv("Data/Fake.csv")
real = pd.read_csv("Data/True.csv")

In [3]:
fake["label"] = 0
real["label"] = 1

In [4]:
data = pd.concat([fake, real])
data = data.sample(frac=1).reset_index(drop=True)

In [5]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    return text

data["text"] = data["text"].apply(clean_text)

In [6]:
X = data["text"]
y = data["label"]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [10]:
#logistic regression
tfidf = TfidfVectorizer(max_features=5000,stop_words="english")

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_tfidf, y_train)

y_pred_lr = lr_model.predict(X_test_tfidf)

In [11]:
print("LOGISTIC REGRESSION RESULTS")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lr))

LOGISTIC REGRESSION RESULTS
Accuracy: 0.989532293986637
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4715
           1       0.99      0.99      0.99      4265

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980

Confusion Matrix:
 [[4654   61]
 [  33 4232]]


In [12]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [13]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

max_len = 300
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

In [14]:
model = Sequential()

model.add(Embedding(input_dim=5000, output_dim=128, input_length=max_len))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.3))  
model.add(Dense(1, activation="sigmoid"))

model.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"])

model.summary()




In [15]:
model.fit(X_train_pad,y_train,epochs=5,batch_size=64,validation_split=0.2)

y_pred_lstm = (model.predict(X_test_pad) > 0.5).astype("int32")


Epoch 1/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 186ms/step - accuracy: 0.9524 - loss: 0.1407 - val_accuracy: 0.9793 - val_loss: 0.0676
Epoch 2/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 201ms/step - accuracy: 0.9682 - loss: 0.0978 - val_accuracy: 0.9663 - val_loss: 0.1013
Epoch 3/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 217ms/step - accuracy: 0.9793 - loss: 0.0657 - val_accuracy: 0.9788 - val_loss: 0.0697
Epoch 4/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 208ms/step - accuracy: 0.9708 - loss: 0.0818 - val_accuracy: 0.9127 - val_loss: 0.2194
Epoch 5/5
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 205ms/step - accuracy: 0.9740 - loss: 0.0807 - val_accuracy: 0.9788 - val_loss: 0.0709
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 56ms/step


In [16]:
print("LSTM RESULTS")
print("Accuracy:", accuracy_score(y_test, y_pred_lstm))
print(classification_report(y_test, y_pred_lstm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lstm))



LSTM RESULTS
Accuracy: 0.977728285077951
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      4715
           1       0.98      0.98      0.98      4265

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980

Confusion Matrix:
 [[4610  105]
 [  95 4170]]
