# Detection of Fake News via Classification MODEL

## 1.Load and check the Dataset

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

2025-08-05 09:47:44.212151: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754387264.553650      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754387264.648147      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:

# Training data
train_data = pd.read_csv(r"/kaggle/input/fake-news-classification/train (2).csv",sep=";")
train_data.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,Palestinians switch off Christmas lights in Be...,"RAMALLAH, West Bank (Reuters) - Palestinians s...",1
1,1,China says Trump call with Taiwan president wo...,BEIJING (Reuters) - U.S. President-elect Donal...,1
2,2,FAIL! The Trump Organization’s Credit Score W...,While the controversy over Trump s personal ta...,0
3,3,Zimbabwe military chief's China trip was norma...,BEIJING (Reuters) - A trip to Beijing last wee...,1
4,4,THE MOST UNCOURAGEOUS PRESIDENT EVER Receives ...,There has never been a more UNCOURAGEOUS perso...,0


In [4]:
# Testing data
test_data = pd.read_csv(r"/kaggle/input/fake-news-classification/test (1).csv",sep=";")
test_data.head(5)

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,"Live from New York, it's a Trump-Clinton remat...",NEW YORK (Reuters) - Veteran actor and frequen...,1
1,1,Catalan separatists to lose majority in tight ...,BARCELONA (Reuters) - Catalonia s independence...,1
2,2,North Carolina governor concedes election to D...,"WINSTON-SALEM, N.C. (Reuters) - North Carolina...",1
3,3,Draft Senate Iran legislation sets tough new U...,WASHINGTON (Reuters) - Draft legislation respo...,1
4,4,California governor taps U.S. Representative B...,"SACRAMENTO, Calif. (Reuters) - California Gove...",1


#### B) EDA

In [5]:
train_data.shape

(24353, 4)

In [6]:
test_data.shape

(8117, 4)

In [7]:
df1 = train_data.copy()
df2 = test_data.copy()

In [8]:
# Checking the NULL Values in Training data
df1.isna().sum()

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

In [9]:
# Checkig the NULL Values in Test data
df2.isna().sum()

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

## 2. Splitting in X,Y 

In [10]:
# Data splitting
x = df1.drop(['Unnamed: 0','label'],axis=1)
y = df1['label']

## 3. Data Prepration

#### 3.1 Import of Tensorflow libraries

In [11]:
from tensorflow.keras.layers import Embedding,Dense,Dropout,LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import one_hot

In [12]:
# Vocbulary size
voc_size = 5000

## 3.2 OneHot Representation

In [13]:
messages = x.copy()
messages.reset_index(inplace=True)

In [14]:
import nltk
import re
from nltk.corpus import stopwords

In [15]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## **3.3 Preprocessing of Data**

In [16]:
'''It used for stemming text — a key step in Natural Language Processing (NLP),
that helps simplify words by reducing them to their root/base form.'''
from nltk.stem.porter import PorterStemmer


In [17]:
ps = PorterStemmer()
corpus= []
for i in range(0,len(messages)):
    review = re.sub('[^a-zA-Z]',' ',messages['title'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [18]:
# Check corpus
for i in range(0,10):
    print(i,corpus[i])

0 palestinian switch christma light bethlehem anti trump protest
1 china say trump call taiwan presid chang island statu
2 fail trump organ credit score make laugh
3 zimbabw militari chief china trip normal visit beij say
4 uncourag presid ever receiv courag award proce whine current presid
5 suspect boko haram suicid bomber kill least nigeria offici
6 watch john oliv present gop debat clowntown f ck world sh tshow
7 senat democrat ask trump attorney gener pick recus russia probe
8 trump humili republican latest hissi fit side democrat debt ceil
9 maci get boot loyal custom fire trump


In [19]:
# Onehot Representation of these corpus words
onehot_rep = [one_hot(words,voc_size) for words in corpus]
onehot_rep[0:5]

[[4397, 2946, 2033, 1072, 1669, 3229, 1483, 3250],
 [3092, 3695, 1483, 637, 984, 4046, 4684, 225, 4308],
 [2685, 1483, 4886, 2987, 3064, 3440, 14],
 [4880, 3628, 3188, 3092, 786, 2163, 4626, 4364, 3695],
 [4319, 4046, 71, 3981, 3522, 296, 2997, 4057, 459, 4046]]

## 3.3 Embedding Representation

In [20]:
col_length = 100
embedded_rep = pad_sequences(onehot_rep,padding='pre',maxlen=col_length)
print(embedded_rep)

[[   0    0    0 ... 3229 1483 3250]
 [   0    0    0 ... 4684  225 4308]
 [   0    0    0 ... 3064 3440   14]
 ...
 [   0    0    0 ... 3575 1483 3638]
 [   0    0    0 ... 1151  245 1875]
 [   0    0    0 ...  763 4978 1979]]


In [21]:
len(embedded_rep)

24353

## 4. MODEL Creation

In [22]:
from tensorflow.keras import regularizers
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

In [23]:
embedding_features = 40
model = Sequential()
model.add(Embedding(input_dim=voc_size, output_dim=embedding_features))  
model.add(BatchNormalization())
model.add(LSTM(32,dropout=0.5,recurrent_dropout=0.2))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid',kernel_regularizer=regularizers.l2(0.01)))

model.build(input_shape=(None, col_length))  # ✅ build manually for summary
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

I0000 00:00:1754387315.454589      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1754387315.455367      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [24]:
x_final = np.array(embedded_rep)
y_final =np.array(y)

In [25]:
x_final.shape,y_final.shape

((24353, 100), (24353,))

In [26]:
x_train,x_test,y_train,y_test = train_test_split(x_final,y_final,test_size=0.3,random_state=42)

In [28]:
# Final Training of MODEL
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model.fit(x_train,y_train,epochs=10,validation_data=(x_test,y_test),callbacks=[early_stop])

Epoch 1/10
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 294ms/step - accuracy: 0.9222 - loss: 0.2109 - val_accuracy: 0.9067 - val_loss: 0.2427
Epoch 2/10
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 294ms/step - accuracy: 0.9389 - loss: 0.1754 - val_accuracy: 0.9097 - val_loss: 0.2510
Epoch 3/10
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 294ms/step - accuracy: 0.9506 - loss: 0.1496 - val_accuracy: 0.9060 - val_loss: 0.2767
Epoch 4/10
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 294ms/step - accuracy: 0.9557 - loss: 0.1377 - val_accuracy: 0.9071 - val_loss: 0.2984


<keras.src.callbacks.history.History at 0x7c7f1c47bf50>

In [29]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}, Loss: {loss:.4f}")


[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 56ms/step - accuracy: 0.9060 - loss: 0.2496
Test Accuracy: 0.9067, Loss: 0.2427


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred = model.predict(x_test)
y_pred = (y_pred > 0.5).astype(int)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))