In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
df=pd.read_csv("fake_or_real_news.csv")
df

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


In [3]:
df.isnull().sum()

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

In [4]:
df.drop(columns=["Unnamed: 0"],inplace=True)

In [5]:
df["news"]=df["title"]+" "+df["text"]

In [6]:
df.drop(columns=["title","text"],inplace=True)

In [7]:
df["label"].replace({"FAKE":0,"REAL":1},inplace=True)

In [8]:
df

Unnamed: 0,label,news
0,0,You Can Smell Hillary’s Fear Daniel Greenfield...
1,0,Watch The Exact Moment Paul Ryan Committed Pol...
2,1,Kerry to go to Paris in gesture of sympathy U....
3,0,Bernie supporters on Twitter erupt in anger ag...
4,1,The Battle of New York: Why This Primary Matte...
...,...,...
6330,1,State Department says it can't find emails fro...
6331,0,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
6332,0,Anti-Trump Protesters Are Tools of the Oligarc...
6333,1,"In Ethiopia, Obama seeks progress on peace, se..."


In [9]:
df["news"]=df["news"].str.lower()

In [10]:
df

Unnamed: 0,label,news
0,0,you can smell hillary’s fear daniel greenfield...
1,0,watch the exact moment paul ryan committed pol...
2,1,kerry to go to paris in gesture of sympathy u....
3,0,bernie supporters on twitter erupt in anger ag...
4,1,the battle of new york: why this primary matte...
...,...,...
6330,1,state department says it can't find emails fro...
6331,0,the ‘p’ in pbs should stand for ‘plutocratic’ ...
6332,0,anti-trump protesters are tools of the oligarc...
6333,1,"in ethiopia, obama seeks progress on peace, se..."


In [11]:
import re
def remove_special_char(text):
    return re.sub(r'[^0-9a-zA-Z\s]',"",text)

df["news"]=df["news"].apply(remove_special_char)

In [12]:
df

Unnamed: 0,label,news
0,0,you can smell hillarys fear daniel greenfield ...
1,0,watch the exact moment paul ryan committed pol...
2,1,kerry to go to paris in gesture of sympathy us...
3,0,bernie supporters on twitter erupt in anger ag...
4,1,the battle of new york why this primary matter...
...,...,...
6330,1,state department says it cant find emails from...
6331,0,the p in pbs should stand for plutocratic or p...
6332,0,antitrump protesters are tools of the oligarch...
6333,1,in ethiopia obama seeks progress on peace secu...


In [13]:
from nltk.tokenize import word_tokenize

df["news"]=df["news"].apply(word_tokenize)

In [14]:
df

Unnamed: 0,label,news
0,0,"[you, can, smell, hillarys, fear, daniel, gree..."
1,0,"[watch, the, exact, moment, paul, ryan, commit..."
2,1,"[kerry, to, go, to, paris, in, gesture, of, sy..."
3,0,"[bernie, supporters, on, twitter, erupt, in, a..."
4,1,"[the, battle, of, new, york, why, this, primar..."
...,...,...
6330,1,"[state, department, says, it, cant, find, emai..."
6331,0,"[the, p, in, pbs, should, stand, for, plutocra..."
6332,0,"[antitrump, protesters, are, tools, of, the, o..."
6333,1,"[in, ethiopia, obama, seeks, progress, on, pea..."


In [15]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def remove_stopwords(text):
    filtered_text=[word for word in text if word not in stop_words]
    return filtered_text

df["news"]=df["news"].apply(remove_stopwords)

In [16]:
df

Unnamed: 0,label,news
0,0,"[smell, hillarys, fear, daniel, greenfield, sh..."
1,0,"[watch, exact, moment, paul, ryan, committed, ..."
2,1,"[kerry, go, paris, gesture, sympathy, us, secr..."
3,0,"[bernie, supporters, twitter, erupt, anger, dn..."
4,1,"[battle, new, york, primary, matters, primary,..."
...,...,...
6330,1,"[state, department, says, cant, find, emails, ..."
6331,0,"[p, pbs, stand, plutocratic, pentagon, p, pbs,..."
6332,0,"[antitrump, protesters, tools, oligarchy, info..."
6333,1,"[ethiopia, obama, seeks, progress, peace, secu..."


In [17]:
from nltk.stem import PorterStemmer

stemmer=PorterStemmer()

def stemming(text):
    stemmed_text=[stemmer.stem(word) for word in text]
    return " ".join(stemmed_text)

df["news"]=df["news"].apply(stemming)

In [18]:
df

Unnamed: 0,label,news
0,0,smell hillari fear daniel greenfield shillman ...
1,0,watch exact moment paul ryan commit polit suic...
2,1,kerri go pari gestur sympathi us secretari sta...
3,0,berni support twitter erupt anger dnc tri warn...
4,1,battl new york primari matter primari day new ...
...,...,...
6330,1,state depart say cant find email clinton speci...
6331,0,p pb stand plutocrat pentagon p pb stand pluto...
6332,0,antitrump protest tool oligarchi inform antitr...
6333,1,ethiopia obama seek progress peac secur east a...


In [19]:
y=df.iloc[:,0]
x=df.iloc[:,1]

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer()
x_train=tfidf.fit_transform(x_train)
x_test=tfidf.transform(x_test)

In [21]:
x_train=x_train.toarray()
x_test=x_test.toarray()

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout

model=Sequential()
model.add(Dense(128,activation="relu",input_shape=(None,x_train.shape[1])))
model.add(Dropout(0.5))
model.add(Dense(64,activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(32,activation="relu"))
model.add(Dense(1,activation="sigmoid"))

In [23]:
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [24]:
history=model.fit(x_train,y_train,validation_split=0.2,batch_size=32,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [25]:
result=model.evaluate(x_test,y_test)
print(f"Loss:{result[0]}")
print(f"Accuracy:{result[1]}")

Loss:0.2474464476108551
Accuracy:0.9337016344070435


In [26]:
def preprocess(text):
    special_char_removed=re.sub(r'[^A-Za-z0-9\s]',"",text)
    tozenized=word_tokenize(special_char_removed.lower())
    stemmed=[stemmer.stem(w) for w in tozenized if w not in stop_words]
    vectorized=tfidf.transform(stemmed)
    return vectorized.toarray()

new_news=input("Enter the news here: ")
input_vector=preprocess(new_news)
output=model.predict(input_vector)
if output[0][0]<=0.5:
    print("FAKE")
else:
    print("REAL")

Enter the news here: RFK Jr. Unlikely to Join Presidential Debate: Independent U.S. presidential candidate Robert F. Kennedy Jr. is unlikely to participate in the 2024’s first U.S.
FAKE
