# Importing

In [37]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import gradio as gr

# Read the data

In [38]:
#Read the data
df=pd.read_csv('news_comb.csv')

#Get shape and head
df.shape
df.drop(['Unnamed: 0'], axis = 1)

Unnamed: 0,Unnamed: 0.1,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
51228,23476,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,FAKE
51229,23477,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,FAKE
51230,23478,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Patrick Henningsen 21st Century WireRemember ...,FAKE
51231,23479,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,FAKE


# Seperating Labels

In [39]:
#DataFlair - Get the labels
labels = df.label
labels.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

# Train test Split the dataset

In [40]:
x_train,x_test,y_train,y_test= train_test_split(df, labels, test_size=0.2, random_state=7)

In [41]:
df_t1 = x_test.drop(['Unnamed: 0','Unnamed: 0.1','title'],axis = 1)
df_t1

Unnamed: 0,text,label
23499,BEIJING (Reuters) - China s ruling Communist P...,REAL
5965,Headlined to H3 10/26/16 - Advertisement - \nT...,FAKE
47211,,FAKE
25930,ABIDJAN (Reuters) - At least 10 people have di...,REAL
20885,MOSCOW (Reuters) - Kremlin spokesman Dmitry Pe...,REAL
...,...,...
28054,Donald Trump went all in for his man Luther St...,FAKE
12110,DUBAI (Reuters) - Iran said on Wednesday it ha...,REAL
36322,"Bill Maher is all about telling it like it is,...",FAKE
44194,If I were a European and was forced to deal wi...,FAKE


In [42]:
#DataFlair - Initialize a TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df= 0.999)

#DataFlair - Fit and transform train set, transform test set
tfidf_train = tfidf_vectorizer.fit_transform(x_train['text']) 
tfidf_test = tfidf_vectorizer.transform(x_test['text'])


In [43]:
#DataFlair - Initialize a PassiveAggressiveClassifier
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)

PassiveAggressiveClassifier(max_iter=50)

In [44]:
#DataFlair - Predict on the test set and calculate accuracy
y_pred=pac.predict(tfidf_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 97.51%


In [45]:
#DataFlair - Build confusion matrix
confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])

array([[5205,  112],
       [ 143, 4787]], dtype=int64)

# Testing using external data

In [48]:
df_test=pd.read_csv('test.csv')
#Get shape and head
df_t2 = df_test.drop(['id','author','title'],axis = 1)
df_t2.loc[df_t2['label']== 1,'label']= 'FAKE'
df_t2.loc[df_t2['label']== 0 , 'label'] = 'REAL'
df_t2

Unnamed: 0,text,label
0,House Dem Aide: We Didn’t Even See Comey’s Let...,FAKE
1,Ever get the feeling your life circles the rou...,REAL
2,"Why the Truth Might Get You Fired October 29, ...",FAKE
3,Videos 15 Civilians Killed In Single US Airstr...,FAKE
4,Print \nAn Iranian woman has been sentenced to...,FAKE
...,...,...
20795,Rapper T. I. unloaded on black celebrities who...,REAL
20796,When the Green Bay Packers lost to the Washing...,REAL
20797,The Macy’s of today grew from the union of sev...,REAL
20798,"NATO, Russia To Hold Parallel Exercises In Bal...",FAKE


In [49]:
df_test = pd.concat([df_t1,df_t2])

In [50]:
# Replace null values with NAN
df_test.dropna(subset=['text'],inplace=True)
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 31008 entries, 23499 to 20799
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    31008 non-null  object
 1   label   31008 non-null  object
dtypes: object(2)
memory usage: 726.8+ KB


In [57]:
# total there are 31008 news articles that AI did not see
i = 896
inp = df_test['text'][i]
print(inp)

BEIRUT, Lebanon  —   For months, the bodies have been piling up in eastern Aleppo as the buildings have come down, pulverized by Syrian and Russian jets, burying residents who could not flee in avalanches of bricks and mortar. And now it is almost over, not because diplomats reached a deal in Geneva, but because President Bashar   of Syria and his foreign allies have won the city. Cold, hungry and scarred by the deaths of loved ones, tens of thousands of civilians and fighters are awaiting buses to take them from their homes to uncertain futures. It is not the first victory that Mr. Assad has secured with overwhelming force in the Syrian conflict. But his subjugation of eastern Aleppo has echoed across the Middle East and beyond, rattling alliances, proving the effectiveness of violence and highlighting the reluctance of many countries, perhaps most notably the United States, to get involved. President Obama, on Friday at his final news conference of the year, acknowledged that the nea

In [58]:
k = "Trouth is ::> " + str(y_pred[0])
k
# This is the ground trouth label..

'Trouth is ::> FAKE'

In [53]:
sen = [inp]
vec_sen = tfidf_vectorizer.transform(sen)
y_pred=pac.predict(vec_sen)
o = "AI PREDICTION ::> " + str(y_pred[0])
o
# This is tha AI prediction..

'AI PREDICTION ::> FAKE'

# Function for gradio

In [46]:
def FakeOrReal(News):
    sen = [News]
    vec_sen = tfidf_vectorizer.transform(sen)
    y_pred=pac.predict(vec_sen)
    o = "AI PREDICTION ::" + str(y_pred[0])
    return o

# Run Gradio

In [47]:
iface = gr.Interface(fn=FakeOrReal, inputs="text", outputs="text")
iface.launch()



Running on local URL:  http://127.0.0.1:7861/

To create a public link, set `share=True` in `launch()`.


(<Flask 'gradio.networking'>, 'http://127.0.0.1:7861/', None)