# Fake Newes detection 

In [45]:
#importing req library
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split

In [46]:
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score,confusion_matrix

In [47]:
#importing Data
news=pd.read_csv('news.csv')
news.head(10)

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
5,6903,"Tehran, USA","\nI’m not an immigrant, but my grandparents ...",FAKE
6,7341,Girl Horrified At What She Watches Boyfriend D...,"Share This Baylee Luciani (left), Screenshot o...",FAKE
7,95,‘Britain’s Schindler’ Dies at 106,A Czech stockbroker who saved more than 650 Je...,REAL
8,4869,Fact check: Trump and Clinton at the 'commande...,Hillary Clinton and Donald Trump made some ina...,REAL
9,2909,Iran reportedly makes new push for uranium con...,Iranian negotiators reportedly have made a las...,REAL


In [48]:
#checking dimension 
news.shape


(6335, 4)

In [49]:
lables=news.label
lables.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [50]:
#diving data into training and test sets
x_train,x_test,y_train,y_test=train_test_split(news['text'],lables,test_size=0.2,random_state=7)

In [51]:
#checking shape of train and test data
x_train.shape
x_train.head

<bound method NDFrame.head of 6237    The head of a leading survivalist group has ma...
3722    ‹ › Arnaldo Rodgers is a trained and educated ...
5774    Patty Sanchez, 51, used to eat 13,000 calories...
336     But Benjamin Netanyahu’s reelection was regard...
3622    John Kasich was killing it with these Iowa vot...
                              ...                        
5699                                                     
2550    It’s not that Americans won’t elect wealthy pr...
537     Anyone writing sentences like ‘nevertheless fu...
1220    More Catholics are in Congress than ever befor...
4271    It was hosted by CNN, and the presentation was...
Name: text, Length: 5068, dtype: object>

# What is a TfidfVectorizer?
TF (Term Frequency): The number of times a word appears in a document is its Term Frequency. A higher value means a term appears more often than others, and so, the document is a good match when the term is part of the search terms.

IDF (Inverse Document Frequency): Words that occur many times a document, but also occur many times in many others, may be irrelevant. IDF is a measure of how significant a term is in the entire corpus.

The TfidfVectorizer converts a collection of raw documents into a matrix of TF-IDF features

In [52]:
tf_vec=TfidfVectorizer(stop_words="english",max_df=0.7)


In [53]:
#careting model with train data
tf_train=tf_vec.fit_transform(x_train)

In [54]:
#transforming model for test data
tf_test=tf_vec.transform(x_test)

# What is a PassiveAggressiveClassifier?
Passive Aggressive algorithms are online learning algorithms. Such an algorithm remains passive for a correct classification outcome, and turns aggressive in the event of a miscalculation, updating and adjusting. Unlike most other algorithms, it does not converge. Its purpose is to make updates that correct the loss, causing very little change in the norm of the weight vector.

In [55]:
pac=PassiveAggressiveClassifier(max_iter=60)
#fiting model
pac.fit(tf_train,y_train)

PassiveAggressiveClassifier(max_iter=60)

In [56]:
#Prediting the output on test data
y_pred=pac.predict(tf_test)
y_pred

array(['REAL', 'FAKE', 'REAL', ..., 'REAL', 'FAKE', 'REAL'], dtype='<U4')

In [57]:
x_test.head
x_test.shape

y_test.head

<bound method NDFrame.head of 3534    REAL
6265    FAKE
3123    REAL
3940    REAL
2856    REAL
        ... 
4986    REAL
5789    REAL
4338    REAL
5924    FAKE
6030    REAL
Name: label, Length: 1267, dtype: object>

In [58]:
#checking Model accuracey 
score=accuracy_score(y_test,y_pred)
score

0.9281767955801105

In [59]:
#model accuracey in percent
print(f'Accuracy: {round(score*100,2)}%')


Accuracy: 92.82%


In [60]:
#testing model on some other data
n2=pd.read_csv('C:/Users/Administrator/Documents/my projects/fake news detection/news2.csv')
n2.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,3424,King: Fill the Supreme Court vacancy,"""We've got an opening on the court. I think Sa...",REAL
1,1718,Ted Cruz is toast: It’s not just that he won’t...,"I’m not sure when it started, but at some poin...",REAL
2,143,Obama’s speech in Selma was an answer to those...,President Obama's supporters sometimes wonder ...,REAL
3,930,Where Does Bernie Sanders Go From Here?,NEW YORK - Bernie Sanders is at a crossroads.\...,REAL
4,1661,Exclusive: GOP campaigns plot revolt against RNC,"Killing Obama administration rules, dismantlin...",REAL


In [61]:
lables=n2.label
lables.head

<bound method NDFrame.head of 0      REAL
1      REAL
2      REAL
3      REAL
4      REAL
       ... 
608    REAL
609    FAKE
610    FAKE
611    REAL
612    REAL
Name: label, Length: 613, dtype: object>

In [62]:
news_text=n2["text"]

In [63]:
news_lable=n2["label"]

In [64]:
tf_new=tf_vec.transform(news_text)

In [65]:
tf_new

<613x61651 sparse matrix of type '<class 'numpy.float64'>'
	with 153374 stored elements in Compressed Sparse Row format>

In [66]:
pred_new=pac.predict(tf_new)

In [67]:
#checking Model accuracey 
score=accuracy_score(news_lable,pred_new)
score

0.9902120717781403

In [68]:
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 99.02%


# NEW DATASET

In [83]:
#testing model on some other data
n3=pd.read_csv('C:/Users/Administrator/Documents/my projects/fake news detection/news3.csv')
n3.head()

Unnamed: 0,title,text,subject,date,labels
0,al race,it s certainly an old lesson by now. Dependin...,Karl Marx is either a villain or a hero of so...,but he left us some interesting quotes. Marx ...,Fake
1,e more time. In the film s final surreal sequence,we see Mickey and Mallory driving in an RV as...,a future nuclear family,one without traditional moral values,Fake
2,"WHEN IN ROME: Erdogan Thugs Rough-up Press, Pr...","21st Century Wire says NATO s ugly stepchild, ...",Middle-east,"April 1, 2016",Fake
3,SUNDAY SCREENING: ‘The War On Democracy’,21st Century Wire says This is the latest inst...,Middle-east,"April 10, 2016",Fake
4,First Time in 30 Years: US Deploys B-52 Bomber...,21st Century Wire says Is Washington preparing...,Middle-east,"April 10, 2016",Fake


In [85]:
lables=n3.labels
lables.head

<bound method NDFrame.head of 0       Fake
1       Fake
2       Fake
3       Fake
4       Fake
        ... 
1002    Fake
1003    Fake
1004    Fake
1005    Fake
1006    Fake
Name: labels, Length: 1007, dtype: object>

In [86]:
news_text2=n3["text"]

In [88]:
news_lable2=n3["labels"]

In [89]:
tf_new2=tf_vec.transform(news_text2)

In [90]:
tf_new2

<1007x61651 sparse matrix of type '<class 'numpy.float64'>'
	with 184337 stored elements in Compressed Sparse Row format>

In [94]:
pred_new2=pac.predict(tf_new2)
pred_new2

array(['FAKE', 'FAKE', 'REAL', ..., 'FAKE', 'REAL', 'FAKE'], dtype='<U4')

In [96]:
pred_new2.shape

(1007,)

# End