## Fake News Detection

### import libraries

In [2]:
import pandas as pd
import numpy as np
import random
import itertools as it
import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score,confusion_matrix

In [3]:
df=pd.read_csv('newsdata.csv')

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,3509,Time Magazine Makes Trump Their ‘Person Of Th...,"If you haven t heard, Donald Trump has joined ...",News,"December 8, 2016",Fake
1,19142,ADELE BREAKS “Best Album” Grammy Award In Half...,"I can t possibly accept this award, she said...",left-news,"Feb 13, 2017",Fake
2,6393,"Trump says pharma 'getting away with murder,' ...",NEW YORK (Reuters) - U.S. President-elect Dona...,politicsNews,"January 11, 2017",Real
3,15335,[VIDEO] WHY THE RACE WAR IS NOT REALLY ABOUT R...,The left believes they are winning this war. ...,politics,"Aug 12, 2015",Fake
4,9141,Orlando killer expressed support for multiple ...,"ORLANDO, Fla. (Reuters) - Orlando nightclub ki...",politicsNews,"June 12, 2016",Real


In [5]:
df.shape

(44898, 6)

In [6]:
df

Unnamed: 0.1,Unnamed: 0,title,text,subject,date,label
0,3509,Time Magazine Makes Trump Their ‘Person Of Th...,"If you haven t heard, Donald Trump has joined ...",News,"December 8, 2016",Fake
1,19142,ADELE BREAKS “Best Album” Grammy Award In Half...,"I can t possibly accept this award, she said...",left-news,"Feb 13, 2017",Fake
2,6393,"Trump says pharma 'getting away with murder,' ...",NEW YORK (Reuters) - U.S. President-elect Dona...,politicsNews,"January 11, 2017",Real
3,15335,[VIDEO] WHY THE RACE WAR IS NOT REALLY ABOUT R...,The left believes they are winning this war. ...,politics,"Aug 12, 2015",Fake
4,9141,Orlando killer expressed support for multiple ...,"ORLANDO, Fla. (Reuters) - Orlando nightclub ki...",politicsNews,"June 12, 2016",Real
...,...,...,...,...,...,...
44893,4982,Louisiana Governor Blasts Trump Over His Phot...,While Republicans try to make Louisiana s dead...,News,"August 19, 2016",Fake
44894,9263,Clinton clinches Democratic nomination: AP del...,WASHINGTON (Reuters) - Hillary Clinton has rea...,politicsNews,"June 7, 2016",Real
44895,19784,YOU’RE FIRED! MITT ROMNEY’S NIECE Tells MI GOP...,"Some of the most divisive, partisan politics c...",left-news,"Oct 18, 2016",Fake
44896,11361,TRUMP SUPPORTER FIGHTS BACK: Man Wearing “Make...,Enough is enough. Americans need to start maki...,politics,"Mar 19, 2017",Fake


In [7]:
df.isnull().sum()

Unnamed: 0    0
title         0
text          0
subject       0
date          0
label         0
dtype: int64

In [8]:
labels=df.label

In [9]:
labels.head()

0    Fake
1    Fake
2    Real
3    Fake
4    Real
Name: label, dtype: object

In [10]:
x_train,x_test,y_train,y_test=train_test_split(df['text'],labels,test_size=0.2,random_state=20) #random state is just like seed

In [11]:
# TFIDF Initialization
vector = TfidfVectorizer(stop_words='english',max_df=0.7) #max_df is max document freq 

In [12]:
# fit and transform
tf_train=vector.fit_transform(x_train)
tf_test=vector.transform(x_test)

In [13]:
# initialize passive aggressive classifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tf_train,y_train)

In [14]:
# predicting on test data sets
y_pred=pac.predict(tf_test)

In [15]:
score = accuracy_score(y_test,y_pred)

In [16]:
print(f"Accuracy is : {round(score*100,2)}%")

Accuracy is : 99.44%


In [17]:
# Confusion Matrix
confusion_matrix(y_test,y_pred,labels=['Fake','Real'])

array([[4634,   30],
       [  20, 4296]], dtype=int64)

In [18]:
# saving model
file_name='model.sav'
joblib.dump(pac,file_name)

['model.sav']

In [19]:
#saving vectorizer model
file_name_1='vector.sav'
joblib.dump(vector,file_name_1)

['vector.sav']

In [31]:
#saving test data
frames=[x_test,y_test]
test=pd.concat(frames,axis=1)
test.to_csv('testdata.csv')