In [1]:
import pandas as pd
import string
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB

# additional imports 

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
def text_process(mess):
    no_punc=[char for char in mess if char not in string.punctuation]
    no_punc=''.join(no_punc)
    return [word for word in no_punc.split() if word.lower() not in stopwords.words('english')]


In [3]:
data=pd.read_csv('news_articles.csv')
data.head()


Unnamed: 0,author,published,title,text,language,site_url,main_img_url,type,label,title_without_stopwords,text_without_stopwords,hasImage
0,Barracuda Brigade,2016-10-26T21:41:00.000+03:00,muslims busted they stole millions in govt ben...,print they should pay all the back all the mon...,english,100percentfedup.com,http://bb4sp.com/wp-content/uploads/2016/10/Fu...,bias,Real,muslims busted stole millions govt benefits,print pay back money plus interest entire fami...,1.0
1,reasoning with facts,2016-10-29T08:47:11.259+03:00,re why did attorney general loretta lynch plea...,why did attorney general loretta lynch plead t...,english,100percentfedup.com,http://bb4sp.com/wp-content/uploads/2016/10/Fu...,bias,Real,attorney general loretta lynch plead fifth,attorney general loretta lynch plead fifth bar...,1.0
2,Barracuda Brigade,2016-10-31T01:41:49.479+02:00,breaking weiner cooperating with fbi on hillar...,red state \nfox news sunday reported this mor...,english,100percentfedup.com,http://bb4sp.com/wp-content/uploads/2016/10/Fu...,bias,Real,breaking weiner cooperating fbi hillary email ...,red state fox news sunday reported morning ant...,1.0
3,Fed Up,2016-11-01T05:22:00.000+02:00,pin drop speech by father of daughter kidnappe...,email kayla mueller was a prisoner and torture...,english,100percentfedup.com,http://100percentfedup.com/wp-content/uploads/...,bias,Real,pin drop speech father daughter kidnapped kill...,email kayla mueller prisoner tortured isis cha...,1.0
4,Fed Up,2016-11-01T21:56:00.000+02:00,fantastic trumps point plan to reform healthc...,email healthcare reform to make america great ...,english,100percentfedup.com,http://100percentfedup.com/wp-content/uploads/...,bias,Real,fantastic trumps point plan reform healthcare ...,email healthcare reform make america great sin...,1.0


In [9]:
print(data.columns)

Index(['author', 'published', 'title', 'text', 'language', 'site_url',
       'main_img_url', 'type', 'label', 'title_without_stopwords',
       'text_without_stopwords', 'hasImage'],
      dtype='object')


In [10]:
data=data.drop(['author', 'published', 'title', 'text', 'site_url',
       'main_img_url', 'type', 'hasImage'],axis=1)
data.head()

Unnamed: 0,language,label,title_without_stopwords,text_without_stopwords
0,english,Real,muslims busted stole millions govt benefits,print pay back money plus interest entire fami...
1,english,Real,attorney general loretta lynch plead fifth,attorney general loretta lynch plead fifth bar...
2,english,Real,breaking weiner cooperating fbi hillary email ...,red state fox news sunday reported morning ant...
3,english,Real,pin drop speech father daughter kidnapped kill...,email kayla mueller prisoner tortured isis cha...
4,english,Real,fantastic trumps point plan reform healthcare ...,email healthcare reform make america great sin...


In [11]:
data['language'].unique()

array(['english', 'ignore', 'german', 'french', 'spanish', nan],
      dtype=object)

In [20]:
data=data.loc[data['language']=='english']
data.head()

Unnamed: 0,language,label,title_without_stopwords,text_without_stopwords
0,english,Real,muslims busted stole millions govt benefits,print pay back money plus interest entire fami...
1,english,Real,attorney general loretta lynch plead fifth,attorney general loretta lynch plead fifth bar...
2,english,Real,breaking weiner cooperating fbi hillary email ...,red state fox news sunday reported morning ant...
3,english,Real,pin drop speech father daughter kidnapped kill...,email kayla mueller prisoner tortured isis cha...
4,english,Real,fantastic trumps point plan reform healthcare ...,email healthcare reform make america great sin...


In [21]:
data['language'].unique()

array(['english'], dtype=object)

In [22]:
data=data.drop('language',axis=1)
data.head()

Unnamed: 0,label,title_without_stopwords,text_without_stopwords
0,Real,muslims busted stole millions govt benefits,print pay back money plus interest entire fami...
1,Real,attorney general loretta lynch plead fifth,attorney general loretta lynch plead fifth bar...
2,Real,breaking weiner cooperating fbi hillary email ...,red state fox news sunday reported morning ant...
3,Real,pin drop speech father daughter kidnapped kill...,email kayla mueller prisoner tortured isis cha...
4,Real,fantastic trumps point plan reform healthcare ...,email healthcare reform make america great sin...


In [23]:
data['body']=data['title_without_stopwords']+' '+data['text_without_stopwords']
data.head()

Unnamed: 0,label,title_without_stopwords,text_without_stopwords,body
0,Real,muslims busted stole millions govt benefits,print pay back money plus interest entire fami...,muslims busted stole millions govt benefits pr...
1,Real,attorney general loretta lynch plead fifth,attorney general loretta lynch plead fifth bar...,attorney general loretta lynch plead fifth att...
2,Real,breaking weiner cooperating fbi hillary email ...,red state fox news sunday reported morning ant...,breaking weiner cooperating fbi hillary email ...
3,Real,pin drop speech father daughter kidnapped kill...,email kayla mueller prisoner tortured isis cha...,pin drop speech father daughter kidnapped kill...
4,Real,fantastic trumps point plan reform healthcare ...,email healthcare reform make america great sin...,fantastic trumps point plan reform healthcare ...


In [25]:
data=data.drop(labels=['title_without_stopwords',
       'text_without_stopwords'],axis=1)
data.head()

Unnamed: 0,label,body
0,Real,muslims busted stole millions govt benefits pr...
1,Real,attorney general loretta lynch plead fifth att...
2,Real,breaking weiner cooperating fbi hillary email ...
3,Real,pin drop speech father daughter kidnapped kill...
4,Real,fantastic trumps point plan reform healthcare ...


In [29]:
data['label'].unique()

array(['Real', 'Fake'], dtype=object)

In [38]:
data=data.dropna()

texttrain,texttest,labeltrain,labeltest=train_test_split(data['body'],data['label'],
                                                         test_size=0.3,random_state=53)


In [39]:
from joblib import load
pipeline=load('youarefakenews.joblib')


pipeline.fit(texttrain,labeltrain)


predictions=pipeline.predict(texttest)



In [40]:
print(classification_report(predictions,labeltest))
print()
print(confusion_matrix(predictions,labeltest))

              precision    recall  f1-score   support

        Fake       1.00      0.64      0.78       572
        Real       0.09      1.00      0.16        19

    accuracy                           0.65       591
   macro avg       0.54      0.82      0.47       591
weighted avg       0.97      0.65      0.76       591


[[368 204]
 [  0  19]]
