In [2]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import re
import joblib
import string

In [3]:
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

In [4]:
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [6]:
fake['class'] = 0
true['class'] = 1

In [7]:
data = pd.concat([fake, true], axis=0)
data.sample(10)

Unnamed: 0,title,text,subject,date,class
6660,U.S. intelligence agencies feud with Republica...,Republican members of Congress are complaining...,politicsNews,"December 16, 2016",1
3970,Trump-Loving Oklahoma GOP Rep. Busted Calling...,What is it with Trump supporters constantly th...,News,"November 2, 2016",0
6574,Trump taps ex-Bush official Bossert to counter...,"PALM BEACH, Fla. (Reuters) - U.S. President-el...",politicsNews,"December 27, 2016",1
13332,LOL! HILLARY ACCIDENTALLY Calls Trump Her “Hus...,Below the belt h/t Weasel Zippers,politics,"Aug 5, 2016",0
4269,Trump may halt insurer payments to force Democ...,WASHINGTON (Reuters) - U.S. President Donald T...,politicsNews,"April 12, 2017",1
18525,Denmark to send 55 soldiers to Kabul after dea...,"COPENHAGEN (Reuters) - Denmark, part of the NA...",worldnews,"October 2, 2017",1
3253,Trump Just Claimed He Beat Obama In The Elect...,Donald Trump can t let it go. A broken and ant...,News,"December 27, 2016",0
14348,Greek island on strike in protest against beco...,ATHENS (Reuters) - Residents on the Greek isla...,worldnews,"November 20, 2017",1
2149,Oxford Fellow GLORIOUSLY Buries Trump For Try...,"Donald Trump just got owned.On Monday, the Con...",News,"March 14, 2017",0
21407,"Mata Pires, owner of embattled Brazil builder ...","SAO PAULO (Reuters) - Cesar Mata Pires, the ow...",worldnews,"August 22, 2017",1


In [9]:
data = data.drop(["title", "subject", "date"], axis=1)

In [10]:
data.sample(10)

Unnamed: 0,text,class
4966,Donald Trump campaign senior advisor Jack King...,0
1957,WASHINGTON (Reuters) - The Trump administratio...,1
13555,CAIRO (Reuters) - Egyptian President Abdel Fat...,1
14491,Our next President is going to have to work wi...,0
9271,WASHINGTON (Reuters) - U.S. President Barack O...,1
17297,TORONTO (Reuters) - The number of asylum seeke...,1
14191,The party s over or is it? Hillary Clinton and...,0
18108,WASHINGTON (Reuters) - A handful of private ...,1
21020,BRASILIA (Reuters) - A leniency deal struck be...,1
7359,Like all other presidents since George H.W. Bu...,0


In [11]:
data.reset_index(inplace=True)

In [12]:
data.drop(['index'], axis= 1, inplace= True)

In [14]:
data.sample(5)

Unnamed: 0,text,class
4325,Florida s Republican governor Rick Scott is no...,0
39669,"MINYA, Egypt (Reuters) - Coptic Christians in ...",1
30327,NEW YORK (Reuters) - Donald Trump clashed on W...,1
12932,To all the #NeverTrumpers: The ugly ride that...,0
3855,Democrats who are offering to work with Donald...,0


In [15]:
def clean_text(text):
    text = text.lower()
    text = re.sub('\[,*?\]', "", text)
    text = re.sub('\\W', " ", text)
    text = re.sub('https?:://\S+|www\.\S+', "", text)
    text = re.sub('[%s]' % re.escape(string.punctuation), "", text)
    text = re.sub('\w*\d\w*', "", text)
    return text

  text = re.sub('\[,*?\]', "", text)
  text = re.sub('https?:://\S+|www\.\S+', "", text)
  text = re.sub('\w*\d\w*', "", text)


In [16]:
data['text'] = data['text'].apply(clean_text)

In [17]:
x = data['text']
y = data['class']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.25, random_state= 42)

In [18]:
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(x_train)
xv_test = vectorizer.transform(x_test)

In [19]:
lr = LogisticRegression()
lr.fit(xv_train, y_train)

In [20]:
prediction = lr.predict(xv_test)
lr.score(xv_test, y_test)

0.9861024498886414

In [21]:
print(classification_report(y_test, prediction))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5895
           1       0.98      0.99      0.99      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



In [22]:
joblib.dump(vectorizer, "vectorizer.jb")
joblib.dump(lr, "lr_model.jb")

['lr_model.jb']