In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer  
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report  
import re
import joblib
import string


In [3]:
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')


In [4]:
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [6]:
fake['class']=0
true['class']=1

In [7]:
data = pd.concat([fake,true],axis = 0)

In [8]:
data.sample(10)

Unnamed: 0,title,text,subject,date,class
20947,CONGRESS JUST DEALT A BIG BLOW To Obama And Hi...,Obama has shown favoritism towards the Muslim ...,left-news,"Feb 26, 2016",0
3211,Here’s The List Of Items Trump Banned From In...,Remember when the National Rifle Association w...,News,"December 31, 2016",0
15529,Bodies of Argentine men killed in New York att...,BUENOS AIRES (Reuters) - The bodies of five Ar...,worldnews,"November 6, 2017",1
20780,EU should impose more sanctions on North Korea...,TALLINN (Reuters) - The European Union should ...,worldnews,"September 7, 2017",1
18491,Cambodia's detained opposition leader denies t...,PHNOM PENH (Reuters) - Cambodia s detained opp...,worldnews,"October 2, 2017",1
10428,TEEN VOGUE Publishes Article To Teach Teen Gir...,"Two months before the 2016 election, Teen Vogu...",politics,"Jul 11, 2017",0
17713,IMAM SLAMS CBS News For Trying To “Make Terror...,"Yesterday, the Imam who calls himself the Ima...",left-news,"Nov 1, 2017",0
20932,MASS EXODUS FROM DEMOCRAT Party In Liberal Mas...,Yeah about that whole Trump not matching up to...,left-news,"Mar 1, 2016",0
4067,Canada says ready to come to NAFTA talks 'at a...,OTTAWA (Reuters) - Canada is ready to come to ...,politicsNews,"April 26, 2017",1
16733,Russian radio station says intruder stabs pres...,MOSCOW (Reuters) - An intruder forced his way ...,worldnews,"October 23, 2017",1


In [9]:
data = data.drop(["title","subject","date"],axis = 1)

In [10]:
data.reset_index(inplace=True)

In [11]:
data.drop(['index'],axis = 1,inplace=True)

In [12]:
data.sample(5)

Unnamed: 0,text,class
33449,"WASHINGTON (Reuters) - Merrick Garland, Presid...",1
7474,"On Wednesday morning, Republican front runner ...",0
37735,BEIRUT (Reuters) - Lebanon s army chief told h...,1
24280,WASHINGTON (Reuters) - U.S. President Donald T...,1
9686,He was given the option to kneel in protest be...,0


In [13]:
import re
import string

def clean_text(text):
    text = text.lower()
    text = re.sub(r"\[.*?\]", "", text)
    text = re.sub(r"https?://\S+", "", text)
    text = re.sub(r"<.*?>", "", text)
    text = re.sub("[%s]" % re.escape(string.punctuation), "", text)
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\w*\d\w*", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


In [14]:
data["text"] = data["text"].apply(clean_text)


In [15]:
x = data["text"]
y = data["class"]

xtrain, xtest, ytrain , ytest = train_test_split(x,y,test_size=0.25,random_state=42)

In [16]:
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(xtrain)
xv_test = vectorizer.transform(xtest)


In [17]:
lr = LogisticRegression()
lr.fit(xv_train,ytrain)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [18]:
prediction = lr.predict(xv_test)
lr.score(xv_test,ytest)

0.985924276169265

In [19]:
print(classification_report(ytest,prediction))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5895
           1       0.98      0.99      0.99      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



In [20]:
joblib.dump(vectorizer,"vectorizer.jb")
joblib.dump(lr,"lr_model.jb")

['lr_model.jb']