In [1]:
import pandas as pd
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [2]:
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

In [3]:
fake['fake'] = 1
true['fake'] = 0

In [4]:
df = pd.concat([fake.head(3000), true.head(3000)])
df['text'] = df['text'].str.lower()

In [5]:
def del_punctuation(sentence):
    rem_punctuation = [word for word in sentence if word not in string.punctuation]
    rem_punctuation = ''.join(rem_punctuation)
    
    return rem_punctuation

df['text'] = df['text'].apply(del_punctuation)

In [6]:
def del_stopwords(sentence):
    rem_stopword = [word for word in sentence.split() if word not in stopwords.words('english')]
    rem_stopword = ' '.join(rem_stopword)
    
    return rem_stopword

df['text'] = df['text'].apply(del_stopwords)

In [7]:
lemmatizer = WordNetLemmatizer()

def lemmatizing(sentence):
    lemma = [lemmatizer.lemmatize(word) for word in sentence.split()]
    lemma = ' '.join(lemma)
    
    return lemma

df['text'] = df['text'].apply(lemmatizing)

In [17]:
def preprocessing(sentences):
    sentences = sentences.lower()
    sentences = sentences.apply(del_punctuation)
    sentences = sentences.apply(del_stopwords)
    sentences = sentences.apply(lemmatizing)
    
    return sentences

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report

X = df['text']
y = df['fake']
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [9]:
# Vectorizing
vectorizer = CountVectorizer()
X_train_vector = vectorizer.fit_transform(X_train).todense()

In [10]:
x_test_vector = vectorizer.transform(x_test).todense()

In [12]:
# Modelling
model = LogisticRegression()
model.fit(X_train_vector, y_train)
y_result = model.predict(x_test_vector)

In [14]:
# Check The Accuracy
print(classification_report(y_test, y_result))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       885
           1       1.00      0.99      1.00       915

    accuracy                           1.00      1800
   macro avg       1.00      1.00      1.00      1800
weighted avg       1.00      1.00      1.00      1800



In [29]:
# Let's Test The Model
sentences = '''One of Donald Trump s favorite punching bags is CNN. He even once tweeted a GIF image of himself punching a person with a CNN logo superimposed over the head 
            indicating that he d like to enact violence against CNN s reporters. Then there was the time he tweeted the  Trump Train roaring over  CNN.  Now, he s back at it 
            this time suggesting that  fake  CNN should be the ones representing America to the world, and that they are doing a bad job. Here is that tweet:.@FoxNews 
            is MUCH more important in the United States than CNN, but outside of the U.S., CNN International is still a major source of (Fake) news, and they represent
            our Nation to the WORLD very poorly. The outside world does not see the truth from them!  Donald J. Trump (@realDonaldTrump) November 25, 2017Of course, 
            it is beneath the dignity of most people to respond to a moronic buffoon like Trump under normal circumstances. However, he is currently squatting in the 
            White House, and has his tiny orange hands on the levers of power   not to mention the nuclear codes   so they have to stoop to a Trumpian level when 
            personally attacked. However, being, well, you know, FIT to be doing the job they are doing, the good folks at CNN Communications fired back at Trump, and 
            their response is nothing short of perfect:It's not CNN's job to represent the U.S to the world. That's yours. Our job is to report the news. #FactsFirst   
            CNN Communications (@CNNPR) November 25, 2017BOOM! Couldn t have asked for a sicker burn than this. And they are right of course   especially the part about
            #FactsFirst. Trump has a problem with the truth, as we all well know. That s what makes what the CNN Communications people replied so fabulous. 
            It is the ultimate truth   something the likes of the pathological orange liar that is Donald Trump knows nothing about.Featured image via  
            Andrew Burton/Getty Images '''
sentences = del_punctuation(sentences)
sentences = del_stopwords(sentences)
sentences = lemmatizing(sentences)

In [30]:
sentences = vectorizer.transform([sentences])
if model.predict(sentences)[0] == 1:
    print("Literally Fake News")
else:
    print("This is Real News")

Literally Fake News
