In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

In [2]:
fake_data = pd.read_csv("C:/Users/User/Desktop/PROJECTS/Fake news detection/Fake.csv")
true_data = pd.read_csv("C:/Users/User/Desktop/PROJECTS/Fake news detection/True.csv")

In [3]:
fake_data.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
true_data.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [5]:
fake_data["class"] = 0
true_data["class"] = 1

In [6]:
fake_data.shape, true_data.shape

((23481, 5), (21417, 5))

In [7]:
# Removing last 10 rows for manual testing
fake_data_manual_test = fake_data.tail(5)
fake_data = fake_data.iloc[:-5]
true_data_manual_test = true_data.tail(5)
true_data = true_data.iloc[:-5]

In [8]:
fake_data.shape, true_data.shape

((23476, 5), (21412, 5))

In [9]:
# Concatenating the datasets
data_manual_test = pd.concat([fake_data_manual_test, true_data_manual_test], axis=0)
data_manual_test.to_csv("manual_test.csv")

In [10]:
data_merge = pd.concat([fake_data, true_data], axis=0)

In [11]:
# Dropping unnecessary columns
data = data_merge.drop(["title", "subject", "date"], axis=1)

In [12]:
data = data.sample(frac=1).reset_index(drop=True)

## Creating Function

In [13]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

In [14]:
data["text"] = data["text"].apply(wordopt)

## Defining dependent and independent variables

In [15]:
x = data["text"]
y = data["class"]

## splitting train and test data

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

## text into vectors

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

## Logsitic Regression

In [18]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train,y_train)
pred_lr=LR.predict(xv_test)
LR.score(xv_test, y_test)
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5908
           1       0.98      0.99      0.99      5314

    accuracy                           0.99     11222
   macro avg       0.99      0.99      0.99     11222
weighted avg       0.99      0.99      0.99     11222



## Decision Tree

In [19]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)
pred_dt = DT.predict(xv_test)
DT.score(xv_test, y_test)
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00      5908
           1       1.00      0.99      1.00      5314

    accuracy                           1.00     11222
   macro avg       1.00      1.00      1.00     11222
weighted avg       1.00      1.00      1.00     11222



## Gradient Boosting Classifier

In [20]:
from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train, y_train)
pred_gbc = GBC.predict(xv_test)
GBC.score(xv_test, y_test)
print(classification_report(y_test, pred_gbc))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5908
           1       0.99      1.00      0.99      5314

    accuracy                           1.00     11222
   macro avg       1.00      1.00      1.00     11222
weighted avg       1.00      1.00      1.00     11222



## Random Forest

In [21]:
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)
pred_rfc = RFC.predict(xv_test)
RFC.score(xv_test, y_test)
print(classification_report(y_test, pred_rfc))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5908
           1       0.99      0.99      0.99      5314

    accuracy                           0.99     11222
   macro avg       0.99      0.99      0.99     11222
weighted avg       0.99      0.99      0.99     11222



## Model Testing

In [22]:
def output_label(n: int) -> str:
    return "Fake News" if n == 0 else "Not A Fake News"

def manual_testing(news: str) -> dict:
    testing_news = {"text": [news]}
    
    new_def_test = pd.DataFrame(testing_news)
    
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    
    new_xv_test = vectorization.transform(new_def_test["text"])
    
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    
    return {
        'Logistic Regression Prediction': output_label(pred_LR[0]),
        'Decision Tree Prediction': output_label(pred_DT[0]),
        'Gradient Boosting Prediction': output_label(pred_GBC[0]),
        'Random Forest Prediction': output_label(pred_RFC[0])
    }

In [27]:
# Input for manual testing
news = str(input("Enter news text: "))
predictions = manual_testing(news)
for model, prediction in predictions.items():
    print(f"{model}: {prediction}")

Enter news text: WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a â€œfiscal conservativeâ€ on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBSâ€™ â€œFace the Nation,â€ drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-defense â€œdiscretionaryâ€ spending o

In [28]:
news = str(input())
manual_testing(news)

Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. Instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  The former reality show star had just one job to do and he couldn t do it. As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year,  President Angry Pants tweeted.  2018 will be a great year for America! As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year. 2018 will be a great year for America!  Donald J. Trump (@realDonaldTrump) December 31, 2017Trump s tweet went down about as welll as you d expect.What kind of president sends a New Year s greeting like this despicable, petty, infantile gibberish? Only Trump! His lack of decency won t eve

{'Logistic Regression Prediction': 'Fake News',
 'Decision Tree Prediction': 'Fake News',
 'Gradient Boosting Prediction': 'Fake News',
 'Random Forest Prediction': 'Fake News'}