In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import re
import string

In [4]:
# Load data
data_fake = pd.read_csv("D:fake.csv")
data_true = pd.read_csv("D:True.csv")

# Add class labels
data_fake["class"] = 0
data_true["class"] = 1

In [5]:
# Drop last 10 entries for manual test
fake_manual_test = data_fake.tail(10)
data_fake = data_fake.iloc[:-10]
true_manual_test = data_true.tail(10)
data_true = data_true.iloc[:-10]


In [6]:
# Merge datasets
data_merge = pd.concat([data_fake, data_true], axis=0)

# Drop unnecessary columns
data = data_merge.drop(['title', 'subject', 'date'], axis=1)

# Shuffle data
data = data.sample(frac=1).reset_index(drop=True)

In [7]:
data.head(10)

Unnamed: 0,text,class
0,It sure would be nice to have a President and ...,0
1,The May 8 episode of Saturday Night Live featu...,0
2,President Obama held a CNN televised Town Hall...,0
3,"CANNON BALL, N.D./WASHINGTON (Reuters) - Three...",1
4,WASHINGTON (Reuters) - The House of Representa...,1
5,"AUSTIN, Texas (Reuters) - U.S. President Barac...",1
6,Germany s defense minister refused to wear a t...,0
7,BELFAST (Reuters) - The British government on ...,1
8,"Whenever Hillary cackles, you can be pretty su...",0
9,BANGKOK (Reuters) - Thai airlines can now add ...,1


In [8]:
# Text preprocessing function
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('\\W', ' ', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

In [9]:
# Apply text preprocessing
data['text'] = data['text'].apply(wordopt)

In [10]:
# Split data into features and labels
x = data['text']
y = data['class']


In [11]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)


In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorization = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [13]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train, y_train)
pred_lr = LR.predict(xv_test)
print(f"Logistic Regression Accuracy: {accuracy_score(y_test, pred_lr)}")

Logistic Regression Accuracy: 0.9898395721925134


In [14]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5927
           1       0.99      0.99      0.99      5293

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [15]:
## other algorithms
# Desicion tree
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train,y_train)

In [16]:
pred_dt = DT.predict(xv_test)
DT.score(xv_test,y_test)
print("Logistic Regression Accuracy: {accuracy_score(y_test, pred_dt")
print(classification_report(y_test, pred_dt))

Logistic Regression Accuracy: {accuracy_score(y_test, pred_dt
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      5927
           1       1.00      0.99      1.00      5293

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



In [17]:
# Gradient Boosting

from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier(random_state=42)
GBC.fit(xv_train, y_train)

In [18]:
predict_gb = GBC.predict(xv_test)
print(accuracy_score(y_test, predict_gb))

0.9962566844919786


In [19]:
print(classification_report(y_test, predict_gb))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5927
           1       0.99      1.00      1.00      5293

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



In [20]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(n_estimators=200, random_state=42)
RFC.fit(xv_train, y_train)
pred_rfc = RFC.predict(xv_test)
print(f"Random Forest Accuracy: {accuracy_score(y_test, pred_rfc)}")
print(classification_report(y_test, pred_rfc))


Random Forest Accuracy: 0.9979500891265597
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5927
           1       1.00      1.00      1.00      5293

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



In [21]:
# svm
from sklearn.svm import SVC

SVC_model = SVC(kernel='linear', random_state=42)
SVC_model.fit(xv_train, y_train)
pred_svc = SVC_model.predict(xv_test)
print(f"Support Vector Machine Accuracy: {accuracy_score(y_test, pred_svc)}")
print(classification_report(y_test, pred_svc))


Support Vector Machine Accuracy: 0.9951871657754011
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5927
           1       1.00      0.99      0.99      5293

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



In [22]:
def output_label(n):
    if n==0:
        return "Fake News"
    elif n==1:
        return "True News"
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test['text'].apply(wordopt)
    new_x_test = new_def_test['text']
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    
    return print("\n\nLR Prediction:{}\nDT Prediction:{}\nGBC Prediction:{}\nRFC Prediction:{}".format(output_label(pred_LR[0]),
                                                                                                      output_label(pred_DT[0]),
                                                                                                      output_label(pred_GBC[0]),
                                                                                                      output_label(pred_RFC[0])))

In [25]:
news = str(input())
manual_testing(news)

WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a â€œfiscal conservativeâ€ on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBSâ€™ â€œFace the Nation,â€ drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-defense â€œdiscretionaryâ€ spending on programs that s

In [26]:
news = str(input())
manual_testing(news)

Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. Instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  The former reality show star had just one job to do and he couldn t do it. As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year,  President Angry Pants tweeted.  2018 will be a great year for America! As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year. 2018 will be a great year for America!  Donald J. Trump (@realDonaldTrump) December 31, 2017Trump s tweet went down about as welll as you d expect.What kind of president sends a New Year s greeting like this despicable, petty, infantile gibberish? Only Trump! His lack of decency won t eve