# Fake News Detector

## Installing Necessary Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import string
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

## Loading the data

In [None]:
data_fake=pd.read_csv('Datasets/Fake.csv')
data_true=pd.read_csv('Datasets/True.csv')

### Data Preview 

In [None]:
data_fake.head()

In [None]:
data_true.tail()

In [None]:
data_fake["class"]=0
data_true['class']=1

In [None]:
data_fake.shape, data_true.shape

In [None]:
# removing the last 10 rows from both datasets to use them for manual testing later.

data_fake_manual_testing = data_fake.tail(10)
for i in range(23480,23470,-1):
    data_fake.drop([i],axis = 0, inplace = True)

    
data_true_manual_testing = data_true.tail(10)
for i in range(21416,21406,-1):
    data_true.drop([i],axis = 0, inplace = True)

In [None]:
data_fake_manual_testing['class']=0
data_true_manual_testing['class']=1

In [None]:
data_fake_manual_testing.head(10)

In [None]:
data_true_manual_testing.head(10)

In [None]:
data_merge=pd.concat([data_fake, data_true], axis = 0)
data_merge.head(10)

In [None]:
data=data_merge.drop(['title','subject','date'], axis = 1)

In [None]:
data.isnull().sum() 

In [None]:
data = data.sample(frac = 1)

In [None]:
data.head()

In [None]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

In [None]:
data.columns

In [None]:
data.head()

## Preprocessing Text

#### Creating a function to convert the text in lowercase, remove the extra space, special chr., ulr and links.

In [None]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]','',text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+','',text)
    text = re.sub('<.*?>+',b'',text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text)
    text = re.sub('\w*\d\w*','',text)
    return text

In [None]:
data['text'] = data['text'].apply(wordopt)

#### Defining dependent and independent variable as x and y

In [None]:
x = data['text']
y = data['class']

## Training the model

#### Splitting the dataset into training set and testing set. 

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.20)

### Extracting Features from the Text

#### Convert text to vectors

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
LR = LogisticRegression()
LR.fit(xv_train, y_train)

In [None]:
pred_lr = LR.predict(xv_test)

In [None]:
pred_lr

In [None]:
LR.score(xv_test, y_test)

In [None]:
print (classification_report(y_test, pred_lr))

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [None]:
pred_dt = DT.predict(xv_test)

In [None]:
DT.score(xv_test, y_test)

In [None]:
print (classification_report(y_test, pred_dt))

## Gradient Boost Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

GB = GradientBoostingClassifier(random_state = 0)
GB.fit(xv_train, y_train)


In [None]:
pred_gb = GB.predict(xv_test)

In [None]:
GB.score(xv_test, y_test)

In [None]:
print(classification_report(y_test, pred_gb))

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(random_state = 0)
RF.fit(xv_train, y_train)

In [None]:
pred_rf = RF.predict(xv_test)

In [None]:
RF.score(xv_test, y_test)

In [None]:
print (classification_report(y_test, pred_rf))

## Testing the Model

In [None]:
def output_lable(n):
    if n==0:
        return "FAKE News"
    elif n==1:
        return "REAL News"
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test['text'] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GB = GB.predict(new_xv_test)
    pred_RF = RF.predict(new_xv_test)
    
    return print("\n\nLR Predicition: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction:{}".format(output_lable(pred_LR[0]),
                                                                                                             output_lable(pred_DT[0]),
                                                                                                             output_lable(pred_GB[0]),
                                                                                                             output_lable(pred_RF[0])))

In [None]:
news = str(input()) 
manual_testing(news)