**Mounting Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Reading and printing the dataset**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

News= pd.read_csv('/content/drive/MyDrive/ML Project/News (1).csv', index_col=0)

print(News.head(5))

                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  class  
0  December 31, 2017      0  
1  December 31, 2017      0  
2  December 30, 2017      0  
3  December 29, 2017      0  
4  December 25, 2017      0  


**Shape of the DATA**

In [None]:
News.shape

(44919, 5)

**Finding and filling the null values**

In [None]:
News.isnull().sum()

title       0
text        0
subject    21
date       21
class       0
dtype: int64

In [None]:
news = News.fillna(' ')
news.isnull().sum()
# print(News[text])

title      0
text       0
subject    0
date       0
class      0
dtype: int64

**Rearranging the data**

In [None]:
news['news']=news['title']+news['text']

news=news[['subject', 'date', 'news', 'class']]
print(news)

         subject               date  \
0           News  December 31, 2017   
1           News  December 31, 2017   
2           News  December 30, 2017   
3           News  December 29, 2017   
4           News  December 25, 2017   
...          ...                ...   
21412  worldnews   August 22, 2017    
21413  worldnews   August 22, 2017    
21414  worldnews   August 22, 2017    
21415  worldnews   August 22, 2017    
21416  worldnews   August 22, 2017    

                                                    news  class  
0       Donald Trump Sends Out Embarrassing New Year’...      0  
1       Drunk Bragging Trump Staffer Started Russian ...      0  
2       Sheriff David Clarke Becomes An Internet Joke...      0  
3       Trump Is So Obsessed He Even Has Obama’s Name...      0  
4       Pope Francis Just Called Out Donald Trump Dur...      0  
...                                                  ...    ...  
21412  'Fully committed' NATO backs new U.S. approach...      1  
214

**Shuffling**

In [None]:
#Shuffling

news = news.sample(frac=1)
news.reset_index(inplace=True)
news.drop(["index"], axis=1, inplace=True)

print(news.head())

        subject               date  \
0      politics        Nov 1, 2017   
1  politicsNews   January 6, 2017    
2          News  November 14, 2016   
3          News   January 20, 2016   
4          News      June 12, 2016   

                                                news  class  
0  Dem Lawmaker’s Epic Fake Panic Attack Over A S...      0  
1  Anarchists threaten to disrupt Trump inaugurat...      1  
2   Actress Emmy Rossum Fires Back At Trump Fans ...      0  
3   GOP Strategist: Trump Supporters Are All ‘Cra...      0  
4   Hillary Hits Trump And His So-Called ‘Univers...      0  


**Importing NLTK libraries**

In [None]:
from tqdm import tqdm
import re
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer

ps = PorterStemmer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
clean_news=news.copy()

**Punctuation Cleaning**

In [None]:
import string

def review_cleaning(text):
    '''Make text lowercase, remove text in square brackets,remove links,remove punctuation
    and remove words containing numbers.'''
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

In [None]:
clean_news['news']=clean_news['news'].apply(lambda x:review_cleaning(x))
clean_news.head()

Unnamed: 0,subject,date,news,class
0,politics,"Nov 1, 2017",dem lawmaker’s epic fake panic attack over a s...,0
1,politicsNews,"January 6, 2017",anarchists threaten to disrupt trump inaugurat...,1
2,News,"November 14, 2016",actress emmy rossum fires back at trump fans ...,0
3,News,"January 20, 2016",gop strategist trump supporters are all ‘craz...,0
4,News,"June 12, 2016",hillary hits trump and his socalled ‘universi...,0


**Remove Stopwords**

In [None]:
stop = stopwords.words('english')
clean_news['news'] = clean_news['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
clean_news.head()

Unnamed: 0,subject,date,news,class
0,politics,"Nov 1, 2017",dem lawmaker’s epic fake panic attack speeding...,0
1,politicsNews,"January 6, 2017",anarchists threaten disrupt trump inauguration...,1
2,News,"November 14, 2016",actress emmy rossum fires back trump fans disg...,0
3,News,"January 20, 2016",gop strategist trump supporters ‘crazy’ nazis ...,0
4,News,"June 12, 2016",hillary hits trump socalled ‘university’ hard ...,0


**Stemming**

In [None]:
#Extracting 'reviews' for processing
news_features=clean_news.copy()
news_features=news_features[['news']].reset_index(drop=True)
news_features.head()

Unnamed: 0,news
0,dem lawmaker’s epic fake panic attack speeding...
1,anarchists threaten disrupt trump inauguration...
2,actress emmy rossum fires back trump fans disg...
3,gop strategist trump supporters ‘crazy’ nazis ...
4,hillary hits trump socalled ‘university’ hard ...


In [None]:
stop_words = set(stopwords.words("english"))
#Performing stemming on the review dataframe
ps = PorterStemmer()

#splitting and adding the stemmed words except stopwords
corpus = []
for i in range(0, len(news_features)):
    news = re.sub('[^a-zA-Z]', ' ', news_features['news'][i])
    news= news.lower()
    news = news.split()
    news = [ps.stem(word) for word in news if not word in stop_words]
    news = ' '.join(news)
    corpus.append(news)

In [None]:
corpus[1]

'anarchist threaten disrupt trump inaugur polic say readywashington reuter anarchist group threaten shut republican donald trump swearingin us presid polic washington said friday believ thousand secur offic assign event abl head disrupt dozen activist group plan protest jan inaugur new york real estat develop whose support count fulfil host controversi campaign promis includ build wall mexican border deport million illeg immigr polic expect peopl flood washington inaugur ceremoni includ parad us capitol white hous along street throng onlook interim polic chief peter newsham told report friday addit two dozen activist group sought permit peac demonstr washington polic awar anarchist group vow onlin interrupt proceed fact folk indic social media come shut inaugur event someth prepar newsham said experienc type thing citi abl handl ask prospect mass arrest newsham said one thing prepar anticip case protest plan inaugur biggest event women march washington jan nation park servic said thurs

**Tokenisation**

In [None]:
import re

def tokenize(txt):
  tokens = re.split('\W+', txt)
  return tokens

clean_news['tokenized_news']=clean_news['news'].apply(lambda x: tokenize(x.lower()))
clean_news.head()

Unnamed: 0,subject,date,news,class,tokenized_news
0,politics,"Nov 1, 2017",dem lawmaker’s epic fake panic attack speeding...,0,"[dem, lawmaker, s, epic, fake, panic, attack, ..."
1,politicsNews,"January 6, 2017",anarchists threaten disrupt trump inauguration...,1,"[anarchists, threaten, disrupt, trump, inaugur..."
2,News,"November 14, 2016",actress emmy rossum fires back trump fans disg...,0,"[actress, emmy, rossum, fires, back, trump, fa..."
3,News,"January 20, 2016",gop strategist trump supporters ‘crazy’ nazis ...,0,"[gop, strategist, trump, supporters, crazy, na..."
4,News,"June 12, 2016",hillary hits trump socalled ‘university’ hard ...,0,"[hillary, hits, trump, socalled, university, h..."


**Vectorisation**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(max_features=5000,ngram_range=(2,2))
# TF-IDF feature matrix
X= tfidf_vectorizer.fit_transform(clean_news['news'])
X.shape

(44919, 5000)

In [None]:
#Getting the target variable
y=clean_news['class']

**Checking for balance of the data**

In [None]:
from collections import Counter
print(f'Original dataset shape : {Counter(y)}')

Original dataset shape : Counter({0: 23502, 1: 21417})


**Our dataset is nearly a balanced one. So now we will split it into train and test data.**

**RNN**

In [None]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import one_hot
#vocabulary size
voc_size=5000

**One Hot Representation**

In [None]:
#One hot encoding
onehot_repr=[one_hot(words,voc_size)for words in corpus]

**Padding Embedded Documents**

In [None]:
#Setting sentence length
sent_length=300

#Padding the sentences
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0 ... 4561 1661 1491]
 [   0    0    0 ... 3403 1235 4422]
 [   0    0    0 ... 3409  959 2776]
 ...
 [2359 3945 1098 ... 2505 1873 4895]
 [   0    0    0 ... 2965 3049 2144]
 [   0    0    0 ... 2551  961 4918]]


In [None]:
embedded_docs[1]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0, 4540, 4725,  142, 3732, 3380, 4486, 2133, 3235, 2515,
       4540, 4750, 4725, 4728, 2705, 4919, 3732,  143,  269, 4221, 4486,
       1808, 3403, 3845, 4077, 2081, 2055, 2329, 4071, 3253, 1010, 3165,
        142, 2101,  334, 4750, 3618, 2897, 3486, 3380, 2456, 3271, 3027,
       4895, 1353, 4729, 2785, 4546,  178, 3144,  924, 4782, 1772, 1727,
       2286,  856, 1765, 4431, 4492, 4472,  451, 1781, 4486, 3833,  960,
       1128, 1808, 3380,  395, 1727, 2204,  269, 4908, 3056, 4054, 2028,
       4375, 4930,  434, 4003, 4486, 3977,  299, 13

**Creating Model**

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(voc_size, 128),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 128)         640000    
                                                                 
 bidirectional (Bidirectiona  (None, None, 128)        98816     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 32)               18560     
 nal)                                                            
                                                                 
 dense (Dense)               (None, 64)                2112      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 6

**Fitting the RNN model**

In [None]:
import numpy as np

# Converting the X and y as array
X_final=np.array(embedded_docs)
y_final=np.array(y)

#Check shape of X and y final
X_final.shape,y_final.shape

((44919, 300), (44919,))

**Training and Validation**

In [None]:
from sklearn.model_selection import train_test_split

# Train test split of the X and y final
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)

print("Length of X-train: ", len(X_train))
print("Length of y-train: ", len(y_train))
print("Length of X-test: ", len(X_test))
print("Length of y-test: ", len(y_test))

# Fitting with 10 epochs and 64 batch size
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=32)

Length of X-train:  30095
Length of y-train:  30095
Length of X-test:  14824
Length of y-test:  14824
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7985f703bc70>

Length of X-train:  30095,
Length of y-train:  30095,
Length of X-test:  14824,
Length of y-test:  14824

**Saving the model**

In [None]:
model.save('/content/drive/MyDrive/ML Project/RNN_02.h5')

**Evaluation**

In [None]:
model.evaluate(X_test, y_test)



[0.09817932546138763, 0.9892066717147827]

**Loading model**

In [None]:
# load the model from disk
new_model = tf.keras.models.load_model('/content/drive/MyDrive/ML Project/RNN_02.h5')

# Show the model architecture
new_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 128)         640000    
                                                                 
 bidirectional (Bidirectiona  (None, None, 128)        98816     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 32)               18560     
 nal)                                                            
                                                                 
 dense (Dense)               (None, 64)                2112      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 6

In [None]:
# Recreate the exact same model, including its weights and the optimizer
new_model = tf.keras.models.load_model('/content/drive/MyDrive/ML Project/Copy of RNN_02.h5')

# Show the model architecture
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 128)         640000    
                                                                 
 bidirectional_2 (Bidirectio  (None, None, 128)        98816     
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (None, 32)               18560     
 nal)                                                            
                                                                 
 dense_2 (Dense)             (None, 64)                2112      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                

In [None]:
def fake_news_det(news):
    input_data = [news]
    vectorized_input_data = tfidf_vectorizer.transform(input_data)
    prediction = new_model.predict(vectorized_input_data)
    print(prediction)

In [None]:
val = fake_news_det('AP  President Donald Trump has told congressional leaders that his hard-line immigration priorities must be enacted in exchange for extending protection from deportation to hundreds of thousands of young immigrants, many of whom were brought to the U.S. illegally as children.Trump s list of demands included overhauling the country s green-card system, a crackdown on unaccompanied minors entering the country, and building his promised wall along the southern border.Many were policies Democrats have said explicitly are off the table and threaten to derail ongoing negotiations over legislation protecting young immigrants known as  Dreamers.  They had been given a reprieve from deportation and the ability to work legally in the country under President Barack Obama s Deferred Action for Childhood Arrivals, or DACA, program, which Trump ended last month.In a letter to House and Senate leaders released by the White House Sunday, Trump said the priorities were the product of a  a bottom-up review of all immigration policies  that he had ordered  to determine what legislative reforms are essential for America s economic and national security. These findings outline reforms that must be included as part of any legislation addressing the status of Deferred Action for Childhood Arrivals (DACA) recipients,  he wrote, adding that:  Without these reforms, illegal immigration and chain migration, which severely and unfairly burden American workers and taxpayers, will continue without end. Trump announced last month that he was ending the DACA program, but he gave Congress six months to come up with a legislative fix before recipients began to lose their status. Trump suggested at the time that he was eager for a deal, telling reporters,  I have a love for these people and hopefully now Congress will be able to help them and do it properly. He d also tweeted that if Congress was unwilling to find a fix, he would  revisit this issue!  in six months.  Congress now has 6 months to legalize DACA (something the Obama Administration was unable to do). If they cant, I will revisit this issue!  Donald J. Trump (@realDonaldTrump) September 6, 2017')

[[-9.114652]]


In [None]:
if val == 0:
  print('Real')
else:
  print('Fake')

Fake


**Checking model's accuracy score, precision score and recall score**

In [None]:
pred = model.predict(X_test)

binary_predictions = []

for i in pred:
    if i >= 0.5:
        binary_predictions.append(1)
    else:
        binary_predictions.append(0)



In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

print('Accuracy on testing set:', accuracy_score(binary_predictions, y_test))
print('Precision on testing set:', precision_score(binary_predictions, y_test))
print('Recall on testing set:', recall_score(binary_predictions, y_test))
print('F1_score on testing set:', f1_score(binary_predictions, y_test))

Accuracy on testing set: 0.9892066918510524
Precision on testing set: 0.9873237430565447
Recall on testing set: 0.9898614879337427
F1_score on testing set: 0.988590986879635
