In [0]:
from google.colab import drive 
drive.mount('/content/gdrive')

In [0]:
#SILENCING THE FALSE POSITIVE WARNINGS
import warnings
warnings.simplefilter('always')
with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=pd.core.common.SettingWithCopyWarning)

In [0]:
#IMPORTING DEPENDENCIES
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, Conv1D, GlobalMaxPooling1D, Activation

In [0]:
#IMPORTING DATASET
data = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/train.csv', index_col=False)
df = data

In [0]:
#Filling Missing Values in Data
#print(df.isna().sum())
df[['title', 'author']] = df[['title', 'author']].fillna(value = 'Missing Value')
df = df.dropna()
df['length'] = df.iloc[:,3].str.len()
#print(df.isna().sum())
df[df['length'] < 50].count()
df = df.drop(df['text'][df['length'] < 50].index, axis=0)
df_reverse = pd.DataFrame()


#Categorical to Numeric
for col_name in df.columns:
    if(df[col_name].dtype == 'object'):
        df[col_name]= df[col_name].astype('category')
        d = dict(enumerate(df[col_name].cat.categories))
        df[col_name] = df[col_name].cat.codes
        df_reverse[col_name+"_code"] = df[col_name]
        df_reverse[col_name] = df[col_name].map(d)

features_cols = ['id', 'title', 'author', 'text']
#FEATURES AND LABELS
X = df[features_cols]
Y = df.label

In [0]:
#PREPARING TRAINING DATASET AND TEST DATASET
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

In [0]:
#SETTING NEURAL-NETWORK PARAMETERS
max_features = 20800
batch_size = 32
embedding_dims = 120
filters = 250
kernel_size = 3
hidden_dims = 250
epochs = 2

In [59]:
#Creating Model
model = Sequential()

#Embedding Input Layer
model.add(Embedding(input_dim = max_features, output_dim = embedding_dims))
model.add(Dropout(0.2))

#Convolution Layer
model.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1))

#Max Pooling Layer
model.add(GlobalMaxPooling1D())

#Vanilla Hidden Layer
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))

#Output Layer
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 120)         2496000   
_________________________________________________________________
dropout_2 (Dropout)          (None, None, 120)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, None, 250)         90250     
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 250)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 250)               62750     
_________________________________________________________________
dropout_3 (Dropout)          (None, 250)               0         
_________________________________________________________________
activation_2 (Activation)    (None, 250)              

In [0]:
#Training Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, Y_test))

In [61]:
#PREDICTIONS USING CNN
Y_pred = model.predict(X_test)
score = model.evaluate(X_test, Y_test, batch_size=128, verbose=1)
accuracy = score[1]
print("Accuracy of CNN:", np.round(accuracy * 100), "%")

Accuracy of CNN: 93.0 %


In [0]:
np.set_printoptions(suppress=True)
Y_pred = np.around(Y_pred).astype(int)
Y_pred = Y_pred.squeeze()

In [0]:
X_test_cp = X_test
#Decoding Data - ONE TIME STEP - DO NOT REPEAT
df_reverse.set_index('title_code', inplace=False)
title_dict = df_reverse.to_dict()['title']
df_reverse.set_index('author_code', inplace=False)
author_dict = df_reverse.to_dict()['author']
df_reverse.set_index('text_code', inplace=False)
text_dict = df_reverse.to_dict()['text']

X_test_cp['title'] = X_test_cp['title'].map(title_dict)
X_test_cp['author'] = X_test_cp['author'].map(author_dict)
X_test_cp['text'] = X_test_cp['text'].map(text_dict)
X_test_cp.set_index('id', inplace=True)

In [0]:
#DISPLAYING NETWORK - DECODED - RESULTS
X_test_cp['Prediction'] = Y_pred
X_test_cp['Prediction'].replace([0,1],['Fake News','Relaible News'],inplace=True)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [67]:
#PREDICTION RESULTS
X_test_cp.tail()

Unnamed: 0_level_0,title,author,text,Prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12652,Commissioner Starts to Press Cleveland Indians...,The New York Times,"At the Golden Globes afterparties, hoi polloi ...",Fake News
13401,Cosmology/ Love & the Afterlife,noreply@blogger.com (Alexander Light),Trending Articles: Trending Articles: ‘Chairma...,Fake News
5882,Fordham Basketball Coach Recruits With an Acad...,Dr. Patrick Slattery,Hollywood’s biggest stars took to social media...,Fake News
11500,"Carrie Fisher, Russia, Shinzo Abe: Your Wednes...",TFPP Writer,This post was originally published on this sit...,Fake News
14754,Ken Blackwell: Media ‘Borking’ Sessions Becaus...,Mac Slavo,"IQUITOS, Peru — Venezuela took its stronges...",Relaible News
