In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [2]:
# import required libraries
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.model_selection import train_test_split
import nltk
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Bidirectional
from tensorflow import metrics
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report

In [3]:
import pickle
# import a naive bayes
NaiveBayes_model = pickle.load(
    open(
        "/content/gdrive/My Drive/NLP Project/NB_model.pkl",
        "rb",
    )
)

# import the count vectorizer for making Bag of Words
Count_Vectorizer = pickle.load(
    open(
        "/content/gdrive/My Drive/NLP Project/CV_BOG.pkl",
        "rb",
    )
)

# import a Tokenizer for assigning a unique integer to one
Tokenizer1 =  pickle.load(
    open(
        "/content/gdrive/My Drive/NLP Project/Text_Vec.pkl",
        "rb",
    )
)

In [4]:
# import neural network


BiLSTM_Model = load_model(
    "/content/gdrive/My Drive/NLP Project/neural_network.h5"
)

In [5]:
# Loading the dataset
fake_news_synthetic = pd.read_csv("/content/gdrive/My Drive/NLP Project/Synthetic_Sentences.csv")
fake_news_synthetic = fake_news_synthetic.dropna()

In [6]:
# Given that synthetic Sentences were created from the corpus after stemming and removing stop 
# words and other preprocessing steps like lowering strings, we dont have to do those preprocessing steps on it.

In [7]:
synthetic_text = fake_news_synthetic['sentences']
synthetic_labels = fake_news_synthetic['labels']

In [8]:
BOG_synthetic = Count_Vectorizer.transform(fake_news_synthetic['sentences']) # We pass test input to transform into BOG, 
                                                    # CV is fitted on train and transformed on synthetic
BOG_synthetic = BOG_synthetic.toarray() # seperate BOG(bag of words as a seperate array)
label = np.array(synthetic_labels) # seperate labels as an array

In [9]:
predicted = NaiveBayes_model.predict(BOG_synthetic)
print(classification_report(synthetic_labels, predicted))


              precision    recall  f1-score   support

           0       0.99      0.97      0.98      1131
           1       0.97      0.99      0.98       866

    accuracy                           0.98      1997
   macro avg       0.98      0.98      0.98      1997
weighted avg       0.98      0.98      0.98      1997



In [10]:
text_comments = list(synthetic_text) # make a list of all documents in the data
one_hot_encoded = Tokenizer1.texts_to_sequences(text_comments)
padded_vector = pad_sequences(one_hot_encoded, padding = 'post', maxlen= 500) #make a padded list
input =np.array(padded_vector)
output = np.array(synthetic_labels)   # convert to input and output for model   

In [11]:
BiLSTM_Model.evaluate(input, output) #test your model



[1.0136531591415405, 0.7551326751708984, 0.9642032384872437]

In [12]:
predicted_prob = BiLSTM_Model.predict(input, batch_size=64)
keras_predicted = np.where(predicted_prob > 0.5, 1,0)

In [13]:
print(classification_report(synthetic_labels, keras_predicted))

              precision    recall  f1-score   support

           0       0.96      0.60      0.73      1131
           1       0.65      0.96      0.77       866

    accuracy                           0.76      1997
   macro avg       0.80      0.78      0.75      1997
weighted avg       0.82      0.76      0.75      1997

