In [447]:
import tensorflow as tf

In [448]:
import pandas as pd

In [449]:
# read all the data in

In [450]:
# read the train
train = pd.read_csv(r"D:\Fake News\fake news detection(FakeNewsNet)\fnn_train.csv")

In [451]:
# validation
validation = pd.read_csv(r"D:\Fake News\fake news detection(FakeNewsNet)\fnn_dev.csv")

In [452]:
# test
test = pd.read_csv(r"D:\Fake News\fake news detection(FakeNewsNet)\fnn_test.csv")

In [453]:
# have an input layer
input_layer = tf.keras.layers.Input(shape = (), dtype = tf.string)

In [454]:
max_tokens = 2000

In [455]:
# pass this to a vectorization layer
# but first make a vectorization layer
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens = max_tokens, output_mode = "int",
                                                  output_sequence_length=100)

In [456]:
text_vec_layer.adapt(train["fullText_based_content"], batch_size = 1000)

In [457]:
# pass the input through this text vectorization layer
vectorized_text = text_vec_layer(input_layer)

In [458]:
# instantiate an embedding layer
emb_layer = tf.keras.layers.Embedding(max_tokens, 100, mask_zero = True)

In [459]:
# pass the vectorized text through the embedding layer
emb_output = emb_layer(vectorized_text)

In [460]:
# some spatial dropout?
# spatial_dropout = tf.keras.layers.SpatialDropout1D(0.5)

In [461]:
# spatial dropout output
# spatial_dropout_output = spatial_dropout(emb_output)

In [462]:
# instantiate a recurrent layer
gru_layer = tf.keras.layers.LSTM(50, dropout = 0.5)

In [463]:
# pass the emb output through the gru
gru_output = gru_layer(emb_output)

In [464]:
# dropout layer
dropout_layer = tf.keras.layers.Dropout(0.5)

In [465]:
# dropout output
dropout_output = dropout_layer(gru_output)

In [466]:
# classification layer
classification_layer = tf.keras.layers.Dense(1)

In [467]:
# class output
class_output = classification_layer(gru_output)

In [468]:
# make the model
model = tf.keras.models.Model(input_layer, class_output)

In [469]:
model.summary()

Model: "model_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        [(None,)]                 0         
_________________________________________________________________
text_vectorization_13 (TextV (None, 100)               0         
_________________________________________________________________
embedding_13 (Embedding)     (None, 100, 100)          200000    
_________________________________________________________________
lstm_14 (LSTM)               (None, 50)                30200     
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 51        
Total params: 230,251
Trainable params: 230,251
Non-trainable params: 0
_________________________________________________________________


In [470]:
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits=True), 
             optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-2), 
             metrics=tf.keras.metrics.BinaryAccuracy())

In [471]:
from sklearn.preprocessing import LabelEncoder

In [472]:
le = LabelEncoder()

In [473]:
targets_train = le.fit_transform(train["label_fnn"])

In [474]:
targets_valid = le.transform(validation["label_fnn"])

In [475]:
train_validation_combined = pd.concat([train["fullText_based_content"], validation["fullText_based_content"]], 
                                     ignore_index = True)

In [476]:
train_valid_target = pd.concat([pd.DataFrame(targets_train), pd.DataFrame(targets_valid)], ignore_index = True)

In [477]:
train_valid_target.columns = ["fnn_label"]

In [478]:
targets_test = le.transform(test["label_fnn"])

In [479]:
import numpy as np

In [480]:
# fit the model
model.fit(train_validation_combined.values, np.array(train_valid_target["fnn_label"]), verbose = 1, batch_size = 128, 
         validation_split = 0.1, 
         epochs = 200, callbacks = tf.keras.callbacks.EarlyStopping(monitor = "val_binary_accuracy", 
                                                                   patience = 10, mode = "max",
                                                                    restore_best_weights = True))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200

KeyboardInterrupt: 

In [None]:
from collections import Counter

In [None]:
Counter(targets_test)

In [None]:
636/(636 + 418)

In [481]:
model.evaluate(test["fullText_based_content"].values, targets_test)



[0.839396059513092, 0.6508538722991943]