In [337]:
import tensorflow as tf

In [338]:
import pandas as pd

In [339]:
# read all the data in

In [340]:
# read the train
train = pd.read_csv(r"D:\Fake News\fake news detection(FakeNewsNet)\fnn_train.csv")

In [341]:
# validation
validation = pd.read_csv(r"D:\Fake News\fake news detection(FakeNewsNet)\fnn_dev.csv")

In [342]:
# test
test = pd.read_csv(r"D:\Fake News\fake news detection(FakeNewsNet)\fnn_test.csv")

In [343]:
# have an input layer
input_layer = tf.keras.layers.Input(shape = (), dtype = tf.string)

In [344]:
max_tokens = 1000

In [345]:
# pass this to a vectorization layer
# but first make a vectorization layer
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens = max_tokens, output_mode = "int",
                                                  output_sequence_length=100)

In [346]:
text_vec_layer.adapt(train["fullText_based_content"], batch_size = 1000)

In [347]:
# pass the input through this text vectorization layer
vectorized_text = text_vec_layer(input_layer)

In [348]:
# instantiate an embedding layer
emb_layer = tf.keras.layers.Embedding(max_tokens, 100, mask_zero = True)

In [349]:
# pass the vectorized text through the embedding layer
emb_output = emb_layer(vectorized_text)

In [350]:
# some spatial dropout?
# spatial_dropout = tf.keras.layers.SpatialDropout1D(0.5)

In [351]:
# spatial dropout output
# spatial_dropout_output = spatial_dropout(emb_output)

In [352]:
# instantiate a recurrent layer
gru_layer = tf.keras.layers.LSTM(50, dropout = 0.5)

In [353]:
# pass the emb output through the gru
gru_output = gru_layer(emb_output)

In [354]:
# dropout layer
dropout_layer = tf.keras.layers.Dropout(0.5)

In [355]:
# dropout output
dropout_output = dropout_layer(gru_output)

In [356]:
# classification layer
classification_layer = tf.keras.layers.Dense(1)

In [357]:
# class output
class_output = classification_layer(gru_output)

In [358]:
# make the model
model = tf.keras.models.Model(input_layer, class_output)

In [359]:
model.summary()

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        [(None,)]                 0         
_________________________________________________________________
text_vectorization_10 (TextV (None, 50)                0         
_________________________________________________________________
embedding_10 (Embedding)     (None, 50, 32)            32000     
_________________________________________________________________
lstm_11 (LSTM)               (None, 16)                3136      
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 17        
Total params: 35,153
Trainable params: 35,153
Non-trainable params: 0
_________________________________________________________________


In [360]:
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits=True), 
             optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-4), 
             metrics=tf.keras.metrics.BinaryAccuracy())

In [361]:
from sklearn.preprocessing import LabelEncoder

In [362]:
le = LabelEncoder()

In [363]:
targets_train = le.fit_transform(train["label_fnn"])

In [364]:
targets_valid = le.transform(validation["label_fnn"])

In [365]:
train_validation_combined = pd.concat([train["fullText_based_content"], validation["fullText_based_content"]], 
                                     ignore_index = True)

In [366]:
train_valid_target = pd.concat([pd.DataFrame(targets_train), pd.DataFrame(targets_valid)], ignore_index = True)

In [367]:
train_valid_target.columns = ["fnn_label"]

In [368]:
targets_test = le.transform(test["label_fnn"])

In [369]:
import numpy as np

In [370]:
# fit the model
model.fit(train_validation_combined.values, np.array(train_valid_target["fnn_label"]), verbose = 1, batch_size = 128, 
         validation_split = 0.1, 
         epochs = 200, callbacks = tf.keras.callbacks.EarlyStopping(monitor = "val_binary_accuracy", 
                                                                   patience = 10, mode = "max",
                                                                    restore_best_weights = True))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200


<keras.callbacks.History at 0x177918e0730>

In [371]:
from collections import Counter

In [372]:
Counter(targets_test)

Counter({1: 636, 0: 418})

In [373]:
636/(636 + 418)

0.603415559772296

In [374]:
model.evaluate(test["fullText_based_content"].values, targets_test)



[0.5060544013977051, 0.5730550289154053]