# Importing Libraries

In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/fake-and-real-news-dataset/True.csv
/kaggle/input/fake-and-real-news-dataset/Fake.csv


# Pre-Processing and Cleaning

In [2]:
fakedataset = pd.read_csv("C:/Users/DELL/Downloads/Fake (1).csv")
realdataset = pd.read_csv("C:/Users/DELL/Downloads/Truee (1).csv") 
realdataset["class"] = 1 # Adding Class to Real News
fakedataset["class"] = 0 # Adding Class to Fake News
realdataset["text"] = realdataset["title"] + " " + realdataset["text"] 
fakedataset["text"] = fakedataset["title"] + " " + fakedataset["text"]
realdataset = realdataset.drop(["subject", "date", "title"], axis = 1)
fakedataset = fakedataset.drop(["subject", "date", "title"], axis = 1)
dataset = realdataset.append(fakedataset, ignore_index = True) 
del realdataset, fakedataset 

# Encoding the Corpus

In [3]:
vocab_size = 10000
encoder = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(dataset["text"], vocab_size)

In [4]:
def enc(dataframe):
    tokenized = []
    for sentence in dataframe["text"].values:
        tokenized.append(encoder.encode(sentence))
    out = tf.keras.preprocessing.sequence.pad_sequences(tokenized, padding = "post")
    return out
x = enc(dataset)

In [5]:
y = dataset["class"]
print(y)

0        1
1        1
2        1
3        1
4        1
        ..
44893    0
44894    0
44895    0
44896    0
44897    0
Name: class, Length: 44898, dtype: int64


# Model Definition

In [6]:
# Model Definition
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64), 
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics= ['acc']) # Compiling the Model

# Training the Model

In [7]:
history = model.fit(x,y, epochs = 2)

Epoch 1/2
Epoch 2/2


# Predicting with the Model

In [8]:
def pad_to_size(vec, size):
  zero = [0] * (size - len(vec))
  vec.extend(zeros)
  return vec

def sample_predict(sample_pred_text, pad):
  encoded_sample_pred_text = encoder.encode(sample_pred_text)

  if pad:
    encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
  encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
  predictions = model.predict(tf.expand_dims(encoded_sample_pred_text, 0))

  return (predictions)

sample_pred_text = ('The movie was cool. The animation and the graphics')
predictions = sample_predict(sample_pred_text, pad=False)
print(predictions)

[[-0.44961074]]


 # Download the Model Weights for Yourself

In [9]:
model.save('my_model.h5') 
import os
from IPython.display import FileLink
FileLink(r'my_model.h5')