## Importing Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
! pip install tensorflow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import os
import numpy as np
import pandas as pd
import seaborn as sns, matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier

from sklearn.metrics import accuracy_score, confusion_matrix

import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

2023-07-18 14:17:05.813027: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Reading the input

In [4]:
df_fake = pd.read_csv('resources/datasets/Fake.csv')
df_true = pd.read_csv('resources/datasets/True.csv')

In [5]:
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to /home/siddhi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/siddhi/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

## Dataset Preparation

In [6]:

df_fake = pd.read_csv('resources/datasets/Fake.csv')
df_true = pd.read_csv('resources/datasets/True.csv')

train_set_fake, test_set_fake = train_test_split(df_fake, test_size=0.2, random_state=7)
train_set_true, test_set_true = train_test_split(df_fake, test_size=0.2, random_state=7)

In [7]:
TRAIN_DATA_PATH='resources/keras/train/'
TEST_DATA_PATH='resources/keras/test/'
os.makedirs(os.path.join(TRAIN_DATA_PATH,'fake'),exist_ok=True)
os.makedirs(os.path.join(TRAIN_DATA_PATH,'true'),exist_ok=True )
os.makedirs(os.path.join(TEST_DATA_PATH,'fake'),exist_ok=True )
os.makedirs(os.path.join(TEST_DATA_PATH,'true'),exist_ok=True )

In [8]:
def file_writer(df, path, label):
    for rows, col in df.iterrows():
        with open(os.path.join(path,label, str(rows)+'.txt'), 'w') as f:
            f.write(col['text'])
        

In [9]:
file_writer(train_set_fake, TRAIN_DATA_PATH,'fake',)
file_writer(train_set_true, TRAIN_DATA_PATH,'true',)

file_writer(test_set_fake, TEST_DATA_PATH,'fake',)
file_writer(test_set_true, TEST_DATA_PATH,'true',)

In [10]:
AUTOTUNE = tf.data.AUTOTUNE
batch_size = 32
seed = 42

raw_train_ds = tf.keras.utils.text_dataset_from_directory(
    'resources/keras/train',
    batch_size=batch_size,
    validation_split=0.2,
    subset='training',
    seed=seed)

Found 37568 files belonging to 2 classes.
Using 30055 files for training.


2023-07-18 14:17:23.024612: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-18 14:17:23.074035: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-18 14:17:23.074599: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-18 14:17:23.078990: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-18 14:17:23.079672: I tensorflow/compile

In [11]:
class_names = raw_train_ds.class_names
train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)

val_ds = tf.keras.utils.text_dataset_from_directory(
    'resources/keras/train',
    batch_size=batch_size,
    validation_split=0.2,
    subset='validation',
    seed=seed)

val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

test_ds = tf.keras.utils.text_dataset_from_directory(
    'resources/keras/test',
    batch_size=batch_size)

test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

Found 37568 files belonging to 2 classes.
Using 7513 files for validation.
Found 9394 files belonging to 2 classes.


In [12]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_ds.map(lambda text, label: text))


## Create Model

In [20]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])


In [21]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, None)              0         
 ctorization)                                                    
                                                                 
 embedding_1 (Embedding)     (None, None, 64)          64000     
                                                                 
 bidirectional_1 (Bidirecti  (None, 128)               66048     
 onal)                                                           
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 138369 (540.50 KB)
Trainable params: 138

## Compile the Model

In [22]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(1e-4),
    metrics=['accuracy'])


## Train the Model

In [23]:
history = model.fit(train_ds, epochs=1,
                    validation_data=val_ds,
                    validation_steps=10)


2023-07-18 14:20:20.912253: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:425] Loaded runtime CuDNN library: 8.4.1 but source was compiled with: 8.6.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
2023-07-18 14:20:20.914144: W tensorflow/core/framework/op_kernel.cc:1828] OP_REQUIRES failed at cudnn_rnn_ops.cc:1762 : UNKNOWN: Fail to find the dnn implementation.


UnknownError: Graph execution error:

Fail to find the dnn implementation.
	 [[{{node CudnnRNN}}]]
	 [[sequential_1/bidirectional_1/forward_lstm_1/PartitionedCall]] [Op:__inference_train_function_23017]

In [None]:
test_loss, test_acc = model.evaluate(test_ds)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)


In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)
