# Fake News Detection With Long Short Term Memory (LSTM) Algorithm

### A Fake News here it's a news that contains sarcasm (classified sarcastic)

In [1]:
import os
import pickle

import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Bidirectional
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Embedding

In [2]:
df_raw = pd.read_json('data/Sarcasm_Headlines_Dataset.json', lines=True)
df_raw.shape

(26709, 3)

In [3]:
df_raw.head()

Unnamed: 0,article_link,headline,is_sarcastic
0,https://www.huffingtonpost.com/entry/versace-b...,former versace store clerk sues over secret 'b...,0
1,https://www.huffingtonpost.com/entry/roseanne-...,the 'roseanne' revival catches up to our thorn...,0
2,https://local.theonion.com/mom-starting-to-fea...,mom starting to fear son's web series closest ...,1
3,https://politics.theonion.com/boehner-just-wan...,"boehner just wants wife to listen, not come up...",1
4,https://www.huffingtonpost.com/entry/jk-rowlin...,j.k. rowling wishes snape happy birthday in th...,0


In [4]:
df = df_raw.iloc[:,1:]
df

Unnamed: 0,headline,is_sarcastic
0,former versace store clerk sues over secret 'b...,0
1,the 'roseanne' revival catches up to our thorn...,0
2,mom starting to fear son's web series closest ...,1
3,"boehner just wants wife to listen, not come up...",1
4,j.k. rowling wishes snape happy birthday in th...,0
...,...,...
26704,american politics in moral free-fall,0
26705,america's best 20 hikes,0
26706,reparations and obama,0
26707,israeli ban targeting boycott supporters raise...,0


In [7]:
df.is_sarcastic.summary()

: 

In [5]:
encoder = keras.layers.TextVectorization(max_tokens=10000)
encoder.adapt(df.map(lambda text, _: test))

vocabulary = np.array(encoder.get_vocabulary())

example, label = next(iter(df))
vocabulary[:20]

AttributeError: 'DataFrame' object has no attribute 'map'

# Fake News Detection With Passive Aggressive Algorithm

### A Fake News here it's a news that contains sarcasm (classified sarcastic)



# Fake News Detection With Passive Aggressive Algorithm

### A Fake News here it's a news that contains sarcasm (classified sarcastic)



In [None]:
# Split the dataset (training ans testing dataset)
x_train, x_test, y_train, y_test = train_test_split(df.headline, df.is_sarcastic, test_size=0.2, random_state=4)

: 

In [None]:
x_train

: 

In [None]:
model = Sequential()
model.add(Embedding(input_dim=len(encoder.get_vocabulary()), output_dim=64, mask_zero=True))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])
model.summary()

: 

In [None]:
history = model.fit(x_train, y_train, batch_size=15,
                     epochs=20, validation_data=(x_test, y_test),
                       validation_steps=30)

: 

In [None]:
test_loss, test_acc = model.evaluate((x_test, y_test))

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

In [None]:
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)

In [None]:
y_pred = model.predict(x_test)
score = accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

: 

In [None]:
confusion_matrix(y_test,y_pred)

: 

In [None]:
# Saving the model
os.makedirs(f"./LSTM weights",exist_ok=True)
filename = f'./LSTM weights/weights{id}.txt'
with open(filename, 'wb') as f:
    pickle.dump(model.get_weights(), f)

: 

In [None]:
sample_text = ['obama is president of the tyrant congress','obama was the first black president of usa']
predictions = model.predict(np.array([sample_text]))