In [1]:
import json    

<h2> Read Data </h2>

In [2]:
with open("sarcasm.json", 'r' )  as f:  
    datastore = json.load(f)

In [3]:
headline=[]
labels=[]
for item in datastore:
    headline.append(item['headline'])
    labels.append(item['is_sarcastic'])

<h2>Preprocessing data</h2>

In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [6]:
tokenizer= Tokenizer(num_words=5000,oov_token='<OOV>')
tokenizer.fit_on_texts(headline)
word_index=tokenizer.word_index


In [7]:
headline_seq=tokenizer.texts_to_sequences(headline)
paded_headline = pad_sequences(headline_seq,padding='post',maxlen=40)
print(paded_headline)

[[ 308    1  679 ...    0    0    0]
 [   4    1 3338 ...    0    0    0]
 [ 145  838    2 ...    0    0    0]
 ...
 [   1    9   68 ...    0    0    0]
 [1541  392 4164 ...    0    0    0]
 [   1 1647    6 ...    0    0    0]]


In [8]:
paded_headline.shape

(26709, 40)

In [9]:
 from sklearn.model_selection import train_test_split

In [10]:
import numpy as np

In [11]:
labels = np.array(labels)

<h2> Splitting Data into train and test dataset

In [12]:
x_train,x_test,y_train,y_test = train_test_split(paded_headline,labels,test_size=0.25, random_state=42)

In [13]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Embedding,Dense,Flatten,GlobalAveragePooling1D,LSTM,Bidirectional

<h2> lstm Model

In [14]:
def sarc_model():
    model = Sequential()
    model.add(Embedding(5000,16,input_length=40))
    model.add(Bidirectional(LSTM(32)))
    model.add(Dense(8,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    return model

In [15]:
model = sarc_model()
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 40, 16)            80000     
_________________________________________________________________
bidirectional (Bidirectional (None, 64)                12544     
_________________________________________________________________
dense (Dense)                (None, 8)                 520       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 9         
Total params: 93,073
Trainable params: 93,073
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x24ee814f148>

<h2> Nural Network Model

In [17]:
def sarc_model2():
    model = Sequential()
    model.add(Embedding(5000,16,input_length=40))
    model.add(GlobalAveragePooling1D())
    model.add(Dense(8,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    return model

In [18]:
model2=sarc_model2()
model2.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 40, 16)            80000     
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 9         
Total params: 80,145
Trainable params: 80,145
Non-trainable params: 0
_________________________________________________________________


In [19]:
model2.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x250a6253348>