In [5]:
import json
import tensorflow as tf
import numpy as np
import urllib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [8]:
def sarcasm_classification():
    data_url = 'https://github.com/dicodingacademy/assets/raw/main/Simulation/machine_learning/sarcasm.json'
    urllib.request.urlretrieve(data_url, 'sarcasm.json')

    vocab_size = 1000
    embedding_dim = 16
    max_length = 120
    trunc_type = 'post'
    padding_type = 'post'
    oov_tok = "<OOV>"
    training_size = 20000

    sentences = []
    labels = []

    with open("sarcasm.json", 'r') as f:
        datastore = json.load(f)

    for item in datastore:
        sentences.append(item['headline'])
        labels.append(item['is_sarcastic'])

    training_sentences = sentences[0:training_size]
    testing_sentences = sentences[training_size:]

    training_labels = labels[0:training_size]
    testing_labels = labels[training_size:]


    tokenizer = Tokenizer(num_words=vocab_size,
                          oov_token=oov_tok)
    tokenizer.fit_on_texts(training_sentences)


    training_sequences = tokenizer.texts_to_sequences(training_sentences)
    training_padded = pad_sequences(training_sequences,
                                    maxlen=max_length,
                                    padding=padding_type,
                                    truncating=trunc_type)
    testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
    testing_padded = pad_sequences(testing_sequences,
                                   maxlen=max_length,
                                   padding=padding_type,
                                   truncating=trunc_type)

    training_label_seq = np.array(training_labels)
    testing_label_seq = np.array(testing_labels)

    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size,
                                  embedding_dim,
                                  input_length=max_length),
        tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.GRU(16)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    history = model.fit(training_padded,
                        training_label_seq,
                        epochs=25,
                        validation_data=(testing_padded, testing_label_seq), verbose=2)

    return model

In [9]:
if __name__ == '__main__':
    model = sarcasm_classification()
    model.save("model.h5")

Epoch 1/25
625/625 - 82s - loss: 0.4566 - accuracy: 0.7724 - val_loss: 0.3822 - val_accuracy: 0.8234 - 82s/epoch - 132ms/step
Epoch 2/25
625/625 - 78s - loss: 0.3518 - accuracy: 0.8413 - val_loss: 0.3769 - val_accuracy: 0.8283 - 78s/epoch - 125ms/step
Epoch 3/25
625/625 - 74s - loss: 0.3293 - accuracy: 0.8523 - val_loss: 0.3739 - val_accuracy: 0.8243 - 74s/epoch - 119ms/step
Epoch 4/25
625/625 - 79s - loss: 0.3164 - accuracy: 0.8594 - val_loss: 0.3816 - val_accuracy: 0.8283 - 79s/epoch - 126ms/step
Epoch 5/25
625/625 - 78s - loss: 0.3085 - accuracy: 0.8619 - val_loss: 0.3849 - val_accuracy: 0.8202 - 78s/epoch - 124ms/step
Epoch 6/25
625/625 - 77s - loss: 0.3001 - accuracy: 0.8669 - val_loss: 0.3765 - val_accuracy: 0.8305 - 77s/epoch - 123ms/step
Epoch 7/25
625/625 - 75s - loss: 0.2861 - accuracy: 0.8752 - val_loss: 0.3855 - val_accuracy: 0.8331 - 75s/epoch - 119ms/step
Epoch 8/25
625/625 - 78s - loss: 0.2774 - accuracy: 0.8795 - val_loss: 0.3890 - val_accuracy: 0.8247 - 78s/epoch - 125

  saving_api.save_model(
