

#Training a Sarcasm Detection Model using LSTM


## Download the Dataset

First, you will download the JSON file and extract the contents into lists.

In [1]:
!pip install -q ivy


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.8/143.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Download the dataset
!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json

--2024-03-15 09:14:19--  https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.125.207, 74.125.23.207, 74.125.203.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.125.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5643545 (5.4M) [application/json]
Saving to: ‘sarcasm.json’


2024-03-15 09:14:20 (5.33 MB/s) - ‘sarcasm.json’ saved [5643545/5643545]



In [3]:
# Load the JSON file
import json

# Load the JSON file
with open("./sarcasm.json", 'r') as f:
    datastore = json.load(f)

# Initialize the lists
sentences = []
labels = []

# Collect sentences and labels into the lists
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])

## Split the Dataset

You will then split the lists into train and test sets.

In [4]:
training_size = 20000

# Split the sentences into training and testing sets
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]

# Split the labels into training and testing sets
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

## Data preprocessing

Next, you will generate the vocabulary and padded sequences.

In [5]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size = 10000
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

# Initialize the Tokenizer class
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

# Generate the word index dictionary
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

# Generate and pad the training sequences
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Generate and pad the testing sequences
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Convert the labels lists into numpy arrays
training_labels = np.array(training_labels)
testing_labels = np.array(testing_labels)

In [6]:
import tensorflow as tf

# Parameters
embedding_dim = 16 # Define the embedding dimension
lstm_dim = 32 # Define the LSTM dimension
dense_dim = 24 # Define the dense layer dimension
NUM_EPOCHS = 10 # Define the number of epochs

##Sample input tensor

In [7]:
# Define the sequence length and feature dimension
sequence_length = 32  # The number of timesteps in each input sample
feature_dim = 16      # The dimensionality of the input features

# Create a sample input tensor with random data
# The shape is (batch_size, sequence_length, feature_dim)
# 'batch_size' can be any integer, representing the number of samples
sample_input = np.random.rand(1, max_length).astype(np.float32)

# Convert the numpy array to a TensorFlow tensor
sample_input_tensor = tf.convert_to_tensor(sample_input)

In [8]:
import ivy
import tensorflow as tf

# Model Definition with LSTM


In [47]:
import ivy

class SarcasmDetectionModel(ivy.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
        super().__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.num_classes = num_classes
        self._build()

    def _build(self, *args, **kwargs):
        self.embedding = ivy.Embedding(self.vocab_size, self.embedding_dim)
        self.lstm = ivy.LSTM(self.embedding_dim, self.hidden_dim)
        self.fc1 = ivy.Linear(self.hidden_dim, self.num_classes)
        self.fc2 = ivy.Linear(self.num_classes, 1)

    def _forward(self, x):
        x = self.embedding(x)
        lstm_output, (hidden, _) = self.lstm(x)

        # Ensure 'hidden' is a tensor before reshaping
        if isinstance(hidden, list):
            # This is just an example, you'll need to adjust this based on your actual data structure
            hidden = hidden[0]  # Select the appropriate tensor from the list

        x = ivy.reshape(hidden, (hidden.shape[0], -1))  # Flatten the output for the linear layer
        x = ivy.relu(self.fc1(x))
        logits = ivy.relu(self.fc2(x))  # Apply ReLU before final sigmoid
        probs = ivy.sigmoid(logits)
        return logits, probs

# Example usage:
# Assuming vocab_size=10000, embedding_dim=300, hidden_dim=256, num_classes=2 (sarcasm or not)
model = SarcasmDetectionModel(10000, 300, 256, 2)
print("Success")


Success


In [11]:
!pip install -q dm-haiku


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/371.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/371.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m368.6/371.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m371.7/371.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [48]:
import jax

In [49]:
ivy.set_backend("jax")
# Define a random key for JAX
key = jax.random.PRNGKey(0)

# Generate a random tensor representing a batch of tokenized text data
# Here, 'seq_length' is the length of your tokenized input sequence
seq_length = 100  # Example sequence length
x = jax.random.randint(key, shape=(1, seq_length), minval=0, maxval=10000)

# Pass the tensor through the model to obtain logits and probabilities
logits, probs = model(x)

In [67]:
# Helper function for loading the dataset in batches
def generate_batches(text_data, labels, dataset_size, batch_size=32):
    if batch_size > dataset_size:
        raise ivy.utils.exceptions.IvyError("Use a smaller batch size")
    for idx in range(0, dataset_size, batch_size):
        yield text_data[idx : min(idx + batch_size, dataset_size)], labels[
            idx : min(idx + batch_size, dataset_size)
        ]

# Helper function to get the number of correct predictions
def num_correct(preds, labels):
    return (preds.argmax(axis=1) == labels).sum().to_numpy().item()

# Define a loss function
def loss_fn(params):
    v, model, x, y = params
    logits, probs = model(x, v=v)
    return (ivy.cross_entropy(y, ivy.softmax(logits)), logits)

# Example usage:
# Assuming you have training_padded, training_labels, testing_padded, and testing_labels defined
dataset_size = len(training_padded)  # Replace with your actual dataset size
batch_size = 32  # Or any other batch size that fits your training scheme

# # Generate batches for training
# for batch_x, batch_y in generate_batches(training_padded, training_labels, dataset_size, batch_size):
#     # Here you would perform your training steps, e.g.:
#     # - Forward pass
#     # - Compute loss
#     # - Backward pass
#     # - Update weights
#     pass


In [62]:

# Enable 64-bit mode in JAX
jax.config.update('jax_enable_x64', True)


In [70]:
import ivy
from tqdm import tqdm

# Assuming SarcasmDetectionModel and other necessary functions are defined above

# Train the model on GPU if it's available
device = "gpu:0" if ivy.gpu_is_available() else "cpu"

# Training hyperparameters
optimizer = ivy.Adam(1e-4)
batch_size = 4
num_epochs = 20
num_classes = 2  # For sarcasm detection, we typically have two classes: sarcastic and not sarcastic

# Initialize the sarcasm detection model
model = SarcasmDetectionModel(
    vocab_size=10000,  # Size of your vocabulary
    embedding_dim=300,  # Size of each word embedding
    hidden_dim=256,  # Number of features in the hidden state of the LSTM
    num_classes=num_classes,
)

# Assuming training_padded, training_labels, testing_padded, and testing_labels are already prepared
training_data = training_padded
training_labels = training_labels

# Training loop
def train(training_data, training_labels, epochs, model, device, num_classes=2, batch_size=32):
    # Training metrics
    epoch_loss = 0.0
    metrics = []
    dataset_size = len(training_data)

    for epoch in range(epochs):
        train_correct = 0
        train_loop = tqdm(
            generate_batches(training_data, training_labels, len(training_data), batch_size=batch_size),
            total=dataset_size // batch_size,
            position=0,
            leave=True,
        )
        for xbatch, ybatch in train_loop:
            xbatch, ybatch = ivy.to_device(ivy.array(xbatch), device), ivy.to_device(ivy.array(ybatch), device)

            # One-hot encode ybatch
            ybatch_encoded = ivy.one_hot(ybatch, num_classes)

            # Compute loss and gradients
            loss, grads = ivy.execute_with_gradients(loss_fn, (model.v, model, xbatch, ybatch_encoded))

            # Update model parameters
            model.v = optimizer.step(model.v, grads)

            batch_loss = ivy.to_numpy(loss[0]).mean().item()  # Batch mean loss
            epoch_loss += batch_loss * xbatch.shape[0]
            train_correct += num_correct(loss[1], ybatch)

            train_loop.set_description(f"Epoch [{epoch + 1:2d}/{epochs}]")
            train_loop.set_postfix(
                running_loss=batch_loss,
                accuracy_percentage=(train_correct / dataset_size) * 100,
            )

        epoch_loss = epoch_loss / dataset_size
        training_accuracy = train_correct / dataset_size

        metrics.append([epoch, epoch_loss, training_accuracy])

        train_loop.write(
            f"\nAverage training loss: {epoch_loss:.6f}, Train Correct: {train_correct}",
            end="\n",
        )

# Train the model
train(
    training_data,
    training_labels,
    num_epochs,
    model,
    device,
    num_classes=num_classes,
    batch_size=batch_size,
)


  0%|          | 0/5000 [00:03<?, ?it/s]


IvyValueError: jax: execute_with_gradients: not enough values to unpack (expected 3, got 2)