

#Training a Sarcasm Detection Model using LSTM


## Download the Dataset

First, you will download the JSON file and extract the contents into lists.

In [None]:
# Download the dataset
!wget https://storage.googleapis.com/tensorflow-1-public/course3/sarcasm.json

In [None]:
# Load the JSON file
import json

# Load the JSON file
with open("./sarcasm.json", 'r') as f:
    datastore = json.load(f)

# Initialize the lists
sentences = []
labels = []

# Collect sentences and labels into the lists
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])

## Split the Dataset

You will then split the lists into train and test sets.

In [None]:
training_size = 20000

# Split the sentences into training and testing sets
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]

# Split the labels into training and testing sets
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

## Data preprocessing

Next, you will generate the vocabulary and padded sequences.

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size = 10000
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

# Initialize the Tokenizer class
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

# Generate the word index dictionary
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

# Generate and pad the training sequences
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Generate and pad the testing sequences
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

# Convert the labels lists into numpy arrays
training_labels = np.array(training_labels)
testing_labels = np.array(testing_labels)

In [None]:
import tensorflow as tf

# Parameters
embedding_dim = 16 # Define the embedding dimension
lstm_dim = 32 # Define the LSTM dimension
dense_dim = 24 # Define the dense layer dimension
NUM_EPOCHS = 10 # Define the number of epochs

##Sample input tensor

In [None]:
# Define the sequence length and feature dimension
sequence_length = 32  # The number of timesteps in each input sample
feature_dim = 16      # The dimensionality of the input features

# Create a sample input tensor with random data
# The shape is (batch_size, sequence_length, feature_dim)
# 'batch_size' can be any integer, representing the number of samples
sample_input = np.random.rand(1, max_length).astype(np.float32)

# Convert the numpy array to a TensorFlow tensor
sample_input_tensor = tf.convert_to_tensor(sample_input)

In [None]:
!pip install ivy

In [None]:
import ivy
import tensorflow as tf

# Model Definition with LSTM


In [None]:
# Model Definition with LSTM


tf_lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 100, input_length=max_length), # Embedding layer
    tf.keras.layers.LSTM(lstm_dim), # LSTM layer
    tf.keras.layers.Dense(dense_dim, activation='relu'),  # Dense layer with ReLU activation
    tf.keras.layers.Dense(1, activation='sigmoid') # Output layer with sigmoid activation
])

# Build the model
tf_lstm.build(sample_input_tensor.shape)

#Transpile to torch, ivy.transpile

In [None]:
 # Transpile the model to PyTorch
 torch_lstm = ivy.transpile(tf_lstm, source="tensorflow", to="torch", args=(sample_input_tensor,))

#Compile the model

In [None]:

# Set the training parameters
tf_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

# Print the model summary
tf_lstm.summary()

In [None]:
# Compile the model
tf_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Start the timer
start_time = time.time()
# Train the model
history = tf_lstm.fit(
    training_padded,
    training_labels,
    epochs=10,
    validation_data=(testing_padded, testing_labels)
)

# End the timer
end_time = time.time()

# Calculate and print the total time taken
tensorflow_time = end_time - start_time
print(f'Total training time: {tensorflow_time:.2f} seconds')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


# Convert the features and labels to PyTorch tensors
X_train_tensor = torch.tensor(training_padded, dtype=torch.float32)
y_train_tensor = torch.tensor(training_labels, dtype=torch.float32)

# Create a Dataset from tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# Define a DataLoader with the dataset
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the loss function (Binary Cross-Entropy for binary classification)
loss_function = nn.BCELoss()

# Define the optimizer (Adam optimizer in this example)
optimizer = optim.Adam(torch_lstm.parameters(), lr=0.001)


In [None]:
import time

# Start the timer
start_time = time.time()

# Assuming 'torch_lstm' is your model and 'train_loader' is your DataLoader instance
for epoch in range(10):
    torch_lstm.train()  # Set the model to training mode
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0

    for inputs, targets in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = torch_lstm(inputs)  # Forward pass
        targets = targets.view(-1, 1)  # Reshape the target to match output shape
        loss = loss_function(outputs, targets)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update parameters

        total_loss += loss.item()

        # Convert outputs to predicted class (0 or 1) by rounding the sigmoid output
        predicted_classes = outputs.round()
        correct_predictions += (predicted_classes == targets).sum().item()
        total_predictions += targets.size(0)

    # Calculate average loss and accuracy
    average_loss = total_loss / len(train_loader)
    accuracy = correct_predictions / total_predictions

    print(f'Epoch {epoch+1} completed, Average Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}')

# End the timer
end_time = time.time()

# Calculate and print the total time taken
torch_time = end_time - start_time
print(f'Total training time: {total_time:.2f} seconds')


In [None]:
import matplotlib.pyplot as plt
# Define labels, positions, and time values
labels = ['PyTorch', 'TensorFlow']
x_pos = np.arange(len(labels))
times = [torch_time, tensorflow_time]

# Create the bar chart
plt.bar(x_pos, times, align='center', alpha=0.7, color=['red', 'blue'])

# Add the data values on top of the bars
for i, v in enumerate(times):
    plt.text(x_pos[i] - 0.1, v + 3, str(v), color='blue', fontweight='bold')

# Add labels and title
plt.xticks(x_pos, labels)
plt.ylabel('Time (seconds)')
plt.title('Training Time Comparison')

# Show the plot
plt.show()

## Using the Ivy framework, the TensorFlow model was seamlessly transpiled into a PyTorch model, allowing for efficient training with similar results to the original TensorFlow model. Remarkably, the training time was significantly reduced from 662.58 seconds to just 264.12 seconds, demonstrating the effectiveness of using Ivy for model conversion and training optimization.