In [None]:

# If using Colab, uncomment the next lines
# !pip install transformers tensorflow sklearn pandas matplotlib seaborn


In [None]:

import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM, Conv1D, GlobalMaxPooling1D, Bidirectional
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# Make sure the file is in the same directory as your .ipynb file
df = pd.read_json("Sarcasm_Headlines_Dataset.json", lines=True)
df = df.rename(columns={"headline": "comment", "is_sarcastic": "label"})
df = df[['comment', 'label']]
df.head()


In [None]:

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def encode_sentences(sentences, tokenizer, max_length=64):
    input_ids = []
    attention_masks = []

    for sent in sentences:
        encoded = tokenizer.encode_plus(
            text=sent,
            add_special_tokens=True,
            max_length=max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='tf'
        )
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])

    return tf.concat(input_ids, axis=0), tf.concat(attention_masks, axis=0)

input_ids, attention_masks = encode_sentences(df['comment'].values, tokenizer)
labels = tf.convert_to_tensor(df['label'].values)


In [None]:

X_train_ids, X_test_ids, X_train_masks, X_test_masks, y_train, y_test = train_test_split(
    input_ids, attention_masks, labels, test_size=0.2, random_state=42)


In [None]:

bert_model = TFBertModel.from_pretrained('bert-base-uncased')

input_ids_layer = Input(shape=(64,), dtype=tf.int32, name="input_ids")
attention_mask_layer = Input(shape=(64,), dtype=tf.int32, name="attention_mask")

bert_output = bert_model(input_ids_layer, attention_mask=attention_mask_layer)[0]

x = Bidirectional(LSTM(64, return_sequences=True))(bert_output)
x = Conv1D(64, kernel_size=3, activation='relu')(x)
x = GlobalMaxPooling1D()(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[input_ids_layer, attention_mask_layer], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:

history = model.fit(
    [X_train_ids, X_train_masks],
    y_train,
    validation_split=0.1,
    epochs=3,
    batch_size=32
)


In [None]:

loss, accuracy = model.evaluate([X_test_ids, X_test_masks], y_test)
print(f"Test Accuracy: {accuracy:.4f}")


In [None]:

model.save("sarcasm_bert_lstm_cnn_model.h5")


In [None]:

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()
plt.title("Training History")
plt.show()
