<a href="https://colab.research.google.com/github/anjali-0404/AIML-practice/blob/main/Sarcastic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -----------------------------
# SARCASTIC DETECTION SYSTEM
# -----------------------------

# 1️⃣ Import Libraries
import pandas as pd
import numpy as np
import re
import nltk
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Download stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

# 2️⃣ Predefined Dataset (Synthetic)
data = {
    'headline': [
        'the weather is lovely today',
        'I absolutely love waiting in long lines',
        'She won the lottery and bought a car',
        'Yeah right, because I love being ignored',
        'The food was amazing at the party',
        'Oh great, another Monday morning',
        'I am thrilled to do extra work for free',
        'He is actually the fastest runner',
        'Wonderful, my phone just died again',
        'I enjoyed the movie a lot'
    ],
    'is_sarcastic': [0,1,0,1,0,1,1,0,1,0]
}
df = pd.DataFrame(data)

# 3️⃣ Preprocessing Function
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

df['headline'] = df['headline'].apply(preprocess_text)

# 4️⃣ Split Dataset
X = df['headline'].values
y = df['is_sarcastic'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5️⃣ Tokenization & Padding
max_words = 1000
max_len = 20
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# 6️⃣ Build LSTM Model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=32, input_length=max_len))
model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# 7️⃣ Train Model
history = model.fit(X_train_pad, y_train, epochs=10, batch_size=4, validation_split=0.1)

# 8️⃣ Evaluate Model
y_pred = (model.predict(X_test_pad) > 0.5).astype('int32')
print('Classification Report:\n', classification_report(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

# 9️⃣ Predict Function
def predict_sarcasm(text):
    text = preprocess_text(text)
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=max_len)
    pred = model.predict(pad)[0][0]
    return 'Sarcastic 😏' if pred>0.5 else 'Not Sarcastic 🙂'

# 10️⃣ Test Examples
print(predict_sarcasm('I just love getting stuck in traffic'))
print(predict_sarcasm('The sun is shining beautifully today'))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 657ms/step - accuracy: 0.3571 - loss: 0.6999 - val_accuracy: 0.0000e+00 - val_loss: 0.6966
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - accuracy: 0.5476 - loss: 0.6871 - val_accuracy: 0.0000e+00 - val_loss: 0.7170
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.8095 - loss: 0.6781 - val_accuracy: 0.0000e+00 - val_loss: 0.7386
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 0.7262 - loss: 0.6632 - val_accuracy: 0.0000e+00 - val_loss: 0.7604
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.8095 - loss: 0.6422 - val_accuracy: 0.0000e+00 - val_loss: 0.7852
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.8095 - loss: 0.6272 - val_accuracy: 0.0000e+00 - val_loss: 0.8114
Epoch 7/10
[1m2/2[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403ms/step
Not Sarcastic 🙂
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Not Sarcastic 🙂
