In [None]:
# üì¶ Install needed libraries
!pip install pandas scikit-learn



In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# 1Ô∏è‚É£ Load Dataset (Kaggle JSON file)
# Download dataset: https://www.kaggle.com/datasets/rmisra/news-headlines-dataset-for-sarcasm-detection
df = pd.read_json("Sarcasm_Headlines_Dataset.json", lines=True)


In [None]:
# Check data
print(df.head())

                                        article_link  \
0  https://www.huffingtonpost.com/entry/versace-b...   
1  https://www.huffingtonpost.com/entry/roseanne-...   
2  https://local.theonion.com/mom-starting-to-fea...   
3  https://politics.theonion.com/boehner-just-wan...   
4  https://www.huffingtonpost.com/entry/jk-rowlin...   

                                            headline  is_sarcastic  
0  former versace store clerk sues over secret 'b...             0  
1  the 'roseanne' revival catches up to our thorn...             0  
2  mom starting to fear son's web series closest ...             1  
3  boehner just wants wife to listen, not come up...             1  
4  j.k. rowling wishes snape happy birthday in th...             0  


In [None]:
# 2Ô∏è‚É£ Split data into features and labels
X = df['headline']      # text headlines
y = df['is_sarcastic']  # 1 = sarcastic, 0 = not sarcastic


In [None]:
# 3Ô∏è‚É£ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# 4Ô∏è‚É£ Convert text to numerical features using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
# 5Ô∏è‚É£ Train a simple Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)


In [None]:
# 6Ô∏è‚É£ Evaluate
y_pred = model.predict(X_test_tfidf)
print("‚úÖ Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


‚úÖ Accuracy: 0.8395731935604642
              precision    recall  f1-score   support

           0       0.85      0.87      0.86      2996
           1       0.83      0.80      0.81      2346

    accuracy                           0.84      5342
   macro avg       0.84      0.84      0.84      5342
weighted avg       0.84      0.84      0.84      5342



In [None]:
# 7Ô∏è‚É£ Try custom predictions
def predict_sarcasm(text):
    text_tfidf = vectorizer.transform([text])
    pred = model.predict(text_tfidf)[0]
    return "üòè Sarcastic" if pred == 1 else "üôÇ Not Sarcastic"

In [None]:
print(predict_sarcasm("like u always right!"))
print(predict_sarcasm("Yeah right, like I have time for that."))
print(predict_sarcasm(" its really tasty ice-cream"))

üòè Sarcastic
üòè Sarcastic
üôÇ Not Sarcastic
