<a href="https://colab.research.google.com/github/elgevan/sarcasm-embedding-exp/blob/main/sarcasm_gemini_as_a_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import google.generativeai as genai
import kagglehub
import os
import pandas as pd

from google.colab import userdata
from sklearn.metrics import accuracy_score

In [2]:
path = kagglehub.dataset_download("rmisra/news-headlines-dataset-for-sarcasm-detection")
df = pd.read_json(path + '/Sarcasm_Headlines_Dataset.json', lines=True)
df = df[['headline', 'is_sarcastic']]



In [3]:
os.environ['GEMINI_TOKEN'] = userdata.get('GEMINI_TOKEN')
genai.configure(api_key=os.environ['GEMINI_TOKEN'])
model = genai.GenerativeModel('gemini-1.5-flash')

In [4]:
def classify_headline(headline, model):
    """
    Classifies a headline as either "Parody" (1) or "News" (0) using Gemini,
    forcing a structured boolean output.

    Args:
        headline: The headline string to classify.
        model: The Gemini model to use for classification.

    Returns:
        classification: 1 for "Parody", 0 for "News".
    """
    prompt = f"""
        You are a headline classifier. Determine if the following headline is a "Parody" (satirical, humorous) or a "News" headline (factual, reporting on real events).

        Consider the following factors:
            * **Exaggeration/Sensationalism:** Parody headlines often use extreme or absurd language.
            * **Humor/Irony:** Parody relies on humor and irony that wouldn't be typical in a serious news article.
            * **Realism/Plausibility:** Parody headlines often describe events that are highly improbable or impossible.
            * **Source (Optional):** If you know the source of a headline, it can be a strong indicator (e.g., The Onion is known for parody). For this exercise, **do not make assumptions about the source if it's not provided.**

        Headline: "{headline}"

        Respond with only a single number:
        - 1 if the headline is a "Parody"
        - 0 if the headline is "News"

        Do not provide any other text or explanation.
        """

    try:
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                candidate_count=1,
                max_output_tokens=1,
                temperature=0.0,
            )
        )

        try:
            classification = int(response.text.strip())
            if classification in [0, 1]:
                return classification
            else:
                print(f"Warning: Invalid classification value: '{classification}'. Returning None.")
                return None
        except ValueError:
            print(f"Warning: Could not parse classification from response: '{response.text}'. Returning None.")
            return None

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [5]:
results = []
for index, row in df.iterrows():
    headline = row['headline']
    is_sarcastic = row['is_sarcastic']
    classification = classify_headline(headline, model)
    results.append({
        'headline': headline,
        'is_sarcastic_pred': classification,
        'is_sarcastic': is_sarcastic
    })

result_df = pd.DataFrame(results)

In [8]:
print(accuracy_score(result_df['is_sarcastic'], result_df['is_sarcastic_pred']))

0.8041858549552585
