In [51]:
import os
import openai
from tqdm import tqdm
from dotenv import load_dotenv
import logging
import sys
import time
import concurrent.futures
import requests
import json

logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

logger = logging.getLogger(__name__)

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [10]:
import pandas as pd

# Load the dataset
file_path = "../datasets/combined.parquet"
data = pd.read_parquet(file_path)

In [49]:
def call_openai_api(model, messages, max_attempts=6, timeout_duration=30):
    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
    }

    payload = {"model": model, "messages": messages}

    for attempt in range(max_attempts):
        try:
            response = requests.post(
                url, headers=headers, data=json.dumps(payload), timeout=timeout_duration
            )
            response.raise_for_status()  # Raise an HTTPError if the HTTP request returned an unsuccessful status code
            return response.json()
        except requests.Timeout:
            logger.warning(f"Timeout occurred on attempt {attempt + 1}")
        except requests.RequestException as e:
            logger.error(f"Request failed: {e}")
        time.sleep(1 + attempt * 2)  # Exponential back-off

    logger.error("Max retry attempts reached. Returning None.")
    return None

In [16]:
prompt = {
    "role": "system",
    "content": "I want you to act as a sarcasm detector. I will provide you with a sentence, and your task is to analyze it and respond with only a single digit: '1' if the sentence is sarcastic, and '0' if it is not. No other text or explanation should be included in your response.",
}

messages = [
    [prompt, {"role": "user", "content": data.iloc[i]["sentence"]}]
    for i in range(len(data))
]
messages[:5]

[[{'role': 'system',
   'content': 'You are a sarcasm detector, given a sentence, you must ONLY reply by 1 (is sarcasm) or 0 (is not sarcasm), no other text should be used in the answer.'},
  {'role': 'user',
   'content': 'thirtysomething scientists unveil doomsday clock of hair loss'}],
 [{'role': 'system',
   'content': 'You are a sarcasm detector, given a sentence, you must ONLY reply by 1 (is sarcasm) or 0 (is not sarcasm), no other text should be used in the answer.'},
  {'role': 'user',
   'content': 'dem rep. totally nails why congress is falling short on gender, racial equality'}],
 [{'role': 'system',
   'content': 'You are a sarcasm detector, given a sentence, you must ONLY reply by 1 (is sarcasm) or 0 (is not sarcasm), no other text should be used in the answer.'},
  {'role': 'user',
   'content': 'eat your veggies: 9 deliciously different recipes'}],
 [{'role': 'system',
   'content': 'You are a sarcasm detector, given a sentence, you must ONLY reply by 1 (is sarcasm) or 0

In [57]:
responses = []
messages_sample = messages[:4000]

for message in tqdm(messages_sample):
    response = call_openai_api(
        model="gpt-3.5-turbo", messages=message, max_attempts=6, timeout_duration=5
    )
    responses.append(response["choices"][0]["message"]["content"])

  0%|          | 0/4000 [00:00<?, ?it/s]DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.openai.com:443
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/chat/completions HTTP/1.1" 200 None
  0%|          | 1/4000 [00:01<2:06:04,  1.89s/it]DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.openai.com:443
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/chat/completions HTTP/1.1" 200 None
  0%|          | 2/4000 [00:02<1:16:28,  1.15s/it]DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.openai.com:443
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/chat/completions HTTP/1.1" 200 None
  0%|          | 3/4000 [00:03<57:27,  1.16it/s]  DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.openai.com:443
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/chat/completions HTTP/1.1" 200 None
  0%|          | 4/4000 [00:03<46:01,  1.45it/s]DEBUG:urllib3.connecti

In [59]:
data_sample = data.iloc[:4000]  # This should match the messages_sample

integer_responses = []
for response in responses:
    # Extract only the first character and check if it's '0' or '1'
    first_char = response.strip()[0] if response else ""
    if first_char in ("0", "1"):
        integer_responses.append(int(first_char))
    else:
        # Handle invalid or unexpected response
        # You could append a default value or handle this as an error
        integer_responses.append(None)  # or some default value like 0 or 1

if filtered_responses := [
    (resp, data_sample.iloc[i]["is_sarcastic"])
    for i, resp in enumerate(integer_responses)
    if resp is not None
]:
    correct_predictions = sum(resp == is_sarc for resp, is_sarc in filtered_responses)
    accuracy = correct_predictions / len(filtered_responses)
    print(f"Accuracy: {accuracy:.2%}")
else:
    print("No valid responses to calculate accuracy.")

Accuracy: 59.75%
