In [None]:
# Cell 1: Import necessary libraries
import pandas as pd
import requests
from tqdm import tqdm
import matplotlib.pyplot as plt
from keys_do_not_upload import deepseek_API_key,deepseek_authenticator

In [ ]:
# Cell 2: Load dataset
# Assuming it's in CSV format
df = pd.read_csv("your_dataset.csv")

# Retain only the relevant columns
df = df[['sentence', 'full context', 'slang term', 'annotator confidence']].copy()
df.columns = ['sentence', 'context', 'slang', 'confidence']

# Cell 3: Define function to call DeepSeek API
API_KEY = deepseek_API_key
API_URL = "https://api.deepseek.com/v1/chat/completions"  # adjust if needed

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {deepseek_authenticator }"
}

def ask_deepseek_is_slang(slang_term, context):
    prompt = f"In the following context, is the word '{slang_term}' used as a slang expression? Reply with only 'Yes' or 'No'.\n\nContext:\n{context}"
    
    payload = {
        "model": "deepseek-chat",  
        "messages": [
            {"role": "user", "content": prompt}
        ]
    }

    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()
        reply = response.json()['choices'][0]['message']['content'].strip().lower()
        return 'yes' in reply
    except Exception as e:
        print(f"Error: {e}")
        return None

# Cell 4: Apply DeepSeek to the dataset
results = []
for idx, row in tqdm(df.iterrows(), total=len(df)):
    is_slang = ask_deepseek_is_slang(row['slang'], row['context'])
    results.append(is_slang)

df['deepseek_slang'] = results

# Drop any rows where API failed (None)
df = df[df['deepseek_slang'].notnull()]

# Cell 5: Calculate accuracy according to annotator confidence
# Convert confidence to a weight
df['weight'] = df['confidence'].astype(int)

# DeepSeek is binary: 1 if slang, 0 if not
df['deepseek_binary'] = df['deepseek_slang'].astype(int)

# Since all are labeled as slang in the dataset, DeepSeek should ideally say 'yes'
df['correct'] = df['deepseek_binary']

# Weighted accuracy
total_weight = df['weight'].sum()
correct_weight = (df['correct'] * df['weight']).sum()
accuracy = correct_weight / total_weight

print(f"Weighted Accuracy: {accuracy:.3f}")

# Cell 6:  Visual breakdown
df['conf_label'] = df['confidence'].map({1: 'Low', 2: 'Medium', 3: 'High'})
grouped = df.groupby('conf_label').apply(lambda x: (x['correct'] * x['weight']).sum() / x['weight'].sum())

grouped.plot(kind='bar', title='Accuracy by Annotator Confidence Level', ylabel='Accuracy')
plt.show()