In [12]:
import pandas as pd
import requests
import time
import random
import re

api_key = "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"

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

EMOTION_LIST = ["joy", "anger", "fear", "nostalgia", "sad", "obsession", "sexual", "hope", "calm", "tender", "violence", "power"]
EMOTION_SET = set(EMOTION_LIST)

def extract_emotions_only(text):
    # Extract all valid emotion words in order, remove duplicates
    found = re.findall(r'\b(?:' + '|'.join(EMOTION_LIST) + r')\b', text.lower())
    found = list(dict.fromkeys(found))  # deduplicate, preserve order
    if 2 <= len(found) <= 4:
        return ', '.join(found)
    else:
        return "error"

def is_valid_emotion_output(output):
    emotions = [e.strip() for e in output.lower().split(",")]
    return 2 <= len(emotions) <= 4 and all(e in EMOTION_SET for e in emotions)

def get_lyrics_emotions(lyrics, max_retries=7):
    prompt = f"""
Given the following song lyrics, label them with the most dominant emotions.
Use only the following emotion labels: joy, anger, fear, nostalgia, sad, obsession, sexual, hope, calm, tender, violence, power.
Return the emotions separated by commas. The number of labels must be 2 to 4.
Do not provide any explanation. Only return the emotion labels.

Example 1:
Lyrics:
I miss the days we danced all night under the stars, now I'm lost and alone.
Emotions:
nostalgia, sad

Example 2:
Lyrics:
You said you'd love me forever, now I'm burning with rage.
Emotions:
anger, obsession

Lyrics:
{lyrics}

Emotions:
"""
    data = {
        "model": "llama3-70b-8192",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.2
    }

    backoff = 2
    retries = 0

    while retries < max_retries:
        try:
            response = requests.post(
                "https://api.groq.com/openai/v1/chat/completions",
                headers=headers,
                json=data
            )
            if response.status_code == 200:
                raw_output = response.json()["choices"][0]["message"]["content"].strip()
                extracted = extract_emotions_only(raw_output)
                if extracted != "error" and is_valid_emotion_output(extracted):
                    return extracted
                else:
                    # Retry once if invalid output format
                    retries += 1
                    print(f"⚠️ Invalid output format, retrying {retries}/{max_retries}...")
                    time.sleep(random.uniform(1, 2))
                    continue

            elif response.status_code == 429:
                retries += 1
                try:
                    msg = response.json()["error"]["message"]
                    wait_time = float(msg.split("try again in ")[1].split("s")[0])
                except Exception:
                    wait_time = backoff

                jitter = random.uniform(0.75, 1.25)
                sleep_time = max(wait_time, backoff) * jitter
                print(f"⚠️ Rate limit hit. Retry {retries}/{max_retries}. Waiting {sleep_time:.2f} seconds...")
                time.sleep(sleep_time)
                backoff = min(backoff * 1.5, 30)

            else:
                print(f"API error {response.status_code}: {response.text}")
                return "error"

        except Exception as e:
            print(f"Request failed: {e}")
            return "error"

    print(f"⚠️ Failed to get valid response after {max_retries} retries, skipping lyric.")
    return "error"


# Load CSV
df = pd.read_csv("good.csv", encoding='ISO-8859-1')

if 'lyrics' not in df.columns:
    raise ValueError("CSV must have a column named 'lyrics'.")

if "emotions" not in df.columns:
    df["emotions"] = ""

# Loop over rows
for i, row in df.iterrows():
    lyrics = row["lyrics"]
    print(f"\nLyrics {i+1}/{len(df)}:\n{lyrics[:300]}...")

    emotions = get_lyrics_emotions(lyrics)
    df.at[i, "emotions"] = emotions

    print("Emotions:", emotions)
    print("-" * 60)

    if (i + 1) % 10 == 0:
        df.to_csv("labeled_lyrics_progress.csv", index=False)
        print(f"Progress saved at song {i+1}.")

    delay = random.uniform(6, 10)
    print(f"Waiting {delay:.2f} seconds before next request...")
    time.sleep(delay)

# Save final results
df.to_csv("labeled_lyrics_final.csv", index=False)
print("\n✅ Done! Emotions labeled and saved to labeled_lyrics_final.csv")



Lyrics 1/897:
Oh my god, did I just say that out loud? Should've known this was the kind of place That that sort of thing just wasn't allowed Should've known by the color of the drapes (Oh, my bad, venetian blinds) What the hell was I thinking saying exactly what's on my mind? But I won't deny I got a dirty mouth...


  df.at[i, "emotions"] = emotions


Emotions: anger, power
------------------------------------------------------------
Waiting 9.50 seconds before next request...

Lyrics 2/897:
[Verse 1] Remember when I called you on the telephone? You were so far away It was raining in New York, did I forget to say? It was later than I wanted it to be On an early summer's night The kind where you can't help but feel alive and free And I told you "From here on out, it's just you and me"  [...
Emotions: hope, joy
------------------------------------------------------------
Waiting 9.89 seconds before next request...

Lyrics 3/897:
[Verse 1] Calling me like I got something to say You thought wrong, but you do it anyway How's it been? Oh, not much, same for me, please go away I can put it on if that's what you want You'd like to get together, but I'd rather not Calls to mind a simpler time that who gave a shit forgot  [Pre-Chor...
Emotions: anger, obsession
------------------------------------------------------------
Waiting 8.22 seconds 

KeyboardInterrupt: 

In [None]:
import pandas as pd
import requests
import time
import random
import re

api_key = "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"

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

EMOTION_LIST = ["joy", "anger", "fear", "nostalgia", "sad", "obsession", "sexual", "hope", "calm", "tender", "violence", "power"]
EMOTION_SET = set(EMOTION_LIST)

def extract_emotions_only(text):
    found = re.findall(r'\b(?:' + '|'.join(EMOTION_LIST) + r')\b', text.lower())
    found = list(dict.fromkeys(found))  # deduplicate, preserve order
    if 2 <= len(found) <= 4:
        return ', '.join(found)
    else:
        return "error"

def is_valid_emotion_output(output):
    emotions = [e.strip() for e in output.lower().split(",")]
    return 2 <= len(emotions) <= 4 and all(e in EMOTION_SET for e in emotions)

def get_lyrics_emotions(lyrics, max_retries=7):
    prompt = f"""
Given the following song lyrics, label them with the most dominant emotions.
Use only the following emotion labels: joy, anger, fear, nostalgia, sad, obsession, sexual, hope, calm, tender, violence, power.
Return the emotions separated by commas. The number of labels must be 2 to 4.
Do not provide any explanation. Only return the emotion labels.

Lyrics:
{lyrics}

Emotions:
"""
    data = {
        "model": "llama3-70b-8192",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.2
    }

    backoff = 10  # Start with 10s backoff
    retries = 0

    while retries < max_retries:
        try:
            response = requests.post(
                "https://api.groq.com/openai/v1/chat/completions",
                headers=headers,
                json=data
            )
            if response.status_code == 200:
                raw_output = response.json()["choices"][0]["message"]["content"].strip()
                extracted = extract_emotions_only(raw_output)
                if extracted != "error" and is_valid_emotion_output(extracted):
                    return extracted
                else:
                    retries += 1
                    print(f"⚠️ Invalid output format, retrying {retries}/{max_retries}...")
                    time.sleep(random.uniform(3, 5))
                    continue

            elif response.status_code == 429:
                retries += 1
                try:
                    msg = response.json()["error"]["message"]
                    wait_time = float(msg.split("try again in ")[1].split("s")[0])
                except Exception:
                    wait_time = backoff

                jitter = random.uniform(1.0, 1.5)
                sleep_time = max(wait_time, backoff) * jitter
                print(f"⚠️ Rate limit hit. Retry {retries}/{max_retries}. Waiting {sleep_time:.2f} seconds...")
                time.sleep(sleep_time)
                backoff = min(backoff * 1.5, 60)  # Cap at 60s

            else:
                print(f"❌ API error {response.status_code}: {response.text}")
                return "error"

        except Exception as e:
            print(f"❌ Request failed: {e}")
            retries += 1
            time.sleep(backoff)
            backoff = min(backoff * 1.5, 60)

    print(f"⚠️ Failed to get valid response after {max_retries} retries. Skipping lyric.")
    return "error"

# Load CSV
df = pd.read_csv("good.csv", encoding='ISO-8859-1')

if 'lyrics' not in df.columns:
    raise ValueError("CSV must have a column named 'lyrics'.")

if "emotions" not in df.columns:
    df["emotions"] = ""

# Loop over rows
for i, row in df.iterrows():
    lyrics = row["lyrics"]
    print(f"\nLyrics {i+1}/{len(df)}:\n{lyrics[:300]}...")

    emotions = get_lyrics_emotions(lyrics)
    df.at[i, "emotions"] = emotions

    print("Emotions:", emotions)
    print("-" * 60)

    # Save progress after every lyric to avoid data loss
    df.to_csv("labeled_lyrics_progress.csv", index=False)
    print(f"✅ Progress saved at song {i+1}.")

    # Delay between requests (longer to avoid rate limits)
    delay = random.uniform(12, 20)
    print(f"⏳ Waiting {delay:.2f} seconds before next request...")
    time.sleep(delay)

# Final save
df.to_csv("labeled_lyrics_final.csv", index=False)
print("\n✅ Done! Emotions labeled and saved to labeled_lyrics_final.csv")



Lyrics 1/897:
Oh my god, did I just say that out loud? Should've known this was the kind of place That that sort of thing just wasn't allowed Should've known by the color of the drapes (Oh, my bad, venetian blinds) What the hell was I thinking saying exactly what's on my mind? But I won't deny I got a dirty mouth...


  df.at[i, "emotions"] = emotions


Emotions: anger, power
------------------------------------------------------------
✅ Progress saved at song 1.
⏳ Waiting 18.89 seconds before next request...

Lyrics 2/897:
[Verse 1] Remember when I called you on the telephone? You were so far away It was raining in New York, did I forget to say? It was later than I wanted it to be On an early summer's night The kind where you can't help but feel alive and free And I told you "From here on out, it's just you and me"  [...
⚠️ Rate limit hit. Retry 1/7. Waiting 65.94 seconds...
Emotions: hope, joy, tender
------------------------------------------------------------
✅ Progress saved at song 2.
⏳ Waiting 14.99 seconds before next request...

Lyrics 3/897:
[Verse 1] Calling me like I got something to say You thought wrong, but you do it anyway How's it been? Oh, not much, same for me, please go away I can put it on if that's what you want You'd like to get together, but I'd rather not Calls to mind a simpler time that who gave a shit forgo