In [None]:
# Genre & Style Comment Generator ‚Äî Reusable Template

# This notebook automates the process of tagging music tracks with one-line genre and style comments
# using OpenAI's GPT model. It reads from a CSV, sends track data to GPT, and writes the output back to a file.

# --- IMPORT LIBRARIES --- #
# pandas is used for handling and editing tabular data (your CSV of tracks).
# OpenAI gives access to the ChatGPT API.
# os is used for file and folder management.
# time is used for creating delays between API calls (to avoid rate limits).
# tenacity adds automatic retry logic for failed API calls.
# dotenv loads your API key from the .env file so it's not hardcoded.
# logging lets us see detailed information about what's happening when the notebook runs.

import pandas as pd
from openai import OpenAI
import os
import time
from tenacity import retry, wait_fixed, stop_after_attempt, retry_if_exception_type, before_sleep_log
from dotenv import load_dotenv
import logging

# --- LOGGING SETUP FOR RETRIES --- #
# This section sets up logging so we can see retry attempts and API activity.
# It will print useful information to the notebook output (e.g. retry messages, OpenAI request responses).

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- CONFIGURATION SECTION --- #
# These variables define which file to load and where to save output.
# Change INPUT_FILE to point to a different CSV if you want to process another playlist.

INPUT_FILE = "../data/PureElectroGroove.csv"
OUTPUT_FILE = "../output/PureElectroGroove_GenreTagged.csv"

# Load the API key from the .env file in the project root.
# This keeps the key out of the notebook and out of git.
load_dotenv("../.env")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY not found. Add it to the .env file in the project root.")

# --- OPENAI CLIENT SETUP --- #
# This line initializes the OpenAI client using your API key.

client = OpenAI(api_key=OPENAI_API_KEY)

# --- LOAD DATA (resume from output if it exists) --- #
# If the output file already exists, we load from there instead of the input.
# This means any tracks that were already tagged in a previous run will be kept,
# and only tracks with empty comments will be processed ‚Äî saving time and API costs.

if os.path.exists(OUTPUT_FILE):
    print(f"üìÇ Resuming from {OUTPUT_FILE}")
    df = pd.read_csv(OUTPUT_FILE)
else:
    df = pd.read_csv(INPUT_FILE)

# Ensure the comment column exists (using lowercase to match the input CSV column name).
if "comment" not in df.columns:
    df["comment"] = ""

# --- GPT PROMPT FUNCTION --- #
# This function prepares a prompt and sends it to GPT to get a genre/style comment.
# The @retry decorator means if the API call fails, it will wait 5 seconds and try again (up to 3 times).

@retry(
    wait=wait_fixed(5),
    stop=stop_after_attempt(3),
    retry=retry_if_exception_type(Exception),
    before_sleep=before_sleep_log(logger, logging.INFO)
)
def generate_genre_comment(title, artist):
    prompt = f"""
    Describe the track for DJs in one line. Include decade and country if possible. Use vivid club-friendly phrasing.
    Avoid repeating genre terms and keep the tone colorful and stylish.
    Format: <Main genre>; <production traits>; <scene/mood/era info>.

    Track title: "{title}"
    Artist: {artist}
    """

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a music genre expert and DJ selector."},
            {"role": "user", "content": prompt.strip()}
        ]
    )
    return response.choices[0].message.content.strip()

# --- MAIN PROCESSING LOOP --- #
# This goes through each track in your CSV file.
# If a track already has a comment, it's skipped (this is what makes resume work).
# After each successful API call, the output is saved immediately ‚Äî so if the notebook
# crashes mid-run, you won't lose any progress.

os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
total = len(df)
skipped = 0

for i, row in df.iterrows():
    if pd.notna(row["comment"]) and str(row["comment"]).strip():
        skipped += 1
        continue
    print(f"üéß Track {i+1}/{total}: {row['title']} by {row['artist']}")
    df.at[i, "comment"] = generate_genre_comment(row["title"], row["artist"])
    # Save after each track so progress is not lost
    df.to_csv(OUTPUT_FILE, index=False)
    time.sleep(1.5)

if skipped:
    print(f"‚è≠Ô∏è  Skipped {skipped} tracks that already had comments")
print(f"‚úÖ Done! Comments saved to {OUTPUT_FILE}")