In [1]:
# Extract music concept from frequent_balanced_aspects.csv
import csv

def extract_aspects(csv_path="frequent_balanced_aspects.csv",
                    txt_path="aspects.txt"):
    with open(csv_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        aspects = [row["aspect"].strip() for row in reader if row.get("aspect")]

    with open(txt_path, "w", encoding="utf-8") as out:
        for a in aspects:
            out.write(a + "\n")
    print(f"Wrote {len(aspects)} aspects to {txt_path}")

if __name__ == "__main__":
    extract_aspects()


Wrote 344 aspects to aspects.txt


In [2]:
# small function just help to load
def load_concepts(txt_path="aspects.txt"):
    with open(txt_path, encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip()]

concepts = load_concepts("aspects.txt")
print(f"Loaded {len(concepts)} concepts")
print(concepts[:10])  # verification

Loaded 344 concepts
['low quality', 'medium tempo', 'emotional', 'instrumental', 'energetic', 'passionate', 'noisy', 'live performance', 'acoustic drums', 'amateur recording']


In [6]:
PROMPTS_TXT = "prompt.txt"
def load_prompts(path=PROMPTS_TXT):
    with open(path, encoding='utf-8') as f:
        lines = f.read().splitlines()
    return [line.strip() for line in lines if line.strip()]
p = load_prompts(PROMPTS_TXT)
print(f"Loaded {len(p)} prompts")

Loaded 1001 prompts


In [None]:

import os
import json
import csv
import itertools
import google.generativeai as genai
import re
from google.generativeai.types import GenerationConfig

API_KEY = 111

genai.configure(api_key=API_KEY)

MODEL_NAME = "gemini-2.0-flash-lite"  
#model = genai.GenerativeModel(MODEL_NAME)
json_generation_config = GenerationConfig(
  response_mime_type="application/json"
)
# see json only mode return
model = genai.GenerativeModel(
    MODEL_NAME,
    generation_config=json_generation_config 
)



# path
ASPECTS_TXT = "aspects.txt"
PROMPTS_TXT = "prompt.txt"
OUTPUT_CSV = "prompt_tags.csv"
OUTPUT_JSON   = "prompt_tags.json"

def load_concepts(path=ASPECTS_TXT):
    with open(path, encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip()]

def load_prompts(path=PROMPTS_TXT):
    with open(path, encoding='utf-8') as f:
        lines = f.read().splitlines()
    return [line.strip() for line in lines if line.strip()]

def chunked(iterable, size):
    it = iter(iterable)
    while True:
        batch = list(itertools.islice(it, size))
        if not batch:
            return
        yield batch

# checked with o4 to cover all tags
CATEGORIES = [
    "genre",
    "instruments",
    "mood",
    "tempo",
    "audio_quality",
    "performance_context",
    "vocals",
    "style",
]

TEST = False  # test mode, only process first batch

def main():
    concepts = load_concepts()
    prompts  = load_prompts()
    prompts = prompts[:1000] # 1001 manual
    all_results = []
    if TEST:
        prompts = prompts[:40] # 40 for test
        concepts = concepts[:5]


    print(f"all {len(concepts)}  music concept，{len(prompts)} prompt")



    # 20 per loop
    for batch_idx, batch in enumerate(chunked(prompts, 20), start=1):

        # system +  prompt
        sys_section = (
            "Below is a list of possible music concepts (one per line), make sure it is exactly match the json format in order for me to load:\n"
            + "\n".join(f"- {c}" for c in concepts)
            + "\n\n"
            + "For each user prompt below, select only from the above list the concepts that apply, and return a JSON array. "
            + "Each element should follow this template exactly:\n"
            + json.dumps({
                "prompt": "<prompt text>",
                "genre": [],
                "instruments": [],
                "mood": [],
                "tempo": [],
                "audio_quality": [],
                "performance_context": [],
                "vocals": [],
                "style": []
            }, indent=2)
            + "\nOutput only the JSON array."
        )
        # id
        start_idx = (batch_idx - 1) * 20
        user_section = "\n\n".join(
            f"Prompt {start_idx + i + 1}: {p}" for i, p in enumerate(batch)
        )
        full_prompt = sys_section + "\n\n" + user_section
        
        resp = model.generate_content(full_prompt)
        text = resp.text.strip()

        print(f"Response text: '{text}'")


        # load to Json
        try:
            results = json.loads(text)
            all_results.extend(results)

        except json.JSONDecodeError as e:
            print("JSON :", e)
            raise

        if TEST:
            print("test")

        # save JSON 
        with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
            json.dump(all_results, f, ensure_ascii=False, indent=2)

        print(f"finished {OUTPUT_JSON}")

if __name__ == "__main__":
    main()


all 344  music concept，1000 prompt
Response text: '[
  {
    "prompt": "Digital drums are playing a four on the floor rhythm with a kick on every beat along with a bassline and a keyboard sound playing short rhythmic chords and a e-guitar playing a simple melody along. A male voice is singing in a higher key. This song may be playing at a folkfest.",
    "genre": [],
    "instruments": [
      "electronic drums",
      "e-guitar",
      "bass",
      "keyboard"
    ],
    "mood": [],
    "tempo": [],
    "audio_quality": [],
    "performance_context": [
      "live performance"
    ],
    "vocals": [
      "male voice"
    ],
    "style": [
      "electronic music"
    ]
  },
  {
    "prompt": "An acoustic drum is playing along with a bassline giving the song a ska/reggae feeling. The e-guitar strumming on the offbeat supports that feeling while a keyboard is playing a bell-sounding chord. Bongos are setting little accents at the end. A saxophone is playing a melody along. This song ma