In [1]:
import json
import os
import requests
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
TAGS_FILE = "sources/tags.json"
INPUT_FILE = "../resources/videos-scrap.json"
OUTPUT_FILE = "sources/videos_with_tags.json"
API_URL = "https://models.github.ai/inference/chat/completions"
API_KEY = os.getenv("VITE_OPENAI_API_KEY")
MODEL = "openai/gpt-4.1-nano"

In [4]:
with open(INPUT_FILE, "r", encoding="utf-8") as f:
    data = json.load(f)

In [5]:
for obj in data:
    videos = obj['videos']
    new_tags = [video['tag'] for video in videos if 'tag' in video]
new_tags

[]

In [6]:
if os.path.exists(TAGS_FILE):
    try:
        with open(TAGS_FILE, "r") as f:
            tag_store = json.load(f)
    except json.JSONDecodeError:
        tag_store = {}
else:
    tag_store = {}

In [None]:
for fecha_obj in data:
    fecha = fecha_obj["fecha"]
    videos = fecha_obj["videos"]

    nuevos_videos = [v for v in videos if "tag" not in v]

    if nuevos_videos:
        titulos = "\n".join(v["titulo"] for v in nuevos_videos)
        util_tags = ",".join(new_tags)   

        prompt = f"Asigna una etiqueta (tag) temática de una sola palabra para cada uno de los siguientes títulos de video:\n\n{titulos}"

        if util_tags:
            prompt += f"\n\nPuedes utiliza las siguientes etiquetas o agregar otras nuevas: {util_tags}"
        prompt += "\n\nFormato de respuesta:\n- [Título]=># [Tag]""

        print(f"📝 Generando tags para {len(nuevos_videos)} videos...")

        try:
            response = requests.post(
                API_URL,
                headers={
                    "Authorization": f"Bearer {API_KEY}",
                    "Content-Type": "application/json",
                },
                json={
                    "model": MODEL,
                    "temperature": 1,
                    "top_p": 1,
                    "messages": [
                        {
                            "role": "system",
                            "content": "Eres un modelo que asigna etiquetas temáticas a títulos de videos musicales.",
                        },
                        {
                            "role": "user",
                            "content": prompt,
                        },
                    ],
                },
            )

            response.raise_for_status()
            content = response.json()["choices"][0]["message"]["content"]

            for line in content.split("\n"):
                if ": " in line:
                    try:
                        titulo, tag = line.strip("- ").split(": ", 1)
                        new_tags.append(tag.strip())

                        tag_store[titulo.strip()] = tag.strip()
                    except ValueError:
                        continue

            # Guardar tags actualizados
            with open(TAGS_FILE, "w", encoding="utf-8") as f:
                json.dump(tag_store, f, indent=2, ensure_ascii=False)

        except Exception as e:
            print(f"❌ Error al llamar a la API: {e}")
            continue


📝 Generando tags para 2 videos...
📝 Generando tags para 28 videos...
📝 Generando tags para 20 videos...
📝 Generando tags para 40 videos...


KeyboardInterrupt: 

In [None]:
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

print(f"✅ Proceso completado. Resultado guardado en '{OUTPUT_FILE}'")