In [33]:
import json
from pathlib import Path
import pandas as pd
from itertools import combinations
from collections import Counter
import networkx as nx
import colorsys
import random
import numpy as np
from node2vec import Node2Vec
import umap
import community as community_louvain
import plotly.express as px
import re
from time import sleep
import requests
from collections import defaultdict

In [4]:
# Cargar el dataset ya limpio
path = Path("../data/albums_clean.json")
with open(path, "r", encoding="utf-8") as f:
    albums = json.load(f)

print(f"‚úÖ √Ålbumes cargados: {len(albums):,}")

‚úÖ √Ålbumes cargados: 1,884


## 1. Matriz de co-ocurrencias b√°sica

In [5]:
pairs = Counter()

for a in albums:
    tags = a.get("clean_tags", [])
    # eliminar duplicados dentro del √°lbum
    tags = sorted(set(tags))
    for combo in combinations(tags, 2):
        pairs[combo] += 1

df_pairs = (
    pd.DataFrame([(a, b, c) for (a, b), c in pairs.items()],
                 columns=["tag1", "tag2", "weight"])
    .sort_values("weight", ascending=False)
    .reset_index(drop=True)
)

print(f"üîó Total de combinaciones √∫nicas: {len(df_pairs):,}")
df_pairs.head(20)

üîó Total de combinaciones √∫nicas: 41,598


Unnamed: 0,tag1,tag2,weight
0,ambient,electronic,152
1,black metal,metal,139
2,death metal,metal,120
3,electronic,experimental,117
4,electronic,techno,116
5,electronic,house,88
6,ambient,experimental,86
7,ambient,drone,81
8,black metal,death metal,67
9,doom,metal,66


In [6]:
# Crear grafo no dirigido
G = nx.Graph()

# Agregar aristas con peso
for _, row in df_pairs.iterrows():
    G.add_edge(row["tag1"], row["tag2"], weight=row["weight"])

print(f"üï∏Ô∏è Nodos: {G.number_of_nodes():,} | Enlaces: {G.number_of_edges():,}")


üï∏Ô∏è Nodos: 3,898 | Enlaces: 41,598


In [7]:
degree_dict = dict(G.degree(weight="weight"))
nx.set_node_attributes(G, degree_dict, "degree")

print("üéöÔ∏è Top 10 tags por grado ponderado:")
sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:10]

üéöÔ∏è Top 10 tags por grado ponderado:


[('electronic', 4717),
 ('metal', 2617),
 ('ambient', 2340),
 ('experimental', 2177),
 ('alternative', 1722),
 ('rock', 1680),
 ('black metal', 1206),
 ('death metal', 1128),
 ('techno', 1000),
 ('drone', 1000)]

In [8]:
graph_data = {
    "nodes": [{"id": n, "size": degree_dict[n]} for n in G.nodes()],
    "links": [
        {"source": u, "target": v, "weight": d["weight"]}
        for u, v, d in G.edges(data=True)
    ],
}

output_path = Path("../data/tag_graph_full.json")
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(graph_data, f, ensure_ascii=False, indent=2)

print(f"üíæ Grafo exportado en formato JSON: {output_path}")

üíæ Grafo exportado en formato JSON: ..\data\tag_graph_full.json


In [9]:
print(f"Nodos: {len(graph_data['nodes']):,}")
print(f"Enlaces: {len(graph_data['links']):,}")

# Mostrar algunos ejemplos aleatorios
import random
print("üéµ Ejemplo de nodo:", random.choice(graph_data["nodes"]))
print("üîó Ejemplo de enlace:", random.choice(graph_data["links"]))


Nodos: 3,898
Enlaces: 41,598
üéµ Ejemplo de nodo: {'id': 'folc hevi blec', 'size': 13}
üîó Ejemplo de enlace: {'source': 'watermelon rock', 'target': 'bongxploitation', 'weight': 1}


In [10]:

# Cargar el grafo base
path = Path("../data/tag_graph_full.json")
with open(path, "r", encoding="utf-8") as f:
    graph_data = json.load(f)

# Rango para las coordenadas
RANGE = 1000

# Generador de colores pastel
def random_color():
    h, s, l = random.random(), 0.6, 0.6
    r, g, b = colorsys.hls_to_rgb(h, l, s)
    return f"rgb({int(r*255)}, {int(g*255)}, {int(b*255)})"

# Asignar atributos iniciales
for node in graph_data["nodes"]:
    node["x"] = random.uniform(-RANGE, RANGE)
    node["y"] = random.uniform(-RANGE, RANGE)
    node["z"] = random.uniform(-RANGE, RANGE)
    node["color"] = random_color()
    node["size"] = node.get("size", 1)

# Guardar nueva versi√≥n
output_path = Path("../data/tag_graph_embedded.json")
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(graph_data, f, ensure_ascii=False, indent=2)

print(f"üíæ Grafo enriquecido exportado: {output_path}")
print(f"üåå Nodos con posici√≥n inicial: {len(graph_data['nodes']):,}")

üíæ Grafo enriquecido exportado: ..\data\tag_graph_embedded.json
üåå Nodos con posici√≥n inicial: 3,898


## 2. Generaci√≥n de modelo h√≠brido

En esta etapa se busca proyectar la red de co-ocurrencias de tags musicales (tag_graph_full.json) en un espacio tridimensional que preserve relaciones significativas entre g√©neros.
Existen tres enfoques principales para este tipo de representaci√≥n:

1. Layouts de fuerza (Force-based):
Simulan el grafo como un sistema f√≠sico de part√≠culas unidas por resortes.
Producen visualizaciones intuitivas, pero dependen de par√°metros arbitrarios (repulsi√≥n, gravedad) y solo conservan relaciones locales entre nodos conectados.
Son √∫tiles para exploraciones visuales r√°pidas, no para an√°lisis estructural reproducible.

2. Reducci√≥n de dimensionalidad (UMAP, t-SNE, PCA):
Operan sobre vectores de co-ocurrencia, preservando tanto estructura local como global.
Permiten observar ejes continuos de variaci√≥n musical (por ejemplo: intensidad ‚Üí atm√≥sfera ‚Üí ritmo).
Su limitaci√≥n es que requieren vectores de entrada previamente definidos.

3. Embeddings de red (Node2Vec, DeepWalk):
Aprenden representaciones vectoriales de cada nodo seg√∫n su contexto estructural en la red, mediante recorridos aleatorios.
Capturan patrones de conectividad y comunidades, generando un espacio sem√°ntico estable y escalable, aunque sin significado geom√©trico directo (se proyecta luego con UMAP).

Para miNoise se adopta un enfoque h√≠brido:

1. Node2Vec ‚Üí obtener un embedding de 128 dimensiones que codifique la estructura de la red de g√©neros.

2. UMAP ‚Üí reducir ese espacio a tres dimensiones para su visualizaci√≥n interactiva, preservando cercan√≠as locales y coherencia global.

Este pipeline combina la robustez estructural de Node2Vec con la legibilidad visual de UMAP, generando un mapa musical continuo donde los g√©neros pr√≥ximos comparten contextos y afinidades estil√≠sticas.

In [11]:
# Cargar el grafo de co-ocurrencias
path = Path("../data/tag_graph_full.json")
with open(path, "r", encoding="utf-8") as f:
    graph_data = json.load(f)

# Convertir a objeto de NetworkX
G = nx.readwrite.json_graph.node_link_graph(graph_data)
print(f"üì¶ Nodos: {G.number_of_nodes():,} | Enlaces: {G.number_of_edges():,}")

üì¶ Nodos: 3,898 | Enlaces: 41,598


The default value will be changed to `edges="edges" in NetworkX 3.6.


  nx.node_link_graph(data, edges="links") to preserve current behavior, or
  nx.node_link_graph(data, edges="edges") for forward compatibility.


In [12]:
# Configuraci√≥n de Node2Vec
node2vec = Node2Vec(
    G,
    dimensions=128,        # tama√±o del vector embebido
    walk_length=20,        # longitud de cada recorrido aleatorio
    num_walks=200,         # cantidad de recorridos por nodo
    workers=4,             # paralelizaci√≥n
    seed=42
)

# Entrenamiento del modelo
model = node2vec.fit(window=10, min_count=1, batch_words=4)
print("‚úÖ Node2Vec entrenado correctamente.")

Computing transition probabilities: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3898/3898 [00:24<00:00, 159.89it/s]


‚úÖ Node2Vec entrenado correctamente.


In [13]:
# Extraer embeddings en el mismo orden que los nodos del grafo
nodes = list(G.nodes())
embeddings = np.array([model.wv[n] for n in nodes])

# Reducir de 128D ‚Üí 3D
reducer = umap.UMAP(
    n_neighbors=15,
    min_dist=0.3,
    n_components=3,
    random_state=42
)
coords = reducer.fit_transform(embeddings)

print("‚úÖ Reducci√≥n UMAP completada.")

  warn(


‚úÖ Reducci√≥n UMAP completada.


In [14]:
# Asignar coordenadas y mantener color y tama√±o previos si exist√≠an
for node, (x, y, z) in zip(nodes, coords):
    G.nodes[node]["x"] = float(x * 150)
    G.nodes[node]["y"] = float(y * 150)
    G.nodes[node]["z"] = float(z * 150)

# Convertir a formato JSON de nuevo
graph_json = nx.readwrite.json_graph.node_link_data(G)
output_path = Path("../data/tag_graph_embedded_umap.json")

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(graph_json, f, ensure_ascii=False, indent=2)

print(f"üíæ Grafo proyectado exportado en: {output_path}")

The default value will be `edges="edges" in NetworkX 3.6.


  nx.node_link_data(G, edges="links") to preserve current behavior, or
  nx.node_link_data(G, edges="edges") for forward compatibility.


üíæ Grafo proyectado exportado en: ..\data\tag_graph_embedded_umap.json


In [15]:
# Calcular comunidades con algoritmo Louvain
partition = community_louvain.best_partition(G, weight="weight", random_state=42)

# Guardar el cluster como atributo en el grafo
nx.set_node_attributes(G, partition, "cluster")

# Visualizar conteo de comunidades detectadas
from collections import Counter
print("üé® Comunidades detectadas:", len(set(partition.values())))
Counter(partition.values()).most_common(10)

üé® Comunidades detectadas: 14


[(1, 755),
 (4, 681),
 (2, 577),
 (3, 506),
 (0, 373),
 (5, 252),
 (7, 226),
 (8, 214),
 (9, 149),
 (6, 114)]

In [16]:
# Generador de color pastel basado en √≠ndice de comunidad
def community_color(c):
    h = (c * 0.618033988749895) % 1.0
    s, l = 0.6, 0.55
    r, g, b = colorsys.hls_to_rgb(h, l, s)
    return f"rgb({int(r*255)}, {int(g*255)}, {int(b*255)})"

for node, data in G.nodes(data=True):
    cluster = data.get("cluster", 0)
    data["color"] = community_color(cluster)
    data["size"] = data.get("size", 1) * 1.5

In [17]:
graph_json = nx.readwrite.json_graph.node_link_data(G)
output_path = Path("../data/tag_graph_enriched.json")

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(graph_json, f, ensure_ascii=False, indent=2)

print(f"üíæ Grafo enriquecido exportado: {output_path}")

üíæ Grafo enriquecido exportado: ..\data\tag_graph_enriched.json


## 3. Generar capa de √Ålbums

In [22]:
# === CONFIGURACI√ìN ===
BASE_PATH = Path.cwd().parent / "data"
INPUT_PATH = BASE_PATH / "albums_clean.json"
OUTPUT_PATH = BASE_PATH / "tag_album_graph.json"

print("üéµ Iniciando construcci√≥n de Capa 2: Tag ‚Üî √Ålbum")
print(f"üìÇ Fuente: {INPUT_PATH}")

üéµ Iniciando construcci√≥n de Capa 2: Tag ‚Üî √Ålbum
üìÇ Fuente: c:\miNoise\miNoise_processor\data\albums_clean.json


In [23]:
with open(INPUT_PATH, "r", encoding="utf-8") as f:
    albums = json.load(f)

nodes = {}
links = []

for album in albums:
    album_id = f"album_{album['title'].replace(' ', '-').lower()}"
    artist_name = album.get("artist", {}).get("name")

    # Nodo del √°lbum (oculto inicialmente)
    nodes[album_id] = {
        "id": album_id,
        "type": "album",
        "title": album["title"],
        "artist": artist_name,
        "cover_url": album.get("cover_url"),
        "visible": False
    }

    for tag in album.get("clean_tags", []):
        tag_id = f"tag_{tag.lower().strip()}"

        # Nodo del tag (visible desde el inicio)
        nodes.setdefault(tag_id, {
            "id": tag_id,
            "type": "tag",
            "label": tag.lower(),
            "visible": True
        })

        links.append({"source": tag_id, "target": album_id})

graph = {"nodes": list(nodes.values()), "links": links}


In [24]:
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
    json.dump(graph, f, indent=2, ensure_ascii=False)

print(f"‚úÖ Red generada con {len(nodes)} nodos y {len(links)} enlaces")
print(f"üíæ Guardado en {OUTPUT_PATH}")

‚úÖ Red generada con 5764 nodos y 15695 enlaces
üíæ Guardado en c:\miNoise\miNoise_processor\data\tag_album_graph.json


In [26]:

df_nodes = pd.DataFrame(graph["nodes"])
df_links = pd.DataFrame(graph["links"])

print("üé∂ Tipos de nodos:")
display(df_nodes["type"].value_counts())

print("\nüîó Primeros 10 enlaces:")
display(df_links.head(10))

üé∂ Tipos de nodos:


type
tag      3898
album    1866
Name: count, dtype: int64


üîó Primeros 10 enlaces:


Unnamed: 0,source,target
0,tag_alternative,album_demo
1,tag_emo,album_demo
2,tag_lofi emo,album_demo
3,tag_midwest emo,album_demo
4,tag_screamo,album_demo
5,tag_skramz,album_demo
6,tag_wroc≈Çaw,album_demo
7,tag_blackgaze,album_that-lasts-forever
8,tag_metal,album_that-lasts-forever
9,tag_emo,album_that-lasts-forever


## 4. Fusi√≥n de capas 

            ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
            ‚îÇ         CAPA 1: TAG ‚Üí TAG (embeddings)    ‚îÇ
            ‚îÇ   Node2Vec ‚Üí UMAP  ‚Üí coordenadas base      ‚îÇ
            ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
                              ‚Üì
                              ‚Üì posiciona tags
                              ‚Üì
            ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
            ‚îÇ   CAPA 2: TAG ‚Üí √ÅLBUM (red bipartita)     ‚îÇ
            ‚îÇ cada √°lbum obtiene coordenadas derivadas   ‚îÇ
            ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
                              ‚Üì
                              ‚Üì unir nodos + enlaces
                              ‚Üì
            ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
            ‚îÇ      GRAFO FINAL (para three.js)         ‚îÇ
            ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò


In [None]:
BASE = Path.cwd().parent / "data"
ENRICH_PATH = BASE / "tag_graph_enriched.json"   # tu modelo Node2Vec+UMAP
TAG_ALBUM_PATH = BASE / "tag_album_graph.json"   # relaciones tag‚Üî√°lbum
OUTPUT = BASE / "tag_album_embedded_graph_BASE.json"

with open(ENRICH_PATH, "r", encoding="utf-8") as f:
    enriched = json.load(f)
embed_nodes = enriched["nodes"] if "nodes" in enriched else enriched

with open(TAG_ALBUM_PATH, "r", encoding="utf-8") as f:
    tag_album = json.load(f)


In [64]:
# Mapear id ‚Üí nodo enriquecido (con coordenadas)
embed_lookup = {n["id"]: n for n in embed_nodes}

# Confirmaci√≥n opcional
list(embed_lookup.items())[:3]

[('ambient',
  {'size': 3510.0,
   'x': 1558.6239337921143,
   'y': 1045.9816217422485,
   'z': 959.3847513198853,
   'cluster': 0,
   'color': 'rgb(209, 71, 71)',
   'id': 'ambient'}),
 ('electronic',
  {'size': 7075.5,
   'x': 1733.9750289916992,
   'y': 1031.202507019043,
   'z': 1026.4997720718384,
   'cluster': 1,
   'color': 'rgb(71, 111, 209)',
   'id': 'electronic'}),
 ('black metal',
  {'size': 1809.0,
   'x': 1400.3026485443115,
   'y': 274.2625415325165,
   'z': 1138.6892795562744,
   'cluster': 2,
   'color': 'rgb(151, 209, 71)',
   'id': 'black metal'})]

In [38]:
def normalize_tag_id(node_id: str) -> str:
    """
    Convierte IDs del grafo tag‚Üîalbum al formato usado en los embeddings.
    """
    t = node_id

    # quitar prefijo
    if t.startswith("tag_"):
        t = t[4:]
    
    # reemplazar guiones por espacios
    t = t.replace("-", " ")

    # lowercase y strip: por seguridad
    return t.lower().strip()

In [39]:
print(normalize_tag_id("tag_dark-ambient"))     # ‚Üí "dark ambient"
print(normalize_tag_id("tag_black-metal"))       # ‚Üí "black metal"
print(normalize_tag_id("tag_ambient"))           # ‚Üí "ambient"

dark ambient
black metal
ambient


In [30]:
final_nodes = []
final_links = tag_album["links"]  # los enlaces ya est√°n listos

missing_tags = 0

In [40]:
final_nodes = []
missing_tags = []

for node in tag_album["nodes"]:
    if node["type"] == "tag":
        # convertir ID del nodo al formato de embed_lookup
        simple_id = normalize_tag_id(node["id"])

        if simple_id in embed_lookup:
            emb = embed_lookup[simple_id]
            node["x"] = emb["x"]
            node["y"] = emb["y"]
            node["z"] = emb["z"]
            node["cluster"] = emb.get("cluster")
            node["color"] = emb.get("color")
            node["size"] = emb.get("size", 1)
        else:
            missing_tags.append(simple_id)
            node["x"] = node["y"] = node["z"] = 0
            node["size"] = 1
            node["color"] = "rgb(200,200,200)"
            node["cluster"] = -1

        final_nodes.append(node)


In [41]:
album_tags = defaultdict(list)

for link in tag_album["links"]:
    src = link["source"]
    tgt = link["target"]

    if src.startswith("tag_") and tgt.startswith("album_"):
        album_tags[tgt].append(src)

In [42]:

for node in tag_album["nodes"]:
    if node["type"] == "album":
        album_id = node["id"]
        tags_for_album = album_tags.get(album_id, [])

        # reunir coords reales desde embeddings
        coords = []
        for t in tags_for_album:
            simple = normalize_tag_id(t)
            if simple in embed_lookup:
                emb = embed_lookup[simple]
                coords.append([emb["x"], emb["y"], emb["z"]])

        if coords:
            arr = np.mean(coords, axis=0)
            node["x"], node["y"], node["z"] = arr.tolist()
        else:
            node["x"] = node["y"] = node["z"] = 0

        final_nodes.append(node)

In [43]:
graph_final = {
    "nodes": final_nodes,
    "links": tag_album["links"]
}

with open(OUTPUT, "w", encoding="utf-8") as f:
    json.dump(graph_final, f, indent=2, ensure_ascii=False)

print(f"üéâ Grafo final generado con {len(final_nodes)} nodos y {len(tag_album['links'])} enlaces")
print(f"üíæ Guardado en: {OUTPUT}")

print(f"Tags sin embedding: {len(missing_tags)}")

üéâ Grafo final generado con 5764 nodos y 15695 enlaces
üíæ Guardado en: c:\miNoise\miNoise_processor\data\tag_album_embedded_graph.json
Tags sin embedding: 132


In [37]:
sum(1 for n in final_nodes if n["type"] == "tag")
sum(1 for n in final_nodes if n["type"] == "album")

1866

El archivo tag_album_embedded_graph.json tendr√°:

- todos los tags con sus coordenadas UMAP verdaderas

- todos los √°lbumes posicionados geom√©tricamente

- todos los links intactos

- estructura final lista para Three.js o cualquier motor 3D

## 4. Fusi√≥n de capas 

In [None]:

# Cargar albums_clean
ALBUMS_CLEAN_PATH = BASE / "albums_clean.json"
OUTPUT = BASE / "tag_album_embedded_graph_BASE.json"
with open(ALBUMS_CLEAN_PATH, "r", encoding="utf-8") as f:
    albums_clean = json.load(f)

def slugify_title(title: str) -> str:
    return title.replace(" ", "-").lower().strip()

# Crear √≠ndice: album_id ‚Üí metadata completa
album_index = {}

for a in albums_clean:
    album_id = f"album_{slugify_title(a['title'])}"
    album_index[album_id] = a

In [45]:
list(album_index.items())[:2]

[('album_demo',
  {'type': 'album',
   'title': 'DEMO',
   'url': 'https://onegirl37razors.bandcamp.com/album/demo',
   'release_date': '04 Nov 2025 00:00:00 GMT',
   'description': ':3',
   'genre': 'metal',
   'tags': ['deathcore', 'metal', 'metalcore', 'hardcore', 'Maryland'],
   'cover_url': 'https://f4.bcbits.com/img/a3583439450_16.jpg',
   'tracks_count': 5,
   'duration_total': 1241.923,
   'artist': {'name': 'onegirl37razors',
    'url': 'https://onegirl37razors.bandcamp.com',
    'location': 'Maryland',
    'description': 'MD metalcore',
    'imageUrl': 'https://f4.bcbits.com/img/0041578877_28.jpg',
    'label': None},
   'label': None,
   'embed_id': 642225966,
   'embed_url': 'https://bandcamp.com/EmbeddedPlayer/album=642225966/size=large/bgcol=000000/linkcol=ffffff/transparent=true/',
   'clean_tags': ['deathcore', 'metal', 'metalcore', 'hardcore']}),
 ('album_that-lasts-forever',
  {'type': 'album',
   'title': 'that lasts forever',
   'url': 'https://abriction.bandcamp.co

In [52]:
def enrich_album_node(node, meta):
    """Enriquece un nodo √°lbum con metadatos completos del albums_clean.json"""

    # Campos directos del √°lbum
    node["url"] = meta.get("url")
    node["release_date"] = meta.get("release_date")
    node["genre"] = meta.get("genre")
    node["description"] = meta.get("description")
    node["tracks_count"] = meta.get("tracks_count")
    node["duration_total"] = meta.get("duration_total")
    node["embed_url"] = meta.get("embed_url")
    node["embed_id"] = meta.get("embed_id")

    # --- ARTISTA ---
    artist = meta.get("artist") or {}
    node["artist_name"] = artist.get("name")
    node["artist_url"] = artist.get("url")
    node["artist_location"] = artist.get("location")
    node["artist_description"] = artist.get("description")
    node["artist_image"] = artist.get("imageUrl")

    # --- LABEL PRINCIPAL (√°lbum) ---
    album_label = meta.get("label") or {}
    node["label_name"] = album_label.get("name")
    node["label_url"] = album_label.get("url")

    # --- LABEL DEL ARTISTA (a veces existe ac√° tambi√©n) ---
    artist_label = artist.get("label") or {}
    node["artist_label_name"] = artist_label.get("name")
    node["artist_label_url"] = artist_label.get("url")

    # --- TAGS ---
    node["original_tags"] = meta.get("tags", [])
    node["clean_tags"] = meta.get("clean_tags", [])

    return node



In [53]:
test = [meta for meta in albums_clean if meta.get("label") is None][:3]
test


[{'type': 'album',
  'title': 'demo',
  'url': 'https://ostatnikontynent.bandcamp.com/album/demo',
  'release_date': '08 Oct 2025 19:46:43 GMT',
  'description': 'demo z piosenkami powsta≈Çymi w okresie maj-pa≈∫dziernik 2025',
  'genre': 'midwest emo',
  'tags': ['alternative',
   'emo',
   'lofi emo',
   'midwest emo',
   'screamo',
   'skramz',
   'Wroc≈Çaw'],
  'cover_url': 'https://f4.bcbits.com/img/a0639327228_16.jpg',
  'tracks_count': 4,
  'duration_total': 677,
  'artist': {'name': 'ostatni kontynent',
   'url': 'https://ostatnikontynent.bandcamp.com',
   'location': 'Wroc≈Çaw, Poland',
   'description': 'wroc≈Çaw-based emo two piece\n\ngabry≈õ - git/vox\n gwen - drums\n\npizza emo nerds 4 ever',
   'imageUrl': 'https://f4.bcbits.com/img/0041279214_28.jpg',
   'label': None},
  'label': None,
  'embed_id': 1246203161,
  'embed_url': 'https://bandcamp.com/EmbeddedPlayer/album=1246203161/size=large/bgcol=000000/linkcol=ffffff/transparent=true/',
  'clean_tags': ['alternative',
  

In [54]:
graph_final = {
    "nodes": final_nodes,
    "links": final_links
}

with open(OUTPUT, "w", encoding="utf-8") as f:
    json.dump(graph_final, f, indent=2, ensure_ascii=False)

print("üéâ Grafo final exportado correctamente")
print(f"üì¶ Nodos: {len(final_nodes):,}")
print(f"üîó Enlaces: {len(final_links):,}")
print(f"üíæ Archivo guardado en: {OUTPUT}")

üéâ Grafo final exportado correctamente
üì¶ Nodos: 7,630
üîó Enlaces: 15,695
üíæ Archivo guardado en: c:\miNoise\miNoise_processor\data\tag_album_embedded_graph.json


In [62]:
next(n for n in final_nodes if n["type"] == "album")

{'id': 'album_demo',
 'type': 'album',
 'title': 'DEMO',
 'artist': 'onegirl37razors',
 'cover_url': 'https://f4.bcbits.com/img/a3583439450_16.jpg',
 'visible': False,
 'x': 1480.1120122273762,
 'y': 483.74621371428174,
 'z': 1101.7284591992695,
 'url': 'https://onegirl37razors.bandcamp.com/album/demo',
 'release_date': '04 Nov 2025 00:00:00 GMT',
 'genre': 'metal',
 'description': ':3',
 'tracks_count': 5,
 'duration_total': 1241.923,
 'embed_url': 'https://bandcamp.com/EmbeddedPlayer/album=642225966/size=large/bgcol=000000/linkcol=ffffff/transparent=true/',
 'embed_id': 642225966,
 'artist_url': 'https://onegirl37razors.bandcamp.com',
 'artist_location': 'Maryland',
 'artist_description': 'MD metalcore',
 'artist_image': 'https://f4.bcbits.com/img/0041578877_28.jpg'}

In [61]:
next(n for n in final_nodes if n["type"] == "tag")

{'id': 'tag_alternative',
 'type': 'tag',
 'label': 'alternative',
 'visible': True,
 'x': 1481.7742824554443,
 'y': 711.7351770401001,
 'z': 1178.7156343460083,
 'cluster': 4,
 'color': 'rgb(71, 209, 186)',
 'size': 2583.0}

In [63]:
path = Path("../data/tag_album_embedded_graph.json")

with open(path, "r", encoding="utf-8") as f:
    data = json.load(f)

album = next(n for n in data["nodes"] if n["type"] == "album")
album

{'id': 'album_demo',
 'type': 'album',
 'title': 'DEMO',
 'artist': 'onegirl37razors',
 'cover_url': 'https://f4.bcbits.com/img/a3583439450_16.jpg',
 'visible': False,
 'x': 1480.1120122273762,
 'y': 483.74621371428174,
 'z': 1101.7284591992695,
 'url': 'https://onegirl37razors.bandcamp.com/album/demo',
 'release_date': '04 Nov 2025 00:00:00 GMT',
 'genre': 'metal',
 'description': ':3',
 'tracks_count': 5,
 'duration_total': 1241.923,
 'embed_url': 'https://bandcamp.com/EmbeddedPlayer/album=642225966/size=large/bgcol=000000/linkcol=ffffff/transparent=true/',
 'embed_id': 642225966,
 'artist_url': 'https://onegirl37razors.bandcamp.com',
 'artist_location': 'Maryland',
 'artist_description': 'MD metalcore',
 'artist_image': 'https://f4.bcbits.com/img/0041578877_28.jpg'}