# Discogs Electronic Recommender v8 — Graph + Search (No Label Paging)

This version avoids unstable `/labels/{id}/releases` paging and instead:

- Builds a **label/artist graph** from your collection + wantlist.
- Expands labels via:
  - **Parent / sub-label relationships**
  - **Your artists' other releases**
- Expands artists via:
  - **Labels (including parent/sub labels) that you like**, by seeing which other artists appear.
- Uses `/database/search` (label + genre=Electronic) to build a candidate pool.
- Enriches candidates via `/releases/{id}`.
- Scores using a **vector-space model (TF-IDF + cosine similarity)** + numeric features
  (label affinity, artist affinity, ratings, votes).
- Returns **Electronic-only** recommendations, one per artist + label.

Set:

- `DISCOGS_USER_TOKEN`
- `DISCOGS_USERNAME`


In [None]:
# %% Imports & global config

import os, time, math, random, re
from pathlib import Path
from typing import Dict, Any, List, Optional
from collections import defaultdict

import requests
import pandas as pd
import numpy as np
from json import JSONDecodeError

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack

from tqdm import tqdm
from IPython.display import display

try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

# --- Discogs credentials ---
DISCOGS_USER_TOKEN = os.getenv("DISCOGS_USER_TOKEN", "XXXX")
DISCOGS_USERNAME   = os.getenv("DISCOGS_USERNAME",   "XXXX")

assert DISCOGS_USER_TOKEN != "REPLACE_WITH_YOUR_TOKEN", "Set DISCOGS_USER_TOKEN (env or edit cell)."
assert DISCOGS_USERNAME   != "your_username_here",      "Set DISCOGS_USERNAME (env or edit cell)."

# --- API + rate limiting ---
REQUESTS_PER_MIN      = 30
SLEEP_BETWEEN_CALLS   = 60.0 / REQUESTS_PER_MIN
MAX_RETRIES           = 7
PER_PAGE              = 50
TIMEOUT_S             = 40
APP_UA                = f"DiscogsRecommenderV8/1.0 (+https://www.discogs.com/user/{DISCOGS_USERNAME})"

print("Discogs user:", DISCOGS_USERNAME)


In [None]:
# %% Low-level API helpers (retry + backoff)

def _headers():
    return {
        "User-Agent": APP_UA,
        "Authorization": f"Discogs token={DISCOGS_USER_TOKEN}",
        "Accept": "application/json",
        "Connection": "keep-alive",
    }

def _backoff(attempt: int, base: float = SLEEP_BETWEEN_CALLS, cap: float = 120.0) -> float:
    return min(cap, base * (1.9 ** (attempt - 1))) + random.uniform(0, 0.9)

def _raw_get(url: str, params: dict, what: str, page: int = 1):
    """GET with retries + simple exponential backoff."""
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            r = requests.get(url, headers=_headers(), params=params, timeout=TIMEOUT_S)
            if r.status_code in (429, 500, 502, 503, 504):
                sleep_for = _backoff(attempt)
                print(f"[{what} retry {attempt}] page {page} HTTP {r.status_code}; sleep {sleep_for:.1f}s")
                time.sleep(sleep_for)
                continue
            r.raise_for_status()
            return r.json()
        except (requests.RequestException, JSONDecodeError) as e:
            sleep_for = _backoff(attempt)
            print(f"[{what} retry {attempt}] page {page} {type(e).__name__}; sleep {sleep_for:.1f}s")
            time.sleep(sleep_for)
    raise RuntimeError(f"Failed to fetch {what} page {page} after {MAX_RETRIES} retries.")


In [None]:
# %% Helpers for parsing and normalisation

def _to_str_list(x) -> List[str]:
    if x is None:
        return []
    if isinstance(x, list):
        return [str(v) for v in x if isinstance(v, (str, int, float))]
    return [str(x)]

def _clean_name(name: str) -> str:
    """Strip Discogs cruft like 'Artist (2)' -> 'Artist'."""
    if not isinstance(name, str):
        return ""
    name = re.sub(r"\(\d+\)$", "", name).strip()
    return name

def parse_year_value(y):
    if y is None:
        return None
    if isinstance(y, (int, float)):
        return int(y)
    if isinstance(y, str):
        m = re.search(r"\d{4}", y)
        return int(m.group(0)) if m else None
    return None

def year_bucket(year: Optional[int]) -> Optional[str]:
    if year is None:
        return None
    return f"year_{year}"

def norm_label(name: Optional[str]) -> Optional[str]:
    if not isinstance(name, str):
        return None
    n = name.strip().lower()
    return n or None


In [None]:
# %% Fetch collection + wantlist (with artist/label IDs)

def _release_row_from_basic_info(bi: dict, source: str) -> Optional[dict]:
    try:
        artists_raw = bi.get("artists") or []
        labels_raw  = bi.get("labels") or []

        artists = [_clean_name(a.get("name", "")) for a in artists_raw if isinstance(a, dict)]
        artist_ids = [a.get("id") for a in artists_raw if isinstance(a, dict) and a.get("id")]

        labels = [l.get("name") for l in labels_raw if isinstance(l, dict)]
        label_ids = [l.get("id") for l in labels_raw if isinstance(l, dict) and l.get("id")]

        return {
            "release_id": bi.get("id"),
            "title":      bi.get("title"),
            "artists":    artists,
            "artist_ids": artist_ids,
            "labels":     labels,
            "label_ids":  label_ids,
            "genres":     bi.get("genres") or [],
            "styles":     bi.get("styles") or [],
            "year":       parse_year_value(bi.get("year")),
            "country":    bi.get("country"),
            "uri":        bi.get("uri"),
            "source":     source,
        }
    except Exception:
        return None

def fetch_collection() -> pd.DataFrame:
    print("Fetching collection…")
    base = f"https://api.discogs.com/users/{DISCOGS_USERNAME}/collection/folders/0/releases"
    page = 1
    rows = []

    while True:
        params = {"page": page, "per_page": PER_PAGE}
        data = _raw_get(base, params, "collection", page)
        for item in data.get("releases", []):
            bi = item.get("basic_information") or {}
            row = _release_row_from_basic_info(bi, source="collection")
            if row:
                row["owned_qty"] = item.get("basic_information", {}).get("count", 1)
                rows.append(row)
        pagination = data.get("pagination") or {}
        if page >= pagination.get("pages", 1):
            break
        page += 1

    df = pd.DataFrame(rows).drop_duplicates(subset=["release_id"])
    print("Collection rows:", len(df))
    return df

def fetch_wantlist() -> pd.DataFrame:
    print("Fetching wantlist…")
    base = f"https://api.discogs.com/users/{DISCOGS_USERNAME}/wants"
    page = 1
    rows = []

    while True:
        params = {"page": page, "per_page": PER_PAGE}
        data = _raw_get(base, params, "wantlist", page)
        for item in data.get("wants", []):
            bi = item.get("basic_information") or {}
            row = _release_row_from_basic_info(bi, source="wantlist")
            if row:
                row["owned_qty"] = 0
                rows.append(row)
        pagination = data.get("pagination") or {}
        if page >= pagination.get("pages", 1):
            break
        page += 1

    df = pd.DataFrame(rows).drop_duplicates(subset=["release_id"])
    print("Wantlist rows:", len(df))
    return df

df_collection = fetch_collection()
df_wantlist   = fetch_wantlist()

df_all = pd.concat([df_collection, df_wantlist], ignore_index=True).drop_duplicates(subset=["release_id"])
print("Total unique releases in profile:", len(df_all))


In [None]:
# %% Base label & artist scores from your profile

label_scores = defaultdict(float)     # key: normalised label name
artist_scores = defaultdict(float)    # key: artist_id

label_key_to_id = {}                  # norm label -> an example label_id
label_key_to_name = {}                # norm label -> pretty label name
artist_id_to_name = {}                # artist_id -> clean name

for _, row in df_all.iterrows():
    src = row.get("source", "collection")
    weight = 1.0 if src == "collection" else 1.7

    for name in row.get("labels", []) or []:
        key = norm_label(name)
        if not key:
            continue
        label_scores[key] += weight
        label_key_to_name.setdefault(key, name)

    artist_ids = row.get("artist_ids") or []
    artist_names = row.get("artists") or []
    for a_id, a_name in zip(artist_ids, artist_names):
        if not a_id:
            continue
        artist_scores[a_id] += weight
        artist_id_to_name[a_id] = _clean_name(a_name)

    for name, lid in zip(row.get("labels") or [], row.get("label_ids") or []):
        key = norm_label(name)
        if key and lid and key not in label_key_to_id:
            label_key_to_id[key] = lid

print("Profile labels:", len(label_scores), " | Profile artists:", len(artist_scores))
display(pd.DataFrame(list(label_scores.items()), columns=["label_key", "score"]).head())


In [None]:
# %% Expanding labels via parent/sub-label relationships

def expand_labels_via_parent_sublabels(
    label_scores: Dict[str, float],
    label_key_to_id: Dict[str, int],
    label_key_to_name: Dict[str, str],
    max_seed_labels: int = 40,
    rel_decay: float = 0.7,
) -> None:
    seed_items = sorted(label_scores.items(), key=lambda x: x[1], reverse=True)[:max_seed_labels]

    for key, base_score in tqdm(seed_items, desc="Parent/sublabel expansion"):
        lid = label_key_to_id.get(key)
        if not lid:
            continue
        url = f"https://api.discogs.com/labels/{lid}"
        data = _raw_get(url, {}, "label_detail", page=1)

        parent = data.get("parent_label")
        if isinstance(parent, dict):
            p_name = parent.get("name")
            pk = norm_label(p_name)
            if pk:
                label_scores[pk] += base_score * rel_decay
                label_key_to_name.setdefault(pk, p_name)
                if parent.get("id") and pk not in label_key_to_id:
                    label_key_to_id[pk] = parent.get("id")

        for sub in data.get("sublabels") or []:
            if not isinstance(sub, dict):
                continue
            s_name = sub.get("name")
            sk = norm_label(s_name)
            if sk:
                label_scores[sk] += base_score * rel_decay
                label_key_to_name.setdefault(sk, s_name)
                if sub.get("id") and sk not in label_key_to_id:
                    label_key_to_id[sk] = sub.get("id")

expand_labels_via_parent_sublabels(label_scores, label_key_to_id, label_key_to_name)
print("Labels after parent/sub expansion:", len(label_scores))


In [None]:
# %% Expanding labels via artists' other releases

def expand_labels_via_artists_other_releases(
    artist_scores: Dict[int, float],
    label_scores: Dict[str, float],
    label_key_to_name: Dict[str, str],
    max_artists: int = 60,
    per_page: int = 50,
    artist_to_label_decay: float = 0.6,
) -> None:
    top_artists = sorted(artist_scores.items(), key=lambda x: x[1], reverse=True)[:max_artists]

    for a_id, a_score in tqdm(top_artists, desc="Label expansion via artists"):
        url = f"https://api.discogs.com/artists/{a_id}/releases"
        params = {"per_page": per_page, "page": 1, "sort": "year"}
        try:
            data = _raw_get(url, params, "artist_releases", page=1)
        except Exception as e:
            print("Artist releases failed for", a_id, ":", e)
            continue

        for rel in data.get("releases", []) or []:
            label_str = rel.get("label") or ""
            labels = [p.strip() for p in label_str.split(",") if p.strip()]
            for label_name in labels:
                key = norm_label(label_name)
                if not key:
                    continue
                label_scores[key] += a_score * artist_to_label_decay
                label_key_to_name.setdefault(key, label_name)

expand_labels_via_artists_other_releases(artist_scores, label_scores, label_key_to_name)
print("Labels after artist-based expansion:", len(label_scores))


In [None]:
# %% Expanding artists via labels (including parent/sub labels) other artists

def expand_artists_via_labels(
    label_scores: Dict[str, float],
    label_key_to_name: Dict[str, str],
    artist_scores: Dict[int, float],
    max_labels: int = 50,
    pages_per_label: int = 1,
    per_page: int = 50,
) -> None:
    base = "https://api.discogs.com/database/search"

    sorted_labels = sorted(label_scores.items(), key=lambda x: x[1], reverse=True)[:max_labels]

    for key, score in tqdm(sorted_labels, desc="Artist expansion via labels"):
        label_name = label_key_to_name.get(key)
        if not label_name:
            continue

        for page in range(1, pages_per_label + 1):
            params = {
                "type": "release",
                "label": label_name,
                "genre": "Electronic",
                "per_page": per_page,
                "page": page,
            }
            try:
                _ = _raw_get(base, params, "label_search_for_artists", page=page)
            except Exception as e:
                print("Search failed for label", label_name, "page", page, ":", e)
                break

expand_artists_via_labels(label_scores, label_key_to_name, artist_scores)
print("Artist expansion via labels complete (IDs boosted during enrichment).")


In [None]:
# %% Build candidate releases via /database/search seeded by high-scoring labels

def build_candidates_via_search_from_labels(
    df_profile: pd.DataFrame,
    label_scores: Dict[str, float],
    label_key_to_name: Dict[str, str],
    max_label_seeds: int = 50,
    pages_per_label: int = 2,   # upper bound; we'll stop earlier if Discogs says fewer pages
    per_page: int = 50,
) -> pd.DataFrame:
    base = "https://api.discogs.com/database/search"
    owned_ids = set(df_profile["release_id"].tolist())
    rows = []

    sorted_labels = sorted(label_scores.items(), key=lambda x: x[1], reverse=True)[:max_label_seeds]

    for key, score in tqdm(sorted_labels, desc="Searching candidates by label"):
        label_name = label_key_to_name.get(key)
        if not label_name:
            continue

        current_page = 1
        max_pages_for_label = pages_per_label  # will be tightened after first response

        while current_page <= max_pages_for_label:
            params = {
                "type": "release",
                "label": label_name,
                "genre": "Electronic",
                "per_page": per_page,
                "page": current_page,
            }
            try:
                data = _raw_get(base, params, "label_search_candidates", page=current_page)
            except Exception as e:
                print(
                    f"Search failed for label {label_name} page {current_page} : {e}"
                )
                break  # give up on this label and move to the next one

            # Use Discogs' own pagination to cap pages for this label
            pagination = data.get("pagination") or {}
            total_pages = pagination.get("pages") or 1
            max_pages_for_label = min(pages_per_label, total_pages)

            for item in (data.get("results") or []):
                rid = item.get("id")
                if not rid or rid in owned_ids:
                    continue

                # --- Artists: can be string or list ---
                artist_field = item.get("artist") or ""
                if isinstance(artist_field, list):
                    artist_pieces = artist_field
                else:
                    artist_pieces = re.split(r",|&", str(artist_field))

                artists = [
                    _clean_name(a)
                    for a in artist_pieces
                    if isinstance(a, str) and a.strip()
                ]

                # --- Labels: can be string or list ---
                label_field = item.get("label") or ""
                label_pieces: List[str] = []

                if isinstance(label_field, list):
                    for v in label_field:
                        if not isinstance(v, str):
                            v = str(v)
                        for part in v.split(","):
                            part = part.strip()
                            if part:
                                label_pieces.append(part)
                elif isinstance(label_field, str):
                    label_pieces = [s.strip() for s in label_field.split(",") if s.strip()]

                labels = label_pieces

                rows.append({
                    "release_id": rid,
                    "title": item.get("title"),
                    "artists": artists,
                    "artist_ids": [],
                    "labels": labels,
                    "genres": _to_str_list(item.get("genre")),
                    "styles": _to_str_list(item.get("style")),
                    "year": parse_year_value(item.get("year")),
                    "country": item.get("country"),
                    "uri": item.get("uri"),
                    "label_seed_key": key,
                    "label_seed_score": score,
                })

            current_page += 1

    df_cand = pd.DataFrame(rows).drop_duplicates(subset=["release_id"])
    print("Raw candidates from label-seeded search:", len(df_cand))
    return df_cand

df_candidates = build_candidates_via_search_from_labels(df_all, label_scores, label_key_to_name)


In [None]:
# %% Enrich candidates with /releases details + boost new artists via label affinity
#     and pull have/want counts. Keep ONLY pure Electronic (genres == ["Electronic"]).

def enrich_candidates_with_release_details(
    df_cand: pd.DataFrame,
    label_scores: Dict[str, float],
    artist_scores: Dict[int, float],
    max_details: int = 800,
) -> (pd.DataFrame, Dict[int, float]):
    if df_cand.empty:
        return df_cand, artist_scores

    artist_scores_ext = dict(artist_scores)
    profile_artist_ids = set(artist_scores.keys())

    rows = []
    ids = df_cand["release_id"].tolist()[:max_details]

    for rid in tqdm(ids, desc="Enriching candidates"):
        url = f"https://api.discogs.com/releases/{rid}"
        try:
            data = _raw_get(url, {}, "release_detail", page=1)
        except Exception as e:
            print("Release detail failed for", rid, ":", e)
            continue

        genres = data.get("genres") or []
        # require pure Electronic only
        if not genres or set(genres) != {"Electronic"}:
            continue

        styles = data.get("styles") or []
        country = data.get("country")

        artists_raw = data.get("artists") or []
        artists = []
        artist_ids = []
        for a in artists_raw:
            if not isinstance(a, dict):
                continue
            nm = _clean_name(a.get("name", ""))
            aid = a.get("id")
            if nm:
                artists.append(nm)
            if aid:
                artist_ids.append(aid)
                artist_id_to_name.setdefault(aid, nm)

        labels_raw = data.get("labels") or []
        labels = [lab.get("name") for lab in labels_raw if isinstance(lab, dict) and lab.get("name")]
        label_keys = [norm_label(l) for l in labels if norm_label(l)]
        max_label_affinity = max([label_scores.get(k, 0.0) for k in label_keys] or [0.0])

        # boost NEW artists that appear on liked labels
        for aid in artist_ids:
            if aid not in profile_artist_ids:
                artist_scores_ext[aid] = artist_scores_ext.get(aid, 0.0) + max_label_affinity * 0.5

        community = data.get("community") or {}
        have_count = community.get("have")
        want_count = community.get("want")

        rows.append({
            "release_id": rid,
            "title": data.get("title"),
            "artists": artists,
            "artist_ids": artist_ids,
            "labels": labels,
            "genres": genres,
            "styles": styles,
            "year": parse_year_value(data.get("year")),
            "country": country,
            "uri": data.get("uri") or data.get("resource_url"),
            "have_count": have_count,
            "want_count": want_count,
        })

    df_enriched = pd.DataFrame(rows).drop_duplicates(subset=["release_id"])
    print("Pure Electronic candidates after enrichment:", len(df_enriched))
    return df_enriched, artist_scores_ext

df_candidates_enriched, artist_scores_extended = enrich_candidates_with_release_details(
    df_candidates,
    label_scores,
    artist_scores,
)


In [None]:
# %% Build TF-IDF text representation

def build_release_text_features(df: pd.DataFrame) -> List[str]:
    texts = []
    for _, row in df.iterrows():
        tokens = []

        for a in row.get("artists", []) or []:
            tokens.append("artist_" + re.sub(r"\s+", "_", _clean_name(str(a)).lower()))

        for l in row.get("labels", []) or []:
            tokens.append("label_" + re.sub(r"\s+", "_", str(l).lower()))

        for g in row.get("genres", []) or []:
            tokens.append("genre_" + re.sub(r"\s+", "_", str(g).lower()))

        for s in row.get("styles", []) or []:
            tokens.append("style_" + re.sub(r"\s+", "_", str(s).lower()))

        country = row.get("country")
        if isinstance(country, str) and country:
            tokens.append("country_" + re.sub(r"\s+", "_", country.lower()))

        y = row.get("year")
        yb = year_bucket(y)
        if yb:
            tokens.append(yb)

        source = row.get("source")
        if isinstance(source, str):
            tokens.append("source_" + source.lower())

        texts.append(" ".join(tokens))
    return texts

mask_electronic_profile = df_all["genres"].apply(lambda gs: ("Electronic" in (gs or [])) if isinstance(gs, list) else False)
if mask_electronic_profile.any():
    df_profile_vec = df_all[mask_electronic_profile].copy()
else:
    df_profile_vec = df_all.copy()

df_profile_vec = df_profile_vec.reset_index(drop=True)
df_cand_vec    = df_candidates_enriched.reset_index(drop=True)

df_profile_vec["source"] = df_profile_vec.get("source", "collection")

df_all_for_vector = pd.concat([
    df_profile_vec.assign(kind="profile"),
    df_cand_vec.assign(kind="candidate"),
], ignore_index=True)

text_corpus = build_release_text_features(df_all_for_vector)

vectorizer = TfidfVectorizer(
    min_df=2,
    max_df=0.9,
    ngram_range=(1, 2),
)

X = vectorizer.fit_transform(text_corpus)
print("TF-IDF matrix shape:", X.shape)

is_profile   = (df_all_for_vector["kind"] == "profile").values
is_candidate = (df_all_for_vector["kind"] == "candidate").values

X_profile   = X[is_profile]
X_candidate = X[is_candidate]

df_profile_vec = df_all_for_vector[is_profile].reset_index(drop=True)
df_cand_vec    = df_all_for_vector[is_candidate].reset_index(drop=True)

assert len(df_cand_vec) == len(df_candidates_enriched)


In [None]:
# %% Numeric features: label affinity, artist affinity (no ratings)

def numeric_feature_matrix_for(
    df: pd.DataFrame,
    label_scores: Dict[str, float],
    artist_scores_ext: Dict[int, float],
):
    label_aff = []
    artist_aff = []

    for _, row in df.iterrows():
        labels = row.get("labels") or []
        label_keys = [norm_label(l) for l in labels if norm_label(l)]
        max_label_aff = max([label_scores.get(k, 0.0) for k in label_keys] or [0.0])
        label_aff.append(max_label_aff)

        artist_ids = row.get("artist_ids") or []
        max_artist_aff = max([artist_scores_ext.get(aid, 0.0) for aid in artist_ids] or [0.0])
        artist_aff.append(max_artist_aff)

    arr = np.vstack([
        np.array(label_aff),
        np.array(artist_aff),
    ]).T
    return arr

num_profile = np.zeros((len(df_profile_vec), 2))
num_cand    = numeric_feature_matrix_for(df_cand_vec, label_scores, artist_scores_extended)

X_profile_full   = hstack([X_profile, num_profile])
X_candidate_full = hstack([X_candidate, num_cand])

print("Profile matrix (with numerics):", X_profile_full.shape)
print("Candidate matrix (with numerics):", X_candidate_full.shape)


In [None]:
# %% Score candidates with cosine similarity + have/want-based adjustments

# Base taste vector
user_vector = X_profile_full.mean(axis=0)
user_vector = np.asarray(user_vector).reshape(1, -1)  # convert from np.matrix

sims = cosine_similarity(X_candidate_full, user_vector)

df_scored = df_candidates_enriched.copy()
df_scored["base_score"] = sims.ravel()

# --- Have / want features ---
have = df_scored["have_count"].fillna(0).astype(float)
want = df_scored["want_count"].fillna(0).astype(float)

# Desirability: favor high wants, low haves
desirability = np.log1p(want) - np.log1p(have + 1.0)
df_scored["desirability"] = desirability

# Penalty for well-known records (have > 500)
overknown_penalty = (have > 500).astype(float)
df_scored["overknown_penalty"] = overknown_penalty

# Combine:
# - base_score from similarity
# - positive contribution from desirability
# - negative contribution from overknown_penalty
df_scored["score"] = (
    df_scored["base_score"]
    + 0.25 * df_scored["desirability"]
    - 0.6 * df_scored["overknown_penalty"]
)

df_scored = df_scored.sort_values(
    by=["score"],
    ascending=[False],
).reset_index(drop=True)

print("Top scored candidates:")
display(df_scored[["release_id", "title", "artists", "labels", "score", "have_count", "want_count"]].head(10))


In [None]:
# %% Final recommendations — Electronic only, one per artist + label

def select_unique_recommendations(
    df_scored: pd.DataFrame,
    top_n: int = 50,
) -> pd.DataFrame:
    df = df_scored.copy()

    # genres already pure Electronic, but keep check
    df["is_electronic"] = df["genres"].apply(lambda gs: set(gs or []) == {"Electronic"})
    df = df[df["is_electronic"]].copy()

    seen_artists = set()
    seen_labels = set()
    rows = []

    for _, row in df.iterrows():
        artists = [a.strip() for a in (row.get("artists") or []) if isinstance(a, str)]
        labels  = [l.strip() for l in (row.get("labels") or []) if isinstance(l, str)]

        main_artist = artists[0] if artists else None
        main_label  = labels[0] if labels else None

        if main_artist and main_artist in seen_artists:
            continue
        if main_label and main_label in seen_labels:
            continue

        rows.append(row)

        if main_artist:
            seen_artists.add(main_artist)
        if main_label:
            seen_labels.add(main_label)

        if len(rows) >= top_n:
            break

    return pd.DataFrame(rows)

final_recs = select_unique_recommendations(df_scored, top_n=50)

print("Final Electronic recommendations:", len(final_recs))
cols_to_show = [
    "release_id",
    "title",
    "artists",
    "labels",
    "genres",
    "styles",
    "year",
    "country",
    "have_count",
    "want_count",
    "score",
    "uri",
]
display(final_recs[cols_to_show].head(20))


In [None]:
# %% Save recommendations to CSV

out_path = Path("discogs_electronic_recommendations_v9_obscure.csv")
final_recs.to_csv(out_path, index=False)
print("Saved recommendations to:", out_path.resolve())
