# Playlist Builder

This notebook is a prototype for functionality that will be built into the Music Catalogue API and UI

In [None]:
%run database.ipynb
%run pathutils.ipynb
%run export.ipynb

In [None]:
connection = connect()
artists_df = load_artists(connection)
artist_moods_df = load_artist_moods(connection)

# Playlist Builder

In [None]:
import numpy as np
import pandas as pd

TIME_COL = {
    "morning": "MorningWeight",
    "afternoon": "AfternoonWeight",
    "evening": "EveningWeight",
    "late": "LateWeight",
}

# Defaults that can be tweaked to taste
STYLE_TARGETS = {
    "morning":   {"Energy": 3.0, "Intimacy": 2.0, "Warmth": 3.0},
    "afternoon": {"Energy": 3.5, "Intimacy": 2.0, "Warmth": 3.0},
    "evening":   {"Energy": 2.5, "Intimacy": 3.5, "Warmth": 3.5},
    "late":      {"Energy": 1.8, "Intimacy": 4.2, "Warmth": 3.8},
}

def _style_fit(row: pd.Series, time_key: str,
               w_energy: float = 1.0, w_intimacy: float = 1.0, w_warmth: float = 1.0) -> float:
    """
    0..1 fit. Higher = closer to the target style for this time-of-day.
    """
    t = STYLE_TARGETS[time_key]
    de = (row["Energy"]   - t["Energy"])   * w_energy
    di = (row["Intimacy"] - t["Intimacy"]) * w_intimacy
    dw = (row["Warmth"]   - t["Warmth"])   * w_warmth

    dist = np.sqrt(de*de + di*di + dw*dw)
    max_dist = np.sqrt((5*w_energy)**2 + (5*w_intimacy)**2 + (5*w_warmth)**2)

    return float(np.clip(1.0 - dist / max_dist, 0.0, 1.0))

def build_playlist_artists_from_mapping(
    artists_df: pd.DataFrame,
    artist_moods_w_df: pd.DataFrame,
    time_key: str,
    n: int = 25,

    # scoring weights
    w_style: float = 0.55,
    w_mood: float = 0.45,

    # sequencing controls
    transition_penalty: float = 0.15,
    avoid_recent: int = 6,

    # randomness controls
    seed: int | None = None,     # Set None for fresh randomness each run
    top_k: int = 8,              # Pick from top_k each step
    temperature: float = 0.7,    # Lower = more "greedy", higher = more random
    random_jitter: float = 0.01,
):
    TIME_COL = {
        "morning": "MorningWeight",
        "afternoon": "AfternoonWeight",
        "evening": "EveningWeight",
        "late": "LateWeight",
    }

    STYLE_TARGETS = {
        "morning":   {"Energy": 3.0, "Intimacy": 2.0, "Warmth": 3.0},
        "afternoon": {"Energy": 3.5, "Intimacy": 2.0, "Warmth": 3.0},
        "evening":   {"Energy": 2.5, "Intimacy": 3.5, "Warmth": 3.5},
        "late":      {"Energy": 1.8, "Intimacy": 4.2, "Warmth": 3.8},
    }

    def _style_fit(row, time_key):
        t = STYLE_TARGETS[time_key]
        de = row["Energy"]   - t["Energy"]
        di = row["Intimacy"] - t["Intimacy"]
        dw = row["Warmth"]   - t["Warmth"]
        dist = np.sqrt(de*de + di*di + dw*dw)
        max_dist = np.sqrt(5**2 + 5**2 + 5**2)
        return float(np.clip(1.0 - dist/max_dist, 0.0, 1.0))

    time_key = time_key.lower().strip()
    if time_key not in TIME_COL:
        raise ValueError(f"time_key must be one of {list(TIME_COL.keys())}")

    wcol = TIME_COL[time_key]
    rng = np.random.default_rng(seed)  # seed=None => different each run

    # Aggregate mood score per artist
    mood_scores = (
        artist_moods_w_df
        .groupby("Artist_Id", as_index=False)[wcol]
        .sum()
        .rename(columns={wcol: "MoodScore"})
    )

    base = artists_df.merge(mood_scores, on="Artist_Id", how="left")
    base["MoodScore"] = base["MoodScore"].fillna(0.0)

    # Normalize mood score to 0..1
    mn, mx = base["MoodScore"].min(), base["MoodScore"].max()
    base["MoodScoreNorm"] = 0.0 if mx == mn else (base["MoodScore"] - mn) / (mx - mn)

    # Style fit + base score
    base["StyleFit"] = base.apply(lambda r: _style_fit(r, time_key), axis=1)
    base["BaseScore"] = (w_style * base["StyleFit"]) + (w_mood * base["MoodScoreNorm"])
    base["BaseScore"] = base["BaseScore"] + rng.normal(0.0, random_jitter, size=len(base))

    style_vecs = base.set_index("Artist_Id")[["Energy", "Intimacy", "Warmth"]]

    def transition_cost(prev_id, cand_id):
        if prev_id is None:
            return 0.0
        a = style_vecs.loc[prev_id].to_numpy(dtype=float)
        b = style_vecs.loc[cand_id].to_numpy(dtype=float)
        return float(np.linalg.norm(a - b) / np.linalg.norm(np.array([5, 5, 5], dtype=float)))

    chosen, recent_ids, prev_id = [], [], None
    remaining = base.copy()

    def softmax_probs(scores: np.ndarray, temp: float) -> np.ndarray:
        temp = max(temp, 1e-6)
        x = scores / temp
        x = x - np.max(x)  # stability
        p = np.exp(x)
        p = p / np.sum(p)
        return p

    while len(chosen) < min(n, len(remaining)):
        rem = remaining.copy()

        rem["RecentPenalty"] = rem["Artist_Id"].isin(recent_ids).astype(float)
        rem["TransitionPenalty"] = rem["Artist_Id"].apply(lambda x: transition_cost(prev_id, x))

        rem["StepScore"] = (
            rem["BaseScore"]
            - (transition_penalty * rem["TransitionPenalty"])
            - (0.50 * rem["RecentPenalty"])
        )

        # Take top_k candidates
        rem_sorted = rem.sort_values("StepScore", ascending=False)
        k = min(top_k, len(rem_sorted))
        pool = rem_sorted.head(k)

        # Weighted random pick from the pool based on StepScore
        scores = pool["StepScore"].to_numpy(dtype=float)
        probs = softmax_probs(scores, temperature)
        pick_idx = rng.choice(np.arange(k), p=probs)
        pick = pool.iloc[pick_idx]

        chosen.append(pick)
        prev_id = int(pick["Artist_Id"])
        recent_ids.append(prev_id)
        if len(recent_ids) > avoid_recent:
            recent_ids.pop(0)

        remaining = remaining[remaining["Artist_Id"] != prev_id]

    playlist_df = pd.DataFrame(chosen).reset_index(drop=True)
    cols = [
        "Artist_Id","Artist_Name",
        "StepScore","BaseScore","StyleFit",
        "MoodScore","MoodScoreNorm",
        "Energy","Intimacy","Warmth",
        "VocalPresence","EnsembleType"
    ]
    return playlist_df[[c for c in cols if c in playlist_df.columns]]

def build_playlist_tod(
    artists_df,
    artist_moods_w_df,
    time_key: str,
    n: int = 25,
    variety: float = 0.5,     # 0..1 (0 = safest, 1 = most varied)
    seed: int | None = None,  # None = different every run
):
    time_key = time_key.lower().strip()

    # Baseline per time-of-day
    presets = {
        "morning":   dict(top_k=10, temperature=0.8),
        "afternoon": dict(top_k=10, temperature=0.8),
        "evening":   dict(top_k=8,  temperature=0.7),
        "late":      dict(top_k=6,  temperature=0.6),
    }
    if time_key not in presets:
        raise ValueError("time_key must be morning/afternoon/evening/late")

    base_top_k = presets[time_key]["top_k"]
    base_temp  = presets[time_key]["temperature"]

    # Map variety 0..1 to multipliers (gentle scaling)
    # top_k grows a bit; temperature grows more
    top_k = int(round(base_top_k * (0.75 + 0.75 * variety)))      # ~0.75x..1.5x
    temperature = base_temp * (0.70 + 0.70 * variety)             # ~0.70x..1.40x

    # Clamp to sensible ranges
    top_k = max(3, min(top_k, 15))
    temperature = float(np.clip(temperature, 0.35, 1.15))

    return build_playlist_artists_from_mapping(
        artists_df=artists_df,
        artist_moods_w_df=artist_moods_w_df,
        time_key=time_key,
        n=n,
        seed=seed,
        top_k=top_k,
        temperature=temperature,
        avoid_recent=8,
        transition_penalty=0.18,
    )
import numpy as np

def build_short_playlist(
    artists_df,
    artist_moods_w_df,
    time_key: str,
    n: int = 4,               # 3..5
    variety: float = 0.4,     # 0..1; keep this low for short lists
    seed: int | None = None,  # None => new each run
):
    time_key = time_key.lower().strip()

    # Tight presets for 3â€“5 album playlists
    presets = {
        "morning":   dict(top_k=4, temperature=0.55, transition_penalty=0.22),
        "afternoon": dict(top_k=4, temperature=0.55, transition_penalty=0.22),
        "evening":   dict(top_k=3, temperature=0.45, transition_penalty=0.25),
        "late":      dict(top_k=2, temperature=0.35, transition_penalty=0.28),
    }
    if time_key not in presets:
        raise ValueError("time_key must be morning/afternoon/evening/late")

    base = presets[time_key]

    # Scale gently with variety (small playlists should not swing wildly)
    top_k = int(round(base["top_k"] + variety * 2))  # +0..2
    temperature = base["temperature"] * (1.0 + 0.5 * variety)  # up to 1.5x
    transition_penalty = base["transition_penalty"] * (1.0 - 0.2 * variety)  # slightly looser if variety high

    top_k = max(2, min(top_k, 6))
    temperature = float(np.clip(temperature, 0.30, 0.85))
    transition_penalty = float(np.clip(transition_penalty, 0.18, 0.35))

    n = int(np.clip(n, 3, 5))

    return build_playlist_artists_from_mapping(
        artists_df=artists_df,
        artist_moods_w_df=artist_moods_w_df,
        time_key=time_key,
        n=n,
        seed=seed,
        top_k=top_k,
        temperature=temperature,
        transition_penalty=transition_penalty,
        avoid_recent=10,
        random_jitter=0.005,  # less jitter for "curated" feel
    )

# Variation check

In [None]:
def overlap(a, b):
    return len(set(a["Artist_Id"]) & set(b["Artist_Id"])) / len(set(a["Artist_Id"]) | set(b["Artist_Id"]))

p1 = build_playlist_tod(artists_df, artist_moods_df, "evening", n=3, variety=0.6, seed=None)
p2 = build_playlist_tod(artists_df, artist_moods_df, "evening", n=3, variety=0.6, seed=None)

overlap(p1, p2)

# Morning Playlist

In [None]:
morning_playlist = build_playlist_tod(artists_df, artist_moods_df, "morning", n=3, variety=0.5, seed=None)
display(morning_playlist)

# Afternoon Playlist

In [None]:
afternoon_playlist = build_playlist_tod(artists_df, artist_moods_df, "afternoon", n=3, variety=0.5, seed=None)
display(afternoon_playlist)

# Evening Playlist

In [None]:
evening_playlist = build_short_playlist(artists_df, artist_moods_df, "evening", n=3, variety=0.1, seed=None)
display(evening_playlist)

# Late Night Playlist

In [None]:
late_playlist = build_short_playlist    (artists_df, artist_moods_df, "late", n=3, variety=0.1, seed=None)
display(late_playlist)