In [15]:
import logging

import pandas as pd

from setlist import (
    load_setlist_cache,
    derive_song_features,
    get_recent_setlists,
    extract_common_songs,
)


logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


setlist_cache = load_setlist_cache()
setlists = get_recent_setlists(setlist_cache, "zhu", api_key=None)  # Use cache only
songs_by_date = extract_common_songs(setlists)

df = derive_song_features(songs_by_date, decay_rate=0.9)

2024-11-16 12:35:45,329 - INFO - Using cache file /Users/dragos/Programming/python/autogigification/setlist_cache.json
2024-11-16 12:35:45,339 - INFO - Using cached setlist for zhu


In [16]:
# Find relative positions of songs in setlist
df["normalised_position"] = df["position"] / df["setlist_size"]

# Create position bins
position_bins = [0, 0.2, 0.8, 1]
position_labels = ["Start", "Middle", "End"]
df["position_bin"] = pd.cut(
    df["normalised_position"], bins=position_bins, labels=position_labels
)

# Create frequency table of positions scaled by recency
weighted_position_freq = (
    df.groupby(["position_bin", "name"], observed=True)["weight"].sum().unstack(fill_value=0)
)

weighted_position_freq

name,Automatic,Better Recognize,Bloodmoon Ritual,Cake,Came for the Low,Can You Feel the Rain,Carnival (feat. Rich The Kid & Playboi Carti),Changes,Chasing Marrakech,Cocaine Model,...,Sky Is Crying,Sky Is Crying / I want to know if you've ever seen the rain,"Stormy Love, NM.",Take My Soul,The Fall,The Wait,Unbothered,Veselka / Desert Woman,Working for It,Zhudio54
position_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Start,0.270774,0.0,0.0,0.0,0.0,0.0,0.435027,0.0,0.535902,0.0,...,0.0,0.0,0.0,1.077721,0.535902,0.0,0.0,0.0,0.435027,0.0
Middle,0.265128,0.0,0.265128,0.270774,1.666436,0.262349,0.0,0.265128,0.0,0.703932,...,0.541819,0.265128,0.535902,0.0,0.0,0.270774,0.270774,0.435027,0.535902,0.859489
End,0.0,0.270774,0.0,0.0,0.270774,0.0,0.0,0.0,0.0,1.233278,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.535902


In [18]:
# Given a desired setlist length, find the most frequently played songs according to features

setlist_length = 12

all_songs = set()
setlist = []

# Find first and last songs
most_likely_first = df.loc[df["is_first"]].groupby("name")["weight"].sum().idxmax()
most_likely_last = df.loc[df["is_last"]].groupby("name")["weight"].sum().idxmax()

all_songs = {most_likely_first, most_likely_last}
setlist = [most_likely_first]

# Fill middle in order according to position bin
for i in range(2, setlist_length):
    current_bin = position_labels[i // setlist_length]
    remaining_songs_position_freq = weighted_position_freq.loc[
        current_bin,
        [song for song in weighted_position_freq.columns if song not in all_songs],
    ]  # type: ignore

    most_likely_song = remaining_songs_position_freq.idxmax()
    setlist.append(most_likely_song)
    all_songs.add(most_likely_song)

setlist.append(most_likely_last)

setlist

['Take My Soul',
 'In the Morning',
 'Rolling',
 'Chasing Marrakech',
 'The Fall',
 'Carnival (feat. Rich The Kid & Playboi Carti)',
 'Working for It',
 'Dreams',
 'Automatic',
 'Better Recognize',
 'Bloodmoon Ritual',
 'Faded']