In [1]:
# goal: we get a list of all 100 songs picked from the improved model
# we then want to order them to artificially create "peaks" within a playlist
# we want to maximize the rating every 5 songs in order to create a more engaging playlist

# WHY? 
# This formulation will create a playlist where higher-rated songs tend to appear at peak positions (multiples of 5), 
# creating an engaging listening experience with regular high points.

Math formulation for optimization model:

Sets:
- $S$: set of songs (100 songs selected from the improved model)
- $P$: set of positions in the playlist (1 to 100)
- $Peaks$: set of positions that are multiples of 5, as well as first song to engage listener (1, 5, 10, 15, ..., 100)

Parameters:
- $rating_i$: rating of song $i$, for all $i \in S$

Decision Variables:
- $X_{i,j}$: binary variable, equals 1 if song $i$ is placed in position $j$, 0 otherwise
for all $i \in S$, for all $j \in P$

Objective:
- Maximize the sum of ratings at peak positions: 

$\max \sum_{i \in S} \sum_{j \in Peaks} X_{i,j} \cdot rating_i$

Constraints:
- Each song must be placed exactly once in the playlist:

$\sum_{j \in P} X_{i,j} = 1$, for all $i \in S$

- Each position must contain exactly one song:

$\sum_{i \in S} X_{i,j} = 1$, for all $j \in P$

In [None]:
import pandas as pd
import numpy as np
from gurobipy import Model, GRB, quicksum

# Load the data (assuming we have the selected songs from the improved model)
# For this example, let's assume we have a dataframe with the selected songs and their ratings
# Replace this with your actual data loading code
df_selected_songs = pd.read_csv('selected_songs.csv')  # This should contain the 100 songs from improved model

# Extract the ratings and song information
ratings = df_selected_songs['average_prediction'].tolist()  # or whatever column contains the ratings
track_names = df_selected_songs['track_name'].tolist()
artists = df_selected_songs['artist_name'].tolist()

# Define the sets
num_songs = len(ratings)  # Should be 100
positions = range(1, num_songs + 1)
peak_positions = [1] + [p for p in positions if p % 5 == 0]  # 1, 5, 10, 15, ..., 100

# Create the optimization model
m = Model("Playlist_Ordering")

# Decision variables: X[i,j] = 1 if song i is placed at position j
X = m.addVars([(i, j) for i in range(num_songs) for j in positions], vtype=GRB.BINARY, name="X")

# Constraint: Each song must be placed exactly once
for i in range(num_songs):
    m.addConstr(quicksum(X[i, j] for j in positions) == 1, f"song_{i}_once")

# Constraint: Each position must contain exactly one song
for j in positions:
    m.addConstr(quicksum(X[i, j] for i in range(num_songs)) == 1, f"position_{j}_filled")

# Objective: Maximize the sum of ratings at peak positions
m.setObjective(
    quicksum(ratings[i] * X[i, j] for i in range(num_songs) for j in peak_positions),
    GRB.MAXIMIZE
)

# Optimize the model
m.optimize()

# Extract the results
if m.status == GRB.OPTIMAL:
    # Create a list to store the ordered playlist
    ordered_playlist = [None] * num_songs
    
    # Fill in the ordered playlist based on the optimization results
    for i in range(num_songs):
        for j in positions:
            if X[i, j].X > 0.5:  # If song i is assigned to position j
                ordered_playlist[j-1] = {
                    'position': j,
                    'track_name': track_names[i],
                    'artist_name': artists[i],
                    'rating': ratings[i],
                    'is_peak': j in peak_positions
                }
    
    # Convert to DataFrame for better visualization
    ordered_df = pd.DataFrame(ordered_playlist)
    
    # Display the ordered playlist
    print("Ordered Playlist:")
    print(ordered_df)
    
    # Calculate the total rating at peak positions
    peak_rating_sum = sum(ordered_df[ordered_df['is_peak']]['rating'])
    print(f"\nTotal rating at peak positions: {peak_rating_sum:.2f}")
    
    # Calculate the average rating at peak vs. non-peak positions
    avg_peak_rating = ordered_df[ordered_df['is_peak']]['rating'].mean()
    avg_nonpeak_rating = ordered_df[~ordered_df['is_peak']]['rating'].mean()
    print(f"Average rating at peak positions: {avg_peak_rating:.2f}")
    print(f"Average rating at non-peak positions: {avg_nonpeak_rating:.2f}")
else:
    print("No optimal solution found.")
