In [1]:
import pandas as pd
import numpy as np

import gurobipy as gb
from gurobipy import *

In [2]:
# Replace non-ASCII with '_'
def sanitize_name(name):
    return ''.join([c if ord(c) < 128 else '_' for c in name]) 

In [7]:
df = pd.read_csv("spotify_data.csv")
df = df.iloc[:,1:]
df = df.sample(n=1000)
df

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre,album_label,artist_gender
6371,6t1wUup2mF3IpJhMfAwoDl,Disturbed,Just Best Covers,If I Ever Lose My Faith In You,0,274055,False,0.379,0.5620,10,...,0.0376,0.07410,0.000012,0.1370,0.1720,173.861,4,alternative,Fuck Your Life Records,Unknown
9247,7qvxyKLc49nd9kGXpT5S95,GAYLE,Indie Chill Out,god has a sense of humor,1,170771,False,0.494,0.4500,2,...,0.0687,0.62200,0.000000,0.2420,0.3680,65.252,4,alternative,X5 Music Group,female
2884,6quygE0BQuMUY861Crte1N,Sajjan Raj Vaidya,Mooskaan,Mooskaan,50,303529,False,0.623,0.2200,2,...,0.0423,0.81300,0.000058,0.1070,0.4750,136.007,4,indie,Unknown,Unknown
8492,5o5FzPpe33A6Aem8YpLsZn,Henrique & Juliano,Ao Vivo Em Brasília (Deluxe),Eu Me Enrosquei De Novo - Ao Vivo,45,181826,False,0.543,0.9150,0,...,0.0663,0.48200,0.000000,0.3230,0.5010,131.402,4,sertanejo,Slap,Unknown
7705,4s6LhHAV5SEsOV0lC2tjvJ,The Mamas & The Papas,If You Can Believe Your Eyes & Ears,California Dreamin' - Single Version,78,162373,False,0.552,0.6080,1,...,0.0345,0.35200,0.000000,0.0533,0.6370,112.367,4,folk,Universal Music Special Markets,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7853,0dUOr3rkkH19T12jRCxjUh,PianoDeuss,Hokage Funeral (Naruto Original Soundtrack),Hokage Funeral (Naruto Original Soundtrack),40,150674,False,0.306,0.0653,9,...,0.0354,0.96500,0.930000,0.1240,0.1090,65.195,4,anime,Aniplex,male
5442,1XOoxrHEwBrh6NxGEDYN7z,Sergei Lemeshev,Сергей Лемешев. 20 золотых песен,Я тебе ничего не скажу,0,188333,False,0.334,0.0405,5,...,0.0426,0.99100,0.000318,0.1760,0.0625,94.957,3,romance,Unknown,male
6593,0qYivybG83m6eGLqPx0dr3,Owl City,Pop Fun and Christmas Music 2022,"Kiss Me Babe, It's Christmas Time",0,229033,False,0.705,0.5670,0,...,0.0306,0.01990,0.000000,0.0937,0.5620,105.960,4,rock,LifeWay Worship,male
2690,4IiuExPFijOGZnVxGsKWcc,The White Stripes;The Glitch Mob,Seven Nation Army (The Glitch Mob Remix),Seven Nation Army - The Glitch Mob Remix,65,257021,False,0.701,0.8440,4,...,0.1330,0.15200,0.626000,0.1940,0.1980,120.040,4,garage,Legacy,Unknown


In [11]:
model = gb.Model("Spotify Recommendation")
# We ask Gurobi not to print too much on screen
model.Params.OutputFlag = 0

#Importing User preference
dfw = pd.read_csv("User_Music_Preferences.csv")
dfw = dfw.sample(n = 1)

#Importing data from the dataset
trackNames = df['track_name'].fillna('Unknown Track').astype(str)
popularity = df['popularity'].reset_index(drop=True)
duration = df['duration_ms'].reset_index(drop=True)
explicit = df['explicit'].reset_index(drop=True)
danceability = df['danceability'].reset_index(drop=True)
energy = df['energy'].reset_index(drop=True)
key = df['key'].reset_index(drop=True)
loudness = df['loudness'].reset_index(drop=True)
speechiness = df['speechiness'].reset_index(drop=True)
acousticness = df['acousticness'].reset_index(drop=True)
instrumentalness = df['instrumentalness'].reset_index(drop=True)
liveness = df['liveness'].reset_index(drop=True)
valence = df['valence'].reset_index(drop=True)
tempo = df['tempo'].reset_index(drop=True)
trackGenre = df['track_genre'].reset_index(drop=True)
albumLabel = df['album_label'].reset_index(drop=True)
artistGender = df['artist_gender'].reset_index(drop=True)
allGenres = trackGenre.unique()

n =len(trackNames)
ng = len(allGenres)
M = 100000
popularityCriteria = 1
proportionThreshold = 0.2
explicitValue = dfw['Explicit']
#Limiting the character names
shortenedNames = [sanitize_name(name[:150]) for name in trackNames]

#Main variables
t = model.addVars(n, vtype = GRB.BINARY, name = shortenedNames)
y = model.addVars(n, vtype = GRB.BINARY, name = "auxiliray variables for Popularity Constraint")
g = model.addVars(ng, vtype = GRB.BINARY, name = [genre for genre in allGenres])

#Weightages
w = []
w.append(int(dfw["totalDanceability"]))
w.append(int(dfw["totalEnergy"]))
w.append(int(dfw["totalSpeechiness"]))
w.append(int(dfw["totalAcousticness"]))
w.append(int(dfw["totalInstrumentalness"]))
w.append(int(dfw["totalLiveness"]))
w.append(int(dfw["totalValence"]))

#Objectives
totalPopularity = gb.quicksum(popularity[i] * t[i] for i in range(n))
totalDanceability = gb.quicksum(danceability[i] * t[i] for i in range(n))
totalEnergy = gb.quicksum(energy[i] * t[i] for i in range(n))
totalSpeechiness = gb.quicksum(speechiness[i] * t[i] for i in range(n))
totalAcousticness = gb.quicksum(acousticness[i] * t[i] for i in range(n))
totalInstrumentalness = gb.quicksum(instrumentalness[i] * t[i] for i in range(n))
totalLiveness = gb.quicksum(liveness[i] * t[i] for i in range(n))
totalValence = gb.quicksum(valence[i] * t[i] for i in range(n))
weightedTotal = w[0]*totalDanceability + w[1]*totalEnergy + w[2]*totalSpeechiness + w[3]*totalAcousticness + w[4]*totalInstrumentalness + w[5]*totalLiveness + w[6]*totalValence

#Objective Functions
model.setObjectiveN(totalPopularity,priority = 1, index = 1)
model.setObjectiveN(weightedTotal, priority = 1, index = 0)
model.ModelSense = GRB.MAXIMIZE

#Constraints
#Max Duration
model.addConstr(gb.quicksum(duration[i]*t[i] for i in range(n)) <= 1800000, name = "Max Duration")

#Popularity Threshold
explicitCriteria = 1
for i in range(n):    
    model.addConstr(popularity[i]*t[i] >=  popularityCriteria*y[i], name = f"Popularity Threshold for track {i+1}a")
    model.addConstr(t[i] <= y[i], name = f"Popularity Threshold for track {i+1}b")
#Explicit Content    
model.addConstr(gb.quicksum(explicit[i]*t[i] for i in range(n)) <= M*explicitCriteria, name = "Explicit Constraint")

#Mapping Genres
for i in range(n):
    # Get the index of the genre for the current track
    genre_index = list(allGenres).index(trackGenre[i])
    # If track `i` is selected, ensure that the corresponding genre variable `g` is set to 1
    model.addConstr(g[genre_index] == t[i], name=f"Genre Constraint for track {i+1}")

#Female Artists
female_sum = gb.quicksum(t[i] for i in range(n) if artistGender[i] == "female")
total_sum = gb.quicksum(t[i] for i in range(n))
model.addConstr(female_sum >= proportionThreshold * total_sum, name="Female Proportion Constraint")

model.optimize()

  w.append(int(dfw["totalDanceability"]))
  w.append(int(dfw["totalEnergy"]))
  w.append(int(dfw["totalSpeechiness"]))
  w.append(int(dfw["totalAcousticness"]))
  w.append(int(dfw["totalInstrumentalness"]))
  w.append(int(dfw["totalLiveness"]))
  w.append(int(dfw["totalValence"]))


In [12]:
# Collect results
selected_tracks = []
total_duration = 0
total_popularity_score = 0
weighted_score = 0
female_count = 0
total_selected_tracks = 0

print("\nSelected Tracks:")
print("---------------")
for i in range(n):
    if t[i].x > 0:  # If track is selected
        track_info = {
            "Name": shortenedNames[i],
            "Genre": trackGenre[i],
            "Popularity": popularity[i],
            "Duration (ms)": duration[i],
            "Danceability": danceability[i],
            "Energy": energy[i],
            "Speechiness": speechiness[i],
            "Acousticness": acousticness[i],
            "Instrumentalness": instrumentalness[i],
            "Liveness": liveness[i],
            "Valence": valence[i],
            "Artist Gender": artistGender[i]
        }
        selected_tracks.append(track_info)
        
        # Summing up the metrics for selected tracks
        total_duration += duration[i]
        total_popularity_score += popularity[i]
        weighted_score += (
            w[0] * danceability[i] +
            w[1] * energy[i] +
            w[2] * speechiness[i] +
            w[3] * acousticness[i] +
            w[4] * instrumentalness[i] +
            w[5] * liveness[i] +
            w[6] * valence[i]
        )

        # Count tracks by female artists
        if artistGender[i] == "female":
            female_count += 1
        
        # Count total selected tracks
        total_selected_tracks += 1

# Print selected tracks with details
for track in selected_tracks:
    print(f"Track: {track['Name']}")
    print(f"  Genre: {track['Genre']}")
    print(f"  Popularity: {track['Popularity']}")
    print(f"  Duration (ms): {track['Duration (ms)']}")
    print(f"  Attributes:")
    print(f"    Danceability: {track['Danceability']}, Energy: {track['Energy']}")
    print(f"    Speechiness: {track['Speechiness']}, Acousticness: {track['Acousticness']}")
    print(f"    Instrumentalness: {track['Instrumentalness']}, Liveness: {track['Liveness']}")
    print(f"    Valence: {track['Valence']}")
    print(f"  Artist Gender: {track['Artist Gender']}")
    print("")

# Print summary of the key metrics for the selected tracks
print("\nSummary of Selected Tracks:")
print("--------------------------")
print(f"Total Duration (ms): {total_duration}")
print(f"Total Popularity Score: {total_popularity_score}")
print(f"Weighted Score: {weighted_score:.2f}")

# Female artist constraint result
female_proportion = female_count / total_selected_tracks if total_selected_tracks > 0 else 0
print("\nFemale Artist Proportion:")
print("-------------------------")
print(f"Female Artists: {female_count} out of {total_selected_tracks} selected tracks")
print(f"Proportion of Female Artists: {female_proportion:.2%}")
print(f"Proportion Threshold Required: {proportionThreshold:.2%}")

# Optional: Printing auxiliary variable results for debugging
print("\nAuxiliary Variables:")
print("---------------------")
for var in y.values():
    if var.x > 0:
        print(f"{var.varName} = {var.x}")
print("---------------------")
for genre_var in g.values():
    if genre_var.x > 0:
        print(f"{genre_var.varName} = {genre_var.x}")


Selected Tracks:
---------------
Track: Confidencias De Amor - Piel Canela
  Genre: rock-n-roll
  Popularity: 33
  Duration (ms): 299983
  Attributes:
    Danceability: 0.548, Energy: 0.547
    Speechiness: 0.0495, Acousticness: 0.503
    Instrumentalness: 0.0, Liveness: 0.0943
    Valence: 0.694
  Artist Gender: female

Track: Blueberry Hill
  Genre: rock-n-roll
  Popularity: 61
  Duration (ms): 147880
  Attributes:
    Danceability: 0.489, Energy: 0.499
    Speechiness: 0.0271, Acousticness: 0.74
    Instrumentalness: 0.000236, Liveness: 0.156
    Valence: 0.829
  Artist Gender: male

Track: The Boatswain's Song - Demo Recording
  Genre: disney
  Popularity: 23
  Duration (ms): 186773
  Attributes:
    Danceability: 0.688, Energy: 0.325
    Speechiness: 0.0462, Acousticness: 0.981
    Instrumentalness: 0.65, Liveness: 0.725
    Valence: 0.812
  Artist Gender: male

Track: Little Black Rain Cloud
  Genre: disney
  Popularity: 20
  Duration (ms): 88910
  Attributes:
    Danceability: 