In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from scipy.spatial.distance import pdist, squareform

# Suppress indexing warnings
pd.options.mode.chained_assignment = None  # default='warn'
import sys
sys.path.append('/Users/irvinc01/Innovation/spotify_optimisation')
from song_orderer import Playlist

In [2]:
playlist_df = Playlist('https://open.spotify.com/playlist/3dsT3A4RpUUwYXFxTjGmiQ?si=1f860b1c51674d19').get_playlist_data()

In [3]:
playlist_df

Unnamed: 0,Track Name,Artist Name,Album Name,Release Date,Duration (ms),Popularity,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo,Key,Mode,Time Signature
0,Habitual Drum Loop no.08,X CLUB.,300 TOOLS FOR SUCCESS,2022-09-16,412867,31,0.000092,0.670,0.985,0.794000,0.1150,-6.533,0.0535,0.0898,143.004,7,1,4
1,Hummer,Juicy Romance,Hummer,2022-08-19,234245,32,0.000136,0.564,0.805,0.443000,0.1020,-7.770,0.0334,0.2400,141.980,2,1,4
2,Made Ya,RYSO,Made Ya,2023-01-12,294626,5,0.000309,0.775,0.982,0.358000,0.0951,-6.264,0.0786,0.5100,145.000,6,1,4
3,Make Me,Elkka,Make Me,2024-02-14,224204,43,0.001860,0.760,0.855,0.842000,0.5940,-8.246,0.0490,0.2230,131.997,9,0,4
4,Black Eye,Allie X,Girl With No Face,2024-02-23,272796,54,0.001200,0.723,0.781,0.214000,0.0768,-4.451,0.0431,0.4930,132.021,9,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418,Vroom Vroom,Charli XCX,Vroom Vroom EP,2016-02-26,193270,64,0.122000,0.836,0.684,0.000000,0.0854,-5.818,0.2410,0.5140,151.007,11,1,4
419,XS,Rina Sawayama,SAWAYAMA,2020-04-17,201060,61,0.085400,0.635,0.929,0.000001,0.0822,-3.256,0.1460,0.5900,117.039,2,0,4
420,Immaterial,SOPHIE,OIL OF EVERY PEARL'S UN-INSIDES,2018-06-15,232806,20,0.115000,0.776,0.717,0.007550,0.3270,-4.897,0.0836,0.8280,139.995,2,1,4
421,disco tits,Tove Lo,BLUE LIPS (lady wood phase II),2017-11-17,223794,63,0.025000,0.790,0.728,0.001060,0.1720,-6.523,0.0612,0.3330,110.059,7,1,4


In [4]:
# Concat ithe track name and artist to make a unique identifier
playlist_df['Track Name'] = playlist_df['Track Name'] + ' - ' + playlist_df['Artist Name']

In [5]:
# Remove any duplicates after this 
playlist_df = playlist_df.drop_duplicates(subset='Track Name').reset_index(drop=True)

In [6]:
clustering_df = playlist_df[['Track Name', 'Duration (ms)', 'Popularity', 'Danceability', 'Energy', 'Instrumentalness', 'Liveness', 'Loudness', 'Speechiness', 'Tempo', 'Valence', 'Mode']]

In [7]:
# Remove the negative from the loudness column
clustering_df['Loudness'] = clustering_df['Loudness'].abs()

# Normalise the columns 
scaler = MinMaxScaler()
for column in clustering_df.drop(columns='Track Name').columns:
    clustering_df[column] = scaler.fit_transform(clustering_df[[column]])

In [8]:
clustering_df

Unnamed: 0,Track Name,Duration (ms),Popularity,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Tempo,Valence,Mode
0,Habitual Drum Loop no.08 - X CLUB.,0.603215,0.369048,0.634640,0.981004,0.829676,0.094655,0.369249,0.044485,0.591822,0.059101,1.0
1,Hummer - Juicy Romance,0.294480,0.380952,0.500632,0.736771,0.462905,0.079776,0.440032,0.010996,0.582920,0.220208,1.0
2,Made Ya - RYSO,0.398844,0.059524,0.767383,0.976934,0.374086,0.071878,0.353857,0.086305,0.609173,0.509814,1.0
3,Make Me - Elkka,0.277125,0.511905,0.748420,0.804613,0.879833,0.642898,0.467269,0.036988,0.496140,0.201974,0.0
4,Black Eye - Allie X,0.361113,0.642857,0.701643,0.704206,0.223615,0.050933,0.250114,0.027158,0.496349,0.491580,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
415,Vroom Vroom - Charli XCX,0.223658,0.761905,0.844501,0.572592,0.000000,0.060776,0.328336,0.356881,0.661390,0.514105,1.0
416,XS - Rina Sawayama,0.237122,0.726190,0.590392,0.905020,0.000001,0.057113,0.181735,0.198600,0.366114,0.595624,0.0
417,Immaterial - SOPHIE,0.291993,0.238095,0.768647,0.617368,0.007889,0.337301,0.275635,0.094635,0.565665,0.850906,1.0
418,disco tits - Tove Lo,0.276416,0.750000,0.786346,0.632293,0.001108,0.159895,0.368677,0.057314,0.305438,0.319961,1.0


In [9]:
# Generate a matrix of the Euclidean distances between the songs
distances_df = pd.DataFrame(squareform(pdist(clustering_df.drop(columns='Track Name'), metric='euclidean')), columns=clustering_df['Track Name'], index=clustering_df['Track Name'])

# Get rid of index label
distances_df = distances_df.reset_index()
distances_df.columns.name = None
distances_df

Unnamed: 0,Track Name,Habitual Drum Loop no.08 - X CLUB.,Hummer - Juicy Romance,Made Ya - RYSO,Make Me - Elkka,Black Eye - Allie X,Von dutch - Charli XCX,♡ StiCKy piCtURe SyNDroMe ʕ•́ᴥ•̀ʔっ♡ - 1999 WRITE THE FUTURE,Act Bad - Roza Terenzi Remix - BASHKKA,C.O.O.L. Party - Confidence Man,...,break - Aidan Rudd,pink diamond - Charli XCX,visions - Charli XCX,Paradise (feat. Hannah Diamond) - Charli XCX,Trophy - Charli XCX,Vroom Vroom - Charli XCX,XS - Rina Sawayama,Immaterial - SOPHIE,disco tits - Tove Lo,IPHONE - Rico Nasty
0,Habitual Drum Loop no.08 - X CLUB.,0.000000,0.583074,0.754135,1.230089,1.339612,1.286805,1.492875,0.458670,1.318372,...,1.048265,1.647426,1.001682,1.092538,1.533439,1.228774,1.535184,1.279580,1.112451,1.098448
1,Hummer - Juicy Romance,0.583074,0.000000,0.590468,1.258438,1.135807,0.864090,1.178029,0.660578,0.935318,...,1.177233,1.367661,0.587038,0.628222,1.261605,0.857104,1.289764,0.901089,0.736838,0.682205
2,Made Ya - RYSO,0.754135,0.590468,0.000000,1.397927,1.212768,1.030010,1.323887,0.670384,0.731886,...,1.277236,1.375930,0.797797,0.934901,1.308905,0.954013,1.325115,0.708215,0.941860,0.783032
3,Make Me - Elkka,1.230089,1.258438,1.397927,0.000000,0.973982,1.579003,1.198167,1.231252,1.659720,...,0.773515,1.332111,1.381668,1.585335,1.033898,1.577265,1.216280,1.558952,1.467547,1.530146
4,Black Eye - Allie X,1.339612,1.135807,1.212768,0.973982,0.000000,1.090177,0.677811,1.352249,1.199323,...,1.028557,0.634087,1.103323,1.229463,0.550426,1.124046,0.432340,1.203457,1.083166,1.133371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,Vroom Vroom - Charli XCX,1.228774,0.857104,0.954013,1.577265,1.124046,0.443515,1.260315,1.222498,0.794190,...,1.660201,1.087589,0.595517,0.720599,1.097160,0.000000,1.147575,0.746647,0.524841,0.425271
416,XS - Rina Sawayama,1.535184,1.289764,1.325115,1.216280,0.432340,1.093281,0.735888,1.543396,1.198530,...,1.319384,0.463669,1.165560,1.286843,0.502090,1.147575,0.000000,1.248579,1.122513,1.115733
417,Immaterial - SOPHIE,1.279580,0.901089,0.708215,1.558952,1.203457,0.737168,1.308558,1.196629,0.400369,...,1.665234,1.243838,0.703042,0.951616,1.171171,0.746647,1.248579,0.000000,0.808712,0.712630
418,disco tits - Tove Lo,1.112451,0.736838,0.941860,1.467547,1.083166,0.437622,1.312636,1.158953,0.828549,...,1.585142,1.239810,0.401065,0.782572,1.104923,0.524841,1.122513,0.808712,0.000000,0.599394


In [10]:
recommendations_df = pd.DataFrame({'Track Name': playlist_df['Track Name'], 'Recommendations': None})
recommendations_df

Unnamed: 0,Track Name,Recommendations
0,Habitual Drum Loop no.08 - X CLUB.,
1,Hummer - Juicy Romance,
2,Made Ya - RYSO,
3,Make Me - Elkka,
4,Black Eye - Allie X,
...,...,...
415,Vroom Vroom - Charli XCX,
416,XS - Rina Sawayama,
417,Immaterial - SOPHIE,
418,disco tits - Tove Lo,


In [11]:
for index, song in recommendations_df.iterrows():
    song = song['Track Name']
    # Get the closest songs not including the song itself
    closest_index = distances_df[song].sort_values(ascending=True)[1:6].index
    # Store these 
    recommendations_list = list(distances_df.iloc[closest_index]['Track Name'].array)
    
    recommendations_df.at[index, 'Recommendations'] = recommendations_list

In [12]:
recommendations_df

Unnamed: 0,Track Name,Recommendations
0,Habitual Drum Loop no.08 - X CLUB.,"[Whip The Bass - Rudosa, I Love U (Boss Mix) -..."
1,Hummer - Juicy Romance,"[Aydin feat. Kevin Parker - Discodeine, Buchla..."
2,Made Ya - RYSO,"[Floor Seats - Dagga, russian dressing - Fenne..."
3,Make Me - Elkka,"[50/50 - Extended Mix - Jockstrap, Venus Fly -..."
4,Black Eye - Allie X,"[Love at First Sight - Kylie Minogue, Fine Day..."
...,...,...
415,Vroom Vroom - Charli XCX,"[Katy on a Mission - Katy B, Blue Razz - Versa..."
416,XS - Rina Sawayama,"[Free Yourself - Jessie Ware, Make Me - Borai ..."
417,Immaterial - SOPHIE,"[Girls Night Out - Charli XCX, High Grade - To..."
418,disco tits - Tove Lo,"[Erotic Electronic - Slayyyter, BRAND NEW BITC..."


In [13]:
recommendations_df.to_csv('recommendations.csv', index=False)