In [93]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from scipy.spatial.distance import pdist, squareform

# Suppress indexing warnings
pd.options.mode.chained_assignment = None  # default='warn'
import sys
sys.path.append('/Users/irvinc01/Innovation/spotify_optimisation')
from song_orderer import Playlist

In [10]:
playlist_df = Playlist('https://open.spotify.com/playlist/77x1p7WNnpYs9do1TiVleR?si=de75a27fcc8e45c3').get_playlist_data()

In [11]:
playlist_df.head(5)

Unnamed: 0,Track Name,Artist Name,Album Name,Release Date,Duration (ms),Popularity,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo,Key,Mode,Time Signature,Genres,Top Genre
0,El Apagón,Bad Bunny,Un Verano Sin Ti,2022-05-06,201816,73,0.0509,0.629,0.698,0.00166,0.0909,-4.485,0.308,0.599,117.765,8,1,4,"{urbano latino, trap latino, reggaeton}",urbano latino
1,Good PusS (feat. cupcakKe) - Remix,COBRAH,Good PusS (feat. cupcakKe) [Remix],2022-02-23,219951,50,0.204,0.845,0.693,0.299,0.186,-7.436,0.0558,0.322,125.009,5,1,4,"{escape room, trap queen, pink noise, chicago ...",escape room
2,4ÆM,Grimes,Miss Anthropocene (Deluxe Edition),2020-02-21,270982,68,0.00971,0.482,0.905,0.29,0.288,-4.371,0.0418,0.116,93.017,7,0,4,"{art pop, grave wave, metropopolis, neo-synthp...",art pop
3,Kill V. Maim,Grimes,Art Angels,2015-11-06,246006,67,0.00744,0.578,0.947,0.0816,0.105,-2.431,0.0359,0.456,134.033,11,0,4,"{art pop, grave wave, metropopolis, neo-synthp...",art pop
4,NDA,Billie Eilish,Happier Than Ever,2021-07-30,195776,76,0.341,0.765,0.373,0.525,0.112,-9.921,0.0713,0.554,85.016,8,1,4,"{pop, art pop}",pop


In [87]:
clustering_df = playlist_df[['Track Name', 'Duration (ms)', 'Popularity', 'Danceability', 'Energy', 'Instrumentalness', 'Liveness', 'Loudness', 'Speechiness', 'Tempo', 'Valence', 'Key', 'Mode', 'Top Genre']]

In [88]:
# Remove the negative from the loudness column
clustering_df['Loudness'] = clustering_df['Loudness'].abs()

# Encode the Top Genre column
genre_encoder = LabelEncoder().fit(clustering_df['Top Genre'])
clustering_df['Top Genre'] = genre_encoder.transform(clustering_df['Top Genre'])

# Normalise the columns 
scaler = MinMaxScaler()
for column in clustering_df.drop(columns='Track Name').columns:
    clustering_df[column] = scaler.fit_transform(clustering_df[[column]])

In [89]:
clustering_df

Unnamed: 0,Track Name,Duration (ms),Popularity,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Tempo,Valence,Key,Mode,Top Genre
0,El Apagón,0.180452,0.776596,0.621795,0.630667,0.001755,0.075096,0.295703,0.70000,0.365332,0.604483,0.727273,1.0,0.933333
1,Good PusS (feat. cupcakKe) - Remix,0.225286,0.531915,0.898718,0.624000,0.316068,0.201274,0.515452,0.06950,0.427167,0.305960,0.454545,1.0,0.311111
2,4ÆM,0.351449,0.723404,0.433333,0.906667,0.306554,0.336606,0.287214,0.03450,0.154083,0.083953,0.636364,0.0,0.088889
3,Kill V. Maim,0.289702,0.712766,0.556410,0.962667,0.086258,0.093804,0.142751,0.01975,0.504195,0.450372,1.000000,0.0,0.088889
4,NDA,0.165519,0.808511,0.796154,0.197333,0.554968,0.103091,0.700499,0.10825,0.085787,0.555987,0.727273,1.0,0.777778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Player Of Games,0.288555,0.563830,0.507692,0.933333,0.001078,0.182699,0.318415,0.02075,0.477819,0.099041,0.818182,0.0,0.088889
96,Get Into It (Yuh),0.023405,0.829787,0.984615,0.573333,0.000036,0.073106,0.410678,0.33500,0.144907,0.813557,0.727273,0.0,0.777778
97,My Name is Dark - Art Mix,0.561828,0.000000,0.479487,0.822667,0.427061,0.084251,0.398913,0.03725,0.418750,0.446061,0.545455,1.0,0.088889
98,Voodoo?,0.313782,0.489362,0.932051,0.205333,0.149049,0.045907,0.762305,0.11800,0.281645,0.488091,0.545455,0.0,0.422222


In [130]:
# Generate a matrix of the Euclidean distances between the songs
distances_df = pd.DataFrame(squareform(pdist(clustering_df.drop(columns='Track Name'), metric='euclidean')), columns=clustering_df['Track Name'], index=clustering_df['Track Name'])

# Get rid of index label
distances_df = distances_df.reset_index()
distances_df.columns.name = None
distances_df

Unnamed: 0,Track Name,El Apagón,Good PusS (feat. cupcakKe) - Remix,4ÆM,Kill V. Maim,NDA,Oxytocin,GOLDWING,nihilist blues (feat. Grimes),Tesla,...,Generative Model,Cosmic Ratio,Glimmer,The Other Side Of Paradise,Left Hand Free,Player Of Games,Get Into It (Yuh),My Name is Dark - Art Mix,Voodoo?,Forever & Ever More
0,El Apagón,0.000000,1.119653,1.668815,1.566727,1.070034,1.070316,1.571610,1.403554,1.692861,...,1.834588,1.817296,1.453123,1.356412,1.205343,1.620041,1.193887,1.486815,1.507077,1.261478
1,Good PusS (feat. cupcakKe) - Remix,1.119653,0.000000,1.275576,1.359471,0.919951,0.834722,1.430295,1.208231,1.058406,...,0.993007,1.034705,1.248922,1.484735,1.005152,1.273330,1.402385,0.854048,1.173165,1.071892
2,4ÆM,1.668815,1.275576,0.000000,0.734747,1.636887,1.475702,1.502125,0.809623,1.307563,...,1.589144,1.598297,1.159387,1.414283,1.681026,0.540293,1.348378,1.369994,1.199065,1.401234
3,Kill V. Maim,1.566727,1.359471,0.734747,0.000000,1.715127,1.503399,1.585413,0.993410,1.624142,...,1.912326,1.833647,1.222730,1.240003,1.631050,0.478143,1.186810,1.415521,1.238370,1.258949
4,NDA,1.070034,0.919951,1.636887,1.715127,0.000000,0.690019,1.400164,1.574017,1.453073,...,1.470655,1.538476,1.239855,1.293345,1.377928,1.735514,1.308297,1.430159,1.231574,1.611748
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Player Of Games,1.620041,1.273330,0.540293,0.478143,1.735514,1.490094,1.422399,0.777077,1.534238,...,1.727859,1.698736,1.249597,1.298528,1.617092,0.000000,1.312085,1.342320,1.157016,1.228403
96,Get Into It (Yuh),1.193887,1.402385,1.348378,1.186810,1.308297,1.449618,1.259568,1.127597,1.985015,...,2.080956,2.096545,0.852625,0.886700,1.478122,1.312085,0.000000,1.812376,0.903423,1.776152
97,My Name is Dark - Art Mix,1.486815,0.854048,1.369994,1.415521,1.430159,1.316857,1.875092,1.437229,1.052248,...,0.935799,0.780641,1.578839,1.773469,1.004189,1.342320,1.812376,0.000000,1.495302,1.076822
98,Voodoo?,1.507077,1.173165,1.199065,1.238370,1.231574,1.382933,0.867713,0.985241,1.782846,...,1.638089,1.658353,0.862353,0.831585,1.502225,1.157016,0.903423,1.495302,0.000000,1.699873


In [101]:
recommendations_df = pd.DataFrame({'Track Name': playlist_df['Track Name'], 'Recommendations': None})
recommendations_df

Unnamed: 0,Track Name,Recommendations
0,El Apagón,
1,Good PusS (feat. cupcakKe) - Remix,
2,4ÆM,
3,Kill V. Maim,
4,NDA,
...,...,...
95,Player Of Games,
96,Get Into It (Yuh),
97,My Name is Dark - Art Mix,
98,Voodoo?,


In [166]:
for index, song in recommendations_df.iterrows():
    song = song['Track Name']
    # Get the closest songs not including the song itself
    closest_index = distances_df[song].sort_values(ascending=True)[1:6].index
    # Store these 
    recommendations_list = list(distances_df.iloc[closest_index]['Track Name'].array)
    
    recommendations_df.at[index, 'Recommendations'] = recommendations_list

In [167]:
recommendations_df

Unnamed: 0,Track Name,Recommendations
0,El Apagón,"[Prey, I Didn't Change My Number, Je veux te v..."
1,Good PusS (feat. cupcakKe) - Remix,"[BRAND NEW BITCH, Open Your Mind, Tunnel, Snak..."
2,4ÆM,"[Player Of Games, My Love, Shinigami Eyes, Daf..."
3,Kill V. Maim,"[Player Of Games, My Love, Enter Sandman, Daff..."
4,NDA,"[Oxytocin, Therefore I Am, I Didn't Change My ..."
...,...,...
95,Player Of Games,"[My Love, Shinigami Eyes, Daffodil, Enter Sand..."
96,Get Into It (Yuh),"[Cinema, P FKN R, Billie Bossa Nova, Vegas (Fr..."
97,My Name is Dark - Art Mix,"[Snake, De Formule, Cosmic Ratio, Temperature ..."
98,Voodoo?,"[Billie Bossa Nova, The Look, P FKN R, Cinema,..."


In [168]:
recommendations_df.to_csv('recommendations.csv', index=False)