## Making a simple personal music recommendation system

In this file I will use sklearn's RandomForestCLassifier to learn on the audio features of the music I have listened to and have shown preference through the number of minutes I went on to listen to the song. I will them use this model to select the top 50 songs I am predicted to like from a dataset called "Top Spotify Songs 2023" created by NIDULA ELGIRIYEWITHANA which can be found at https://www.kaggle.com/datasets/nelgiriyewithana/top-spotify-songs-2023. Thank you Nidula.

In [1]:
import pandas as pd
import pandas
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import spotify_api_functions as spotify

{'display_name': 'grantdidway', 'external_urls': {'spotify': 'https://open.spotify.com/user/grantdidway12345'}, 'href': 'https://api.spotify.com/v1/users/grantdidway12345', 'id': 'grantdidway12345', 'images': [{'url': 'https://i.scdn.co/image/ab67757000003b8229a934e2ad6b3666ff845bbe', 'height': 64, 'width': 64}, {'url': 'https://i.scdn.co/image/ab6775700000ee8529a934e2ad6b3666ff845bbe', 'height': 300, 'width': 300}], 'type': 'user', 'uri': 'spotify:user:grantdidway12345', 'followers': {'href': None, 'total': 40}}


In [2]:
songs = pd.read_csv('stream_with_audio_features.csv')
top_hits = pd.read_csv('spotify-2023.csv', encoding='latin1')
top_hits

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
948,My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,...,144,A,Major,60,24,39,57,0,8,3
949,Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,...,166,F#,Major,42,7,24,83,1,12,6
950,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,92,C#,Major,80,81,67,4,0,8,6
951,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,97,C#,Major,82,67,77,8,0,12,5


In [3]:
top_hits.rename(columns={'danceability_%': "danceability", 'valence_%': 'valence', 'energy_%':'energy',
       'acousticness_%':'acousticness', 'instrumentalness_%':'instrumentalness', 'liveness_%':'liveness', 'speechiness_%':'speechiness'}, inplace=True)

In [4]:
top_hits.columns

Index(['track_name', 'artist(s)_name', 'artist_count', 'released_year',
       'released_month', 'released_day', 'in_spotify_playlists',
       'in_spotify_charts', 'streams', 'in_apple_playlists', 'in_apple_charts',
       'in_deezer_playlists', 'in_deezer_charts', 'in_shazam_charts', 'bpm',
       'key', 'mode', 'danceability', 'valence', 'energy', 'acousticness',
       'instrumentalness', 'liveness', 'speechiness'],
      dtype='object')

In [5]:
common_features = ['danceability', 'energy', 'key', 'mode', 'speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence']

In [6]:
# Considering "liked" songs to be ones I have listened to more that 15 minutes of
songs['liked'] = songs['total_mins'] > 15  # Set an appropriate threshold


# Replacing "Major" with 1 and "Minor" with 0
top_hits['mode'] = songs['mode'].map({'Major': 1, 'Minor': 0})

common_features = ['danceability', 'energy', 'speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence']

songs['mode']


0       1.0
1       1.0
2       0.0
3       1.0
4       1.0
       ... 
3645    1.0
3646    1.0
3647    1.0
3648    1.0
3649    1.0
Name: mode, Length: 3650, dtype: float64

In [7]:

features = common_features  # The audio features
X = songs[features]
y = songs['liked']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
model = RandomForestClassifier(random_state=42)

# Fit the model to the training data. Songs I "liked" have a True value in the "liked colum"
# The likes were determined by a threshold of number of minutes played
model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [9]:
# Test the model on the testing data set aside
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.7082191780821918


In [10]:
top_hits_predictions = model.predict(top_hits[features])
top_hits['predicted_preference'] = top_hits_predictions

# Filter songs predicted to be liked
recommended_songs = top_hits[top_hits['predicted_preference'] == 1]

In [11]:
print(recommended_songs[['track_name', 'predicted_preference']])

print(len(recommended_songs))

                              track_name  predicted_preference
0    Seven (feat. Latto) (Explicit Ver.)                  True
1                                   LALA                  True
2                                vampire                  True
3                           Cruel Summer                  True
5                               Sprinter                  True
..                                   ...                   ...
947                        The Great War                  True
948                         My Mind & Me                  True
950                 A Veces (feat. Feid)                  True
951                        En La De Ella                  True
952                                Alone                  True

[866 rows x 2 columns]
866


In [12]:
user_id = spotify.get_user_id()  # Get user ID
playlist_name = "My Recommendations"  # Name of the new playlist
playlist_description = "Songs recommended based on my listening history."

# Create a new playlist
new_playlist = spotify.user_playlist_create(user_id, playlist_name, playlist_description)
playlist_id = new_playlist['id']


In [13]:
# Get URI of each recommended song
recommended_songs['spotify_uri'] = recommended_songs.head(50).apply(
    lambda row: spotify.get_spotify_uri(row['track_name'], row['artist(s)_name']), axis=1
)

# Filter out songs where the URI could not be found
recommended_songs_with_uri = recommended_songs.dropna(subset=['spotify_uri'])

print(recommended_songs_with_uri[['track_name', 'artist(s)_name', 'spotify_uri']])


                                          track_name  \
1                                               LALA   
2                                            vampire   
3                                       Cruel Summer   
5                                           Sprinter   
6                                    Ella Baila Sola   
7                                           Columbia   
8                                           fukumean   
11                                         Super Shy   
12                                           Flowers   
13                                          Daylight   
14                                         As It Was   
16                                 Cupid - Twin Ver.   
18                                        Classy 101   
19                                        Like Crazy   
23          Peso Pluma: Bzrp Music Sessions, Vol. 55   
25                                       SABOR FRESA   
27                                      MOJABI G

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_songs['spotify_uri'] = recommended_songs.head(50).apply(


In [14]:
recommended_songs_with_uri

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,mode,danceability,valence,energy,acousticness,instrumentalness,liveness,speechiness,predicted_preference,spotify_uri
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,,71,61,74,7,0,10,4,True,spotify:track:7ABLbnD53cQK00mhcaOUVG
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,,51,32,53,17,0,31,6,True,spotify:track:1kuGVB7EU95pJObxwvfwKS
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,,55,58,72,11,0,11,15,True,spotify:track:1BxfuPKGuaTgP7aM0Bbdwr
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,,92,66,58,19,0,8,24,True,spotify:track:2FDTHlrBguDzQkp7PVj16Q
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,,67,83,76,48,0,8,3,True,spotify:track:3dnP0JxCgygwQH9Gm7q7nb
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,,67,26,71,37,0,11,4,True,spotify:track:6XbtvPmIpyCbjuT0e8cQtp
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,,85,22,62,12,0,28,9,True,spotify:track:4rXLjWdF2ZZpXCVTfWcshS
11,Super Shy,NewJeans,1,2023,7,7,422,55,58255150,37,...,,78,52,82,18,0,15,7,True,spotify:track:5sdQOyqq2IDhvmx2lHOpwd
12,Flowers,Miley Cyrus,1,2023,1,12,12211,115,1316855716,300,...,,71,65,68,6,0,3,7,True,spotify:track:7DSAEUvxU8FajXtRloy8M0
13,Daylight,David Kushner,1,2023,4,14,3528,98,387570742,80,...,,51,32,43,83,0,9,3,True,spotify:track:1odExI7RdWc4BT515LTAwj


In [15]:
# Get the list of song URIs from the recommended songs
song_uris = recommended_songs_with_uri['spotify_uri'].tolist() 

# Add songs to the playlist
spotify.user_playlist_add_tracks(user_id, playlist_id, song_uris)
