In [15]:
# I'll try to make a simple music recommender system using my own music data. I'll use the Spotify API to get the audio features of the one artist. I'll then use the KNN algorithm to find the songs that are most similar to the song I input. I'll also try to make a simple GUI for this. Now let's get started!

In [37]:
# Neccessary imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from sklearn.neighbors import NearestNeighbors
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import webbrowser
import os
import sys

In [38]:
# Spotify API credentials

with open('credentials.txt') as f:
    credentials = f.readlines()
    credentials = [c.strip() for c in credentials]


client_id = credentials[0]
client_secret = credentials[1]
redirect_uri = credentials[2]


scope = 'user-library-read playlist-read-private playlist-modify-private playlist-modify-public playlist-read-collaborative user-read-recently-played user-top-read user-read-playback-position user-read-playback-state user-modify-playback-state user-read-currently-playing app-remote-control streaming user-read-email user-read-private'


sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope=scope))


In [39]:
# Prepare data for KNN


def get_artist_id(artist_name):
    artist = sp.search(q='artist:' + artist_name, type='artist')
    artist_id = artist['artists']['items'][0]['id']
    return artist_id

def get_artist_data(artist_id):
    artist = sp.artist(artist_id)
    artist_name = artist['name']
    artist_genres = artist['genres']
    artist_popularity = artist['popularity']
    artist_followers = artist['followers']['total']
    artist_data = pd.DataFrame({'artist_name': [artist_name], 'artist_genres': [artist_genres], 'artist_popularity': [artist_popularity], 'artist_followers': [artist_followers]})
    return artist_data

def get_artist_albums(artist_id):
    albums = sp.artist_albums(artist_id)
    albums = albums['items']
    album_ids = []
    album_names = []
    album_release_dates = []
    album_total_tracks = []
    for album in albums:
        album_ids.append(album['id'])
        album_names.append(album['name'])
        album_release_dates.append(album['release_date'])
        album_total_tracks.append(album['total_tracks'])
    album_data = pd.DataFrame({'album_id': album_ids, 'album_name': album_names, 'album_release_date': album_release_dates, 'album_total_tracks': album_total_tracks})
    return album_data

def get_album_ID(artist_name, album_name):
    album = sp.search(q='album:' + album_name + ' artist:' + artist_name, type='album')
    album_id = album['albums']['items'][0]['id']
    return album_id

def get_album_data(album_id):
    album = sp.album(album_id)
    album_name = album['name']
    album_release_date = album['release_date']
    album_total_tracks = album['total_tracks']
    album_data = pd.DataFrame({'album_name': [album_name], 'album_release_date': [album_release_date], 'album_total_tracks': [album_total_tracks]})
    return album_data

def get_album_tracks_with_features(album_id):
    tracks = sp.album_tracks(album_id)['items']
    
    track_data = []

    for track in tracks:

        audio_features = sp.audio_features(track['uri'])[0]

        track_data.append({
            "name": track['name'],
            "id": track['id'],
            "uri": track['uri'],
            "danceability": audio_features['danceability'],
            "energy": audio_features['energy'],
            "key": audio_features['key'],
            "loudness": audio_features['loudness'],
            "mode": audio_features['mode'],
            "speechiness": audio_features['speechiness'],
            "acousticness": audio_features['acousticness'],
            "instrumentalness": audio_features['instrumentalness'],
            "liveness": audio_features['liveness'],
            "valence": audio_features['valence'],
            "tempo": audio_features['tempo'],
            "duration_ms": audio_features['duration_ms'],
        })

    return pd.DataFrame(track_data)


In [40]:
# Now let's get the data for the artist I want to use for this recommender system. I'll use the artist "Rihanna" for this. I'll get the data for the artist, the albums and the tracks. I'll also get the audio features for the tracks. I'll then merge all the data into one dataframe.

artist_id = get_artist_id('Rihanna')
artist_data = get_artist_data(artist_id)
album_data = get_artist_albums(artist_id)
track_data = pd.DataFrame()

for album_id in album_data['album_id']:
    track_data = track_data.append(get_album_tracks_with_features(album_id))

track_data = track_data.reset_index(drop=True)

track_data

  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data.append(get_album_tracks_with_features(album_id))
  track_data = track_data

Unnamed: 0,name,id,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,Lift Me Up - From Black Panther: Wakanda Forev...,6sCvvleqKbeyOkQDieBYgp,spotify:track:6sCvvleqKbeyOkQDieBYgp,0.247,0.299,9,-6.083,1,0.0315,0.899000,0.000000,0.1310,0.1720,177.115,196520
1,Love & Loyalty (Believe),6KsDoI8iySmXQ6UaSPuAx4,spotify:track:6KsDoI8iySmXQ6UaSPuAx4,0.816,0.726,1,-9.874,1,0.0688,0.233000,0.001830,0.0669,0.5960,111.990,380053
2,Alone,0AoBY2Y3qs6dtGgOD6c91N,spotify:track:0AoBY2Y3qs6dtGgOD6c91N,0.600,0.659,4,-7.264,0,0.0542,0.176000,0.000000,0.1110,0.3070,89.955,221747
3,No Woman No Cry,2yrtWT6W4KUMbfNtBcH8iN,spotify:track:2yrtWT6W4KUMbfNtBcH8iN,0.434,0.252,2,-9.082,1,0.0305,0.506000,0.001010,0.1030,0.1380,147.268,213267
4,Árboles Bajo El Mar,5UaiWpx39E60GAsoAtICoV,spotify:track:5UaiWpx39E60GAsoAtICoV,0.678,0.385,6,-8.343,0,0.0389,0.783000,0.001120,0.0982,0.0577,92.161,261787
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,Consideration - Mangal Suvarnan Remix,6sDIx3jRwWrh7vHflBvhv1,spotify:track:6sDIx3jRwWrh7vHflBvhv1,0.669,0.659,10,-6.205,1,0.0374,0.248000,0.000294,0.1150,0.1860,127.987,206333
214,Consideration - James Carter Remix,1s0dzzFLZTbRGSJ2EL2ObD,spotify:track:1s0dzzFLZTbRGSJ2EL2ObD,0.674,0.872,5,-5.639,1,0.1290,0.024700,0.000310,0.1480,0.1610,145.110,157747
215,Consideration - Will Clarke Remix,73a0etz8TVhjViZLH2qctQ,spotify:track:73a0etz8TVhjViZLH2qctQ,0.775,0.605,0,-11.241,1,0.0468,0.044400,0.016200,0.1080,0.3370,126.002,224293
216,Consideration - Stafford Brothers Remix,3nXljZFHOuvlgqPzclKDyH,spotify:track:3nXljZFHOuvlgqPzclKDyH,0.707,0.867,7,-3.687,0,0.0559,0.002520,0.000451,0.1250,0.1990,125.009,203573


In [41]:
# We got 217 tracks for Rihanna. Now we can use track_data to find the songs that are most similar to the song we input. Using danceability, energy, loudness, speechiness, acousticness, liveness and tempo we can calculate the distance between the songs. We'll use the KNN algorithm to find the songs that are most similar to the song we input. We'll use the NearestNeighbors class from the skle learn library to do this. We'll also use the MinMaxScaler class to scale the data before we use the KNN algorithm.

# Prepare data for KNN

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X = track_data[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'liveness', 'tempo']]
X = scaler.fit_transform(X)

# KNN

knn = NearestNeighbors(n_neighbors=10, algorithm='auto')
knn.fit(X)


In [44]:
# Now we want to get all tracks from a playlist. We'll use the playlist ID to get the tracks. We'll then get the audio features for the tracks. We'll then merge all the data into one dataframe.

def get_playlist_tracks_with_features(url):
    playlist_id = url.split('/')[-1]
    playlist = sp.playlist(playlist_id)
    tracks = playlist['tracks']['items']
    
    track_data = []

    for track in tracks:

        audio_features = sp.audio_features(track['track']['uri'])[0]

        track_data.append({
            "name": track['track']['name'],
            "id": track['track']['id'],
            "uri": track['track']['uri'],
            "danceability": audio_features['danceability'],
            "energy": audio_features['energy'],
            "key": audio_features['key'],
            "loudness": audio_features['loudness'],
            "mode": audio_features['mode'],
            "speechiness": audio_features['speechiness'],
            "acousticness": audio_features['acousticness'],
            "instrumentalness": audio_features['instrumentalness'],
            "liveness": audio_features['liveness'],
            "valence": audio_features['valence'],
            "tempo": audio_features['tempo'],
            "duration_ms": audio_features['duration_ms'],
        })

    return pd.DataFrame(track_data)

get_playlist_tracks_with_features('https://open.spotify.com/playlist/1glmheZVEvBoG8nqPynX1p?si=257caa611c1d44e3')


Unnamed: 0,name,id,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,I Want to Die,6fcqTv2LVXVoaFKZWbRTbO,spotify:track:6fcqTv2LVXVoaFKZWbRTbO,0.408,0.858,5,-4.478,0,0.0351,0.000731,0.000000,0.0807,0.386,84.953,278817
1,Raindrops,2HEmAttobAQzfNJMqJqP2y,spotify:track:2HEmAttobAQzfNJMqJqP2y,0.466,0.609,4,-5.011,0,0.0296,0.004680,0.018800,0.1600,0.191,105.999,270293
2,To Leave,4FqSL5KXpydcBMfbDQvvPu,spotify:track:4FqSL5KXpydcBMfbDQvvPu,0.360,0.687,4,-5.014,0,0.0280,0.000034,0.194000,0.1970,0.358,160.020,267160
3,Years of Silence,5WSqNyypJ0hITVpvJMetqQ,spotify:track:5WSqNyypJ0hITVpvJMetqQ,0.260,0.738,1,-7.454,0,0.0378,0.283000,0.078200,0.0799,0.242,117.157,431293
4,To Blossom Blue,4h24QsCUd3UECOHGhfD6Xi,spotify:track:4h24QsCUd3UECOHGhfD6Xi,0.619,0.426,7,-10.100,1,0.0302,0.067800,0.037700,0.0838,0.145,119.526,495267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,The Cold Solitude,20bozUFuZE1z0VTrVwx5WX,spotify:track:20bozUFuZE1z0VTrVwx5WX,0.379,0.701,2,-4.595,0,0.0395,0.020500,0.005470,0.1280,0.344,95.935,413280
96,A Secret Kiss,3QJWwKX9XiZo8RYyrU4U1x,spotify:track:3QJWwKX9XiZo8RYyrU4U1x,0.335,0.829,3,-5.657,0,0.0398,0.000005,0.906000,0.1080,0.399,160.061,382154
97,Let the Music Cry,7FuidtXSm7n0RtVhv3QpaF,spotify:track:7FuidtXSm7n0RtVhv3QpaF,0.497,0.725,2,-6.838,0,0.0311,0.005330,0.000369,0.0792,0.331,140.024,380143
98,Love You to Death,58RDwkonFMOkoytBtIQetc,spotify:track:58RDwkonFMOkoytBtIQetc,0.423,0.738,6,-7.329,1,0.0319,0.359000,0.084000,0.0801,0.276,111.414,428800


In [49]:
# Now we try to create a function that takes a song as input and returns the 10 most similar songs. We'll use the KNN algorithm to do this. We'll use the NearestNeighbors class from the skle learn library to do this. We'll also use the MinMaxScaler class to scale the data before we use the KNN algorithm.

def get_similar_songs_with_audio_features(song_name, artist_name):
    song = sp.search(q='track:' + song_name + ' artist:' + artist_name, type='track')
    song_id = song['tracks']['items'][0]['id']
    song_data = get_playlist_tracks_with_features('https://open.spotify.com/playlist/1glmheZVEvBoG8nqPynX1p?si=257caa611c1d44e3')
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
    song_data = song_data.reset_index(drop=True)
    song_data = song_data[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'liveness', 'tempo']]
    song_data = scaler.fit_transform(song_data)
    song_data = pd.DataFrame(song_data, columns=['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'liveness', 'tempo'])
    song_data = song_data.iloc[-10:]
    distances, indices = knn.kneighbors(song_data)
    distances = distances.flatten()
    indices = indices.flatten()
    similar_songs = pd.DataFrame()
    for i in range(len(indices)):
        similar_songs = similar_songs.append(track_data.iloc[indices[i]])
    similar_songs = similar_songs.drop_duplicates(subset=['name'])
    similar_songs = similar_songs.sort_values(by=['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'liveness', 'tempo'], ascending=False)
    return similar_songs

get_similar_songs_with_audio_features('Umbrella', 'Rihanna')

  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, song_name)))
  song_data = song_data.append(get_album_tracks_with_features(get_album_ID(artist_name, son

Unnamed: 0,name,id,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
186,Break It Off,2osdbXkJFUwPBLiZ4gRaEv,spotify:track:2osdbXkJFUwPBLiZ4gRaEv,0.897,0.738,10,-5.314,0,0.228,0.0166,0.0,0.0573,0.798,133.966,213373
217,Lemon - Edit,2LKlHBPHxLG5ArAXfF148n,spotify:track:2LKlHBPHxLG5ArAXfF148n,0.805,0.71,1,-6.642,1,0.309,0.000881,2.6e-05,0.358,0.147,190.044,146907
120,Wait Your Turn - Chew Fu Can't Wait No More Fix,4dyosL2M4UsLfHwvOs4RrF,spotify:track:4dyosL2M4UsLfHwvOs4RrF,0.785,0.817,7,-4.926,0,0.276,0.0405,0.000104,0.273,0.824,127.91,309293
210,Lemon (feat. Drake) - Drake Remix,739vCwA3EpBSkk3uDsI2wB,spotify:track:739vCwA3EpBSkk3uDsI2wB,0.777,0.657,1,-6.434,1,0.237,0.000738,3e-06,0.0932,0.245,189.992,226107
123,Hard - Chew Fu Granite Fix,0SfYKR2DPK2IpJILbDIE1G,spotify:track:0SfYKR2DPK2IpJILbDIE1G,0.739,0.883,11,-5.611,0,0.229,0.00094,0.000774,0.0633,0.746,130.039,328013
173,Lemme Get That,1pKEXuXVJ9PCJj2BNrG4kX,spotify:track:1pKEXuXVJ9PCJj2BNrG4kX,0.704,0.894,8,-5.731,1,0.165,0.103,0.000221,0.075,0.723,175.842,221027
118,Russian Roulette - Chew Fu Black Russian Fix,081sl4BpdkXcLciOJktR9y,spotify:track:081sl4BpdkXcLciOJktR9y,0.666,0.755,6,-5.418,0,0.243,0.00447,0.284,0.147,0.543,130.036,355213
54,Jump,4IBihEaN04iwfX5I9sGFj9,spotify:track:4IBihEaN04iwfX5I9sGFj9,0.629,0.819,0,-6.181,1,0.25,0.229,0.00706,0.169,0.468,162.048,264453
155,Shut Up And Drive - The Wideboys,1XekHLeG9WMs4Iy4EbbvCR,spotify:track:1XekHLeG9WMs4Iy4EbbvCR,0.609,0.909,1,-3.837,1,0.335,0.000666,0.00925,0.343,0.571,128.05,219040


In [32]:
# Belirli bir şarkının özelliklerini girdi olarak alın
input_song_features = scaler.transform([[0.7, 0.6, -5.0, 0.1, 0.2, 0.1, 100.0]])

# En yakın komşuları ve uzaklıklarını bulun
distances, indices = knn.kneighbors(input_song_features, n_neighbors=10)

# Benzer şarkıları görüntüleyin
similar_songs = track_data.iloc[indices[0]]
print(similar_songs)


                     name                      id  \
24              Desperado  4mCf3vQf7z0Yseo0RxAi3V   
40              Desperado  6ELYUjIai7bjtyUocOLCRE   
23                   Work  72TFWvU3wUYdUuxejTTIzt   
39                   Work  5gZsEYaYvyi1sAVyuiXbDm   
58      Nobody's Business  6EAxaAk1eZx0aqqH4ff4rP   
75      Nobody's Business  5PpxwDAoIGOcAe3iVgcAwt   
13                Wake Up  11eM29CRUKqdlb6zHe13pP   
176    Good Girl Gone Bad  6g1B0hd6rlT3xP3mERUnz8   
170  Hate That I Love You  7iu0WYLdo4yksKf3seaxzI   
108                Fading  2P9zV8VmHIPEYK2U2s0lUi   

                                      uri  danceability  energy  key  \
24   spotify:track:4mCf3vQf7z0Yseo0RxAi3V         0.685   0.610    0   
40   spotify:track:6ELYUjIai7bjtyUocOLCRE         0.685   0.610    0   
23   spotify:track:72TFWvU3wUYdUuxejTTIzt         0.725   0.534   11   
39   spotify:track:5gZsEYaYvyi1sAVyuiXbDm         0.725   0.534   11   
58   spotify:track:6EAxaAk1eZx0aqqH4ff4rP         0.741 

