# Get Data Using Spotify API

In [None]:
import requests 
import base64

#Untuk mendapatkan CLIENT ID DAN CLIENT SECRET Kunjungi https://developer.spotify.com/dashboard
CLIENT_ID = 'xxx' #
CLIENT_SECRET = 'xxx'

client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [None]:
import pandas as pd
import spotipy 
from spotipy.oauth2 import SpotifyOAuth

# Inisialisasi OAuth
SPOTIPY_CLIENT_ID = 'xxx'
SPOTIPY_CLIENT_SECRET = 'xxx'
SPOTIPY_REDIRECT_URI = 'http://localhost:8888/callback'
SCOPE = 'playlist-read-private'

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=SPOTIPY_CLIENT_ID,
    client_secret=SPOTIPY_CLIENT_SECRET,
    redirect_uri=SPOTIPY_REDIRECT_URI,
    scope=SCOPE
))

def get_data_spotify(playlist_id):
    try:
        playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')
    except spotipy.exceptions.SpotifyException as e:
        print(f"Error fetching playlist tracks: {e}")
        return pd.DataFrame()

    music_data = []

    for track_info in playlist_tracks['items']:
        track = track_info['track']
        if not track:
            continue

        track_id = track['id']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']

        try:
            audio_features = sp.audio_features(track_id)[0] if track_id else None
        except spotipy.exceptions.SpotifyException:
            audio_features = None

        try:
            album_info = sp.album(album_id) if album_id else None
            release_date = album_info['release_date'] if album_info else None
        except spotipy.exceptions.SpotifyException:
            release_date = None

        try:
            track_details = sp.track(track_id) if track_id else None
            popularity = track_details['popularity'] if track_details else None
            duration = track_details['duration_ms'] if track_details else None
        except spotipy.exceptions.SpotifyException:
            popularity = None


        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': duration,
            'Explicit': track_details.get('explicit', None) if track_details else None,
            'External URLs': track_details.get('external_urls', {}).get('spotify', None) if track_details else None,
            'Danceability': audio_features.get['danceability'] if audio_features else None,
            'Energy' : audio_features.get['energy'] if audio_features else None,
            'Loudness' : audio_features.get['loudness'] if audio_features else None,
            'speechiness' : audio_features.get['speechiness'] if audio_features else None,
            'Tempo' : audio_features.get['tempo'] if audio_features else None,
            'Valence' : audio_features.get['valence'] if audio_features else None
        }
        music_data.append(track_data)

    df_music = pd.DataFrame(music_data)
    return df_music

# Ganti dengan ID playlist yang valid
playlist_id = 'xxx'
music_df = get_data_spotify(playlist_id)

# Tampilkan DataFrame
print(music_df)



In [5]:
music_df

Unnamed: 0,Track Name,Artists,Album Name,Album ID,Track ID,Popularity,Release Date,Duration (ms),Explicit,External URLs,Danceability,Energy,Loudness,speechiness,Tempo,Valence
0,Young,"Frank Walker, Andrew Jackson",24,41YwJx4SJzTz7uhRfvtlFk,6RN1PeZxL6L4EzGcTw6n9y,15,2017-03-24,205783,False,https://open.spotify.com/track/6RN1PeZxL6L4EzG...,,,,,,
1,SummerThing!,"AFROJACK, Mike Taylor",SummerThing!,0MRRBtUQH6706AqLBYHkZI,1KcnJTiXJXAqW5bB7zpkBC,44,2015-06-21,235344,False,https://open.spotify.com/track/1KcnJTiXJXAqW5b...,,,,,,
2,Sunrise (feat. Jason Walker),"Kygo, Jason Walker",Kids in Love,47oyrKaeobswT4T8zyaxdM,4ODnJrz0C3AJ4DFv8YQhJ8,57,2017-11-03,214826,False,https://open.spotify.com/track/4ODnJrz0C3AJ4DF...,,,,,,
3,Takeaway,"The Chainsmokers, ILLENIUM, Lennon Stella",World War Joy,01GR4NL5O5CZM51k0aejKD,3g0mEQx3NTanacLseoP0Gw,67,2019-12-06,209880,False,https://open.spotify.com/track/3g0mEQx3NTanacL...,,,,,,
4,All That Really Matters,"ILLENIUM, Teddy Swims",ILLENIUM,0ValflNlWce21wm5PlwV60,6cLrGfKIIRHYQw7S0mCDhA,72,2023-04-28,244235,False,https://open.spotify.com/track/6cLrGfKIIRHYQw7...,,,,,,
5,Roses,"The Chainsmokers, ROZES",Roses,2GFflENKz28RcMoSuulPZC,3vv9phIu6Y1vX3jcqaGz5Z,71,2015-06-16,226738,False,https://open.spotify.com/track/3vv9phIu6Y1vX3j...,,,,,,
6,Jungle,"Alok, The Chainsmokers, Mae Stephens",Jungle,6YFH6hee6fTJjlRddtl8gN,0OvO2X2Q3i98dc5RcgEN3x,71,2023-09-22,175500,False,https://open.spotify.com/track/0OvO2X2Q3i98dc5...,,,,,,
7,First Time,"Kygo, Ellie Goulding",First Time,6wu7tiTr8zchRG7PWwTOK1,2Gl0FzuLxflY6nPifJp5Dr,61,2017-04-28,193511,False,https://open.spotify.com/track/2Gl0FzuLxflY6nP...,,,,,,
8,Higher Love,"Kygo, Whitney Houston",Golden Hour,7tcs1X9pzFvcLOPuhCstQJ,1kKYjjfNYxE0YYgLa7vgVY,74,2020-05-29,228267,False,https://open.spotify.com/track/1kKYjjfNYxE0YYg...,,,,,,
9,Scared to Be Lonely,"Martin Garrix, Dua Lipa",Scared to Be Lonely,2v9rQe4F8fVSh5v8bAq0jF,3ebXMykcMXOcLeJ9xZ17XH,78,2017-01-27,220883,False,https://open.spotify.com/track/3ebXMykcMXOcLeJ...,,,,,,


# Data Processing

Karena untuk sekarang API Spotify tidak lagi bisa untuk menarik Audio Features yang terdapat pada setiap lagu maka kasus ini menggunakan dataset spotify dari https://www.kaggle.com/datasets/nadintamer/top-tracks-of-2017 <br>

`Data Description: ` <br>
There is one .csv file in the dataset. (featuresdf.csv) This file includes:

- Spotify URI for the song
- Name of the song
- Artist(s) of the song
- Audio features for the song (such as danceability, tempo, key etc.)

In [7]:
import pandas as pd 
import warnings
warnings.filterwarnings("ignore")
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
df = pd.read_csv('../dataset/spotify_audiofeatures.csv')

In [9]:
df

Unnamed: 0,id,name,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,7qiZfU4dY1lWllzX7mPBI,Shape of You,Ed Sheeran,0.825,0.652,1.0,-3.183,0.0,0.0802,0.5810,0.000000,0.0931,0.9310,95.977,233713.0,4.0
1,5CtI0qwDJkDQGwXD1H1cL,Despacito - Remix,Luis Fonsi,0.694,0.815,2.0,-4.328,1.0,0.1200,0.2290,0.000000,0.0924,0.8130,88.931,228827.0,4.0
2,4aWmUDTfIPGksMNLV2rQP,Despacito (Featuring Daddy Yankee),Luis Fonsi,0.660,0.786,2.0,-4.757,1.0,0.1700,0.2090,0.000000,0.1120,0.8460,177.833,228200.0,4.0
3,6RUKPb4LETWmmr3iAEQkt,Something Just Like This,The Chainsmokers,0.617,0.635,11.0,-6.769,0.0,0.0317,0.0498,0.000014,0.1640,0.4460,103.019,247160.0,4.0
4,3DXncPQOG4VBw3QHh3S81,I'm the One,DJ Khaled,0.609,0.668,7.0,-4.284,1.0,0.0367,0.0552,0.000000,0.1670,0.8110,80.924,288600.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1PSBzsahR2AKwLJgx8ehB,Bad Things (with Camila Cabello),Machine Gun Kelly,0.675,0.690,2.0,-4.761,1.0,0.1320,0.2100,0.000000,0.2870,0.2720,137.817,239293.0,4.0
96,0QsvXIfqM0zZoerQfsI9l,Don't Let Me Down,The Chainsmokers,0.542,0.859,11.0,-5.651,1.0,0.1970,0.1600,0.004660,0.1370,0.4030,159.797,208053.0,4.0
97,7mldq42yDuxiUNn08nvzH,Body Like A Back Road,Sam Hunt,0.731,0.469,5.0,-7.226,1.0,0.0326,0.4630,0.000001,0.1030,0.6310,98.963,165387.0,4.0
98,7i2DJ88J7jQ8K7zqFX2fW,Now Or Never,Halsey,0.658,0.588,6.0,-4.902,0.0,0.0367,0.1050,0.000001,0.1250,0.4340,110.075,214802.0,4.0


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                100 non-null    object 
 1   name              100 non-null    object 
 2   artists           100 non-null    object 
 3   danceability      100 non-null    float64
 4   energy            100 non-null    float64
 5   key               100 non-null    float64
 6   loudness          100 non-null    float64
 7   mode              100 non-null    float64
 8   speechiness       100 non-null    float64
 9   acousticness      100 non-null    float64
 10  instrumentalness  100 non-null    float64
 11  liveness          100 non-null    float64
 12  valence           100 non-null    float64
 13  tempo             100 non-null    float64
 14  duration_ms       100 non-null    float64
 15  time_signature    100 non-null    float64
dtypes: float64(13), object(3)
memory usage: 12.6+

In [14]:
df.isna().sum()

id                  0
name                0
artists             0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
duration_ms         0
time_signature      0
dtype: int64

In [15]:
scaler = MinMaxScaler()
df_music_features = df[['danceability', 'energy', 'key','loudness','mode','speechiness','acousticness',
                        'instrumentalness','liveness','valence','tempo']].values
music_scaled = scaler.fit_transform(df_music_features)

**Code Info** <br>
<br>

`scaler = MinMaxScaler()` Digunakan MinMaxScaler untuk menormalisasikan data dalam rentang 0 - 1. <br>
`df_music_features` Digunakan untuk mengekstrak fitur yang menjadi acuan dari data yg akan dinormalisasikan <br>
`music_scaled` Digunakan untuk melakukan normalisasi pada variabel df_music_features


In [None]:
def music_recommendation(input_song, num_recommendation=10):
    input_song_lower = input_song.lower() # menjadikan input menjadi huruf kecil

    #mengecek apakah data ada di library (dataset)
    if not df['name'].str.lower().isin([input_song_lower]).any():
        print(f"'{input_song}' is not available in our library! Please enter a valid song name!")
        return
    
    input_index = df[df['name'].str.lower() == input_song_lower].index[0] #mengambil Index dari data
    sim_score = cosine_similarity(music_scaled[input_index:input_index+1], music_scaled) #menghitung kemiripan menggunakan cosine similarity
    sim_song = np.argsort(sim_score[0])[-num_recommendation-1:-1][::-1] #mensortir kemiripan dari data namun data input tidak disertakan
    
    recommendations = df.iloc[sim_song][['name', 'artists']] #mengembalikan data yang isinya name dan artists
    return recommendations

In [None]:
input_song = "don't let me down" #contoh input
recommendations = music_recommendation(input_song, num_recommendation=10) #memberikan 10 rekomendasi yang paling mirip berdasarkan input 
print(f"Recommendation song for '{input_song}':")
print(recommendations)

Recommendation song for 'don't let me down':
                                   name      artists
88                               Malibu  Miley Cyrus
92                           Ahora Dice  Chris Jeday
77                         Just Hold On   Steve Aoki
64      No Promises (feat. Demi Lovato)  Cheat Codes
52                      Let Me Love You     DJ Snake
61                            El Amante    Nicky Jam
31                              Starboy   The Weeknd
89                            All Night    The Vamps
15                      Congratulations  Post Malone
48  Bad and Boujee (feat. Lil Uzi Vert)        Migos
