## Collecting Data from Spotify 

In order to get as many random song records as possible the idea is to import a playlist from Spotify, get all the artists from this playlist and then get all the albums from that artist. Next get all the songs from each album and create a dataframe with the song title, artist and the musical features (for clustering later).

In [219]:
import config
import spotipy
import json
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials

<b> Input personal ID and passcode

In [216]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

### Functions to get the tracks, artists and albums

In [3]:
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username, playlist_id, market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [4]:
def get_artists_from_track(track):
    return [artist["name"] for artist in track["artists"]]

In [5]:
def get_artists_from_playlist(playlist_id):
    tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    return list(set(artist for subset in [get_artists_from_track(track["track"]) for track in tracks_from_playlist] for artist in subset))

In [6]:
def get_artists_ids_from_playlist(playlist_id):
    tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    return list(set(artist for subset in [get_artists_ids_from_track(track["track"]) for track in tracks_from_playlist] for artist in subset))

In [7]:
def get_artists_ids_from_track(track):
    return [artist["id"] for artist in track["artists"]]

In [8]:
def get_albums_from_artist(artist_id):
    results = sp.artist_albums(artist_id, limit = 50,country="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [9]:
def get_album_ids_from_artist(artist_id):
    results = sp.artist_albums(artist_id, limit = 50)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return [track["id"] for track in tracks]

In [10]:
def get_track_ids_from_albums(album_ids):
    return list(set([i["id"] for j in album_ids for i in sp.album(j)["tracks"]["items"]]))

<b> Trial and error to see how much a playlist yields </b> (since the amount of songs grows exponentially, runtime was too high for too many songs)

In [218]:
# https://open.spotify.com/playlist/6yPiKpy7evrwvZodByKvM9?si=53bb53d618714446
# https://open.spotify.com/playlist/6RAweZ5A3KPS0bfsZq5ShA?si=7145448b003c4c06
# https://open.spotify.com/playlist/6iP3SkfA4C7fqVLT7jFxHr?si=da822e87b612422a
ten_thou_songs = '6yPiKpy7evrwvZodByKvM9'
twenty_songs = '6RAweZ5A3KPS0bfsZq5ShA'
two_songs = '6iP3SkfA4C7fqVLT7jFxHr'

In [12]:
len(get_playlist_tracks("spotify", two_songs))

2

### Get artist and album IDs

In [13]:
artists = get_artists_from_playlist(two_songs)
artists_ids = get_artists_ids_from_playlist(two_songs)

In [14]:
artists_ids

['2Mu5NfyYm8n5iTomuKAEHl', '3nnQpaTvKb5jCQabZefACI']

In [16]:
album_ids_all_artists = []
    
for x in artists_ids:
    album_ids_all_artists.append(get_album_ids_from_artist(x))

In [18]:
album_ids_all_artists

[['6IfrO26rrFYAEbKLPjYZF0',
  '5sHByOqrDlhVXmMamZN49L',
  '22tn8fUpD1lurSga9yuqhM',
  '1BZoqf8Zje5nGdwZhOjAtD',
  '18XFe4CPBgVezXkxZP6rTb',
  '4z6F5s3RVaOsekuaegbLfD',
  '5dLSIo9cqbNiSRZNw4C6am',
  '2qtMrGLdjgNbVJ1xKDiqAf',
  '7pCY6bsu2PBONONqn70xLR',
  '2s86Plfre03jrW2vNyMj7A',
  '0zicOVyILytA8uaml8B2p3',
  '4wpzhjoof946rOeBziHINy',
  '4RwFZ9Gm5HB8sKi2GY54Gp',
  '3rLdVazrZrdbt5hLrXvhaS',
  '3nkttvCGUlJnkaGGRV6VTs',
  '6FWAHAq0uOFSISh2MTKS5N',
  '3URjYGvaAfACsye5HvHwkg',
  '0EHnwxYO1kwrvo5GPtcBAG',
  '0Mfmqwdm4MfNyrrRS0FoeX',
  '3whjvbBxG7j2FfTImRdqBw',
  '4yLYm0MNabHKZRkF04GRFK',
  '35pCwz0jIV2to1ZdyQ3Cs7',
  '6JDj2vdw1qMCykUOWc29Ni',
  '2BZPvDqEfvWML8U9mCDLb4',
  '6tw1IBoDUNVBHbf3BnUV7u',
  '6DWkCWip6xRcFwP0MCBL1E',
  '63NEi7MbCcGlMM2uVWC9tt',
  '6CM5qhYBvpgYNek5kYwuOJ',
  '5EkfaYyrg1J1lwh0oanknr',
  '6IB1v8xGCHpRmj4IHGxI9F',
  '2Pl8EdIx4Le8zHZBND44WT',
  '3veMevJHT7RJfEhGshmOiS',
  '2YRIXvSDdR1wv0k1YCfWD7',
  '13heDPCGwoIdufMoHIyjmh',
  '4kkb2R4q9ojUaeQx6oaSk1',
  '04qBN1xNYeX7Hzyub

In [19]:
len(album_ids_all_artists)

2

In [22]:
all_track_ids = []

for x in album_ids_all_artists:
        all_track_ids.append(get_track_ids_from_albums(x))

In [23]:
all_track_ids

[['0JFx4l1JZJ6XtMAtlCMiqF',
  '5bUVHuzQh5mkvMPjUU074i',
  '1XqgxhR3FcJXzFiSG8m6oL',
  '5wXgj9h0Pc36Rcgwf2qd7A',
  '3abz5HTatP9iv5p8Nm5IT0',
  '2vW9PXPr7YFxSLmHcHQGYq',
  '1pTyDGecdNtlm1PWuA46ZP',
  '4tLHsVB3rzaJcRfwFoMq91',
  '3FpY4pxw80UGTsG5do1eP9',
  '3SNBX4awHPJFnFzVf3nD5N',
  '2xeaDPP0nselGoREJDWhZg',
  '0ExpOQPyqSPIBJM7hGYRjQ',
  '2TQ1NzA5o6W0Ahjh34iibC',
  '3DvbkRlIl1luUuSLOowoVX',
  '3CNSWn2mISh7Ll3yJQbVEw',
  '2Qr7v3TzxkQ7jyazBqzSa3',
  '4YqECYigqRPv1ISR2oVqo8',
  '1HkwtH3XipPmKJeI9CZE1x',
  '5zKx6A8xH976VZ3pXreAFQ',
  '4uOSSFBofjz2y952Xzeq5W',
  '4tzRyCTpbOMpPUmIdPS7pZ',
  '651TRXpDMisZqvXMwNYQPD',
  '6U2F6eV8KSAMpLrZsRaNzE',
  '45G4bxDSyGABPW9KIRiaaT',
  '2oxMXAOjd5qOtyguYqAToL',
  '5mV3sIgGn0uCqbcf0f5eg9',
  '34QTgJPSf9Nvpw3NrlX8pu',
  '5npJRtGZoRax9ZaF3RkHeq',
  '2MuCLFbtlwUX26ajzNQnOU',
  '4L4WTX2XNTtzAcyHQU9d0G',
  '0u9fY4rfyynBbIcS2xanLy',
  '0x4duIqDqYwbLXNGa1AyRY',
  '4y0mygDmFnEaAvWBRd3EMQ',
  '2rM1uEwuDtoEl4kOyxiW1M',
  '28DnXjTaKItymVP1Bl9KIm',
  '3HiOludgUKJtniNCr

In [24]:
len(all_track_ids)

2

### Get track names from the IDs

In [29]:
def get_track_names(all_track_ids):
    
    list_of_songs = []
    
    for x in all_track_ids:
        
        for i in x:
                  
            track = sp.track('spotify:track:' + i)
            list_of_songs.append(track['name'])
        
    return list_of_songs

In [39]:
def get_artist_names(all_track_ids):
    
    list_of_artists = []
    
    for x in all_track_ids:
        
        for i in x:
            
            track = sp.track('spotify:track:' + i)
            list_of_artists.append(track['artists'][0]['name'])
        
    return list_of_artists

In [36]:
list_of_songs = get_track_names(all_track_ids)

In [32]:
len(list_of_songs)

7053

In [40]:
list_of_artists = get_artist_names(all_track_ids)

In [41]:
len(list_of_artists)

7053

In [190]:
songs_df = pd.DataFrame({'song_title' : list_of_songs, 'artist' : list_of_artists})

In [191]:
songs_df

Unnamed: 0,song_title,artist
0,Hey Ya!,Outkast
1,"No Woman, No Cry",Fugees
2,Can You Feel It,The Jacksons
3,Foolish,Ashanti
4,Arigato Interlude,Ms. Lauryn Hill
...,...,...
7048,XO,John Mayer
7049,Un-Break My Heart,Toni Braxton
7050,God Is a DJ,Faithless
7051,I Wanna Be Adored - Remastered,The Stone Roses


### Get features of the songs

Need to split the list into chunks of size 100 in order to parse them

In [193]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
def chunks_list(lst, n):
    """List of successive n-sized chunks from lst."""
    return list(chunks(lst,n))

In [194]:
song_list_chunked1 = chunks_list(all_track_ids[0], 100)

In [195]:
song_list_chunked2 = chunks_list(all_track_ids[1], 100)

In [196]:
len(song_list_chunked1[0]) # song_list_chunked1 is a list of 37 lists of length 100

100

In [197]:
len(song_list_chunked2) # song_list_chunked2 is a list of 35 lists of length 100

35

In [198]:
audio_list1 = []
for x in song_list_chunked1:

    audio_list1.append(sp.audio_features(x))

In [199]:
audio_list2 = []
for x in song_list_chunked2:

    audio_list2.append(sp.audio_features(x))

We obtain a list of dictionaries holding the song features

In [200]:
audio_list1
audio_list2

[[{'danceability': 0.541,
   'energy': 0.952,
   'key': 2,
   'loudness': -4.687,
   'mode': 1,
   'speechiness': 0.047,
   'acousticness': 0.185,
   'instrumentalness': 0.454,
   'liveness': 0.641,
   'valence': 0.729,
   'tempo': 113.852,
   'type': 'audio_features',
   'id': '27BX9e4fHX89wRwSRZyXh7',
   'uri': 'spotify:track:27BX9e4fHX89wRwSRZyXh7',
   'track_href': 'https://api.spotify.com/v1/tracks/27BX9e4fHX89wRwSRZyXh7',
   'analysis_url': 'https://api.spotify.com/v1/audio-analysis/27BX9e4fHX89wRwSRZyXh7',
   'duration_ms': 215000,
   'time_signature': 4},
  {'danceability': 0.376,
   'energy': 0.942,
   'key': 8,
   'loudness': -8.925,
   'mode': 1,
   'speechiness': 0.0415,
   'acousticness': 0.00964,
   'instrumentalness': 0.38,
   'liveness': 0.133,
   'valence': 0.934,
   'tempo': 131.516,
   'type': 'audio_features',
   'id': '5UDAw7YUWBa8BxUCvSFzsH',
   'uri': 'spotify:track:5UDAw7YUWBa8BxUCvSFzsH',
   'track_href': 'https://api.spotify.com/v1/tracks/5UDAw7YUWBa8BxUCvSFzs

In [201]:
len(audio_list2)

35

### Create dataframe of the audio features

In [202]:
features_df1 = pd.DataFrame()

for x in audio_list1:
    for i in x:
        try:
            dicts = { key: [i[key]] for key in list(i.keys()) }
            features_df1 = features_df1.append(pd.DataFrame(dicts))
        except:
            print("Error! No features for this song.") #+ sp.track('spotify:track:' + i['id'])
            continue

features_df1

Error! No features for this song.
Error! No features for this song.
Error! No features for this song.
Error! No features for this song.
Error! No features for this song.
Error! No features for this song.


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.657,0.883,4,-6.778,0,0.0526,0.0349,0.000000,0.3060,0.9660,79.397,audio_features,0JFx4l1JZJ6XtMAtlCMiqF,spotify:track:0JFx4l1JZJ6XtMAtlCMiqF,https://api.spotify.com/v1/tracks/0JFx4l1JZJ6X...,https://api.spotify.com/v1/audio-analysis/0JFx...,228573,4
0,0.846,0.326,0,-13.909,1,0.1520,0.0134,0.000040,0.2800,0.3670,88.988,audio_features,5bUVHuzQh5mkvMPjUU074i,spotify:track:5bUVHuzQh5mkvMPjUU074i,https://api.spotify.com/v1/tracks/5bUVHuzQh5mk...,https://api.spotify.com/v1/audio-analysis/5bUV...,273093,4
0,0.892,0.638,6,-7.468,0,0.0488,0.0445,0.000476,0.2020,0.8920,125.359,audio_features,1XqgxhR3FcJXzFiSG8m6oL,spotify:track:1XqgxhR3FcJXzFiSG8m6oL,https://api.spotify.com/v1/tracks/1XqgxhR3FcJX...,https://api.spotify.com/v1/audio-analysis/1Xqg...,358373,4
0,0.620,0.701,0,-5.742,1,0.0624,0.3160,0.000000,0.1470,0.7710,90.059,audio_features,5wXgj9h0Pc36Rcgwf2qd7A,spotify:track:5wXgj9h0Pc36Rcgwf2qd7A,https://api.spotify.com/v1/tracks/5wXgj9h0Pc36...,https://api.spotify.com/v1/audio-analysis/5wXg...,229013,4
0,0.000,0.978,10,-9.615,1,0.0000,0.7990,0.002280,0.9510,0.0000,0.000,audio_features,3abz5HTatP9iv5p8Nm5IT0,spotify:track:3abz5HTatP9iv5p8Nm5IT0,https://api.spotify.com/v1/tracks/3abz5HTatP9i...,https://api.spotify.com/v1/audio-analysis/3abz...,34182,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.519,0.444,1,-8.295,1,0.3030,0.0295,0.000014,0.3160,0.0962,92.117,audio_features,26Dg5sAkFzfGR3LA7W4V9P,spotify:track:26Dg5sAkFzfGR3LA7W4V9P,https://api.spotify.com/v1/tracks/26Dg5sAkFzfG...,https://api.spotify.com/v1/audio-analysis/26Dg...,233160,4
0,0.664,0.665,6,-8.073,0,0.0256,0.0129,0.000000,0.2340,0.4880,99.019,audio_features,6rSQMB27y3n9DOSax1pTdE,spotify:track:6rSQMB27y3n9DOSax1pTdE,https://api.spotify.com/v1/tracks/6rSQMB27y3n9...,https://api.spotify.com/v1/audio-analysis/6rSQ...,213133,4
0,0.620,0.966,11,-7.884,0,0.0386,0.0512,0.000041,0.0767,0.9470,115.166,audio_features,0HFqT5VBOJdt7NpboUl7DU,spotify:track:0HFqT5VBOJdt7NpboUl7DU,https://api.spotify.com/v1/tracks/0HFqT5VBOJdt...,https://api.spotify.com/v1/audio-analysis/0HFq...,200840,4
0,0.661,0.726,9,-7.354,0,0.0541,0.0092,0.001510,0.2490,0.5310,95.924,audio_features,3JjppPC2rc9YtycDLYkSCH,spotify:track:3JjppPC2rc9YtycDLYkSCH,https://api.spotify.com/v1/tracks/3JjppPC2rc9Y...,https://api.spotify.com/v1/audio-analysis/3Jjp...,330467,4


In [203]:
features_df2 = pd.DataFrame()

for x in audio_list2:
    for i in x:
        try:
            dicts = { key: [i[key]] for key in list(i.keys()) }
            features_df2 = features_df2.append(pd.DataFrame(dicts))
        except:
            print("Error!") #+ sp.track('spotify:track:' + i['id'])
            continue

features_df2

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.541,0.952,2,-4.687,1,0.0470,0.18500,0.454000,0.6410,0.729,113.852,audio_features,27BX9e4fHX89wRwSRZyXh7,spotify:track:27BX9e4fHX89wRwSRZyXh7,https://api.spotify.com/v1/tracks/27BX9e4fHX89...,https://api.spotify.com/v1/audio-analysis/27BX...,215000,4
0,0.376,0.942,8,-8.925,1,0.0415,0.00964,0.380000,0.1330,0.934,131.516,audio_features,5UDAw7YUWBa8BxUCvSFzsH,spotify:track:5UDAw7YUWBa8BxUCvSFzsH,https://api.spotify.com/v1/tracks/5UDAw7YUWBa8...,https://api.spotify.com/v1/audio-analysis/5UDA...,183107,4
0,0.431,0.326,2,-11.791,1,0.0292,0.69800,0.000000,0.1800,0.375,173.631,audio_features,3tWkjdHRJXSIjsIPfKJiVC,spotify:track:3tWkjdHRJXSIjsIPfKJiVC,https://api.spotify.com/v1/tracks/3tWkjdHRJXSI...,https://api.spotify.com/v1/audio-analysis/3tWk...,214987,4
0,0.570,0.721,7,-7.871,0,0.0363,0.39100,0.000019,0.1630,0.722,74.886,audio_features,3PX3OcHkep7tZ5ExxRK4hk,spotify:track:3PX3OcHkep7tZ5ExxRK4hk,https://api.spotify.com/v1/tracks/3PX3OcHkep7t...,https://api.spotify.com/v1/audio-analysis/3PX3...,260827,4
0,0.637,0.468,0,-8.702,1,0.0346,0.40800,0.000041,0.5920,0.604,124.432,audio_features,2QtKGn6PoTDcroCFUNixCt,spotify:track:2QtKGn6PoTDcroCFUNixCt,https://api.spotify.com/v1/tracks/2QtKGn6PoTDc...,https://api.spotify.com/v1/audio-analysis/2QtK...,192280,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.431,0.377,2,-9.753,1,0.0288,0.74900,0.000000,0.2110,0.353,173.670,audio_features,0hm785gGfLmHvl9YesZcFP,spotify:track:0hm785gGfLmHvl9YesZcFP,https://api.spotify.com/v1/tracks/0hm785gGfLmH...,https://api.spotify.com/v1/audio-analysis/0hm7...,213627,4
0,0.620,0.417,11,-9.166,0,0.0286,0.41500,0.000000,0.0984,0.133,109.904,audio_features,5CYbKUBECU71XChJBP01KE,spotify:track:5CYbKUBECU71XChJBP01KE,https://api.spotify.com/v1/tracks/5CYbKUBECU71...,https://api.spotify.com/v1/audio-analysis/5CYb...,271467,4
0,0.604,0.893,10,-7.480,0,0.0453,0.17500,0.900000,0.1570,0.366,130.555,audio_features,76QhWAnwj7aSgAYPOPNXYN,spotify:track:76QhWAnwj7aSgAYPOPNXYN,https://api.spotify.com/v1/tracks/76QhWAnwj7aS...,https://api.spotify.com/v1/audio-analysis/76Qh...,207613,4
0,0.502,0.860,4,-9.251,0,0.0278,0.01230,0.240000,0.2160,0.559,112.585,audio_features,5vnMGVPld0mhqVVHMK89y3,spotify:track:5vnMGVPld0mhqVVHMK89y3,https://api.spotify.com/v1/tracks/5vnMGVPld0mh...,https://api.spotify.com/v1/audio-analysis/5vnM...,292413,4


In [204]:
df_all_features = pd.concat([features_df1, features_df2])

In [205]:
df_all_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.657,0.883,4,-6.778,0,0.0526,0.0349,0.000000,0.3060,0.966,79.397,audio_features,0JFx4l1JZJ6XtMAtlCMiqF,spotify:track:0JFx4l1JZJ6XtMAtlCMiqF,https://api.spotify.com/v1/tracks/0JFx4l1JZJ6X...,https://api.spotify.com/v1/audio-analysis/0JFx...,228573,4
0,0.846,0.326,0,-13.909,1,0.1520,0.0134,0.000040,0.2800,0.367,88.988,audio_features,5bUVHuzQh5mkvMPjUU074i,spotify:track:5bUVHuzQh5mkvMPjUU074i,https://api.spotify.com/v1/tracks/5bUVHuzQh5mk...,https://api.spotify.com/v1/audio-analysis/5bUV...,273093,4
0,0.892,0.638,6,-7.468,0,0.0488,0.0445,0.000476,0.2020,0.892,125.359,audio_features,1XqgxhR3FcJXzFiSG8m6oL,spotify:track:1XqgxhR3FcJXzFiSG8m6oL,https://api.spotify.com/v1/tracks/1XqgxhR3FcJX...,https://api.spotify.com/v1/audio-analysis/1Xqg...,358373,4
0,0.620,0.701,0,-5.742,1,0.0624,0.3160,0.000000,0.1470,0.771,90.059,audio_features,5wXgj9h0Pc36Rcgwf2qd7A,spotify:track:5wXgj9h0Pc36Rcgwf2qd7A,https://api.spotify.com/v1/tracks/5wXgj9h0Pc36...,https://api.spotify.com/v1/audio-analysis/5wXg...,229013,4
0,0.000,0.978,10,-9.615,1,0.0000,0.7990,0.002280,0.9510,0.000,0.000,audio_features,3abz5HTatP9iv5p8Nm5IT0,spotify:track:3abz5HTatP9iv5p8Nm5IT0,https://api.spotify.com/v1/tracks/3abz5HTatP9i...,https://api.spotify.com/v1/audio-analysis/3abz...,34182,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.431,0.377,2,-9.753,1,0.0288,0.7490,0.000000,0.2110,0.353,173.670,audio_features,0hm785gGfLmHvl9YesZcFP,spotify:track:0hm785gGfLmHvl9YesZcFP,https://api.spotify.com/v1/tracks/0hm785gGfLmH...,https://api.spotify.com/v1/audio-analysis/0hm7...,213627,4
0,0.620,0.417,11,-9.166,0,0.0286,0.4150,0.000000,0.0984,0.133,109.904,audio_features,5CYbKUBECU71XChJBP01KE,spotify:track:5CYbKUBECU71XChJBP01KE,https://api.spotify.com/v1/tracks/5CYbKUBECU71...,https://api.spotify.com/v1/audio-analysis/5CYb...,271467,4
0,0.604,0.893,10,-7.480,0,0.0453,0.1750,0.900000,0.1570,0.366,130.555,audio_features,76QhWAnwj7aSgAYPOPNXYN,spotify:track:76QhWAnwj7aSgAYPOPNXYN,https://api.spotify.com/v1/tracks/76QhWAnwj7aS...,https://api.spotify.com/v1/audio-analysis/76Qh...,207613,4
0,0.502,0.860,4,-9.251,0,0.0278,0.0123,0.240000,0.2160,0.559,112.585,audio_features,5vnMGVPld0mhqVVHMK89y3,spotify:track:5vnMGVPld0mhqVVHMK89y3,https://api.spotify.com/v1/tracks/5vnMGVPld0mh...,https://api.spotify.com/v1/audio-analysis/5vnM...,292413,4


In [206]:
df_all_features.to_csv('features_data')

<b> Add ID column to original df so we can merge them

In [207]:
track_ids = []

for x in all_track_ids:

    for i in x:

        track_ids.append(i)

In [208]:
len(track_ids)

7053

In [209]:
songs_df['id'] = track_ids

In [210]:
songs_df

Unnamed: 0,song_title,artist,id
0,Hey Ya!,Outkast,0JFx4l1JZJ6XtMAtlCMiqF
1,"No Woman, No Cry",Fugees,5bUVHuzQh5mkvMPjUU074i
2,Can You Feel It,The Jacksons,1XqgxhR3FcJXzFiSG8m6oL
3,Foolish,Ashanti,5wXgj9h0Pc36Rcgwf2qd7A
4,Arigato Interlude,Ms. Lauryn Hill,3abz5HTatP9iv5p8Nm5IT0
...,...,...,...
7048,XO,John Mayer,0hm785gGfLmHvl9YesZcFP
7049,Un-Break My Heart,Toni Braxton,5CYbKUBECU71XChJBP01KE
7050,God Is a DJ,Faithless,76QhWAnwj7aSgAYPOPNXYN
7051,I Wanna Be Adored - Remastered,The Stone Roses,5vnMGVPld0mhqVVHMK89y3


In [211]:
df = pd.merge(songs_df, df_all_features, on='id')

In [212]:
df

Unnamed: 0,song_title,artist,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,Hey Ya!,Outkast,0JFx4l1JZJ6XtMAtlCMiqF,0.657,0.883,4,-6.778,0,0.0526,0.0349,0.000000,0.3060,0.966,79.397,audio_features,spotify:track:0JFx4l1JZJ6XtMAtlCMiqF,https://api.spotify.com/v1/tracks/0JFx4l1JZJ6X...,https://api.spotify.com/v1/audio-analysis/0JFx...,228573,4
1,"No Woman, No Cry",Fugees,5bUVHuzQh5mkvMPjUU074i,0.846,0.326,0,-13.909,1,0.1520,0.0134,0.000040,0.2800,0.367,88.988,audio_features,spotify:track:5bUVHuzQh5mkvMPjUU074i,https://api.spotify.com/v1/tracks/5bUVHuzQh5mk...,https://api.spotify.com/v1/audio-analysis/5bUV...,273093,4
2,Can You Feel It,The Jacksons,1XqgxhR3FcJXzFiSG8m6oL,0.892,0.638,6,-7.468,0,0.0488,0.0445,0.000476,0.2020,0.892,125.359,audio_features,spotify:track:1XqgxhR3FcJXzFiSG8m6oL,https://api.spotify.com/v1/tracks/1XqgxhR3FcJX...,https://api.spotify.com/v1/audio-analysis/1Xqg...,358373,4
3,Foolish,Ashanti,5wXgj9h0Pc36Rcgwf2qd7A,0.620,0.701,0,-5.742,1,0.0624,0.3160,0.000000,0.1470,0.771,90.059,audio_features,spotify:track:5wXgj9h0Pc36Rcgwf2qd7A,https://api.spotify.com/v1/tracks/5wXgj9h0Pc36...,https://api.spotify.com/v1/audio-analysis/5wXg...,229013,4
4,Arigato Interlude,Ms. Lauryn Hill,3abz5HTatP9iv5p8Nm5IT0,0.000,0.978,10,-9.615,1,0.0000,0.7990,0.002280,0.9510,0.000,0.000,audio_features,spotify:track:3abz5HTatP9iv5p8Nm5IT0,https://api.spotify.com/v1/tracks/3abz5HTatP9i...,https://api.spotify.com/v1/audio-analysis/3abz...,34182,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7312,XO,John Mayer,0hm785gGfLmHvl9YesZcFP,0.431,0.377,2,-9.753,1,0.0288,0.7490,0.000000,0.2110,0.353,173.670,audio_features,spotify:track:0hm785gGfLmHvl9YesZcFP,https://api.spotify.com/v1/tracks/0hm785gGfLmH...,https://api.spotify.com/v1/audio-analysis/0hm7...,213627,4
7313,Un-Break My Heart,Toni Braxton,5CYbKUBECU71XChJBP01KE,0.620,0.417,11,-9.166,0,0.0286,0.4150,0.000000,0.0984,0.133,109.904,audio_features,spotify:track:5CYbKUBECU71XChJBP01KE,https://api.spotify.com/v1/tracks/5CYbKUBECU71...,https://api.spotify.com/v1/audio-analysis/5CYb...,271467,4
7314,God Is a DJ,Faithless,76QhWAnwj7aSgAYPOPNXYN,0.604,0.893,10,-7.480,0,0.0453,0.1750,0.900000,0.1570,0.366,130.555,audio_features,spotify:track:76QhWAnwj7aSgAYPOPNXYN,https://api.spotify.com/v1/tracks/76QhWAnwj7aS...,https://api.spotify.com/v1/audio-analysis/76Qh...,207613,4
7315,I Wanna Be Adored - Remastered,The Stone Roses,5vnMGVPld0mhqVVHMK89y3,0.502,0.860,4,-9.251,0,0.0278,0.0123,0.240000,0.2160,0.559,112.585,audio_features,spotify:track:5vnMGVPld0mhqVVHMK89y3,https://api.spotify.com/v1/tracks/5vnMGVPld0mh...,https://api.spotify.com/v1/audio-analysis/5vnM...,292413,4


In [213]:
df.to_csv('all_music_data')