# Installing Spotipy

In [1]:
##!conda install -c conda-forge spotipy

## Loading credentials from the config file

In [2]:
import config

## Starting with Spotify API

In [None]:
import spotipy
import pandas as pd
import json
from spotipy.oauth2 import SpotifyClientCredentials


#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))



results = sp.search(q="daddy cool",limit=5,market="GB")
results


In [5]:
import pprint

In [16]:
pprint.pprint(results)

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=daddy+cool&type=track&market=GB&offset=0&limit=5',
            'items': [{'album': {'album_type': 'album',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/54R6Y0I7jGUCveDTtI21nb'},
                                              'href': 'https://api.spotify.com/v1/artists/54R6Y0I7jGUCveDTtI21nb',
                                              'id': '54R6Y0I7jGUCveDTtI21nb',
                                              'name': 'Boney M.',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:54R6Y0I7jGUCveDTtI21nb'}],
                                 'external_urls': {'spotify': 'https://open.spotify.com/album/1KQUrny9y5zGpktF6hAGd4'},
                                 'href': 'https://api.spotify.com/v1/albums/1KQUrny9y5zGpktF6hAGd4',
                                 'id': '1KQUrny9y5zGpktF6h

In [7]:
pprint.pprint(results["tracks"]["items"][0]["id"])

'3WMbD1OyfKuwWDWMNbPQ4g'


# Understanding the json

In [8]:
print("The json file has the following keys: ",list(results.keys())) # We can see that we only have tracks
print("The 'tracks' key has the following child keys: ",list(results["tracks"].keys())) # Let's check the values
print("The query we made is: ",results["tracks"]["href"]) # Query we have searched 
print("The song's info is contained in: ",results["tracks"]["items"]) #items (actual tracks)
print("The limit of the query we've made is: ",results["tracks"]["limit"]) #Limit we have chosen
print("The next page if any: ",results["tracks"]["next"]) #link to the next page (next 50 tracks)
print("The starting webpage: ",results["tracks"]["offset"]) # Actual offset (starting point)
print("Starting webpage: ",results["tracks"]["previous"]) #Previous search
print("Total number of results: ",results["tracks"]["total"]) # Number of matches

The json file has the following keys:  ['tracks']
The 'tracks' key has the following child keys:  ['href', 'items', 'limit', 'next', 'offset', 'previous', 'total']
The query we made is:  https://api.spotify.com/v1/search?query=daddy+cool&type=track&market=GB&offset=0&limit=5
The song's info is contained in:  [{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/54R6Y0I7jGUCveDTtI21nb'}, 'href': 'https://api.spotify.com/v1/artists/54R6Y0I7jGUCveDTtI21nb', 'id': '54R6Y0I7jGUCveDTtI21nb', 'name': 'Boney M.', 'type': 'artist', 'uri': 'spotify:artist:54R6Y0I7jGUCveDTtI21nb'}], 'external_urls': {'spotify': 'https://open.spotify.com/album/1KQUrny9y5zGpktF6hAGd4'}, 'href': 'https://api.spotify.com/v1/albums/1KQUrny9y5zGpktF6hAGd4', 'id': '1KQUrny9y5zGpktF6hAGd4', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/ab67616d0000b273dafd1cd6e9537ec8463ea691', 'width': 640}, {'height': 300, 'url': 'https://i.scdn.co/image/ab67616d00001

## Other Info

In [9]:
results["tracks"]["items"][0]["artists"] # Track artists
results["tracks"]["items"][0]["id"] # Track ID
results["tracks"]["items"][0]["name"] # Track name
results["tracks"]["items"][0]["popularity"] # Popularity index
results["tracks"]["items"][0]["uri"] # Basically ID

'spotify:track:3WMbD1OyfKuwWDWMNbPQ4g'

In [10]:
len(results['tracks']["items"])

5

## Getting the track_id

In [11]:
track_id=results["tracks"]["items"][0]["id"]
track_id

'3WMbD1OyfKuwWDWMNbPQ4g'

In [12]:
for item in results['tracks']['items']:
    print("The name of song is: '{}' and the id is: {}".format(item['name'],item["id"]))

The name of song is: 'Daddy Cool' and the id is: 3WMbD1OyfKuwWDWMNbPQ4g
The name of song is: 'Rasputin' and the id is: 5lWSa1rmuSL6OBPOnkAqoa
The name of song is: 'Daddy Cool' and the id is: 50rcUhHimavzT1qq6QrgMG
The name of song is: 'Yes Sir, I Can Boogie' and the id is: 2LScqpywMqGcnum6nNaxXX
The name of song is: 'Daddy Cool' and the id is: 6blKbRwYDoXl5fFvQY2U75


## Embeded track player

In [13]:
from IPython.display import IFrame

track_id = '3WMbD1OyfKuwWDWMNbPQ4g'
#track_id= 'spotify:track:3hgl7EQwTutSm6PESsB7gZ'
IFrame(src="https://open.spotify.com/embed/track/"+track_id,
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

In [14]:
def play_song(track_id):
    return IFrame(src="https://open.spotify.com/embed/track/"+track_id,
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

In [15]:
play_song('3WMbD1OyfKuwWDWMNbPQ4g')

# Getting the Audio features of a song

In [17]:
id_ = results["tracks"]["items"][0]["id"]

In [18]:
sp.audio_features(id_ )

[{'danceability': 0.763,
  'energy': 0.777,
  'key': 5,
  'loudness': -6.932,
  'mode': 0,
  'speechiness': 0.0364,
  'acousticness': 0.281,
  'instrumentalness': 0.79,
  'liveness': 0.134,
  'valence': 0.929,
  'tempo': 124.431,
  'type': 'audio_features',
  'id': '3WMbD1OyfKuwWDWMNbPQ4g',
  'uri': 'spotify:track:3WMbD1OyfKuwWDWMNbPQ4g',
  'track_href': 'https://api.spotify.com/v1/tracks/3WMbD1OyfKuwWDWMNbPQ4g',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3WMbD1OyfKuwWDWMNbPQ4g',
  'duration_ms': 208600,
  'time_signature': 4}]

In [19]:
play_song('2TVxnKdb3tqe1nhQWwwZCO')

In [20]:
sp.audio_features("3WMbD1OyfKuwWDWMNbPQ4g")

[{'danceability': 0.763,
  'energy': 0.777,
  'key': 5,
  'loudness': -6.932,
  'mode': 0,
  'speechiness': 0.0364,
  'acousticness': 0.281,
  'instrumentalness': 0.79,
  'liveness': 0.134,
  'valence': 0.929,
  'tempo': 124.431,
  'type': 'audio_features',
  'id': '3WMbD1OyfKuwWDWMNbPQ4g',
  'uri': 'spotify:track:3WMbD1OyfKuwWDWMNbPQ4g',
  'track_href': 'https://api.spotify.com/v1/tracks/3WMbD1OyfKuwWDWMNbPQ4g',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3WMbD1OyfKuwWDWMNbPQ4g',
  'duration_ms': 208600,
  'time_signature': 4}]

## Building a Data frame of audio features

In [21]:
sp.audio_features(results["tracks"]["items"][0]["uri"])

[{'danceability': 0.763,
  'energy': 0.777,
  'key': 5,
  'loudness': -6.932,
  'mode': 0,
  'speechiness': 0.0364,
  'acousticness': 0.281,
  'instrumentalness': 0.79,
  'liveness': 0.134,
  'valence': 0.929,
  'tempo': 124.431,
  'type': 'audio_features',
  'id': '3WMbD1OyfKuwWDWMNbPQ4g',
  'uri': 'spotify:track:3WMbD1OyfKuwWDWMNbPQ4g',
  'track_href': 'https://api.spotify.com/v1/tracks/3WMbD1OyfKuwWDWMNbPQ4g',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3WMbD1OyfKuwWDWMNbPQ4g',
  'duration_ms': 208600,
  'time_signature': 4}]

In [22]:
results

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=daddy+cool&type=track&market=GB&offset=0&limit=5',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/54R6Y0I7jGUCveDTtI21nb'},
       'href': 'https://api.spotify.com/v1/artists/54R6Y0I7jGUCveDTtI21nb',
       'id': '54R6Y0I7jGUCveDTtI21nb',
       'name': 'Boney M.',
       'type': 'artist',
       'uri': 'spotify:artist:54R6Y0I7jGUCveDTtI21nb'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/1KQUrny9y5zGpktF6hAGd4'},
     'href': 'https://api.spotify.com/v1/albums/1KQUrny9y5zGpktF6hAGd4',
     'id': '1KQUrny9y5zGpktF6hAGd4',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b273dafd1cd6e9537ec8463ea691',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e02dafd1cd6e9537ec8463ea691',
       'width': 300},
      {'height': 64,
       'url': 'https://i.sc

In [23]:
list_of_songs=[]
for i in results["tracks"]["items"]:
    list_of_songs.append(i["id"])

song_features = sp.audio_features(list_of_songs)

df=pd.DataFrame(song_features)    
df=df[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

df

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms
0,0.763,0.777,-6.932,0.0364,0.281,0.79,0.134,0.929,124.431,3WMbD1OyfKuwWDWMNbPQ4g,208600
1,0.69,0.752,-11.47,0.0455,0.463,0.00102,0.595,0.97,126.005,5lWSa1rmuSL6OBPOnkAqoa,220667
2,0.809,0.872,-4.729,0.0792,0.00581,0.545,0.0637,0.969,123.996,50rcUhHimavzT1qq6QrgMG,154859
3,0.454,0.769,-7.653,0.0814,0.127,3e-06,0.175,0.491,122.531,2LScqpywMqGcnum6nNaxXX,273867
4,0.586,0.82,-6.414,0.0323,1e-06,0.794,0.106,0.917,125.037,6blKbRwYDoXl5fFvQY2U75,201707


## Searching a playlist

In [24]:
results = sp.search(q="down jay sean")
play_song(results["tracks"]["items"][0]["id"])

In [25]:
playlist = sp.user_playlist_tracks("spotify", "4ebKOuGNfJ5g8RdtbEBHxe",market="GB")

In [26]:
playlist

{'href': 'https://api.spotify.com/v1/playlists/4ebKOuGNfJ5g8RdtbEBHxe/tracks?offset=0&limit=100&market=GB&additional_types=track',
 'items': [{'added_at': '2021-11-07T08:03:16Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/20oxoq89bzf5r8uf5x54v3k8a'},
    'href': 'https://api.spotify.com/v1/users/20oxoq89bzf5r8uf5x54v3k8a',
    'id': '20oxoq89bzf5r8uf5x54v3k8a',
    'type': 'user',
    'uri': 'spotify:user:20oxoq89bzf5r8uf5x54v3k8a'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'single',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2Pcw4hXyhUXI5lAxdjDvww'},
       'href': 'https://api.spotify.com/v1/artists/2Pcw4hXyhUXI5lAxdjDvww',
       'id': '2Pcw4hXyhUXI5lAxdjDvww',
       'name': 'Post Rock Community',
       'type': 'artist',
       'uri': 'spotify:artist:2Pcw4hXyhUXI5lAxdjDvww'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/4rfmp4tcSWXavXohMvBexj'},
 

In [None]:
type(playlist)

In [None]:
playlist.keys()

In [27]:
len(playlist["items"])

100

## Extracting a song from playlist

In [None]:
pprint.pprint(playlist)

In [None]:
playlist["items"][0]["track"]["id"]

In [None]:
play_song(playlist["items"][0]["track"]["id"])

In [None]:
print(list(playlist.keys())) # Let's look at items and total:
print("Total number of songs in the playlist: ",playlist["total"]) #  Let's check items:
len(playlist["items"]) # It is limited to 100 tracks, we will have to fix it:

## Extracting the songs of a playlist

Pagination using "next"
When you collect songs from a playlist using sp.playlist_tracks, you're limited by the limit parameter, which has a maximum (and default) value of 100. When the playlist has more than 100 songs, you have to collect them by navigating through the "pages" of the results.

The parameter offset allows you to retrieve resuls starting at a certain position: if you start at position 101, you'd get the next "page" of results. An offset of 201 would give you the third page, and so on.

The function sp.next() does the same, but in a simpler way: it can be used on the results from any request to directly retrieve the results for the next page.

We can check whether there's a next page or not by accessing the key next on the results from any request.

In [None]:
sp.audio_features(bob_dylan_id_list)

In [28]:
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id,market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [None]:
spanish_tracks = get_playlist_tracks("Spotify","37i9dQZF1EQpj7X7UK8OOF")

spanish_ids = []
for i in spanish_tracks:
    spanish_ids.append(i['track']['id'])
 
af = sp.audio_features(spanish_ids)

In [None]:
len(spanish_tracks)

In [None]:
spanish_ids = []
for i in spanish_tracks:
    spanish_ids.append(i['track']['id'])

In [None]:
len(spanish_ids)

In [None]:
spanish_ids

In [None]:
af = sp.audio_features(spanish_ids)

In [None]:
af

In [None]:
spanish_df = pd.DataFrame(af)
spanish_df.head()

### Chopping a big playlist into chunks

In [None]:
big_playlist_tracks = get_playlist_tracks("Spotify","2s1KwThY09NjEkmzeTsZWe")

In [None]:
bp_ids = []
for i in big_playlist_tracks:
    bp_ids.append(i['track']['id'])

In [None]:
chunk_size = 100
bp_ids_chopped = [bp_ids[i:i+chunk_size] for i in range(0,len(bp_ids), 100)]


In [None]:
bp_ids_chopped

In [None]:
af_list = []
for i in bp_ids_chopped:
    af_list.extend(sp.audio_features(i))
pd.DataFrame(af_list)

### Example with rock

In [None]:
rock = get_playlist_tracks('Spotify','37i9dQZF1DWXRqgorJj26U')

In [None]:
rock_ids =[]
for i in rock:
    rock_ids.append(i['track']['id'])

In [None]:
len(rock_ids)

In [None]:
chunk_size = 100
rock_ids_chopped = [rock_ids[i:i+chunk_size] for i in range(0,len(rock_ids), 100)]

In [None]:
rock_af = []
for i in rock_ids_chopped:
    rock_af.extend(sp.audio_features(i))

In [None]:
rock_df = pd.DataFrame(rock_af)
rock_df

## Optional(Extra)

## Getting the artists of the playlist 

In [None]:
def get_artists_from_track(track):
    return [artist["name"] for artist in track["artists"]]

In [None]:
def get_artists_from_playlist(playlist_id):
    tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    return list(set(artist for subset in [get_artists_from_track(track["track"]) for track in tracks_from_playlist] for artist in subset))

In [None]:
get_artists_from_playlist("4rnleEAOdmFAbRcNCgZMpY")

# Getting albums 

In this section we will work with albums to extract information. We will start by extracting all the albums of an artist.

In [None]:
def get_albums_from_artist(artist_id):
    results = sp.artist_albums(artist_id, limit = 50,country="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

# Same for albums ids
def get_album_ids_from_artist(artist_id):
    results = sp.artist_albums(artist_id, limit = 50)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return [track["id"] for track in tracks]

Example: Coldplay

In [None]:
coldplay_id = "4gzpq5DPGxSnKTe4SA8HAU"
coldplay_albums = get_albums_from_artist(coldplay_id)
coldplay_album_ids = get_album_ids_from_artist(coldplay_id)

# Check artists that played with coldplay
set([artist["name"] for track in coldplay_albums for artist in track["artists"]])

## Getting the songs of a given album

In [None]:
def get_track_ids_from_albums(album_ids):
    return list(set([i["id"] for j in album_ids for i in sp.album(j)["tracks"]["items"]]))

In [None]:
coldplay_songs = get_track_ids_from_albums(coldplay_album_ids)

len(coldplay_songs)