# Lab | API wrappers - Create your collection of songs & audio features

To move forward with the project, you need to create a collection of songs with their audio features - as large as possible! An idea for collecting as many songs as possible is to start with all the songs of a big, diverse playlist and then go to every artist present in the playlist and grab every song of every album of that artist. The amount of songs you'll be collecting per playlist will grow exponentially!


Spotify objects are identified by either a "url", a "uri" or an "id".

- The id is an alphanumeric code, and it's the nuclear part of the identifier.

- The uri contains "spotify:track" before the id. An uri is useful because it can be searched manually in the Spotify app.

- The url is a link to the song on the Spotify web player.

In [None]:
#!pip install spotipy

### Authentification

In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentials
#sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id='<your client id here>',
#                                                           client_secret='<your client secret here>'))

### Storing Secrets

In [5]:
secrets_file = open("/Users/gau82/Ironhack/Week6/6.5 API wrappers, Spotipy/secrets.txt","r")

In [8]:
string = secrets_file.read()

In [9]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        print(line.split(':'))
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

['cid', ' bccae7255f7b494ca17d15667489a8c1']
['csecret', ' 35c0fbac88034b30bb324f686e32fc59']


Authentication with Secrets

In [10]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['csecret']))

# Handling large playlists

#### We will need to collect a "database" of songs. Playlists are a good way to access relatively large amounts of songs.

In [97]:
# we will need more songs for our clustering
playlist = sp.user_playlist_tracks("spotify", "1G8IpkZKobrIlXcVPoSIuf")

In [98]:
playlist["total"]

10000

In [99]:
len(playlist["items"])

100

### Function to extract all songs from a playlist

In [100]:
playlist['next']

'https://api.spotify.com/v1/playlists/1G8IpkZKobrIlXcVPoSIuf/tracks?offset=100&limit=100&additional_types=track'

In [101]:
sp.next(playlist)

{'href': 'https://api.spotify.com/v1/playlists/1G8IpkZKobrIlXcVPoSIuf/tracks?offset=100&limit=100&additional_types=track',
 'items': [{'added_at': '2020-11-29T15:02:07Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/acclaimedmusic'},
    'href': 'https://api.spotify.com/v1/users/acclaimedmusic',
    'id': 'acclaimedmusic',
    'type': 'user',
    'uri': 'spotify:user:acclaimedmusic'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/1nJvji2KIlWSseXRSlNYsC'},
       'href': 'https://api.spotify.com/v1/artists/1nJvji2KIlWSseXRSlNYsC',
       'id': '1nJvji2KIlWSseXRSlNYsC',
       'name': 'The Velvet Underground',
       'type': 'artist',
       'uri': 'spotify:artist:1nJvji2KIlWSseXRSlNYsC'},
      {'external_urls': {'spotify': 'https://open.spotify.com/artist/0IwlY33zbBXN7zlS9DP2Cj'},
       'href': 'https://api.spotify.com/v1/artist

In [102]:
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3000)/1000) # respectful nap
    return tracks

In [103]:
#this now holds all 10,000 songs and their artists
#need to make it into a dataframe, once we've broken it down! 

all_tracks = get_playlist_tracks("1G8IpkZKobrIlXcVPoSIuf")
len(all_tracks)

10000

##### Method1: we ONLY need artist, name of song, and URI for the database

In [123]:
all_tracks

[{'added_at': '2020-11-29T15:02:07Z',
  'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/acclaimedmusic'},
   'href': 'https://api.spotify.com/v1/users/acclaimedmusic',
   'id': 'acclaimedmusic',
   'type': 'user',
   'uri': 'spotify:user:acclaimedmusic'},
  'is_local': False,
  'primary_color': None,
  'track': {'album': {'album_type': 'album',
    'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/74ASZWbe4lXaubB36ztrGX'},
      'href': 'https://api.spotify.com/v1/artists/74ASZWbe4lXaubB36ztrGX',
      'id': '74ASZWbe4lXaubB36ztrGX',
      'name': 'Bob Dylan',
      'type': 'artist',
      'uri': 'spotify:artist:74ASZWbe4lXaubB36ztrGX'}],
    'available_markets': ['AD',
     'AE',
     'AG',
     'AL',
     'AM',
     'AO',
     'AR',
     'AT',
     'AU',
     'AZ',
     'BA',
     'BB',
     'BD',
     'BE',
     'BF',
     'BG',
     'BH',
     'BI',
     'BJ',
     'BN',
     'BO',
     'BR',
     'BS',
     'BT',
     'BW',
     'B

In [127]:
all_tracks[0]['track']['artists'][0]['name']

'Bob Dylan'

In [128]:
all_tracks[0]['track']['name']

'Like a Rolling Stone'

In [129]:
all_tracks[0]['track']['uri']

'spotify:track:3AhXZa8sUQht0UEdBJgpGc'

### Method2: Getting the artist, song name, and URI from each song of my playlist

In [85]:
# #first we need to get a list of individual tracks from the playlist
# def get_name_artist_from_playlist_item(playlist_item):
#     return get_name_artists_from_track(playlist_item['track'])

In [86]:
# get_name_artist_from_playlist_item(results["items"][0])

[('Like a Rolling Stone', 'Bob Dylan')]

In [89]:
# def get_name_artists_from_playlist(input_playlist):
#     return list(map(get_name_artist_from_playlist_item,input_playlist["items"]))

In [90]:
# get_name_artists_from_playlist(playlist)

[[('Like a Rolling Stone', 'Bob Dylan')],
 [('Smells Like Teen Spirit', 'Nirvana')],
 [('A Day In The Life - Remastered', 'The Beatles')],
 [('Good Vibrations (Mono)', 'The Beach Boys')],
 [('Johnny B Goode', 'Chuck Berry')],
 [('Respect', 'Aretha Franklin')],
 [('Be My Baby', 'The Ronettes')],
 [("What's Going On", 'Marvin Gaye')],
 [('I Heard It Through The Grapevine', 'Marvin Gaye')],
 [("(I Can't Get No) Satisfaction - Mono Version / Remastered 2002",
   'The Rolling Stones')],
 [('Billie Jean', 'Michael Jackson')],
 [('Heartbreak Hotel', 'Elvis Presley')],
 [('Born to Run', 'Bruce Springsteen')],
 [('My Generation - Original Mono Version', 'The Who')],
 [('Love Will Tear Us Apart', 'Joy Division')],
 [('God Only Knows - Mono / 1997 Remastered', 'The Beach Boys')],
 [("(Sittin' On) the Dock of the Bay", 'Otis Redding')],
 [('Hey Ya! - Radio Mix / Club Mix', 'Outkast')],
 [('Anarchy In The UK - Remastered 2012', 'Sex Pistols')],
 [('Paper Planes', 'M.I.A.')],
 [('London Calling', 'T

In [91]:
# def get_name_artists_from_playlist(input_playlist):
#     return flatten(list(map(get_name_artist_from_playlist_item,input_playlist["items"])))

In [96]:
# df = get_name_artists_from_playlist(results)
# df

[('Like a Rolling Stone', 'Bob Dylan'),
 ('Smells Like Teen Spirit', 'Nirvana'),
 ('A Day In The Life - Remastered', 'The Beatles'),
 ('Good Vibrations (Mono)', 'The Beach Boys'),
 ('Johnny B Goode', 'Chuck Berry'),
 ('Respect', 'Aretha Franklin'),
 ('Be My Baby', 'The Ronettes'),
 ("What's Going On", 'Marvin Gaye'),
 ('I Heard It Through The Grapevine', 'Marvin Gaye'),
 ("(I Can't Get No) Satisfaction - Mono Version / Remastered 2002",
  'The Rolling Stones'),
 ('Billie Jean', 'Michael Jackson'),
 ('Heartbreak Hotel', 'Elvis Presley'),
 ('Born to Run', 'Bruce Springsteen'),
 ('My Generation - Original Mono Version', 'The Who'),
 ('Love Will Tear Us Apart', 'Joy Division'),
 ('God Only Knows - Mono / 1997 Remastered', 'The Beach Boys'),
 ("(Sittin' On) the Dock of the Bay", 'Otis Redding'),
 ('Hey Ya! - Radio Mix / Club Mix', 'Outkast'),
 ('Anarchy In The UK - Remastered 2012', 'Sex Pistols'),
 ('Paper Planes', 'M.I.A.'),
 ('London Calling', 'The Clash'),
 ('Sympathy For The Devil', 'T

#### Creating a Dataframe: 

In [None]:
#use a "for" loop to get the audio features, artists, and song names of all 10,000 songs in the playlist and
#put them inside the lists, making sure to ignore the Nans

In [143]:
#creating lists to hold song, artist, uri

song_name = []   
artist_name = []
track_uri = []


for i in range(len(all_tracks)):
    try:
        song = all_tracks[i]['track']['name']
        artist = all_tracks[i]["track"]["album"]['artists'][0]['name']
        uri = all_tracks[i]['track']['uri']
        
        artist_name.append(artist)
        song_name.append(song)
        track_uri.append(uri)
    except:
        artist_name.append('none')
        song_name.append('none')
        track_uri.append('none')

In [144]:
spotify_df=pd.DataFrame({'song': song_name,'artist': artist_name,'uri': track_uri})

In [155]:
spotify_df.head(5)

Unnamed: 0,song,artist,uri
0,Like a Rolling Stone,Bob Dylan,spotify:track:3AhXZa8sUQht0UEdBJgpGc
1,Smells Like Teen Spirit,Nirvana,spotify:track:3oTlkzk1OtrhH8wBAduVEi
2,A Day In The Life - Remastered,The Beatles,spotify:track:3ZFBeIyP41HhnALjxWy1pR
3,Good Vibrations (Mono),The Beach Boys,spotify:track:5Qt4Cc66g24QWwGP3YYV9y
4,Johnny B Goode,Chuck Berry,spotify:track:7MH2ZclofPlTrZOkPzZKhK


In [156]:
spotify_df.song.value_counts()
spotify_df
#can see there are 53 "nones"

none                    53
Summertime               6
Crazy                    4
Girlfriend               4
Fire                     4
                        ..
Young Adult Friction     1
The Pusher               1
If You Leave Me Now      1
Worried Life Blues       1
Come Dancing             1
Name: song, Length: 9639, dtype: int64

In [157]:
spotify_df.artist.value_counts()

Various Artists           684
The Beatles               110
Bob Dylan                  87
The Rolling Stones         80
Bruce Springsteen          57
                         ... 
Mystikal                    1
Gary Jules                  1
The Nerves                  1
Stick McGhee                1
The Mighty Lemon Drops      1
Name: artist, Length: 3401, dtype: int64

In [159]:
# spotify_df.isna().sum()
sum(spotify_df.duplicated())

52

# Audio features (Not completed)

In [None]:
# get the uri of a single song:
song_uri = playlist["items"][33]["track"]["uri"]
song_uri

In [147]:
# get the audio features for that song
sp.audio_features(song_uri)

[{'danceability': 0.63,
  'energy': 0.616,
  'key': 1,
  'loudness': -8.128,
  'mode': 0,
  'speechiness': 0.0309,
  'acousticness': 0.463,
  'instrumentalness': 0.0408,
  'liveness': 0.173,
  'valence': 0.509,
  'tempo': 118.65,
  'type': 'audio_features',
  'id': '1dv3ePjze9tPq2pk8eWJdR',
  'uri': 'spotify:track:1dv3ePjze9tPq2pk8eWJdR',
  'track_href': 'https://api.spotify.com/v1/tracks/1dv3ePjze9tPq2pk8eWJdR',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1dv3ePjze9tPq2pk8eWJdR',
  'duration_ms': 270773,
  'time_signature': 4}]

In [162]:
song_uri = playlist["items"][50]["track"]["uri"]
song_uri

'spotify:track:13KEdaWIc7T6ozEmnJjHy1'

In [163]:
# get the audio features for that song
sp.audio_features(song_uri)

[{'danceability': 0.78,
  'energy': 0.787,
  'key': 0,
  'loudness': -6.653,
  'mode': 1,
  'speechiness': 0.209,
  'acousticness': 0.533,
  'instrumentalness': 0.134,
  'liveness': 0.122,
  'valence': 0.668,
  'tempo': 177.835,
  'type': 'audio_features',
  'id': '13KEdaWIc7T6ozEmnJjHy1',
  'uri': 'spotify:track:13KEdaWIc7T6ozEmnJjHy1',
  'track_href': 'https://api.spotify.com/v1/tracks/13KEdaWIc7T6ozEmnJjHy1',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/13KEdaWIc7T6ozEmnJjHy1',
  'duration_ms': 237067,
  'time_signature': 4}]

In [160]:
#creating lists to hold each audio feature

danceability = []
energy = []
key = []
loudness = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []

In [172]:
# create blank dictionary to store audio features
feature_dict = {}

# convert track_uri column to an iterable list
track_uris = playlist(["items"][0]["track"]["uri"]).to_list()

# loop through track URIs and pull audio features using the API,
# store all these in a dictionary
for t_uri in track_uris:
    
    feature_dict[t_uri] = {'popularity': 0,
                           'danceability': 0,
                           'energy': 0,
                           'speechiness': 0,
                           'instrumentalness': 0,
                           'tempo': 0}
    
    r = requests.get(BASE_URL + 'tracks/' + t_uri, headers=headers)
    r = r.json()
    feature_dict[t_uri]['popularity'] = r['popularity']
    
    s = requests.get(BASE_URL + 'audio-features/' + t_uri, headers=headers)
    s = s.json()
    feature_dict[t_uri]['danceability'] = s['danceability']
    feature_dict[t_uri]['energy'] = s['energy']
    feature_dict[t_uri]['speechiness'] = s['speechiness']
    feature_dict[t_uri]['instrumentalness'] = s['instrumentalness']
    feature_dict[t_uri]['tempo'] = s['tempo']

TypeError: string indices must be integers

In [None]:
# convert dictionary into dataframe with track_uri as the first column
df_features = pd.DataFrame.from_dict(feature_dict, orient='index')
df_features.insert(0, 'track_uri', df_features.index)
df_features.reset_index(inplace=True, drop=True)

df_features.head()