![logo_ironhack_blue 7](https://user-images.githubusercontent.com/23629340/40541063-a07a0a8a-601a-11e8-91b5-2f13e4e6b441.png)

# Lab | API wrappers - Create your collection of songs & audio features


#### Instructions 


To move forward with the project, you need to create a collection of songs with their audio features - as large as possible! 

These are the songs that we will cluster. And, later, when the user inputs a song, we will find the cluster to which the song belongs and recommend a song from the same cluster.
The more songs you have, the more accurate and diverse recommendations you'll be able to give. Although... you might want to make sure the collected songs are "curated" in a certain way. Try to find playlists of songs that are diverse, but also that meet certain standards.

The process of sending hundreds or thousands of requests can take some time - it's normal if you have to wait a few minutes (or, if you're ambitious, even hours) to get all the data you need.

An idea for collecting as many songs as possible is to start with all the songs of a big, diverse playlist and then go to every artist present in the playlist and grab every song of every album of that artist. The amount of songs you'll be collecting per playlist will grow exponentially!

<h1 style="color: #00BFFF;">00 |</h1>

In [2]:
# 📚 Basic libraries
import pandas as pd # dataframe managment

# 🧩 New libraries
import spotipy # Spotify API for developers
from spotipy.oauth2 import SpotifyClientCredentials # Spotify user credentiials
import pprint # Makes dictionaries pretty to navigate when printed

<h1 style="color: #00BFFF;">01 | Data Extraction</h1>

<h3 style="color: #008080;">API acces</h3>

In [3]:
# Don't copy this, those are my credientials :P
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="d565de5f22d343be949f165e317b3448",
                                                           client_secret="9ec0e5075f784243b25dedcf5d91f149"))

<h3 style="color: #008080;">Playlist track</h3>

In [4]:
# Selecting the playlists; "Humans"
# This was a funny playlist. I asked 32 random friends for a 1 artist and 1 song in the first COVID lockdown
# The idea was to surprise them in my future wedding, making it sound there :D 
playlist = sp.user_playlist_tracks("spotify", "0Yf47MikpebEPxnZbAhEvn")
pprint.pprint(playlist)

{'href': 'https://api.spotify.com/v1/playlists/0Yf47MikpebEPxnZbAhEvn/tracks?offset=0&limit=100&additional_types=track',
 'items': [{'added_at': '2020-03-13T18:58:12Z',
            'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/arattz'},
                         'href': 'https://api.spotify.com/v1/users/arattz',
                         'id': 'arattz',
                         'type': 'user',
                         'uri': 'spotify:user:arattz'},
            'is_local': False,
            'primary_color': None,
            'track': {'album': {'album_type': 'compilation',
                                'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/5EBH204cwRkvAWknwTAjCQ'},
                                             'href': 'https://api.spotify.com/v1/artists/5EBH204cwRkvAWknwTAjCQ',
                                             'id': '5EBH204cwRkvAWknwTAjCQ',
                                             'name': 'Marea',
         

<h3 style="color: #008080;">Getting artists from the playlist</h3>

In [5]:
artists = []
for i in range(len(playlist["items"])):
    artist = playlist["items"][i]["track"]["artists"][0]["name"]
    if artist not in artists:
        artists.append(artist)
artists

['Marea',
 'Passion Pit',
 'Fleetwood Mac',
 'Love of Lesbian',
 'Solar Fake',
 'Matt Simons',
 'Qntal',
 'Alice Cooper',
 'Matisyahu',
 'RAC',
 'Nickelback',
 'Franz Liszt',
 'Rammstein',
 'Red Hot Chili Peppers',
 'Backstreet Boys',
 'Radiohead',
 'Whitesnake',
 'Heroes Del Silencio',
 'The Sound',
 'Shawn Mendes',
 'Patrick Cassidy',
 'Fuel Fandango',
 'Eluveitie',
 'Michael Jackson',
 'Fools Garden',
 'The Dø',
 'ZOO',
 'Glen Hansard',
 'Road Ramos',
 'Robin Schulz',
 'Babi',
 'ONE OK ROCK']

<h3 style="color: #008080;">Getting all albums for each artist</h3>

In [6]:
albums = []
for i in range(len(playlist["items"])):
    album = playlist["items"][i]["track"]["album"]["name"]
    if album not in albums:
        albums.append(album)
albums

['Coces al Aire 1997-2007',
 'Gossamer',
 'Tango In the Night (Deluxe Edition)',
 'Manifiesto delirista',
 'You Win. Who Cares?',
 'After The Landslide (Remix)',
 'VI - Translucida',
 'Trash',
 'Spark Seeker',
 'Strangers',
 'Here and Now (Audio Only Version)',
 'Valentina Lisitsa Plays Liszt',
 'Liebe ist für alle da (Special Edition)',
 'Californication (Deluxe Edition)',
 'Millennium',
 'In Rainbows',
 'Whitesnake (2018 Remaster)',
 'El Espíritu Del Vino- Edición Especial',
 'Jeopardy',
 'Lost In Japan (Remix)',
 'Hannibal - Original Motion Picture Soundtrack',
 'Origen',
 'Slania',
 'Thriller 25 Super Deluxe Edition',
 'Dish Of The Day',
 'Shake, Shook, Shaken',
 'Tempestes Vénen del Sud',
 'Music From The Motion Picture Once',
 '¿Qué sueñan los perros?',
 'Sugar',
 'Colegas',
 'Eye of the Storm']

<h3 style="color: #008080;">Getting all songs from the playlist</h3>

In [7]:
tracks_title = []
for i in range(0,len(playlist["items"])):
    title = playlist["items"][i]["track"]["name"]
    tracks_title.append(title)

tracks_title

['Corazon de mimbre',
 'Take a Walk',
 'Everywhere - 2017 Remaster',
 'Manifiesto delirista',
 'A Bullet Left for You',
 'After The Landslide - Remix',
 'Passacaglia',
 'Poison',
 'Live Like A Warrior',
 'Let Go',
 'Trying Not to Love You',
 'Hungarian Rhapsody No. 12 in C sharp minor, S.244',
 'Liebe ist für alle da',
 'Otherside',
 'I Want It That Way',
 'Bodysnatchers',
 'Is This Love - 2018 Remaster',
 'La sirena varada',
 "I Can't Escape Myself",
 'Lost In Japan - Remix',
 'Vide Cor Meum',
 'Mi danza (feat. Dani de Morón)',
 'Inis Mona',
 'Billie Jean',
 'Lemon Tree',
 'Despair, Hangover & Ecstasy',
 'Estiu',
 'Falling Slowly',
 'El Día Que Quieras',
 'Sugar (feat. Francesco Yates)',
 'Colegas',
 'Letting Go']

<h3 style="color: #008080;">Getting song features from the playlist</h3>

In [8]:
tracks = playlist['items']

while playlist['next']:
    playlist = sp.next(playlist)
    tracks.extend(playlist['items'])

track_ids = [track['track']['id'] for track in tracks]

<h1 style="color: #00BFFF;">02 | Data Wrangling</h1>

In [9]:
features = sp.audio_features(track_ids[:100]) # getting features for each track
features_df = pd.DataFrame(features) # making it a df
features_df.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.521,0.928,9,-3.548,0,0.0837,0.0235,0.0,0.0961,0.323,90.555,audio_features,6ScI1xf3RszISXvLGmitY3,spotify:track:6ScI1xf3RszISXvLGmitY3,https://api.spotify.com/v1/tracks/6ScI1xf3RszI...,https://api.spotify.com/v1/audio-analysis/6ScI...,337666,4
1,0.566,0.755,11,-5.526,1,0.0368,0.0338,0.0,0.315,0.445,101.006,audio_features,4Sfa7hdVkqlM8UW5LsSY3F,spotify:track:4Sfa7hdVkqlM8UW5LsSY3F,https://api.spotify.com/v1/tracks/4Sfa7hdVkqlM...,https://api.spotify.com/v1/audio-analysis/4Sfa...,264493,4
2,0.73,0.487,4,-10.991,1,0.0303,0.258,0.01,0.0852,0.731,114.965,audio_features,254bXAqt3zP6P50BdQvEsq,spotify:track:254bXAqt3zP6P50BdQvEsq,https://api.spotify.com/v1/tracks/254bXAqt3zP6...,https://api.spotify.com/v1/audio-analysis/254b...,226653,4
3,0.62,0.785,0,-6.32,1,0.0328,0.374,1e-05,0.0932,0.395,110.032,audio_features,7A5yQ2gcJ3FA1wh5MNnibS,spotify:track:7A5yQ2gcJ3FA1wh5MNnibS,https://api.spotify.com/v1/tracks/7A5yQ2gcJ3FA...,https://api.spotify.com/v1/audio-analysis/7A5y...,333413,4
4,0.633,0.831,9,-5.943,1,0.0283,0.000675,0.00771,0.28,0.445,136.016,audio_features,4zV7p7ldp1KpTKuCeVbOex,spotify:track:4zV7p7ldp1KpTKuCeVbOex,https://api.spotify.com/v1/tracks/4zV7p7ldp1Kp...,https://api.spotify.com/v1/audio-analysis/4zV7...,329160,4


In [10]:
features_df = features_df.drop(["type", "id", "uri", "track_href", "analysis_url", "duration_ms", "time_signature"], axis=1) # dropping some ugly columns

In [11]:
features_df.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.521,0.928,9,-3.548,0,0.0837,0.0235,0.0,0.0961,0.323,90.555
1,0.566,0.755,11,-5.526,1,0.0368,0.0338,0.0,0.315,0.445,101.006
2,0.73,0.487,4,-10.991,1,0.0303,0.258,0.01,0.0852,0.731,114.965
3,0.62,0.785,0,-6.32,1,0.0328,0.374,1e-05,0.0932,0.395,110.032
4,0.633,0.831,9,-5.943,1,0.0283,0.000675,0.00771,0.28,0.445,136.016


<h3 style="color: #008080;">Dictionaries rule</h3>

In [12]:
# Now, we will make a dictionary with our lists, assigning the future column name as key
ids = {
    'artist': artists,
    'album': albums,
    'song': tracks_title,
}

ids = pd.DataFrame(ids)

<h1 style="color: #00BFFF;">03 | Reporting</h1>

<h3 style="color: #008080;">Pandas rule the world -> making it a dataframe</h3>

In [13]:
df = pd.concat([ids, features_df], axis=1)

In [14]:
df.head(5)

Unnamed: 0,artist,album,song,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Marea,Coces al Aire 1997-2007,Corazon de mimbre,0.521,0.928,9,-3.548,0,0.0837,0.0235,0.0,0.0961,0.323,90.555
1,Passion Pit,Gossamer,Take a Walk,0.566,0.755,11,-5.526,1,0.0368,0.0338,0.0,0.315,0.445,101.006
2,Fleetwood Mac,Tango In the Night (Deluxe Edition),Everywhere - 2017 Remaster,0.73,0.487,4,-10.991,1,0.0303,0.258,0.01,0.0852,0.731,114.965
3,Love of Lesbian,Manifiesto delirista,Manifiesto delirista,0.62,0.785,0,-6.32,1,0.0328,0.374,1e-05,0.0932,0.395,110.032
4,Solar Fake,You Win. Who Cares?,A Bullet Left for You,0.633,0.831,9,-5.943,1,0.0283,0.000675,0.00771,0.28,0.445,136.016


In [15]:
df.to_csv('playlist.csv', index=False) 