#### Install Spotipy library to use Spotify Web API

In [1]:
pip install spotipy --upgrade

Requirement already up-to-date: spotipy in c:\users\adam\miniconda3\envs\ml\lib\site-packages (2.16.0)
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


#### Import OS functionality & load environment variables from the local `.env`

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

#### Import spotipy library to use Spotify Web API
https://spotipy.readthedocs.io/

## Authentication

In [4]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_credentials_manager = SpotifyClientCredentials(client_id=client_id,
                                                      client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## List of playlists

In [5]:
import pandas as pd
USER_NAME = 'eargasmusic'
user = sp.user(USER_NAME)
playlist_dict = sp.user_playlists(USER_NAME)
playlist_items = playlist_dict['items']
all_playlists = pd.DataFrame(playlist_items,
                         columns=['id', 'name'])

# Create boolean filter series to mask named and unnamed playlists
named = all_playlists['name'].str.startswith('eargasm | ')
unnamed = all_playlists['name'].str.startswith('eargasm music ')

# Let's keep these playlists as unlabeled for later experiments
unnamed_playlists = all_playlists[unnamed.values]
# Extract all named playlists
named_playlists = all_playlists[named.values]


named_playlists

Unnamed: 0,id,name
0,5apHWYcigR3lSZpyzyGKEa,eargasm | breathe easy
1,3MXM4ca1b3bT198F7mG9ms,eargasm | city walk
2,2QdM3NBe7lkOzC7OqWXfNI,eargasm | curvatronik
3,1CwPTyGbQDSda6m7vTys1d,eargasm | decadency
4,6pGQQZ4PITmFnSC0rTnmXp,eargasm | deep water
5,7tOxW8H2qzxGmIgwAFSYrw,eargasm | departure
6,1YfxkylLN0ecX19mAquHGy,eargasm | dust settling
7,1zPa9pKaSsIZYVuuxZqAdv,eargasm | get moving
8,4aVYl96G4Xjfsc7IT1V8pj,eargasm | glide
9,5s9oeBauiz5PDbDHCRix0Y,eargasm | high frequency radio


## Basic song information

In [6]:
# Import timeit library to count the processing time
import timeit
start = timeit.default_timer()

track_id = []
track_artists = []
track_name = []
track_duration = []
track_popularity = []
track_releasedate = []
track_playlist = []

for _, (_id, _name) in named_playlists.iterrows():
    
    for i in range(0,10000,100):
        
        playlist_items = sp.playlist_items(_id, limit=100, offset=i)['items']        
        for item in playlist_items:
            track_id.append(item['track']['id'])
            track_artists.append(item['track']['artists'][0]['name'])
            track_name.append(item['track']['name'])
            track_duration.append(item['track']['duration_ms'])
            track_popularity.append(item['track']['popularity'])
            track_releasedate.append(item['track']['album']['release_date'])
            track_playlist.append(_name)        
    
labeled_songs = pd.DataFrame({'track_id': track_id,
                              'track_artists': track_artists,
                              'track_name': track_name,
                              'track_duration': track_duration,
                              'track_popularity': track_popularity,
                              'track_releasedate': track_releasedate,
                              'track_playlist': track_playlist})

stop = timeit.default_timer()
print('Runtime: {} seconds.'.format(stop-start))

Runtime: 312.19229459999997 seconds.


In [7]:
labeled_songs

Unnamed: 0,track_id,track_artists,track_name,track_duration,track_popularity,track_releasedate,track_playlist
0,1ua6hBq18qZLyprXjMcpyf,Virgil Howe,Someday,251266,43,2009-10-19,eargasm | breathe easy
1,42VpxSdGQgnV1UJkWeGYkA,Cass McCombs,Switch,254233,52,2016-08-26,eargasm | breathe easy
2,1g8A166soQjwl1ihqBWKGW,The Slow Revolt,Lean,207699,0,2016-09-09,eargasm | breathe easy
3,6cAVWcj8TQ5yR2T6BZjnOg,Dirty Nice,Zero Summer,212640,0,2017-06-09,eargasm | breathe easy
4,3YA509E9ki7a3Ic9cf25Vt,Alex Ebert,Broken Record,274800,47,2017-05-05,eargasm | breathe easy
...,...,...,...,...,...,...,...
3616,0hXzxTABL0Q85WhdzS62Fc,Sevdaliza,Amandine Insensible,253884,29,2015-11-24,eargasm | weightless
3617,2sudalI4ICuiFAHIqOdQlo,Oko Ebombo,Niggality,365042,19,2016,eargasm | weightless
3618,0Jx42ufEToqS6vzHFZz6L8,Inner Tongue,Dig Deeper,212800,34,2018-05-25,eargasm | weightless
3619,01CBH5z1p01xSEsiSSykSx,Pearl De Luna,London Lullaby,297752,19,2018-01-26,eargasm | weightless


## Audio features

In [28]:
def audio_features(id):
    all_features = sp.audio_features(id)
    columns_to_keep = ['id',
                       'danceability',
                       'energy',
                       'key',
                       'loudness',
                       'mode',
                       'speechiness',
                       'acousticness',
                       'instrumentalness',
                       'liveness',
                       'valence',
                       'tempo']
    selected_features = pd.DataFrame(all_features,
                                     columns=columns_to_keep)
    return selected_features

In [12]:
audio_analysis = sp.audio_analysis('1ua6hBq18qZLyprXjMcpyf')
audio_analysis.keys()

dict_keys(['meta', 'track', 'bars', 'beats', 'sections', 'segments', 'tatums'])

In [17]:
audio_analysis['track']['key']

9

## TO DO

- duplicate songs
- similarities
- audio features