In [97]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os
from time import sleep
from tqdm import tqdm
from requests import get, post
import json
import base64
from IPython.display import clear_output

# Initialize Id and Secret, as well as sp object
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("SECRET")

# token handling funcitons
def get_token(id=client_id, secret=client_secret):
    auth_string = f'{id}:{secret}'
    auth_bytes = auth_string.encode('utf-8')
    auth_base64 = str(base64.b64encode(auth_bytes), 'utf-8')

    url = 'https://accounts.spotify.com/api/token'
    
    # post requests
    headers = {
        'Authorization': f'Basic {auth_base64}',
        'Content-Type': 'application/x-www-form-urlencoded'
    }

    data = {'grant_type': 'client_credentials'}
    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)

    return json_result['access_token']

def get_auth_header(token):
    return {'Authorization': f'Bearer {token}'}

# create spotipy object 
sp = spotipy.Spotify(auth=get_token(),
                     auth_manager=SpotifyOAuth(
                                            client_id=client_id,
                                            client_secret=client_secret,
                                            redirect_uri='https://localhost:1480',
                                            scope='user-library-read'
                                            ))

In [98]:
# test token
print(get_auth_header(get_token()))

{'Authorization': 'Bearer BQAwddJrmydCvGZfnTCkRuYmh4EiBf3g6hyN2fan8CzU-J5X5LAWOQ7Ldz2JjBF6mR0LLWMtdRy03NLgsox9MN73t6zHySmpqccN53Dt8k_eMxy6gRM'}


In [99]:
# Get names of columns that we need
cols = pd.read_csv('../Spotify_Youtube_wAPI.csv')
cols.columns.tolist()

# Initialize dict to transform into df
playlist_data = {key: [] for key in cols}
additional_cols = ['Followers', 'Release Date', 'Available Markets'] 
for col in additional_cols:
    playlist_data[col] = []

In [100]:
# Initialize list of track objects to collect data from 
    # (blend playlist can only be 50 songs, so it works nicely with a max batch of 50)
id = ''
playlist = sp.playlist_tracks(playlist_id='', limit=50, offset=0)
completed = 0

# Overall loop that will break when we run out of playlist tracks
while playlist:
    
    # Initialize track and artist uri lists for gathering additional data that the playlist object will not have
    track_uris = []
    artist_uris = []

    # Iterate over the item objects in the playlist results
        # Fill the track and artist lists for later requests | collect what data we can with the playlist result and append it to the dictionary
    for i in tqdm(range(0, len(playlist['items']))):
        track_uris.append(playlist['items'][i]['track']['uri'])
        artist_uris.append(playlist['items'][i]['track']['artists'][0]['uri'])
        playlist_data['Artist'].append(playlist['items'][i]['track']['artists'][0]['name'])
        playlist_data['Url_spotify'].append(playlist['items'][i]['track']['preview_url'])
        playlist_data['Track'].append(playlist['items'][i]['track']['name'])
        playlist_data['Album'].append(playlist['items'][i]['track']['album']['name'])
        playlist_data['Release Date'].append(playlist['items'][i]['track']['album']['release_date'])
        playlist_data['Album_type'].append(playlist['items'][i]['track']['album']['album_type'])
        playlist_data['Uri'].append(playlist['items'][i]['track']['uri'])
        playlist_data['Duration_ms'].append(playlist['items'][i]['track']['duration_ms'])
        playlist_data['Track Popularity'].append(playlist['items'][i]['track']['popularity'])
        playlist_data['Available Markets'].append(playlist['items'][i]['track']['available_markets'])
        
    # Recall the API to get additional artist info that was not retrievable from the playlist request
    artists = sp.artists(artists=artist_uris)

    for artist in artists['artists']:
        playlist_data['Artist Popularity'].append(artist['popularity'])
        playlist_data['Genres'].append(artist['genres'])
        playlist_data['Followers'].append(artist['followers']['total'])
        
    sleep(5)    

    # Recall the API to get additional features that were not retrievable from the playlist request
    features = sp.audio_features(tracks=track_uris)

    for track in tqdm(range(0, len(features))):
        playlist_data['Danceability'].append(features[track]['danceability'])
        playlist_data['Energy'].append(features[track]['energy'])
        playlist_data['Key'].append(features[track]['key'])
        playlist_data['Loudness'].append(features[track]['loudness'])
        playlist_data['Speechiness'].append(features[track]['speechiness'])
        playlist_data['Acousticness'].append(features[track]['acousticness'])
        playlist_data['Instrumentalness'].append(features[track]['instrumentalness'])
        playlist_data['Liveness'].append(features[track]['liveness'])
        playlist_data['Valence'].append(features[track]['valence'])
        playlist_data['Tempo'].append(features[track]['tempo'])

    # use case complete
    if not playlist['next']:
        break

    # to ensure rate limit is not exceeded
    completed += 50
    print(f'retrieved data for {completed} rows')
    clear_output(wait=True)
    sleep(10)    
    playlist = sp.next(playlist)
        

100%|██████████| 35/35 [00:00<00:00, 215883.29it/s]
100%|██████████| 35/35 [00:00<00:00, 317750.30it/s]


In [None]:
# drop columns we can't retrieve from API (like youtube data)
playlist_data = {k: v for k, v in playlist_data.items() if v}

# ensure equal lengths 
for key in playlist_data.keys():
    print(f'{key} {len(playlist_data[key])}')

# transform the dict to a df
playlist_dataset = pd.DataFrame(data=playlist_data)
playlist_dataset

# Identify path
path = './'
playlist_dataset.to_csv(path)
