In [None]:
# Input Spotify API details 
client_id="xxxx"
client_secret="xxxx"

# import libraries
from __future__ import print_function 
import numpy as np
import pandas as pd
import sys
import json
import time
import spotipy
import string
from spotipy.oauth2 import SpotifyClientCredentials
from tqdm import tqdm_notebook

# Authentication
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))
# Get Artist Details
# Using Biffy Clyro as an example from the spotify website 'https://open.spotify.com/artist/1km0R7wy712AzLkA1WjKET'
# Simply search the artist and copy the characters after the /artist/ section
biffy_id = '1km0R7wy712AzLkA1WjKET'

# Get some details about the artist
sp.artist(biffy_id)

In [None]:
# I used the following website as a template for the next section:
# https://medium.com/@samlupton/spotipy-get-features-from-your-favourite-songs-in-python-6d71f0172df0

# then I customized the code a little bit to pull in extra items I wanted such as popularity of track

# Get Tracks for Artist
def artist_tracks(artists):
    
    '''
    Takes a list of artist names, iterates through their Spotify albums, checks for 
    duplicate albums, then appends all the tracks in those albums to a list of lists
    '''
    
    # Each list in this list will be a track and its features
    tracks = []
    
    
    
    for artist in tqdm_notebook(artists):
        
        # Get the artist URI (a unique ID)
        artist_uri = sp.search(artist)['tracks']['items'][0]['artists'][0]['uri']

        # Spotify has a lot of duplicate albums, but we'll cross-reference them with this list to avoid extra loops
        album_checker = []
        
        # The starting point of our loop of albums for those artists with more than 50
        n = 0
        
        # Note the album_type = 'album'. This discounts singles, compilations and collaborations
        while len(sp.artist_albums(artist_uri, album_type = 'album', limit=50, offset = n)['items']) > 0:
            
            # Avoid overloading Spotify with requests by assigning the list of album dictionaries to a variable
            dict_list = sp.artist_albums(artist_uri, album_type = 'album', limit=50, offset = n)['items']
            
            for i, album in tqdm_notebook(enumerate(dict_list)):

                # Add the featured artists for the album in question to the checklist
                check_this_album = [j['name'] for j in dict_list[i]['artists']]
                # And the album name
                check_this_album.append(dict_list[i]['name'])
                # And its date
                check_this_album.append(dict_list[i]['release_date'])

                # Only continue looping if that album isn't in the checklist
                if check_this_album not in album_checker:
                    
                    # Add this album to the checker
                    album_checker.append(check_this_album)
                    # For every song on the album, get its descriptors and features in a list and add to the tracklist
                    tracks.extend([ [artist, album['name'], album['uri'], album['images'][0]['url'], song['name'], sp.track(song['uri'])['popularity'],
                                     sp.track(song['uri'])['track_number'],
                                     #sp.audio_analysis(track_uri)['track']['num_samples'], - this is not plays...

                      album['release_date'] ] + list(sp.audio_features(song['uri'])[0].values())
                                   for song in sp.album_tracks(album['uri'])['items'] 
                                  
                                  ])
            
            # Go through the next 50 albums (otherwise we'll get an infinite while loop)
            n += 50

    return tracks

# Get Tracks for Artist Listed
artist = ['Biffy Clyro']
biffy_tracks = artist_tracks(artist)


In [None]:
# Get Track Details for Artist Listed Above
def df_tracks(tracklist):
    
    '''
    Takes the output of artist_tracks (i.e. a list of lists),
    puts it in a dataframe and formats it.
    '''

    df = pd.DataFrame(tracklist, columns=['artist',
     'album_name',
     'album_uri',
     'album_image',
     'track',
     'popularity',
     'track_number',
     #'plays',
     'release_date'] + list(sp.audio_features('7tr2za8SQg2CI8EDgrdtNl')[0].keys()))

    df.rename(columns={'uri':'song_uri'}, inplace=True)

    df.drop_duplicates(subset=['artist', 'track', 'release_date'], inplace=True)

    # Reorder the cols to have identifiers first, auditory features last
    cols = ['artist', 'album_name', 'album_uri', 'album_image', 'release_date', 'track', 'popularity', 'track_number', 'id', 
     'song_uri', 'track_href','analysis_url', 'type', 'danceability', 'energy', 'key',  'loudness', 'mode', 'speechiness',
     'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']

    df = df[cols]
    
    return df


# Get Artist Track Details
biffy_track_details = df_tracks(biffy_tracks)

In [None]:
# The next section is just cleaning the code into a format that is easier to analyze and work with

# Clean up Track Names
biffy_track_details['track'] = biffy_track_details['track'].str.split(' - ').str[0].apply(lambda s: s.replace('(L', 'xxx')).apply(lambda s: s.replace('(I', 'xxx'))
biffy_track_details['track'] = biffy_track_details['track'].str.split('xxx').str[0]
biffy_track_details['track'] = biffy_track_details['track'].str.strip().apply(lambda x: string.capwords(x))

# Rename Deluxe Albums and Tracks
biffy_track_details['album_name'] = biffy_track_details['album_name'].replace(['Puzzle (Deluxe Bundle)'],'Puzzle')
biffy_track_details['album_name'] = biffy_track_details['album_name'].replace(['Opposites (Deluxe)'],'Opposites')
biffy_track_details['album_name'] = biffy_track_details['album_name'].replace(['Ellipsis (Deluxe)'],'Ellipsis')
biffy_track_details['album_name'] = biffy_track_details['album_name'].replace(['Balance, Not Symmetry (Original Motion Picture Soundtrack)'],'Balance, Not Symmetry')
biffy_track_details['track'] = biffy_track_details['track'].replace(['Living Is A Problem'],'Living Is A Problem Because Everything Dies')
biffy_track_details['track'] = biffy_track_details['track'].replace(["The Joke'S On Us"],"The Joke's On Us")
biffy_track_details['track'] = biffy_track_details['track'].replace(["The Jokes On Us"],"The Joke's On Us")
biffy_track_details['track'] = biffy_track_details['track'].replace(["The Girl And His Cat"],"A Girl And His Cat")
biffy_track_details['track'] = biffy_track_details['track'].replace(["Opposites"],"Opposite")
biffy_track_details['track'] = biffy_track_details['track'].replace(["All The Way Down"],"All The Way Down: Prologue Chapter 1")
biffy_track_details['track'] = biffy_track_details['track'].replace(["All The Way Down (prologue Chapter 1)"],"All The Way Down: Prologue Chapter 1")
biffy_track_details['track'] = biffy_track_details['track'].replace(["Toys Toys Toys Choke, Toys Toys Toys"],'Toys, Toys, Toys, Choke, Toys, Toys, Toys')
biffy_track_details['track'] = biffy_track_details['track'].replace(["Liberate The Illiterate"],"Liberate The Illiterate/a Mong Among Mingers")
biffy_track_details['track'] = biffy_track_details['track'].replace(["Now The Action Is On Fire"],"Now The Action Is On Fire!")
biffy_track_details['track'] = biffy_track_details['track'].replace(["Pause It And Turn It Up (hidden Track)"],"Pause It And Turn It Up")
biffy_track_details['track'] = biffy_track_details['track'].replace(["Witch‚Äôs Cup"],"Witch's Cup")
biffy_track_details['track'] = biffy_track_details['track'].replace(["All The Way Down Chapter 2"],"All The Way Down: Chapter 2")
biffy_track_details['track'] = biffy_track_details['track'].replace(["The Conversation Is ..."],"The Conversation Is...")

# Save file to 
biffy_track_details.to_csv(r'file_location', index = False, header=True)

# Remove duplicates and pull most recent track
column_keep = ['artist', 'album_name', 'album_uri', 'album_image', 'release_date', 'track','track_number', 'song_uri', 'track_href']
biff_recent = biffy_track_details.sort_values(by=['artist', 'album_name', 'track','release_date'],ascending = [True, True, True, True]).drop_duplicates(['track','album_name'], keep='last')[column_keep]

# Aggregate Popularity
popularity = ['popularity']
biffy_pop_agg = biffy_track_details.groupby(['artist', 'album_name', 'track'])[popularity].agg({np.max})

# Aggregate Audio Features
aud_feats = ['danceability', 'energy', 'key',  'loudness', 'mode', 'speechiness','acousticness', 
             'instrumentalness', 'liveness', 'valence', 'tempo','duration_ms', 'time_signature']
biffy_feat_agg = biffy_track_details.groupby(['artist', 'album_name', 'track'])[aud_feats].agg({np.mean})

# Left join Audio Features to Deduped List
biffy_v1 = biff_recent.merge(biffy_pop_agg, on=['album_name', 'track'], how='left')
biffy_clean = biffy_v1.merge(biffy_feat_agg, on=['album_name', 'track'], how='left')

# Rename Columns and Sort
biffy_clean.columns = ['artist', 'album_name', 'album_uri', 'album_image', 'release_date', 'track', 'track_number','song_uri', 'track_href', 
                       'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness','acousticness', 
                       'instrumentalness', 'liveness', 'valence', 'tempo','duration_ms', 'time_signature']
biffy_clean = biffy_clean.sort_values(by=['artist', 'album_name', 'track','track_number'],ascending = [True, True, True, True])

# Transpose Data
keys = ['artist', 'album_name', 'album_uri', 'album_image', 'release_date', 'track', 'song_uri', 'track_href', 'popularity', 
        'track_number', 'duration_ms', 'time_signature', 'tempo', 'key', 'mode', 'loudness']
biffy_clean_tp = biffy_clean.melt(id_vars=keys, var_name='audio_feat')

# Write to csv
biffy_clean.to_csv(r'file_location/biffyClyro.csv', index = False, header=True)
biffy_clean_tp.to_csv(r'file_location/biffyClyroTP.csv', index = False, header=True)
