# Using Spotify API to get top tracks of an artist using spotipy library

In [1]:
import os
import random
import time
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv

In [2]:
load_dotenv() # load environment variables

client_id = os.getenv('SPOTIFY_CLIENT_ID')
client_secret = os.getenv('SPOTIFY_CLIENT_SECRET')

# spotipy setup
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

#### Test with Led Zeppelin's URI

In [3]:
# lz_uri = 'spotify:artist:36QJpDe2go2KgaRleHCDTp'
# results = sp.artist_top_tracks(lz_uri)

# for track in results['tracks'][:10]:
#     print('track    : ' + track['name'])
#     print('cover art: ' + track['album']['images'][0]['url'])
#     print()

In [4]:
genres = ['pop', 'rock', 'hip-hop', 'classical', 'jazz', 'electronic', 'metal', 'country'] # genres to randomly sample from

columns = ['track_name', 'artist', 'album', 'genres', 'popularity', 'explicit', 'cover_url'] # df columns

data = []

backup_file = 'spotify_tracks_backup.csv' # backup file path

# fetch and store data
def fetch_tracks_from_genre(genre, limit=25, market='US'):
    try:
        results = sp.search(
            q=f'genre:{genre}',  # Search for tracks in a random genre
            type='track',         # We are searching for tracks only
            limit=limit,          # Limit the number of results (up to 25 at a time)
            market=market         # Limit to the specified market (e.g., 'US')
        )
        tracks = results['tracks']['items']
        
        for track in tracks:
            track_name = track['name']
            artist_names = ', '.join([artist['name'] for artist in track['artists']])
            album_name = track['album']['name']
            track_popularity = track['popularity']
            is_explicit = track['explicit']

            # Get genres from the artist (additional API call)
            artist_genres = []
            for artist in track['artists']:
                artist_info = sp.artist(artist['id'])
                artist_genres.extend(artist_info['genres'])
            artist_genres = list(set(artist_genres))  # Remove duplicates

            # Get album cover image URL (large size)
            album_cover_url = track['album']['images'][1]['url']  # This is the largest size (index 0)

            # Append track data to the list
            data.append([track_name, artist_names, album_name, ', '.join(artist_genres), track_popularity, is_explicit, album_cover_url])
    
    except Exception as e:
        print(f'Error fetching data for genre {genre}: {str(e)}')

num_tracks = 2000
batch_size = 25
num_batches = num_tracks // batch_size

for batch in range(num_batches):
    print(f'Fetching batch {batch + 1} of {num_batches}...')
    
    random_genre = random.choice(genres) # fetch tracks from random genre
    fetch_tracks_from_genre(random_genre, limit=batch_size)
    
    df_batch = pd.DataFrame(data, columns=columns)
    
    # check if backup file exists
    if os.path.exists(backup_file):
        df_batch.to_csv(backup_file, mode='a', header=False, index=False) # append to existing file
    else:
        df_batch.to_csv(backup_file, mode='w', header=True, index=False) # write a new file
    
    data = [] # clear data list for next batch
    
    print(f'Pausing for 30 seconds... (Batch {batch + 1} of {num_batches})')
    time.sleep(30)

print('Data collection completed.')

Fetching batch 1 of 80...
Pausing for 30 seconds... (Batch 1 of 80)
Fetching batch 2 of 80...
Error fetching data for genre electronic: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Pausing for 30 seconds... (Batch 2 of 80)
Fetching batch 3 of 80...
Pausing for 30 seconds... (Batch 3 of 80)
Fetching batch 4 of 80...
Pausing for 30 seconds... (Batch 4 of 80)
Fetching batch 5 of 80...
Pausing for 30 seconds... (Batch 5 of 80)
Fetching batch 6 of 80...
Pausing for 30 seconds... (Batch 6 of 80)
Fetching batch 7 of 80...
Pausing for 30 seconds... (Batch 7 of 80)
Fetching batch 8 of 80...
Pausing for 30 seconds... (Batch 8 of 80)
Fetching batch 9 of 80...
Pausing for 30 seconds... (Batch 9 of 80)
Fetching batch 10 of 80...
Pausing for 30 seconds... (Batch 10 of 80)
Fetching batch 11 of 80...
Pausing for 30 seconds... (Batch 11 of 80)
Fetching batch 12 of 80...
Pausing for 30 seconds... (Batch 12 of 80)
Fetching batch 13 of 80...
Pausing for 30 s