In [1]:
import requests
import base64
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
import urllib3

In [2]:
client_id = '4bfcfc5810374447b13d593925b9fbd3'
client_secret = 'e6f4755f93f747c5acb311a4c78de8a8'

In [3]:
# Base64 encode the client ID and client secret
client_credentials = f"{client_id}:{client_secret}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {'Authorization': f'Basic {client_credentials_base64.decode()}'}
data = {'grant_type': 'client_credentials'}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [4]:
# Initialize Spotipy client with the access token
sp = spotipy.Spotify(auth=access_token)

# Track data extraction

## Extract songs and their data from playlists

In [6]:
def chunks(lst, chunk_size):
    """Yield successive chunk_size-sized chunks from lst."""
    for i in range(0, len(lst), chunk_size):
        yield lst[i:i + chunk_size]

In [7]:
def extract_playlist_tracks(playlist_ids):
    all_tracks = []
    processed_track_ids = set()  # Set to store track IDs that have already been processed
    
    # Iterate over chunks of playlist IDs
    for playlist_chunk in chunks(playlist_ids, 5):  # Use 5 instead of 100 to leave some room for audio features requests
        for playlist_id in playlist_chunk:
            print(f"Processing playlist with ID: {playlist_id}")
            # Fetch tracks from the playlist
            playlist_tracks = sp.playlist_tracks(playlist_id)
            for item in playlist_tracks['items']:
                try:
                    track = item['track']
                    track_id = track['id']
                    
                    # Fetch artist information to get genre
                    artist_id = track['artists'][0]['id']  # Assuming only one artist for simplicity
                    artist = sp.artist(artist_id)
                    
                    # Check if track ID has already been processed
                    if track_id in processed_track_ids:
                        continue
                    processed_track_ids.add(track_id)
                    
                    # Fetch audio features for the track with rate limiting
                    while True:
                        try:
                            audio_features = sp.audio_features(track_id)
                            break
                        except spotipy.SpotifyException as e:
                            if e.http_status == 429:
                                retry_after = int(e.headers.get('Retry-After', 5))  # Default to 5 seconds if Retry-After header is missing
                                print(f"Rate limited. Waiting for {retry_after} seconds...")
                                time.sleep(retry_after)
                            else:
                                raise
                                
                    track_info = {
                        'track_name': track['name'],
                        'track_id': track_id,
                        'artist_name': ', '.join([artist['name'] for artist in track['artists']]),
                        'album_name': track['album']['name'],
                        'release_date': track['album']['release_date'],
                        'artist_genre': artist['genres'],
                        'popularity': track['popularity'],
                        'Duration (ms)': track['duration_ms'],
                        'Acousticness': audio_features[0]['acousticness'] if audio_features else None,
                        'Danceability': audio_features[0]['danceability'] if audio_features else None,
                        'Energy': audio_features[0]['energy'] if audio_features else None,
                        'Instrumentalness': audio_features[0]['instrumentalness'] if audio_features else None,
                        'Key': audio_features[0]['key'] if audio_features else None,
                        'Liveness': audio_features[0]['liveness'] if audio_features else None,
                        'Loudness': audio_features[0]['loudness'] if audio_features else None,
                        'Mode': audio_features[0]['mode'] if audio_features else None,
                        'Speechiness': audio_features[0]['speechiness'] if audio_features else None,
                        'Tempo': audio_features[0]['tempo'] if audio_features else None,
                        'Time Signature': audio_features[0]['time_signature'] if audio_features else None,
                        'Valence': audio_features[0]['valence'] if audio_features else None
                    }
                    all_tracks.append(track_info)
                except TypeError:
                    pass  # Skip processing if item is not a track
    return pd.DataFrame(all_tracks)

In [8]:
# Desired playlist IDs
playlist_ids = ['37i9dQZF1DX4JAvHpjipBk', '37i9dQZF1E39XlANlAA96X', '37i9dQZF1E38u5gDMtyu8E', 
                '37i9dQZF1EIZobirLlpmBa', '0DZnrQLsdDXN4lGTFXZhgI', '37i9dQZF1DZ06evO3nMr04',
                '37i9dQZF1E8Rlw8xLHOZhl', '37i9dQZEVXbMDoHDwVN2tF', '14t09nUK5mFkvIIDhkRV7B', 
                '37i9dQZF1EQnqst5TRi17F', '37i9dQZF1DXbpmT3HUTsZm', '4kw9kdjzx1UmyWvpysl0y2',
                '37i9dQZF1E37Dn6wPX9ecO', '37i9dQZEVXcQNDD4Awnxhz', '37i9dQZEVXbt5m3VaHBeIu',
                '37i9dQZF1E8Rlw8xLHOZhl', '37i9dQZF1EpoOUxiVPoQtA', '37i9dQZF1EIUyZdbBpVQ7y', 
                '37i9dQZF1E8L9WcXJhUTKo', '37i9dQZF1DWUoqEG4WY6ce', '37i9dQZF1DX3asLxmR3A9e', 
                '37i9dQZF1E8GJRbsFHwpXF', '37i9dQZF1E8L8G4urdGGr9', '37i9dQZF1E8MKHBxFbDuoo',
                '37i9dQZF1E8SlAh0oV8cae', '37i9dQZF1E8MubKUjrQkqL', '37i9dQZF1E8NMu9akGoaMM',
                '37i9dQZF1DZ06evO4lAAFJ', '37i9dQZF1E4kbqsPktqjuv', '37i9dQZF1DZ06evO3Ec90s',
                '37i9dQZF1DX6drTZKzZwSo', '37i9dQZF1DX4o1oenSJRJd', '1zoyTmSQomGTNadjjOuaPu',
                '37i9dQZF1DWUa8ZRTfalHk', '37i9dQZF1DX4WYpdgoIcn6', '37i9dQZF1DX6aTaZa0K6VA']  

# Extract songs info
playlist_tracks_df = extract_playlist_tracks(playlist_ids)


Processing playlist with ID: 37i9dQZF1DX4JAvHpjipBk
Processing playlist with ID: 37i9dQZF1E39XlANlAA96X
Processing playlist with ID: 37i9dQZF1E38u5gDMtyu8E
Processing playlist with ID: 37i9dQZF1EIZobirLlpmBa
Processing playlist with ID: 0DZnrQLsdDXN4lGTFXZhgI
Processing playlist with ID: 37i9dQZF1DZ06evO3nMr04
Processing playlist with ID: 37i9dQZF1E8Rlw8xLHOZhl
Processing playlist with ID: 37i9dQZEVXbMDoHDwVN2tF
Processing playlist with ID: 14t09nUK5mFkvIIDhkRV7B
Processing playlist with ID: 37i9dQZF1EQnqst5TRi17F
Processing playlist with ID: 37i9dQZF1DXbpmT3HUTsZm
Processing playlist with ID: 4kw9kdjzx1UmyWvpysl0y2
Processing playlist with ID: 37i9dQZF1E37Dn6wPX9ecO
Processing playlist with ID: 37i9dQZEVXcQNDD4Awnxhz
Processing playlist with ID: 37i9dQZEVXbt5m3VaHBeIu
Processing playlist with ID: 37i9dQZF1E8Rlw8xLHOZhl
Processing playlist with ID: 37i9dQZF1EpoOUxiVPoQtA
Processing playlist with ID: 37i9dQZF1EIUyZdbBpVQ7y
Processing playlist with ID: 37i9dQZF1E8L9WcXJhUTKo
Processing p

In [9]:
playlist_tracks_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1802 entries, 0 to 1801
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_name        1802 non-null   object 
 1   track_id          1802 non-null   object 
 2   artist_name       1802 non-null   object 
 3   album_name        1802 non-null   object 
 4   release_date      1802 non-null   object 
 5   artist_genre      1802 non-null   object 
 6   popularity        1802 non-null   int64  
 7   Duration (ms)     1802 non-null   int64  
 8   Acousticness      1802 non-null   float64
 9   Danceability      1802 non-null   float64
 10  Energy            1802 non-null   float64
 11  Instrumentalness  1802 non-null   float64
 12  Key               1802 non-null   int64  
 13  Liveness          1802 non-null   float64
 14  Loudness          1802 non-null   float64
 15  Mode              1802 non-null   int64  
 16  Speechiness       1802 non-null   float64


In [10]:
# Assuming you have a DataFrame named 'df'
playlist_tracks_df.to_csv('tracks.csv', index=False)