# ***PROJECT-B3TA***
## **notebook 00 | Spotify API Access** - V2  

---

#### Imports.

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from dotenv import load_dotenv
import os
import time

#### Set up Spotify API credentials.

In [2]:
load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

#### Define `DEFINESP()` function.

In [24]:
def DEFINESP(playlist_id, sp_location, sp_filename):
    # Authenticate with Spotify API
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    
    # Get playlist tracks
    offset = 0
    tracks = []
    while True:
        results = sp.playlist_tracks(playlist_id, offset=offset)
        tracks.extend(results['items'])
        offset += len(results['items'])
        if len(results['items']) == 0:
            break
    
    # Extract track information
    track_data = []
    for item in tracks:
        track = item['track']
        audio_features = sp.audio_features(track['id'])[0]
        
        # Extract genre information
        artists = track.get('artists', [])
        genre = 'unknown'
        if artists:
            artist_info = sp.artist(artists[0]['id'])
            if 'genres' in artist_info and artist_info['genres']:
                genre = artist_info['genres'][0]
        
        # Extract key and mode information
        key = audio_features.get('key', 'unknown')
        mode = audio_features.get('mode', 'unknown')
        
        track_data.append({
            'sound_profile': sp_location,
            'track_id': track['id'],
            'artist_name': artist_info['name'] if artists else 'unknown',
            'track_name': track['name'],
            'genre': genre,
            'key': key,
            'mode': mode,
            'duration_ms': audio_features['duration_ms'],
            'tempo': audio_features['tempo'],
            'loudness': audio_features['loudness'],
            'energy': audio_features['energy'],
            'valence': audio_features['valence'],
            'danceability': audio_features['danceability'],
            'speechiness': audio_features['speechiness'],
            'instrumentalness': audio_features['instrumentalness'],
            'acousticness': audio_features['acousticness'],
            'liveness': audio_features['liveness']
        })
    
    # Create DataFrame
    df = pd.DataFrame(track_data, columns=['sound_profile', 'track_id', 'artist_name', 'track_name', 'genre', 'key',
                                           'mode', 'duration_ms', 'tempo', 'loudness', 'energy', 'valence',
                                           'danceability', 'speechiness', 'instrumentalness', 'acousticness', 'liveness'])
    
    # Write to CSV
    df.to_csv(f'{sp_filename}.csv', index=False)

    # Print statement
    print(f"DEFINESP successful! {len(df)} tracks saved down.")


#### Define `DEFINESPv2()` function - chunked for more efficient requests.

In [25]:
import time

def DEFINESPv2(playlist_id, sp_location, sp_filename):
    # Authenticate with Spotify API
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    
    # Get playlist tracks
    offset = 0
    tracks = []
    while True:
        results = sp.playlist_tracks(playlist_id, offset=offset)
        tracks.extend(results['items'])
        offset += len(results['items'])
        if len(results['items']) == 0:
            break
    
    # Define chunk size
    chunk_size = 50  # Adjust as needed
    
    # Extract track information
    track_data = []
    track_ids = [item['track']['id'] for item in tracks]
    chunks = [track_ids[i:i + chunk_size] for i in range(0, len(track_ids), chunk_size)]
    
    for chunk in chunks:
        try:
            audio_features_chunk = sp.audio_features(chunk)

            for audio_features in audio_features_chunk:
                if audio_features and audio_features[0]:
                    af = audio_features[0]
                    track_id = af['id']
                    
                    # Extract genre information
                    artist_info = sp.artist(track['artists'][0]['id'])
                    genre = 'unknown'
                    if 'genres' in artist_info and artist_info['genres']:
                        genre = artist_info['genres'][0]

                    # Extract key and mode information
                    key = af.get('key', 'unknown')
                    mode = af.get('mode', 'unknown')

                    track_data.append({
                        'sound_profile': sp_location,
                        'track_id': track_id,
                        'artist_name': artist_info['name'] if artist_info else 'unknown',
                        'track_name': af['name'],
                        'genre': genre,
                        'key': key,
                        'mode': mode,
                        'duration_ms': af['duration_ms'],
                        'tempo': af['tempo'],
                        'loudness': af['loudness'],
                        'energy': af['energy'],
                        'valence': af['valence'],
                        'danceability': af['danceability'],
                        'speechiness': af['speechiness'],
                        'instrumentalness': af['instrumentalness'],
                        'acousticness': af['acousticness'],
                        'liveness': af['liveness']
                    })
                else:
                    track_data.append({
                        'sound_profile': sp_location,
                        'track_id': chunk[0],
                        'artist_name': 'unknown',
                        'track_name': 'unknown',
                        'genre': 'unknown',
                        'key': 'unknown',
                        'mode': 'unknown',
                        'duration_ms': 0,
                        'tempo': 0,
                        'loudness': 0,
                        'energy': 0,
                        'valence': 0,
                        'danceability': 0,
                        'speechiness': 0,
                        'instrumentalness': 0,
                        'acousticness': 0,
                        'liveness': 0
                    })
                
            time.sleep(1)

        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 429:
                retry_after = int(e.response.headers.get('Retry-After', 1))
                print(f"Rate limited. Retrying after {retry_after} seconds.")
                time.sleep(retry_after + 1)
                continue
            else:
                raise
        except Exception as e:
            print(f"Failed to get audio features: {e}")

    # Create DataFrame
    df = pd.DataFrame(track_data, columns=['sound_profile', 'track_id', 'artist_name', 'track_name', 'genre', 'key',
                                           'mode', 'duration_ms', 'tempo', 'loudness', 'energy', 'valence',
                                           'danceability', 'speechiness', 'instrumentalness', 'acousticness', 'liveness'])
    
    # Write to CSV
    df.to_csv(f'{sp_filename}.csv', index=False)

    # Print statement
    print(f"DEFINESP successful! {len(df)} tracks saved down.")


### 1. **Request for full 305 song Spotify playlist** - b3_v2_london
'5rOsTEi1hRy70nuxIhcSEd' 
- Spotify API only returns a maximum of 100 tracks per request. Access all tracks in the playlist by making request with an offset parameter to handle pagination fetch the remaining tracks above the 100 max.
- This code will fetch all tracks from the playlist by making multiple requests with different offset values until all tracks have been retrieved. It then constructs the DataFrame and writes it to a CSV file as before.

In [8]:
# Authenticate with Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Playlist ID
playlist_id = '5rOsTEi1hRy70nuxIhcSEd'

# Get playlist tracks
offset = 0
tracks = []
while True:
    results = sp.playlist_tracks(playlist_id, offset=offset)
    tracks.extend(results['items'])
    offset += len(results['items'])
    if len(results['items']) == 0:
        break

# Extract track information
track_data = []
for item in tracks:
    track = item['track']
    audio_features = sp.audio_features(track['id'])[0]
    
    # Extract genre information
    artists = track.get('artists', [])
    genre = 'unknown'
    if artists:
        artist_info = sp.artist(artists[0]['id'])
        if 'genres' in artist_info and artist_info['genres']:
            genre = artist_info['genres'][0]
    
    # Extract key and mode information
    key = audio_features.get('key', 'unknown')
    mode = audio_features.get('mode', 'unknown')
    
    track_data.append({
        'sound_profile': 'london',
        'track_id': track['id'],
        'artist_name': artist_info['name'] if artists else 'unknown',
        'track_name': track['name'],
        'genre': genre,
        'key': key,
        'mode': mode,
        'duration_ms': audio_features['duration_ms'],
        'tempo': audio_features['tempo'],
        'loudness': audio_features['loudness'],
        'energy': audio_features['energy'],
        'valence': audio_features['valence'],
        'danceability': audio_features['danceability'],
        'speechiness': audio_features['speechiness'],
        'instrumentalness': audio_features['instrumentalness'],
        'acousticness': audio_features['acousticness'],
        'liveness': audio_features['liveness']
    })

# Create DataFrame
df = pd.DataFrame(track_data, columns=['sound_profile', 'track_id', 'artist_name', 'track_name', 'genre', 'key',
                                       'mode', 'duration_ms', 'tempo', 'loudness', 'energy', 'valence',
                                       'danceability', 'speechiness', 'instrumentalness', 'acousticness', 'liveness'])

# Write to CSV
df.to_csv('b3-v2-london.csv', index=False)


In [9]:
len(df)

305

### 2. **As above 241 song Spotify playlist** - b3_v2_manchester
- '5ihId2i4NrqnyNtMWajKJT' - full inc. error tracks (241)
- '1yGLZFTcc6bXFgSlQDihk5' - full without error tracks (236)

In [26]:
DEFINESPv2('1yGLZFTcc6bXFgSlQDihk5', 'manchester', 'b3_v2_manchester')

Max Retries reached


NameError: name 'requests' is not defined

### 3. **As above 227 song Spotify playlist** - b3_v2_nyc
'2ranO2Nk3DVJXygB95phc5'

In [12]:
DEFINESP('2ranO2Nk3DVJXygB95phc5', 'nyc', 'b3_v2_nyc')

In [15]:
len(df)

305

### 2. **As above 241 song Spotify playlist** - b3-v2-manchester
'5ihId2i4NrqnyNtMWajKJT'

In [5]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from dotenv import load_dotenv
import os
import time

# Authenticate with Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Playlist ID
playlist_id = '5ihId2i4NrqnyNtMWajKJT'

# Get playlist tracks
offset = 0
tracks = []
while True:
    results = sp.playlist_tracks(playlist_id, offset=offset)
    tracks.extend(results['items'])
    offset += len(results['items'])
    if len(results['items']) == 0:
        break

# Extract track information
track_data = []
for item in tracks:
    track = item['track']
    audio_features = sp.audio_features(track['id'])
    
    # Check if audio features are available
    if audio_features is not None:
        audio_features = audio_features[0]  # Extract the first (and only) element
    
        # Extract genre information
        artists = track.get('artists', [])
        genre = 'unknown'
        if artists:
            artist_info = artists[0]
            if 'genres' in artist_info and artist_info['genres']:
                genre = artist_info['genres'][0]
        
        # Extract key and mode information
        key = audio_features.get('key', 'unknown')
        mode = audio_features.get('mode', 'unknown')
        
        track_data.append({
            'sound_profile': 'manchester',
            'track_id': track['id'],
            'artist_name': artist_info['name'] if artists else 'unknown',
            'track_name': track['name'],
            'genre': genre,
            'key': key,
            'mode': mode,
            'duration_ms': audio_features['duration_ms'],
            'tempo': audio_features['tempo'],
            'loudness': audio_features['loudness'],
            'energy': audio_features['energy'],
            'valence': audio_features['valence'],
            'danceability': audio_features['danceability'],
            'speechiness': audio_features['speechiness'],
            'instrumentalness': audio_features['instrumentalness'],
            'acousticness': audio_features['acousticness'],
            'liveness': audio_features['liveness']
        })

# Create DataFrame
df = pd.DataFrame(track_data, columns=['sound_profile', 'track_id', 'artist_name', 'track_name', 'genre', 'key',
                                       'mode', 'duration_ms', 'tempo', 'loudness', 'energy', 'valence',
                                       'danceability', 'speechiness', 'instrumentalness', 'acousticness', 'liveness'])

# Write to CSV
df.to_csv('b3-v2-manchester.csv', index=False)


TypeError: 'NoneType' object is not iterable

In [None]:
len(df)

### **Summary**
- All track information effectively accessed and written to csv. To be joined to the main dataset in the next notebook.



---

## /// ***APPENDIX*** ///

### 1. **Basic request for max 100 songs from specific Spotify playlist**
- Access the data from spotify API and download into a CSV
- Includes the following information for each track in the playlist in the following order.

['artist_name', 'track_name', 'genre', 'year', 'duration_ms', 'tempo',
       'time_signature', 'key', 'mode', 'loudness', 'popularity', 'liveness',
       'acousticness', 'speechiness', 'instrumentalness', 'energy', 'valence',
       'danceability', 'track_id']



In [None]:
'''
# Set up Spotify API credentials
client_id = 'num'
client_secret = 'num'

# Authenticate with Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Playlist ID
playlist_id = '5o0FvP9PHlZ7WXob6N19QS'

# Get playlist tracks
results = sp.playlist_tracks(playlist_id)

# Extract track information
track_data = []
for item in results['items']:
    track = item['track']
    artist = sp.artist(track['artists'][0]['id'])
    audio_features = sp.audio_features(track['id'])[0]
    track_data.append({
        'artist_name': artist['name'],
        'track_name': track['name'],
        'genre': artist['genres'][0] if artist['genres'] else None,
        'year': track['album']['release_date'][:4],
        'duration_ms': audio_features['duration_ms'],
        'tempo': audio_features['tempo'],
        'time_signature': audio_features['time_signature'],
        'key': audio_features['key'],
        'mode': audio_features['mode'],
        'loudness': audio_features['loudness'],
        'popularity': track['popularity'],
        'liveness': audio_features['liveness'],
        'acousticness': audio_features['acousticness'],
        'speechiness': audio_features['speechiness'],
        'instrumentalness': audio_features['instrumentalness'],
        'energy': audio_features['energy'],
        'valence': audio_features['valence'],
        'danceability': audio_features['danceability'],
        'track_id': track['id']
    })

# Create DataFrame
df = pd.DataFrame(track_data, columns=['artist_name', 'track_name', 'genre', 'year', 'duration_ms', 'tempo',
                                       'time_signature', 'key', 'mode', 'loudness', 'popularity', 'liveness',
                                       'acousticness', 'speechiness', 'instrumentalness', 'energy', 'valence',
                                       'danceability', 'track_id'])

# Write to CSV
df.to_csv('playlist_tracks.csv', index=False)
'''


### 2. Multiple page request for 100+ songs from specific Spotify playlist (keeps dfs seperate)


In [None]:
'''
# Authenticate with Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Playlist ID
playlist_id = '2pMw3LqX6Gcb1plV1CtnIt'

# Get playlist tracks
offset = 0
tracks = []
while True:
    results = sp.playlist_tracks(playlist_id, offset=offset)
    tracks.extend(results['items'])
    offset += len(results['items'])
    if len(results['items']) == 0:
        break

# Extract track information
track_data = []
for item in tracks:
    track = item['track']
    artist = sp.artist(track['artists'][0]['id'])
    audio_features = sp.audio_features(track['id'])[0]
    track_data.append({
        'artist_name': artist['name'],
        'track_name': track['name'],
        'genre': artist['genres'][0] if artist['genres'] else None,
        'year': track['album']['release_date'][:4],
        'duration_ms': audio_features['duration_ms'],
        'tempo': audio_features['tempo'],
        'time_signature': audio_features['time_signature'],
        'key': audio_features['key'],
        'mode': audio_features['mode'],
        'loudness': audio_features['loudness'],
        'popularity': track['popularity'],
        'liveness': audio_features['liveness'],
        'acousticness': audio_features['acousticness'],
        'speechiness': audio_features['speechiness'],
        'instrumentalness': audio_features['instrumentalness'],
        'energy': audio_features['energy'],
        'valence': audio_features['valence'],
        'danceability': audio_features['danceability'],
        'track_id': track['id']
    })

# Create DataFrame
df = pd.DataFrame(track_data, columns=['artist_name', 'track_name', 'genre', 'year', 'duration_ms', 'tempo',
                                       'time_signature', 'key', 'mode', 'loudness', 'popularity', 'liveness',
                                       'acousticness', 'speechiness', 'instrumentalness', 'energy', 'valence',
                                       'danceability', 'track_id'])

# Write to CSV
df.to_csv('playlist_tracks.csv', index=False)
'''

### 3. Multiple page request for 100+ songs from specific Spotify playlist - with rate limiting
- https://developer.spotify.com/documentation/web-api/concepts/rate-limits


In [None]:
'''
import time

# Set a delay between requests
request_delay = 0.5  # Adjust this as needed

# Authenticate with Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Playlist ID
playlist_id = '2pMw3LqX6Gcb1plV1CtnIt'

# Get playlist tracks
offset = 0
tracks = []
while True:
    results = sp.playlist_tracks(playlist_id, offset=offset)
    tracks.extend(results['items'])
    offset += len(results['items'])
    if len(results['items']) == 0:
        break

    # Introduce a delay before making the next request
    time.sleep(request_delay)

# Extract track information
track_data = []
for item in tracks:
    track = item['track']
    artist = sp.artist(track['artists'][0]['id'])
    audio_features = sp.audio_features(track['id'])[0]
    track_data.append({
        'artist_name': artist['name'],
        'track_name': track['name'],
        'genre': artist['genres'][0] if artist['genres'] else None,
        'year': track['album']['release_date'][:4],
        'duration_ms': audio_features['duration_ms'],
        'tempo': audio_features['tempo'],
        'time_signature': audio_features['time_signature'],
        'key': audio_features['key'],
        'mode': audio_features['mode'],
        'loudness': audio_features['loudness'],
        'popularity': track['popularity'],
        'liveness': audio_features['liveness'],
        'acousticness': audio_features['acousticness'],
        'speechiness': audio_features['speechiness'],
        'instrumentalness': audio_features['instrumentalness'],
        'energy': audio_features['energy'],
        'valence': audio_features['valence'],
        'danceability': audio_features['danceability'],
        'track_id': track['id']
    })

    # Introduce a delay before making the next request
    time.sleep(request_delay)

# Create DataFrame
df = pd.DataFrame(track_data, columns=['artist_name', 'track_name', 'genre', 'year', 'duration_ms', 'tempo',
                                       'time_signature', 'key', 'mode', 'loudness', 'popularity', 'liveness',
                                       'acousticness', 'speechiness', 'instrumentalness', 'energy', 'valence',
                                       'danceability', 'track_id'])

# Write to CSV
df.to_csv('playlist_tracks.csv', index=False)
'''

### 4. Multiple page request for 100+ songs from specific Spotify playlist - with rate limiting + further optimisation (batched fetching)
- Optimized version batching requests to fetch audio features for multiple tracks at once, reducing the number of API calls. 
- Additionally, audio features are fetched only once for each track ID and reuse the results as needed.
- Rate limiting is handled by introducing a delay between request. request_delay value can be adjusted as needed based on the Spotify API rate limits and use case.

In [None]:
'''
import time

# Set a delay between requests
request_delay = 0.5  # Adjust this as needed

# Authenticate with Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Playlist ID
playlist_id = '2pMw3LqX6Gcb1plV1CtnIt'

# Get playlist tracks
offset = 0
tracks = []

# Function to fetch audio features for a batch of track IDs                # <- batched up code section
def get_audio_features(track_ids):                                         # <- batched up code section
    audio_features = sp.audio_features(track_ids)                          # <- batched up code section
    return audio_features                                                  # <- batched up code section

while True:                                                                # <- also impacts remainder of code
    results = sp.playlist_tracks(playlist_id, offset=offset)               # <- ...
    playlist_tracks = results['items']
    
    # Extract track IDs for the batch
    track_ids = [track['track']['id'] for track in playlist_tracks]
    
    # Fetch audio features for the batch of track IDs
    audio_features = get_audio_features(track_ids)
    
    for i, item in enumerate(playlist_tracks):
        track = item['track']
        artist = sp.artist(track['artists'][0]['id'])
        audio_feature = audio_features[i]
        track_data.append({
            'artist_name': artist['name'],
            'track_name': track['name'],
            'genre': artist['genres'][0] if artist['genres'] else None,
            'year': track['album']['release_date'][:4],
            'duration_ms': audio_feature['duration_ms'],
            'tempo': audio_feature['tempo'],
            'time_signature': audio_feature['time_signature'],
            'key': audio_feature['key'],
            'mode': audio_feature['mode'],
            'loudness': audio_feature['loudness'],
            'popularity': track['popularity'],
            'liveness': audio_feature['liveness'],
            'acousticness': audio_feature['acousticness'],
            'speechiness': audio_feature['speechiness'],
            'instrumentalness': audio_feature['instrumentalness'],
            'energy': audio_feature['energy'],
            'valence': audio_feature['valence'],
            'danceability': audio_feature['danceability'],
            'track_id': track['id']
        })
        
    offset += len(results['items'])
    if len(results['items']) == 0:
        break

    # Introduce a delay before making the next request
    time.sleep(request_delay)

# Create DataFrame
df = pd.DataFrame(track_data, columns=['artist_name', 'track_name', 'genre', 'year', 'duration_ms', 'tempo',
                                       'time_signature', 'key', 'mode', 'loudness', 'popularity', 'liveness',
                                       'acousticness', 'speechiness', 'instrumentalness', 'energy', 'valence',
                                       'danceability', 'track_id'])

# Write to CSV
df.to_csv('playlist_tracks.csv', index=False)'''