In [None]:
import requests
import pandas as pd
import time

In [None]:
# Add your spotify API credentials
client_id = "add id here"
client_secret = "add secret here"

# GETTING ACCESS TOKEN
url = "https://accounts.spotify.com/api/token"
headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}
data = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret
}

response = requests.post(url, headers=headers, data=data)
print(response.json())

# ESTABLISHING ACCESS TOKEN VARIABLE TO USE LATER
access_token = response.json().get('access_token')
print("Access Token:", access_token)

In [None]:
# ESTABLISHING DATA REQUEST FOR SEARCH USING QUERY

playlist_list_url = "https://api.spotify.com/v1/search?q=2000s&type=playlist"

playlist_list_headers = {
    "Authorization": f"Bearer {access_token}"
}

playlist_list_response = requests.get(playlist_list_url, headers= playlist_list_headers)

playlist_list_data = playlist_list_response.json()
print(playlist_list_data)

# GATHERING PLAYLIST INFORMATION
all_playlist_list_info = []

for item in playlist_list_data['playlists']['items']:
    if item is not None:  # Check if item is not None
        playlist_name = item.get('name', 'N/A')
        playlist_description = item.get('description', 'N/A')
        playlist_tracks_href = item.get('tracks', {}).get('href', 'N/A')
        playlist_tracks = item.get('tracks', {}).get('total', 0)
        playlist_id = item.get('id', 'N/A')
        playlist_uri = item.get('uri', 'N/A')

        all_playlist_list_info.append({
            'playlist_name': playlist_name,
            'playlist_description': playlist_description,
            'playlist_tracks_href': playlist_tracks_href,
            'playlist_tracks': playlist_tracks,
            'playlist_id': playlist_id,
            'playlist_uri': playlist_uri
        })

# for info in all_playlist_list_info:
#    print(info)

# TURNING IT INTO A DATAFRAME
all_playlist_list_info_df = pd.DataFrame(all_playlist_list_info)
all_playlist_list_info_df.head(2)

In [None]:
# Prepare a list to store track info
all_tracks_info = []
request_count = 5

# Loop through the playlist track links
for link in all_playlist_list_info_df['playlist_tracks_href'].iloc[:request_count]:
    playlist_url = f"{link.split('/tracks')[0]}"  # Strip query params

    playlist_headers = {
        "Authorization": f"Bearer {access_token}"
    }

    playlist_response = requests.get(playlist_url, headers=playlist_headers)

    if playlist_response.status_code != 200:
        print(f"Failed to fetch: {playlist_url}")
        continue

    playlist_data = playlist_response.json()
    print(f"\nFetched playlist: {playlist_url}")

    # Now extract tracks from THIS playlist
    for item in playlist_data.get('tracks', {}).get('items', []):
        track = item.get('track')
        if not track:
            continue

        track_name = track.get('name', 'N/A')
        track_id = track.get('id', 'N/A')
        track_href = track.get('href', 'N/A')
        track_uri = track.get('uri', 'N/A')

        for artist in track.get('artists', []):
            artist_name = artist.get('name', 'N/A')
            artist_id = artist.get('id', 'N/A')
            artist_href = artist.get('href', 'N/A')
            artist_spotify_url = artist.get('external_urls', {}).get('spotify', 'N/A')

            all_tracks_info.append({
                'track_name': track_name,
                'track_id': track_id,
                'track_href': track_href,
                'track_uri': track_uri,
                'artist_name': artist_name,
                'artist_id': artist_id,
                'artist_href': artist_href,
                'artist_spotify_url': artist_spotify_url
            })

    time.sleep(0.1)

# Print results
# print(f"\nExtracted {len(all_tracks_info)} track entries:")
# for info in all_tracks_info:
    print(info)

# TURNING IT INTO A DATAFRAME
all_tracks_info_df = pd.DataFrame(all_tracks_info)
all_tracks_info_df.head(5)


In [None]:
# Prepare a list to store artist info
artist_info = []

# Drop duplicates and NaN
unique_artist_ids = all_tracks_info_df['artist_id'].dropna().unique()

for link in unique_artist_ids:
    artist_url = f"https://api.spotify.com/v1/artists/{link}"

    artist_headers = {
        "Authorization": f"Bearer {access_token}"
    }

    artist_response = requests.get(artist_url, headers=artist_headers)

    if artist_response.status_code != 200:
        print(f"Failed to fetch artist {link}, skipping...")
        continue

    artist_data = artist_response.json()

    genres = artist_data.get('genres', [])
    genre_string = ", ".join(genres)

    artist_info.append({
        'id': artist_data.get('id'),
        'name': artist_data.get('name'),
        'genres': genre_string,  # now a single string
        'followers': artist_data.get('followers', {}).get('total', 0),
        'popularity': artist_data.get('popularity'),
        'external_urls': artist_data.get('external_urls', {}).get('spotify', ''),
        'href': artist_data.get('href'),
        'uri': artist_data.get('uri')
    })

    # print(f"Fetched: {artist_data['name']} ({artist_data['id']}) ({genres})")

    time.sleep(0.1)  # Respect the API rate limit
