<a href="https://colab.research.google.com/github/columose/Spotify-API/blob/main/functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def playlist_to_df(playlist_url, spotipy_client_id, spotipy_client_secret):

  """
  Converts a Spotify playlist url to a pandas dataframe with track details,
  track audio features, and top genre associated with the artist of the track.
  The function will extract data from a maximum of 100 tracks in the playlist as
  this quantity seems to be preferred by Spotify API. The function will work for
  personal and public playlists.

  Parameters:
  playlist_url(string): URL of the playlist ('https://open.spotify.com/playlist/...')
  spotipy_client_id(string) = Spotify client ID associated with your personal Spotify Web App
  spotipy_client_secret(string) = Spotify client secret associated with your personal Spotify Web App.

  If you don't have a Spotify Web App, get started by following this link: https://developer.spotify.com/documentation/web-api

  Returns:
    pandas.DataFrame: Dataframe with details of a Spotify playlist such as track name, artist name, genre,
                      popularity score, and audio features of the track such as tempo, duration in ms, acousticness etc.
  """

  import pandas as pd
  import numpy as np
  import spotipy
  from spotipy.oauth2 import SpotifyClientCredentials

  # Input user credentials
  spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
  client_id = spotipy_client_id,client_secret = spotipy_client_secret))

  ## Step 1 is operating on the track details ##
  playlist_results = spotify.playlist_items(playlist_url,limit = 100, additional_types = ('track',))

  playlist_details = []
  track_uris = []
  artist_uris = []

  for itrck in range(len(playlist_results['items'])):

        # Extract info about the artist name, genre, song name and popularity from the results
        song_details = playlist_results['items'][itrck]['track']

        artist_name = song_details['artists'][0]['name']

        temp = {
                'artist_name':artist_name,
                'song_name':song_details['name'],
                'popularity':song_details['popularity']
        }

        playlist_details.append(temp)

        # extract track and artist uris for further analyses
        artist_uris.append(song_details['artists'][0]['uri'])
        track_uris.append(song_details['uri'])

  #Convert track details to pandas dataframe
  df_playlist_track_details = pd.DataFrame(playlist_details)

  ## Step 2 Operate on the artist_uris to obtain their genres ##

  # Estimate number of batches as only 50 artists can be requested at a time
  num_batch = len(artist_uris)/50

  # Handling occurences where the number of batches is not an integer
  if type(num_batch) is not int and num_batch < 1:
    num_batch = np.ceil(num_batch)
    start_end_idx = np.array([0, len(artist_uris)])
  elif type(num_batch) is not int and num_batch > 1:
    start_end_idx = np.array([0, 50])
  else:
    start_end_idx = np.array([0, 50])

  # Request artist details from uris in batches
  artist_details = []

  for ibatch in range(int(num_batch)):
    artist_request = spotify.artists(artists = artist_uris[start_end_idx[0]:start_end_idx[1]])
    artist_details.extend(artist_request['artists'])
    start_end_idx += 50 # to loop through the next 50 indices

  # Obtain genre from artist details
  genres = [' '.join(artist['genres'][:1]) for artist in artist_details]
  df_genres = pd.DataFrame({'genres':genres})

  ## Step 3: Operate on the track uris ##

  # Extract audio features from track uris
  track_features = spotify.audio_features(tracks = track_uris)
  features_to_remove = ['type','id','uri','track_href','analysis_url']

  new_features = []
  for itrck in range(len(track_features)):
    for feat in features_to_remove:
      track_features[itrck].pop(feat, None)
    new_features.append(track_features[itrck])

  df_features = pd.DataFrame(new_features)

    ## Step 4: Concatenate dataframes

  df = pd.concat([df_playlist_track_details, df_genres, df_features], axis = 1, join = 'inner')

  return df