# Lab | Extending the internal databases with audio features

At this point, you have the **hot_songs** and the **not_hot_songs** databases. However, you don't have any acoustic information about the songs. 
The purpose of this lab is to use Spotify's API to extend both databases with this information to use it later.

## Instructions

* Create a function to search a given **single** song in the Spotify API: **search_song(title, artist, limit)**. 

First importing the necessary libraries.

In [1]:
import pandas as pd
import numpy as np

import sys

# getting the spotify credentials out of this file
from config import *
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
# initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

In [3]:
# importing the previous created data frames with top 100 hot songs and top 2000 2023 "not hot" songs:

hot_songs = pd.read_csv('../../../Day_1/Morning/lab-web-scraping-single-page/billboard_top100.csv')
not_hot_songs = pd.read_csv('../../../Day_1/Afternoon/lab-not-hot-songs/not_hot_songs.csv')

In [4]:
hot_songs.drop(columns='Unnamed: 0', inplace=True)
not_hot_songs.drop(columns='Unnamed: 0', inplace=True)

In [5]:
hot_songs.head(3)

Unnamed: 0,artist,title
0,Brenda Lee,Rockin' Around The Christmas Tree
1,Mariah Carey,All I Want For Christmas Is You
2,Bobby Helms,Jingle Bell Rock


In [6]:
hot_songs['title'][0]

"Rockin' Around The Christmas Tree"

In [29]:
hot_songs['title'][0]+" "+hot_songs['artist'][0]

"Rockin' Around The Christmas Tree Brenda Lee"

In [8]:
results = sp.search(q=hot_songs['title'][0]+" "+hot_songs['artist'][0],limit=1)
results

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Rockin%27+Around+The+Christmas+Tree+Brenda+Lee&type=track&offset=0&limit=1',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4cPHsZM98sKzmV26wlwD2W'},
       'href': 'https://api.spotify.com/v1/artists/4cPHsZM98sKzmV26wlwD2W',
       'id': '4cPHsZM98sKzmV26wlwD2W',
       'name': 'Brenda Lee',
       'type': 'artist',
       'uri': 'spotify:artist:4cPHsZM98sKzmV26wlwD2W'}],
     'available_markets': ['AR',
      'AU',
      'AT',
      'BE',
      'BO',
      'BR',
      'BG',
      'CA',
      'CL',
      'CO',
      'CR',
      'CY',
      'CZ',
      'DK',
      'DO',
      'DE',
      'EC',
      'EE',
      'SV',
      'FI',
      'FR',
      'GR',
      'GT',
      'HN',
      'HK',
      'HU',
      'IS',
      'IE',
      'IT',
      'LV',
      'LT',
      'LU',
      'MY',
      'MT',
      'MX',
      'NL',
      'NZ',
      'NI',
      '

In [9]:
json_results = json.dumps(results, ensure_ascii=True)
json_results

'{"tracks": {"href": "https://api.spotify.com/v1/search?query=Rockin%27+Around+The+Christmas+Tree+Brenda+Lee&type=track&offset=0&limit=1", "items": [{"album": {"album_type": "album", "artists": [{"external_urls": {"spotify": "https://open.spotify.com/artist/4cPHsZM98sKzmV26wlwD2W"}, "href": "https://api.spotify.com/v1/artists/4cPHsZM98sKzmV26wlwD2W", "id": "4cPHsZM98sKzmV26wlwD2W", "name": "Brenda Lee", "type": "artist", "uri": "spotify:artist:4cPHsZM98sKzmV26wlwD2W"}], "available_markets": ["AR", "AU", "AT", "BE", "BO", "BR", "BG", "CA", "CL", "CO", "CR", "CY", "CZ", "DK", "DO", "DE", "EC", "EE", "SV", "FI", "FR", "GR", "GT", "HN", "HK", "HU", "IS", "IE", "IT", "LV", "LT", "LU", "MY", "MT", "MX", "NL", "NZ", "NI", "NO", "PA", "PY", "PE", "PH", "PL", "PT", "SG", "SK", "ES", "SE", "CH", "TW", "TR", "UY", "US", "GB", "AD", "LI", "MC", "ID", "JP", "TH", "VN", "RO", "IL", "ZA", "SA", "AE", "BH", "QA", "OM", "KW", "EG", "MA", "DZ", "TN", "LB", "JO", "PS", "IN", "BY", "KZ", "MD", "UA", "AL",

In [10]:
results['tracks']['items'][0]['id']

'2EjXfH91m7f8HiJN1yQg97'

In [11]:
def get_song_ids(df: pd.DataFrame):
    """
    Using spotipy.search to get IDs of songs stored in the df.
    
    The input df should contain a column with song 'title' and song 'artist'.
    
    Returns a list with the song IDs.
    """
    import time
    
    list_of_ids = []
    
    # define a chunk size
    chunk_size = 50
    
    for start in range(0, len(df), chunk_size):
        chunk = df[start:start+chunk_size]
        
        for index, row in chunk.iterrows():
            try:
                search_song = sp.search(q=row['title']+" "+row['artist'],limit=1)
                song_id = search_song['tracks']['items'][0]['id']
                list_of_ids.append(song_id)
            
            except:
                print("Song not found!")
                list_of_ids.append("")
                
        print(f"Processed {start+chunk_size} songs. Now sleeping a bit.")
        time.sleep(10)
        
    return list_of_ids

In [12]:
hot_song_id_list = get_song_ids(hot_songs)

Processed 50 songs. Now sleeping a bit.
Processed 100 songs. Now sleeping a bit.


In [13]:
hot_song_id_list

['2EjXfH91m7f8HiJN1yQg97',
 '7iKRL1F3m4t4dkMponnD0P',
 '7vQbuQcyTflfCIOu3Uzzya',
 '2FRnf9qhLbvw8fu4IBXx78',
 '77khP2fIVhSW23NwxrRluh',
 '5hslUAKq9I9CG2bAulFkHN',
 '2uFaJJtFpPDc5Pa95XzTvg',
 '0oPdaY4dXtc3ZsaG17V972',
 '4xhsWYTOGcal8zt0J161CU',
 '5ASM6Qjiav2xPe7gRkQMsQ',
 '0lizgQ7Qw35od7CYaoMBZb',
 '4PS1e8f2LvuTFgUs1Cn3ON',
 '3YZE5qDV7u1ZD1gZc47ZeR',
 '5mM1jHHXhKc0ZYi0R8EOLn',
 '46pF1zFimM582ss1PrMy68',
 '4HEOgBHRCExyYVeTyrXsnL',
 '1TH5fhztFZmUGWaCXmZ6ie',
 '3QiAAp20rPC3dcAtKtMaqQ',
 '1BxfuPKGuaTgP7aM0Bbdwr',
 '1foCxQtxBweJtZmdxhEHVO',
 '25leEEaz1gIpp7o21Fqyjo',
 '2pnPe4pJtq7689i5ydzvJJ',
 '5a1iz510sv2W9Dt1MvFd5R',
 '3rUGC1vUpkDG9CZFHMur1t',
 '75dfH68JDisE8dDaD4KlVY',
 '1SV1fxF65n9NhRHp3KlBuu',
 '2pXpURmn6zC5ZYDMms6fwa',
 '3QIoEi8Enr9uHffwInGIsC',
 '2IGMVunIBsBLtEQyoI1Mu7',
 '2QpN1ZVw8eJO5f7WcvUA1k',
 '4KULAymBBJcPRpk1yO4dOG',
 '5aIVCx5tnk0ntmdiinnYvw',
 '4qP2V09IpTct5A1ZSnr1zh',
 '38xhBO2AKrJnjdjVnhJES6',
 '7dJYggqjKo71KI9sLzqCs8',
 '59uQI0PADDKeE6UZDTJEe8',
 '7xapw9Oy21WpfEcib2ErSA',
 

Once the desired song is located, **the function should return the href/id/uri of the song to the code** (not to the user) to get the audio features.

* Create a function **get_audio_features(list_of_song_ids)** to obtain the audio features of a given list of songs (the content of list_of_songs can be the href/id/uri or a list with a single song IDs). 

In [14]:
#testing the function audio_features
sp.audio_features(['0cVyQfDyRnMJ0V3rjjdlU3'])

[{'danceability': 0.85,
  'energy': 0.699,
  'key': 0,
  'loudness': -3.292,
  'mode': 1,
  'speechiness': 0.0776,
  'acousticness': 0.152,
  'instrumentalness': 0,
  'liveness': 0.32,
  'valence': 0.915,
  'tempo': 114.481,
  'type': 'audio_features',
  'id': '0cVyQfDyRnMJ0V3rjjdlU3',
  'uri': 'spotify:track:0cVyQfDyRnMJ0V3rjjdlU3',
  'track_href': 'https://api.spotify.com/v1/tracks/0cVyQfDyRnMJ0V3rjjdlU3',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0cVyQfDyRnMJ0V3rjjdlU3',
  'duration_ms': 114234,
  'time_signature': 4}]

In [15]:
def get_audio_features(list_of_song_ids: list):
    """
    Using the song IDs stored in a list to get the audio features out of the Spotify Database.
    Performs a bulk request of 50 IDs at once to retrieve the audio features.
    Returns a dataframe with the corresponding audio features.
    """
    
    import time 
    
    #feature_list = []
    feature_df = pd.DataFrame()

    # define a chunk size
    chunk_size = 50
    
    for start in range(0, len(list_of_song_ids), chunk_size):
        #for start in range(0,100,50) -> chunk_size is the increment. second loop will start from chunk_size
        try:
            features = sp.audio_features(tracks=list_of_song_ids[start:start+chunk_size])
            #features = sp.audio_features(tracks='id1, id2, ..., id49')
        
            for f in features:
                df_temp = pd.DataFrame([f])
                feature_df = pd.concat([feature_df, df_temp], ignore_index=True)
        
        except:
            print(f"Error processing tracks in {start:start+chunk_size}")

    print(f"Processed {start+chunk_size} songs. Now sleeping a bit.")
    time.sleep(10)
    
    return feature_df

In [None]:
#def get_audio_features(list_of_song_ids: list):
#    """
#    Using the song IDs to get the audio features out of the Spotify Database.
   """
    
    import time 
    
    feature_list = []
    
    # First, we are creating chunks:
    chunk_size = 50
    
    for start in range(0, len(list_of_song_ids), chunk_size):
        chunk = list_of_song_ids[start:start+chunk_size]
        
        for i in chunk:
            try:
                my_dict = sp.audio_features([i])[0]
                #my_dict_new = {key : [my_dict[key]] for key in my_dict.keys()}
                feature_list.append(my_dict)
                
            except:
                print("Error retrieving features for song:", i)
    
        print("Sleeping a bit before getting the next ids")
        time.sleep(30)
        
    feature_df = pd.DataFrame(feature_list)
    
    return feature_df

In [16]:
hot_song_feature_df = get_audio_features(hot_song_id_list)

Processed 100 songs. Now sleeping a bit.


In [17]:
hot_song_feature_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.589,0.472,8,-8.749,1,0.0502,0.61400,0.000000,0.5050,0.898,67.196,audio_features,2EjXfH91m7f8HiJN1yQg97,spotify:track:2EjXfH91m7f8HiJN1yQg97,https://api.spotify.com/v1/tracks/2EjXfH91m7f8...,https://api.spotify.com/v1/audio-analysis/2EjX...,126267,4
1,0.548,0.871,7,-4.253,1,0.0708,0.00113,0.000000,0.7650,0.640,128.136,audio_features,7iKRL1F3m4t4dkMponnD0P,spotify:track:7iKRL1F3m4t4dkMponnD0P,https://api.spotify.com/v1/tracks/7iKRL1F3m4t4...,https://api.spotify.com/v1/audio-analysis/7iKR...,215653,4
2,0.754,0.424,2,-8.463,1,0.0363,0.64300,0.000000,0.0652,0.806,119.705,audio_features,7vQbuQcyTflfCIOu3Uzzya,spotify:track:7vQbuQcyTflfCIOu3Uzzya,https://api.spotify.com/v1/tracks/7vQbuQcyTflf...,https://api.spotify.com/v1/audio-analysis/7vQb...,130973,4
3,0.735,0.478,2,-12.472,1,0.0293,0.18900,0.000002,0.3550,0.947,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
4,0.683,0.375,0,-13.056,1,0.0303,0.57900,0.000000,0.0760,0.888,140.467,audio_features,77khP2fIVhSW23NwxrRluh,spotify:track:77khP2fIVhSW23NwxrRluh,https://api.spotify.com/v1/tracks/77khP2fIVhSW...,https://api.spotify.com/v1/audio-analysis/77kh...,135533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.616,0.834,9,-3.069,1,0.0664,0.06000,0.000000,0.3380,0.746,151.701,audio_features,0O3U5iwTbiXCREMkvotJuN,spotify:track:0O3U5iwTbiXCREMkvotJuN,https://api.spotify.com/v1/tracks/0O3U5iwTbiXC...,https://api.spotify.com/v1/audio-analysis/0O3U...,165848,4
96,0.711,0.809,2,-4.389,0,0.0955,0.04470,0.000000,0.3390,0.816,106.017,audio_features,2KslE17cAJNHTsI2MI0jb2,spotify:track:2KslE17cAJNHTsI2MI0jb2,https://api.spotify.com/v1/tracks/2KslE17cAJNH...,https://api.spotify.com/v1/audio-analysis/2Ksl...,206020,4
97,0.498,0.764,4,-5.006,1,0.0309,0.12300,0.000031,0.1190,0.489,147.984,audio_features,73zawW1ttszLRgT9By826D,spotify:track:73zawW1ttszLRgT9By826D,https://api.spotify.com/v1/tracks/73zawW1ttszL...,https://api.spotify.com/v1/audio-analysis/73za...,191231,4
98,0.844,0.637,10,-6.072,0,0.0325,0.22000,0.000000,0.0459,0.960,125.012,audio_features,2Sy3dDqPwjnTO3PnommJPe,spotify:track:2Sy3dDqPwjnTO3PnommJPe,https://api.spotify.com/v1/tracks/2Sy3dDqPwjnT...,https://api.spotify.com/v1/audio-analysis/2Sy3...,189426,3


* Once the previous function has been created, create another function **add_audio_features(df, audio_features_df)** to concat a given dataframe with the audio features dataframe and return the extended data frame.

In [18]:
def add_audio_features(df, audio_features_df):
    """
    Concats a given dataframe with the audio features dataframe and return the extended data frame. 
    """
    
    final_df = pd.concat([df, audio_features_df], axis=1)
    
    return final_df

In [19]:
final_hot_songs = add_audio_features(hot_songs, hot_song_feature_df)

In [20]:
final_hot_songs.head()

Unnamed: 0,artist,title,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Brenda Lee,Rockin' Around The Christmas Tree,0.589,0.472,8,-8.749,1,0.0502,0.614,0.0,0.505,0.898,67.196,audio_features,2EjXfH91m7f8HiJN1yQg97,spotify:track:2EjXfH91m7f8HiJN1yQg97,https://api.spotify.com/v1/tracks/2EjXfH91m7f8...,https://api.spotify.com/v1/audio-analysis/2EjX...,126267,4
1,Mariah Carey,All I Want For Christmas Is You,0.548,0.871,7,-4.253,1,0.0708,0.00113,0.0,0.765,0.64,128.136,audio_features,7iKRL1F3m4t4dkMponnD0P,spotify:track:7iKRL1F3m4t4dkMponnD0P,https://api.spotify.com/v1/tracks/7iKRL1F3m4t4...,https://api.spotify.com/v1/audio-analysis/7iKR...,215653,4
2,Bobby Helms,Jingle Bell Rock,0.754,0.424,2,-8.463,1,0.0363,0.643,0.0,0.0652,0.806,119.705,audio_features,7vQbuQcyTflfCIOu3Uzzya,spotify:track:7vQbuQcyTflfCIOu3Uzzya,https://api.spotify.com/v1/tracks/7vQbuQcyTflf...,https://api.spotify.com/v1/audio-analysis/7vQb...,130973,4
3,Wham!,Last Christmas,0.735,0.478,2,-12.472,1,0.0293,0.189,2e-06,0.355,0.947,107.682,audio_features,2FRnf9qhLbvw8fu4IBXx78,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
4,Burl Ives,A Holly Jolly Christmas,0.683,0.375,0,-13.056,1,0.0303,0.579,0.0,0.076,0.888,140.467,audio_features,77khP2fIVhSW23NwxrRluh,spotify:track:77khP2fIVhSW23NwxrRluh,https://api.spotify.com/v1/tracks/77khP2fIVhSW...,https://api.spotify.com/v1/audio-analysis/77kh...,135533,4


In [21]:
not_hot_song_id_list = get_song_ids(not_hot_songs)

Processed 50 songs. Now sleeping a bit.
Processed 100 songs. Now sleeping a bit.
Processed 150 songs. Now sleeping a bit.
Processed 200 songs. Now sleeping a bit.
Processed 250 songs. Now sleeping a bit.
Processed 300 songs. Now sleeping a bit.
Processed 350 songs. Now sleeping a bit.
Processed 400 songs. Now sleeping a bit.
Processed 450 songs. Now sleeping a bit.
Processed 500 songs. Now sleeping a bit.
Processed 550 songs. Now sleeping a bit.
Processed 600 songs. Now sleeping a bit.
Processed 650 songs. Now sleeping a bit.
Processed 700 songs. Now sleeping a bit.
Processed 750 songs. Now sleeping a bit.
Processed 800 songs. Now sleeping a bit.
Processed 850 songs. Now sleeping a bit.
Processed 900 songs. Now sleeping a bit.
Processed 950 songs. Now sleeping a bit.
Processed 1000 songs. Now sleeping a bit.
Processed 1050 songs. Now sleeping a bit.
Processed 1100 songs. Now sleeping a bit.
Processed 1150 songs. Now sleeping a bit.
Processed 1200 songs. Now sleeping a bit.
Processed 12

In [22]:
len(not_hot_song_id_list)

1995

In [24]:
not_hot_song_feature_df = get_audio_features(not_hot_song_id_list)

Processed 2000 songs. Now sleeping a bit.


In [25]:
not_hot_song_feature_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.397,0.386,0,-10.405,0,0.0503,0.27100,0.000000,0.1880,0.210,144.242,audio_features,6l8GvAyoUZwWDgF1e4822w,spotify:track:6l8GvAyoUZwWDgF1e4822w,https://api.spotify.com/v1/tracks/6l8GvAyoUZwW...,https://api.spotify.com/v1/audio-analysis/6l8G...,355400,4
1,0.401,0.383,9,-10.048,1,0.0279,0.51000,0.007800,0.1210,0.285,96.957,audio_features,5B5YKjgne3TZzNpMsN9aj1,spotify:track:5B5YKjgne3TZzNpMsN9aj1,https://api.spotify.com/v1/tracks/5B5YKjgne3TZ...,https://api.spotify.com/v1/audio-analysis/5B5Y...,269986,4
2,0.579,0.508,2,-9.484,1,0.0270,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,spotify:track:40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,0.331,0.550,0,-6.483,1,0.0272,0.60500,0.000004,0.1920,0.429,177.734,audio_features,70C4NyhjD5OZUMzvWZ3njJ,spotify:track:70C4NyhjD5OZUMzvWZ3njJ,https://api.spotify.com/v1/tracks/70C4NyhjD5OZ...,https://api.spotify.com/v1/audio-analysis/70C4...,339000,3
4,0.209,0.417,3,-8.740,1,0.0338,0.16400,0.001960,0.1130,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1990,0.877,0.422,10,-14.933,0,0.0546,0.04000,0.000406,0.1820,0.970,131.103,audio_features,5EAgXGJ8Kw5QAfhQkZXYqT,spotify:track:5EAgXGJ8Kw5QAfhQkZXYqT,https://api.spotify.com/v1/tracks/5EAgXGJ8Kw5Q...,https://api.spotify.com/v1/audio-analysis/5EAg...,307933,4
1991,0.581,0.654,0,-7.428,1,0.0249,0.18800,0.000461,0.2690,0.620,104.996,audio_features,66RRhWLXx7OvJidJvhs7AE,spotify:track:66RRhWLXx7OvJidJvhs7AE,https://api.spotify.com/v1/tracks/66RRhWLXx7Ov...,https://api.spotify.com/v1/audio-analysis/66RR...,173400,4
1992,0.419,0.383,4,-11.782,1,0.0256,0.07240,0.017600,0.0896,0.350,87.568,audio_features,2a1iMaoWQ5MnvLFBDv4qkf,spotify:track:2a1iMaoWQ5MnvLFBDv4qkf,https://api.spotify.com/v1/tracks/2a1iMaoWQ5Mn...,https://api.spotify.com/v1/audio-analysis/2a1i...,257480,4
1993,0.870,0.706,1,-5.769,1,0.0417,0.56000,0.000000,0.1040,0.953,122.980,audio_features,7nuBU1HCcOwG5f1orN4ByW,spotify:track:7nuBU1HCcOwG5f1orN4ByW,https://api.spotify.com/v1/tracks/7nuBU1HCcOwG...,https://api.spotify.com/v1/audio-analysis/7nuB...,225932,4


In [26]:
final_not_hot_songs = add_audio_features(not_hot_songs, not_hot_song_feature_df)

In [27]:
final_hot_songs.to_csv('hot_songs.csv')
final_not_hot_songs.to_csv('not_hot_songs.csv')

In [28]:
function_file = 'my_function.py'  
with open(function_file, 'a') as file:
    file.write(
    '''
    def get_song_ids(df: pd.DataFrame):
    """
    Using spotipy.search to get IDs of songs stored in the df.
    
    The input df should contain a column with song 'title' and song 'artist'.
    
    Returns a list with the song IDs.
    """
    import time
    
    list_of_ids = []
    
    # define a chunk size
    chunk_size = 50
    
    for start in range(0, len(df), chunk_size):
        chunk = df[start:start+chunk_size]
        
        for index, row in chunk.iterrows():
            try:
                search_song = sp.search(q=row['title']+" "+row['artist'],limit=1)
                song_id = search_song['tracks']['items'][0]['id']
                list_of_ids.append(song_id)
            
            except:
                print("Song not found!")
                list_of_ids.append("")
                
        print(f"Processed {start+chunk_size} songs. Now sleeping a bit.")
        time.sleep(10)
        
    return list_of_ids
    
    
    
    def get_audio_features(list_of_song_ids: list):
    """
    Using the song IDs stored in a list to get the audio features out of the Spotify Database.
    Performs a bulk request of 50 IDs at once to retrieve the audio features.
    Returns a dataframe with the corresponding audio features.
    """
    
    import time 
    
    #feature_list = []
    feature_df = pd.DataFrame()

    # define a chunk size
    chunk_size = 50
    
    for start in range(0, len(list_of_song_ids), chunk_size):
        #for start in range(0,100,50) -> chunk_size is the increment. second loop will start from chunk_size
        try:
            features = sp.audio_features(tracks=list_of_song_ids[start:start+chunk_size])
            #features = sp.audio_features(tracks='id1, id2, ..., id49')
        
            for f in features:
                df_temp = pd.DataFrame([f])
                feature_df = pd.concat([feature_df, df_temp], ignore_index=True)
        
        except:
            print(f"Error processing tracks in {start:start+chunk_size}")

    print(f"Processed {start+chunk_size} songs. Now sleeping a bit.")
    time.sleep(10)
    
    return feature_df
    
    
    
    def add_audio_features(df, audio_features_df):
    """
    Concats a given dataframe with the audio features dataframe and return the extended data frame. 
    """
    
    final_df = pd.concat([df, audio_features_df], axis=1)
    
    return final_df
    ''')