In [3]:
import pandas as pd
import numpy as np
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [5]:
#input credentials
from config import *

In [179]:
def search_song(title:str, artist:str, limit=1) -> str:
    """
    Function takes two values: title and artist and then searches for it in Spotify
    It returns the number of results set by `limit`. 

    If the limit is great than 1, the user must select which item to pick.

    The function returns the Spotify id number.

    Input
    title: string of song title
    artist: string of artist name
    limit: the number of results to return

    Output
    string with Spotify song id number
    """
    
    #Initialize SpotiPy with user credentias #
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                           client_secret=CLIENT_SECRET))

    try:
        #clean artist name
        artist = (artist.replace('Featuring', 'feat.')
                        .replace('X', '')
                        .replace('&', '')
                        .replace('x', ''))
        
        #search for song
        results = sp.search(q="artist:" + artist + " track:" + title, limit=limit)
    
        #if len(results['tracks']['items']) > 1:
        if limit > 1:
            select_dict = {}
            for i in range(len(results['tracks']['items'])):
                #create a dataframe with the results
                id = results['tracks']['items'][i]['id']
                select_dict[id] = [results["tracks"]["items"][i]['name'],
                                   results['tracks']['items'][i]['album']['artists'][0]['name'],
                                   results['tracks']['items'][i]['album']['name']]
                
            df_results = (pd.DataFrame.from_dict(select_dict,
                                                 orient='index',
                                                 columns = ['title', 'artist', 'album'])
                          .reset_index(names = 'id',
                                       drop=False)
                          .drop_duplicates(subset = ['title', 'artist', 'album'],
                                           keep='first')
                         )
            
            display(df_results[['title', 'artist', 'album']])
                
            row_number = int(input('Please enter the number of the song you are looking for'))
    
            id_number = df_results.iloc[row_number]['id']
    
            return id_number
    
        else:
            return results['tracks']['items'][0]['id']
        
    except IndexError:
        return 'Not found'

In [180]:
search_song('Bad Blood', 'Taylor Swift')

'273dCMFseLcVsoSWx59IoE'

In [181]:
search_song('All My Life', 'Lil Durk Featuring J. Cole')

'6HgWWaMu31KdOpEG5l28BG'

In [124]:
def get_audio_features(list_of_ids)->pd.DataFrame:
    """
    This function takes a list of id values and gets the spotify audio features for this song.
    The function returns a pandas DataFrame with the id number and the 

    Input
    list_of_ids: a list with id values as strings

    Output
    pandas DataFrame with the title of the song, name of the artist and all features
    """
    
    #Initialize SpotiPy with user credentias #
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                           client_secret=CLIENT_SECRET))
    full_dict={}
    for id in list_of_ids:
        
        my_dict = sp.audio_features(id)[0]

        #print(my_dict)

        my_dict_new = { key : [my_dict[key]] for key in list(my_dict.keys()) }
        #print(my_dict_new)
        
        full_dict[id] = my_dict_new
        
        #add time pause here
    
    #print(full_dict)
        #my_dict_new['titles'] = [results["tracks"]["items"][0]['name']]
        #my_dict_new['artists']= [results['tracks']['items'][0]['album']['artists'][0]['name']]
        
        
    features_df = pd.DataFrame.from_dict(full_dict,orient='index').reset_index(drop = False)
    features_df = features_df.rename(columns={'index':'id'})

    return features_df
    

In [126]:
get_audio_features(test_list)

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id.1,uri,track_href,analysis_url,duration_ms,time_signature
0,3k79jB4aGmMDUQzEwa46Rz,[0.511],[0.532],[5],[-5.745],[1],[0.056],[0.169],[0],[0.311],[0.322],[137.827],[audio_features],[3k79jB4aGmMDUQzEwa46Rz],[spotify:track:3k79jB4aGmMDUQzEwa46Rz],[https://api.spotify.com/v1/tracks/3k79jB4aGmM...,[https://api.spotify.com/v1/audio-analysis/3k7...,[219724],[4]
1,273dCMFseLcVsoSWx59IoE,[0.65],[0.802],[7],[-6.114],[1],[0.181],[0.0871],[6.54e-06],[0.148],[0.295],[170.146],[audio_features],[273dCMFseLcVsoSWx59IoE],[spotify:track:273dCMFseLcVsoSWx59IoE],[https://api.spotify.com/v1/tracks/273dCMFseLc...,[https://api.spotify.com/v1/audio-analysis/273...,[211933],[4]


In [165]:
#read in the file with hot songs
hot = pd.read_csv('data/popular.csv')

In [166]:
hot.shape

(100, 2)

In [167]:
chunk_num = 2
chunks = np.split(hot, chunk_num)

In [168]:
chunks[0].head()

Unnamed: 0,titles,artists
0,Vampire,Olivia Rodrigo
1,Last Night,Morgan Wallen
2,Fast Car,Luke Combs
3,Calm Down,Rema & Selena Gomez
4,Flowers,Miley Cyrus


In [169]:
import time

list_of_ids = []

for chunk in chunks:
    #make a tuple of titles and artists
    artist_name = list(chunk.artists)
    song_title = list(chunk.titles)
    tup = list(zip(artist_name, song_title))
    
    for i, t in enumerate(tup):
        list_of_ids.append(search_song(tup[i][1],tup[i][0]))
        time.sleep(10)

['3k79jB4aGmMDUQzEwa46Rz']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT', '1s7oOCT8vauUh01PbJD6ps']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT', '1s7oOCT8vauUh01PbJD6ps', '0yLdNVWF3Srea0uzk55zFn']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT', '1s7oOCT8vauUh01PbJD6ps', '0yLdNVWF3Srea0uzk55zFn', 'Not found']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT', '1s7oOCT8vauUh01PbJD6ps', '0yLdNVWF3Srea0uzk55zFn', 'Not found', '1BxfuPKGuaTgP7aM0Bbdwr']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT', '1s7oOCT8vauUh01PbJD6ps', '0yLdNVWF3Srea0uzk55zFn', 'Not found', '1BxfuPKGuaTgP7aM0Bbdwr', '4rXLjWdF2ZZpXCVTfWcshS']
['3k79jB4aGmMDUQzEwa46Rz', '59uQI0PADDKeE6UZDTJEe8', '1Lo0QY9cvc8sUB2vnIOxDT', '1s7oOCT8vauUh01

In [170]:
hot['id'] = list_of_ids
display(hot)

Unnamed: 0,titles,artists,id
0,Vampire,Olivia Rodrigo,3k79jB4aGmMDUQzEwa46Rz
1,Last Night,Morgan Wallen,59uQI0PADDKeE6UZDTJEe8
2,Fast Car,Luke Combs,1Lo0QY9cvc8sUB2vnIOxDT
3,Calm Down,Rema & Selena Gomez,1s7oOCT8vauUh01PbJD6ps
4,Flowers,Miley Cyrus,0yLdNVWF3Srea0uzk55zFn
...,...,...,...
95,Ain't That Some,Morgan Wallen,2YVlDlk2zRceefo6XlC6DX
96,Lagunas,Peso Pluma & Jasiel Nunez,Not found
97,Dial Drunk,Noah Kahan,0caJ2wkqp4UmXBwdR2JvB5
98,Save Me,Jelly Roll With Lainey Wilson,4Bee3PcNdfeqO1B80dDgFn


In [177]:
# hot_not_found = hot[hot['id']=='Not found']


# hot_not_found['artists'] = (hot_not_found['artists'].str.replace('Featuring', 'feat.')
#                                                     .str.replace('X', '')
#                                                        .str.replace('&', '')
#                                                        .str.replace('x', '')
#                           )
# hot_not_found

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hot_not_found['artists'] = (hot_not_found['artists'].str.replace('Featuring', 'feat.')


Unnamed: 0,titles,artists,id
5,All My Life,Lil Durk feat. J. Cole,Not found
8,Karma,Taylor Swift feat. Ice Spice,Not found
14,Ella Baila Sola,Eslabon Armado Peso Pluma,Not found
19,Endless Fashion,Lil Uzi Vert feat. Nicki Minaj,Not found
21,Die For You,The Weeknd Ariana Grande,Not found
25,La Bebe,Yng Lvcas Peso Pluma,Not found
30,Aye,Lil Uzi Vert feat. Travis Scott,Not found
34,Barbie World,Nicki Minaj Ice Spice With Aqua,Not found
40,Put It On Da Floor Again,Latto feat. Cardi B,Not found
42,Tulum,Peso Pluma Grupo Frontera,Not found


In [178]:
# new_list_ids=[]

# #make a tuple of titles and artists
# artist_name = list(hot_not_found.artists)
# song_title = list(hot_not_found.titles)
# tup = list(zip(artist_name, song_title))
    
# for i, t in enumerate(tup):
#     new_list_ids.append(search_song(tup[i][1],tup[i][0]))
#     print(new_list_ids)
#     time.sleep(5)

['6HgWWaMu31KdOpEG5l28BG']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3dnP0JxCgygwQH9Gm7q7nb']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3dnP0JxCgygwQH9Gm7q7nb', '41bmnQZoDMQdDh5zyomtW7']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3dnP0JxCgygwQH9Gm7q7nb', '41bmnQZoDMQdDh5zyomtW7', '7oDd86yk8itslrA9HRP2ki']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3dnP0JxCgygwQH9Gm7q7nb', '41bmnQZoDMQdDh5zyomtW7', '7oDd86yk8itslrA9HRP2ki', '2UW7JaomAMuX9pZrjVpHAU']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3dnP0JxCgygwQH9Gm7q7nb', '41bmnQZoDMQdDh5zyomtW7', '7oDd86yk8itslrA9HRP2ki', '2UW7JaomAMuX9pZrjVpHAU', '2Hc1LaV6bzFil3dE71eORA']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3dnP0JxCgygwQH9Gm7q7nb', '41bmnQZoDMQdDh5zyomtW7', '7oDd86yk8itslrA9HRP2ki', '2UW7JaomAMuX9pZrjVpHAU', '2Hc1LaV6bzFil3dE71eORA', '741UUVE2kuITl0c6zuqqbO']
['6HgWWaMu31KdOpEG5l28BG', '4i6cwNY6oIUU2XZxPIw82Y', '3d