In [1]:
import pandas as pd
import numpy as np
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import time

In [2]:
from config import *

ModuleNotFoundError: No module named 'config'

In [None]:
def search_song(title:str, artist:str, limit=1) -> str:
    """
    Function takes two values: title and artist and then searches for it in Spotify
    It returns the number of results set by `limit`. 

    If the limit is great than 1, the user must select which item to pick.

    The function returns the Spotify id number.

    Input
    title: string of song title
    artist: string of artist name
    limit: the number of results to return

    Output
    string with Spotify song id number
    """
    
    #Initialize SpotiPy with user credentias #
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                           client_secret=CLIENT_SECRET))

    try:
        #clean artist name
        artist = (artist.replace('Featuring', 'feat.')
                        .replace('X', '')
                        .replace('&', '')
                        .replace('x', '')            
                 )
        
        #search for song
        results = sp.search(q="artist:" + artist + " track:" + title, limit=limit)
    
        #if len(results['tracks']['items']) > 1:
        if limit > 1:
            select_dict = {}
            for i in range(len(results['tracks']['items'])):
                #create a dataframe with the results
                id = results['tracks']['items'][i]['id']
                select_dict[id] = [results["tracks"]["items"][i]['name'],
                                   results['tracks']['items'][i]['album']['artists'][0]['name'],
                                   results['tracks']['items'][i]['album']['name']]
                
            df_results = (pd.DataFrame.from_dict(select_dict,
                                                 orient='index',
                                                 columns = ['title', 'artist', 'album'])
                          .reset_index(names = 'id',
                                       drop=False)
                          .drop_duplicates(subset = ['title', 'artist', 'album'],
                                           keep='first')
                         )
            
            display(df_results[['title', 'artist', 'album']])
                
            row_number = int(input('Please enter the number of the song you are looking for'))
    
            id_number = df_results.iloc[row_number]['id']
    
            return id_number
    
        else:
            return results['tracks']['items'][0]['id']
        
    except IndexError:
        return 'Not found'

In [None]:
search_song('Bad Blood', 'Taylor Swift')

In [None]:
search_song('Pound Town 2 (feat. Nicki Minaj & Tay Keith)', 'Sexyy Red')

In [None]:
def get_audio_features(list_of_ids)->pd.DataFrame:
    """
    This function takes a list of id values and gets the spotify audio features for this song.
    The function returns a pandas DataFrame with the id number and the 

    Input
    list_of_ids: a list with id values as strings

    Output
    pandas DataFrame with the title of the song, name of the artist and all features
    """
    
    #Initialize SpotiPy with user credentias #
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                           client_secret=CLIENT_SECRET))
    full_dict={}
    for id in list_of_ids:
        
        my_dict = sp.audio_features(id)[0]

        #print(my_dict)

        #my_dict_new = { key : [my_dict[key]] for key in list(my_dict.keys()) }
        #print(my_dict_new)
        
        full_dict[id] = my_dict
        
        time.sleep(1)
    
    #print(full_dict)
        #my_dict_new['titles'] = [results["tracks"]["items"][0]['name']]
        #my_dict_new['artists']= [results['tracks']['items'][0]['album']['artists'][0]['name']]
        
        
    features_df = pd.DataFrame.from_dict(full_dict,orient='index').reset_index(drop = False)
    features_df = features_df.rename(columns={'index':'id_to_drop'})

    return features_df

In [None]:
def add_audio_features(df, audio_features_df):
    """
    this function merges a data frame containing the song title and artist with the song's features
    it returns the extended dataframe

    Input
    df: dataframe with song title, artist name, id
    audio_features_df: dataframe with id and features

    Output
    pandas dataframe with complete dataset
    """
    df_temp = df.copy()
    audio_features_df_temp = audio_features_df.copy()

    full_df = df_temp.merge(audio_features_df_temp, on = 'id', how = 'inner')

    return full_df


In [None]:
#get features for hot datset

def get_hot_features(file_name, new_file_name):

    #read in the file with hot songs
    hot = pd.read_csv(file_name)

    #get chunks
    chunk_num = 2
    chunks = np.split(hot, chunk_num)


    #get ids
    list_of_ids = []

    for chunk in chunks:
        #make a tuple of titles and artists
        artist_name = list(chunk.artists)
        song_title = list(chunk.titles)
        tup = list(zip(artist_name, song_title))
        
        for i, t in enumerate(tup):
            list_of_ids.append(search_song(tup[i][1],tup[i][0]))
            
        time.sleep(30)

    #create new df and assign ids to id column
    hot_with_ids = hot.copy()
    hot_with_ids['id']=list_of_ids

    #drop ids 'Not found'
    hot_with_ids = hot_with_ids[hot_with_ids['id']!='Not found']

    #get updated set of ids
    updated_ids = list(hot_with_ids['id'])

    #use ids to get features for each id and returns a dataframe
    features = get_audio_features(updated_ids)

    #merge the two files
    complete = add_audio_features(hot_with_ids, features)

    complete.to_csv(new_file_name, index=False)

    return 1

In [None]:
get_hot_features('data/popular.csv', 'data/popular_with_features.csv')