In [1]:
from config import *

In [10]:
import spotipy
import json
import numpy as np
import pandas as pd
from time import sleep
from tqdm.notebook import tqdm
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [4]:
# Create a function to search a given single song in the Spotify API
def search_song(artist_name, track_title):
    search_query = f"artist:{artist_name} track:{track_title}"
    results = sp.search(q=search_query, type='track', limit=10)
    track_names_list = []
    artist_list = []
    album_name_list = []
    if results['tracks']['items']:
        for track in results['tracks']['items']:
            track_name = track['name']
            artists = ", ".join([artist['name'] for artist in track['artists']])
            album_name = track['album']['name']
            track_uri = track['uri']
            track_id = track['id']
            track_href = track['href']
            print(f"Track Name: {track_name}\nArtists: {artists}\nAlbum: {album_name}")
            #append to list
            track_names_list.append(track_name)
            artist_list.append(artists)
            album_name_list.append(album_name)
    else:
        print(f"No results found for '{track_title}' by '{artist_name}'.")
    results_df = pd.DataFrame({"Title": track_names_list, "Artist": artist_list, "Album": album_name_list})
    return results_df

In [8]:
billboard_top_100 = pd.read_csv('./hot100.csv')
billboard_top_100.head()

100


Unnamed: 0,Artist,Title
0,Doja Cat,Paint The Town Red
1,SZA,Snooze
2,Taylor Swift,Cruel Summer
3,Luke Combs,Fast Car
4,Jung Kook & Jack Harlow,3D


In [13]:
# Create function to search for the ID's of a list of songs
def songs_ids(df):
    id_song = []
    
    pbar_1 = tqdm(len(df))
    pbar_2 = tqdm(2)
    for i in range(0, len(df), 2):
        chunk = df.iloc[i:i+2]

        for index, row in chunk.iterrows():
            title = row["Title"]
            artist = row["Artist"]
            query = " track: " + title + "artist: " + artist        

            try:
                results = sp.search(q=query, limit=1)
                song_id = results["tracks"]["items"][0]["id"]
                id_song.append(song_id)
            except:
                song_id = np.nan
                id_song.append(song_id)
                print(f"ID not found for {row['Title']} by {row['Artist']}")
            pbar_2.update(n=1)
        pbar_2.update(0)
        pbar_1.update(n=1)
        sleep(2)

    df['ids'] = id_song

    return df

In [14]:
billboard_top_100 = songs_ids(billboard_top_100)
billboard_top_100

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Unnamed: 0,Artist,Title,ids
0,Doja Cat,Paint The Town Red,56y1jOTK0XSvJzVv9vHQBK
1,SZA,Snooze,4iZ4pt7kvcaH6Yo8UoZ4s2
2,Taylor Swift,Cruel Summer,2EGaDf0cPX789H3LNeB03D
3,Luke Combs,Fast Car,1Lo0QY9cvc8sUB2vnIOxDT
4,Jung Kook & Jack Harlow,3D,2pLZ6tUBapqlngc9lnYz4M
...,...,...,...
95,Rod Wave,Long Journey,59ksHIFa69pi0az2bKdu6s
96,Luke Bryan,But I Got A Beer In My Hand,3jhV8I9sIPXsjPs2ZOnSaf
97,Peso Pluma,Rubicon,0xGZjxYqcNQ8WqxyeZ4eSN
98,Zach Bryan,East Side Of Sorrow,00syWkRGIVQvYsg2OwfBUw


In [15]:
billboard_top_100.isnull().sum()

Artist    0
Title     0
ids       0
dtype: int64

In [17]:
# Create function to obtain the audio features of a given list of songs
def get_audio_features(list_of_songs_ids):
    df = pd.DataFrame()

    for song_id in list_of_songs_ids:
        my_dict = sp.audio_features(song_id)[0]
        my_dict_new = { key:[my_dict[key]] for key in list(my_dict.keys()) }
        df = pd.concat([df, pd.DataFrame(my_dict_new)], axis=0)

    return df

In [21]:
audio_features_df = get_audio_features(list(billboard_top_100['ids']))
audio_features_df = audio_features_df.reset_index(drop=True)
audio_features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.864,0.556,2,-7.683,0,0.194,0.255,4e-06,0.112,0.726,99.974,audio_features,56y1jOTK0XSvJzVv9vHQBK,spotify:track:56y1jOTK0XSvJzVv9vHQBK,https://api.spotify.com/v1/tracks/56y1jOTK0XSv...,https://api.spotify.com/v1/audio-analysis/56y1...,230480,4
1,0.559,0.551,5,-7.231,1,0.132,0.141,0.0,0.11,0.392,143.008,audio_features,4iZ4pt7kvcaH6Yo8UoZ4s2,spotify:track:4iZ4pt7kvcaH6Yo8UoZ4s2,https://api.spotify.com/v1/tracks/4iZ4pt7kvcaH...,https://api.spotify.com/v1/audio-analysis/4iZ4...,201800,4
2,0.666,0.871,1,-6.017,0,0.0272,0.27,0.0208,0.263,0.936,108.501,audio_features,2EGaDf0cPX789H3LNeB03D,spotify:track:2EGaDf0cPX789H3LNeB03D,https://api.spotify.com/v1/tracks/2EGaDf0cPX78...,https://api.spotify.com/v1/audio-analysis/2EGa...,215326,4
3,0.712,0.603,8,-5.52,1,0.0262,0.186,0.0,0.115,0.67,97.994,audio_features,1Lo0QY9cvc8sUB2vnIOxDT,spotify:track:1Lo0QY9cvc8sUB2vnIOxDT,https://api.spotify.com/v1/tracks/1Lo0QY9cvc8s...,https://api.spotify.com/v1/audio-analysis/1Lo0...,265493,4
4,0.865,0.785,1,-3.313,1,0.0483,0.0221,0.0,0.286,0.836,108.031,audio_features,2pLZ6tUBapqlngc9lnYz4M,spotify:track:2pLZ6tUBapqlngc9lnYz4M,https://api.spotify.com/v1/tracks/2pLZ6tUBapql...,https://api.spotify.com/v1/audio-analysis/2pLZ...,162545,4


Concat dataframe with audio features dataframe

In [22]:
def add_audio_features(df, audio_features_df):
    df_features = pd.concat([df, audio_features_df], axis=1)
    return df_features

In [23]:
billboard_top_100_features = add_audio_features(billboard_top_100,audio_features_df)
billboard_top_100_features
display(billboard_top_100_features.columns)

Index(['Artist', 'Title', 'ids', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'uri', 'track_href', 'analysis_url',
       'duration_ms', 'time_signature'],
      dtype='object')

In [24]:
billboard_top_100_features.isnull().sum()

Artist              0
Title               0
ids                 0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
type                0
id                  0
uri                 0
track_href          0
analysis_url        0
duration_ms         0
time_signature      0
dtype: int64

In [25]:
billboard_top_100_features.to_csv('top_100_extended.csv', index=False)