In [1]:
import pandas as pd
import pickle
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
# ! pip install spotipy

In [2]:
songs = pd.read_pickle('songs.pkl')

In [3]:
#only grabs rows where title doesn't contain the word 'Album'
songs = songs.loc[~(songs.Title.str.contains('Album', regex = False))].copy() 

In [4]:
#only grabs rows where title doesn't contain the word 'Sountrack'
songs = songs.loc[~(songs.Artist.str.contains('Soundtrack', regex = False))].copy()

In [7]:
songs.reset_index(inplace = True)

In [8]:
songs.drop(columns = ['index'], inplace = True)

In [9]:
songs['SpotifyID'] = None

In [10]:
songs.head()

Unnamed: 0,Title,Artist,SpotifyID
0,Fine Line,Harry Styles,
1,Christmas,Michael Buble,
2,Please Excuse Me For Being Antisocial,Roddy Ricch,
3,Merry Christmas,Mariah Carey,
4,"When We All Fall Asleep, Where Do We Go?",Billie Eilish,


In [15]:
cid = ""
secret = ""

#instantiating spotify client
client_credentials_manager = SpotifyClientCredentials(client_id = cid, client_secret = secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [38]:
#getSongID takes track's name, track's Artist, and track results
#iterate through the track results to get the name in what spotify returned and
# test those results against what song and artist we have in the data frame
def getSongID(trackName, trackArtist, trackResults):
    for track in trackResults['tracks']['items']:
        if (track['name'].lower() == trackName.lower()) & (track['artists'][0]['name'].lower() == trackArtist.lower()):
            return track['id']
    return None

#getAlbumID takes album's name, album's Artist, and album results
#iterate through the search results to get the name in what spotify returned and
# test those results against what album and artist we have in the data frame
def getAlbumID(albumName, albumArtist, searchResult):
    for album in searchResult['albums']['items']:
        if (album['name'].lower() == albumName.lower()) & (album['artists'][0]['name'].lower() == albumArtist.lower()):
            return album['id']
    return None

#getAlbumSongs takes in album's artist and the album track results (which is a list of )
#it gets the songs in the album and returns it
def getAlbumSongs(albumArtist, albumTracksResults):
    albumSongDF = pd.DataFrame(columns=["Title","Artist", "SpotifyID"])
    for songs in albumTracksResults['items']:
        newRow = pd.Series(data={"Title": songs['name'],"Artist": albumArtist, "SpotifyID": songs['id']})
        albumSongDF = albumSongDF.append(newRow, ignore_index=True)
    return albumSongDF

In [49]:
#create two df
songsToDeleteDF = pd.DataFrame(columns=["Title","Artist", "SpotifyID"])
albumSongToAddDF = pd.DataFrame(columns=["Title","Artist", "SpotifyID"])

#iterate through all song rows and find SpotifyID
for index, row in songs.iterrows(): 
    #get songID from Spotify Search API
    trackResults = sp.search(q=row['Title'], type='track', market= 'US', limit=10,offset=0)
    songID = getSongID(row['Title'], row['Artist'], trackResults)
    #If songID is not found, we assume it is an album and search for its albumID
    if  songID == None:
        #Delete the row from songs DF that does not have spotifyID since they are an album
        songsToDeleteDF = songsToDeleteDF.append(row, ignore_index=True)
        searchResult = sp.search(q=row['Title'], type='album', market= 'US', limit=10,offset=0)
        albumID = getAlbumID(row['Title'], row['Artist'], searchResult)
        #If we find an albumID, we get all the songs from that album and add it to the DF
        if albumID != None:
            albumTracksResults = sp.album_tracks(albumID)
            albumSongToAddDF = pd.concat([albumSongToAddDF,getAlbumSongs(row['Artist'],albumTracksResults)], ignore_index=True)
    else:
        #Add the spotifyID to an existing song
        songs.SpotifyID.iloc[index] = songID
        
        
songs = pd.concat([songs,albumSongToAddDF],ignore_index=True) #add the album songs into the song DF
songs = songs[~songs.Title.isin(songsToDeleteDF.Title)].copy() #remove album or song without spotifyID from songs DF
songs.head()

Unnamed: 0,Title,Artist,SpotifyID
0,Fine Line,Harry Styles,6VzcQuzTNTMFnJ6rBSaLH9
5,Hollywood's Bleeding,Post Malone,7sWRlDoTDX8geTR8zzr2vt
9,Lover,Taylor Swift,1dGr1c8CrMLDpV6mPbImSI
14,Rudolph The Red-Nosed Reindeer,Burl Ives,47otoIkhx3fkdivEXL5OB6
16,What You See Is What You Get,Luke Combs,273TiTHLlHSRe5zrzs7wvD


In [62]:
#songs.reset_index(inplace = True)
#songs.drop(columns = ['index'], inplace = True)
#songs.head()
#songs.to_pickle('songsWithSpotifyID.pkl')
#songs = pd.read_pickle('songsWithSpotifyID.pkl')