In [1]:
# Import packages, load Genius API token from local .env file
import os
import requests
import json
%load_ext dotenv
%dotenv

In [2]:
# Set parameters to call API
genius_token=os.getenv("GENIUS_TOKEN")
base_url = "http://api.genius.com"
value = 'Bearer ' + genius_token
headers= {'Authorization': value}

In [None]:
search_url = base_url + "/search"
artist_name = "Taylor Swift"
params = {'q': artist_name}
response = requests.get(search_url, params = params, headers=headers)
response_data = response.json()
response_data

This would be similar to typing in "Taylor Swift" into the search bar on Genius. What's the top hit that shows up? 

In [None]:
response_data['response']['hits'][0]

Looks like it's "Look What You Made Me Do". Assuming the top hit will show the correct artist (which, in this case, is a good assumption), grab the artist ID from this top hit so we can get all of Taylor Swift's songs. 

In [None]:
taylor_swift_id = response_data['response']['hits'][0]['result']['primary_artist']['id']
artist_songs_url = base_url + '/artists/' + str(taylor_swift_id) + '/songs'
params={'sort': 'title',
       'per_page': 50} # results per page can only be a max of 50
response = requests.get(artist_songs_url, params = params, headers=headers)
response.json()

Taylor Swift has a ton of songs (more than 50), so need to create a loop to go through all the pages. We'll know we got to the last page if 'next_page' = None in the JSON blob

In [None]:
# album = 'reputation'
song_title = []
song_url = []
page = 1
next_page = 1
while next_page != None:
    params={'sort': 'title',
            'per_page': 50,
            'page': page} 
    response = requests.get(artist_songs_url, params=params, headers=headers)
    response_data = response.json()
    next_page=response_data['response']['next_page']
    page += 1 # Will keep going until there aren't any more pages
    for i in response_data['response']['songs']:
        if i['primary_artist']['id'] == taylor_swift_id: # Only include songs by Taylor Swift
            print(i['title'] + " " + i['url'])

She has SO MANY things that show up through the Genius API (covers / interviews / setlists / IG posts / etc). Trying to figure out how to narrow down the list to just those on her 6 albums, but there isn't anything in the metadata that has album name for each of these songs. One way is to scrape the lyrics page for every single record that shows up above, but that will take forever, and still might not give good results because she performs a lot of covers on tour. 

Maybe we can use the Spotify API to get a list of the tracks on an album, and all we would have to input is the album names we care about? 

# Spotify

In [62]:
import sys
import spotipy
import spotipy.util as util
import spotipy.oauth2 as oauth2

spotify_client_id = os.getenv("SPOTIFY_CLIENT_ID")
spotify_secret_id = os.getenv("SPOTIFY_CLIENT_SECRET")

client_credentials=oauth2.SpotifyClientCredentials(client_id=spotify_client_id,client_secret=spotify_secret_id)
sp = spotipy.Spotify(client_credentials_manager=client_credentials)

In [84]:
def get_artist(name):
    results = sp.search(q='artist:' + name, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        return items[0]
    else:
        return None
    
def show_album_tracks(album):
    tracks = []
    results = sp.album_tracks(album['id'])
    tracks.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    for track in tracks:
        print('  ', track['name'])
        print()
        print(track)
        
def show_artist_albums(id):
    albums = []
    results = sp.artist_albums(artist['id'], album_type='album')
    albums.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        albums.extend(results['items'])
    unique = set()  # skip duplicate albums
    for album in albums:
        name = album['name']
        if not name in unique:  
            print(name)
            unique.add(name)
#             show_album_tracks(album)
    return unique
        
        
name = "Taylor Swift"
artist = get_artist(name)
all_albums = show_artist_albums(artist)

reputation
reputation Stadium Tour Surprise Song Playlist
reputation (Big Machine Radio Release Special)
Taylor Swift Karaoke: 1989 (Deluxe)
1989
Taylor Swift Karaoke: 1989 (Deluxe Edition)
Taylor Swift Karaoke: 1989
1989 (Deluxe)
1989 (Deluxe Edition)
1989 (Big Machine Radio Release Special)
Red (Deluxe Edition)
Red
Red (Karaoke Version)
Taylor Swift Karaoke: Red
Red (Big Machine Radio Release Special)
Speak Now (Deluxe Edition)
Speak Now
Speak Now (US Version)
Speak Now (Karaoke Version)
Taylor Swift Karaoke: Speak Now
Speak Now (Japanese Version)
Speak Now World Tour Live
Speak Now (Big Machine Radio Release Special)
Speak Now (Deluxe Package)
Fearless
Fearless (International Version)
Fearless (Karaoke Version)
Fearless Karaoke
Fearless (Platinum Edition)
Fearless Platinum Edition
Fearless (Big Machine Radio Release Special)
Taylor Swift
Taylor Swift Karaoke
Taylor Swift (Big Machine Radio Release Special)


Something will be manual, whether it's removing specific albums or just including the primary ones. Just going to manually select the albums I want instead of trying to write clever code.

In [85]:
selected_albums = ['1989','reputation','Fearless','Taylor Swift','Red','Speak Now']
for i in all_albums: 
    if i in selected_albums:
        print(i)

reputation
Red
Fearless
Taylor Swift
1989
Speak Now
