Using the artists schema, this script iterates over artistID's and gets the corresponding list of songID's, then appends each resulting (artistID,songID) pair to the song_ids.txt file.

In [8]:
# functions _get and get_artist_songs() in this code 
# are courtesy of GitHub user imdkm:
# https://gist.github.com/imdkm/a60247b59ff1881fa4bb8846a9b44c96

In [9]:
import requests, json
from time import sleep
import sys
import pandas as pd
import os

In [21]:
# Secret token
token = open("ACCESS_TOKEN.txt", "r").read()

In [22]:
# constant values.
BASE_URL = "https://api.genius.com"
CLIENT_ACCESS_TOKEN = token
QUERY_SIZE = 100

In [28]:
# send request and get response in json format.
def _get(path, params=None, headers=None):

    # generate request URL
    requrl = '/'.join([BASE_URL, path])
    token = "Bearer {}".format(CLIENT_ACCESS_TOKEN)
    if headers:
        headers['Authorization'] = token
    else:
        headers = {"Authorization": token}

    response = requests.get(url=requrl, params=params, headers=headers)
    response.raise_for_status()

    return response.json()

def get_artist_songs(artist_id):
    # initialize variables & a list.
    current_page = 1
    next_page = True
    songs = []

    # main loop
    while next_page:

        path = "artists/{}/songs/".format(artist_id)
        params = {'page': current_page}
        data = _get(path=path, params=params)

        page_songs = data['response']['songs']

        if page_songs:
            # add all the songs of current page,
            # and increment current_page value for next loop.
            songs += page_songs
            current_page += 1
        else:
            # if page_songs is empty, quit.
            next_page = False

    # get all the song ids, excluding not-primary-artist songs.
    songs = [song["id"] for song in songs
             if song["primary_artist"]["id"] == artist_id]

    return songs

In [29]:
def get_from(artist_id):
    with open("artists.txt","r") as f:
        lines = f.readlines()
        i=0
        while i < len(lines):
            if lines[i].split(',')[0] == str(artist_id):
                break
            i+=1
    return lines[i+1:]

In [38]:
# read in list of artist names
    
import os.path
if os.path.isfile('song_ids.txt'):
    l = !wc -l song_ids.txt
    if int(l[0].split()[0]) > 0:
        s = !tail -1 song_ids.txt
        last_artist = s[0].split(',')[0]
        artists = get_from(last_artist)
    
artist_ids = [line.strip().split(',')[0] for line in artists if line.strip().split(',')[0] != 'None']
print(str(len(artists))+" artists total")

1493 artists total


In [39]:
artist_set = set()
with open("song_ids.txt", "r") as f:
    for line in f:
        line = line.strip()
        artist_id,_ = line.split(',')
        artist_set.add(artist_id)

print("done reading")

done reading


In [40]:
len(artist_set)

279

In [41]:
# query only a subset of artists at a time to not overwork server
# increment chunk_num on each run
    
artists_chunk = artist_ids[0:QUERY_SIZE]

# populate song ids using artist names
for i, artist_id in enumerate(artists_chunk):
    #sys.stdout.write('\r'+str(i).zfill(5))
    print(artist_id)
    
    # check if we've already queried this artist:
    if artist_id in artist_set:
        print(artist_id + " already queried, skipping")
        continue
    else:
        artist_set.add(artist_id)

    # get all song ids and make a list.
    song_ids = [None]
    try:
        song_ids = get_artist_songs(int(artist_id))
        print("-> " + str(len(song_ids))+" results")
    except:
        print("NOT FOUND")

    with open("song_ids.txt", "a") as f:
        for song_id in song_ids:
            f.write(str(artist_id)+","+str(song_id)+"\n")
            
    sleep(.5)
    
os.system('say "Done"')

102568
-> 32 results
482418
-> 173 results
13
-> 1133 results
68428
-> 69 results
453
-> 152 results
2532
-> 66 results
29788
-> 12 results
49350
-> 127 results
12787
-> 43 results
26092
-> 81 results
48669
-> 111 results
12519
-> 94 results
231573
-> 83 results
21569
-> 76 results
805
-> 243 results
1630
-> 174 results
369364
-> 31 results
53293
-> 184 results
1230717
-> 13 results
27570
-> 145 results
5407
-> 139 results
266191
-> 30 results
6974
-> 235 results
555561
-> 104 results
289976
-> 193 results
33996
-> 196 results
124
-> 71 results
47192
-> 37 results
15639
-> 35 results
17941
-> 82 results
1040665
-> 76 results
24823
-> 246 results
266918
-> 2 results
32515
-> 26 results
25561
-> 242 results
481225
-> 72 results
212456
-> 81 results
152874
-> 6 results
12524
-> 90 results
354971
-> 42 results
221650
-> 166 results
53023
-> 72 results
5841
-> 118 results
1583
-> 342 results
25516
-> 81 results
156
-> 193 results
1170
-> 123 results
23355
-> 35 results
196985
-> 104 results

0