In [1]:
import urllib
import json
import pandas as pd
import time
import sys

In [2]:
# Secret token
token = open("ACCESS_TOKEN.txt", "r").read()

In [3]:
# Format a request URI for the Genius API
search_term = 'Ayla Celik'
_URL_API = "https://api.genius.com/"
_URL_SEARCH = "search?q="
querystring = _URL_API + _URL_SEARCH + urllib.parse.quote(search_term)
request = urllib.request.Request(querystring)
request.add_header("Authorization", "Bearer " + token)
request.add_header("User-Agent", "")

In [4]:
def get_songs():
    annotation_header=['song_id','lyric','annotation']
    songs_header = ['artist','song']
    try:
        annotations = pd.read_csv('annotations.tsv', names=annotation_header, sep='\t')
    except:
        return pd.read_csv('song_ids_1.csv', sep=',')
    songs = pd.read_csv('song_ids_1.csv', sep=',')
    last_id = str(annotations.song_id.values[-1])
    i = songs[songs.song == last_id].index[0]
    return songs[i+1:]

In [5]:
# Function to recursively search the annotation dict
# and get the text, skipping blockquotes, formatting, images, etc.

def recurse(children):
    st = ''
    for child in children:
        #print(child)
        if type(child) == dict and 'tag' in child:
            if child['tag'] == 'p' or child['tag'] == 'a':
                st += recurse(child['children'])
            elif child['tag'] == 'blockquote':
                st += '[BLOCKQUOTE]'
        elif type(child) == list:
            st += recurse(child)
        elif type(child) == str:
            st += child + ' '
    return st

In [15]:
def append_annotations(songdf, batchsize):
    # Get a chunk of song ids
    song_ids = songdf.head(batchsize)
    song_ids = song_ids[song_ids.song != 'None'].song.astype(int)

    with open('annotations.tsv', 'a') as f:

        for i, song_id in enumerate(song_ids):
            sys.stdout.write('\r'+str(i))

            # Format the search query using the 'referents' api
            # Given a song id, returns all of its referents (lines that have been annotated)
            # and the corresponding annotations.
            querystring = "https://api.genius.com/referents?song_id=" + str(song_id) + "&per_page=50"
            request = urllib.request.Request(querystring)
            request.add_header("Authorization", "Bearer " + token)
            request.add_header("User-Agent", "")

            # Do internet stuff
            # Send the request to Genius, and parse the response
            try:
                response = urllib.request.urlopen(request, timeout=10)
                string = response.read().decode('utf-8')
                json_obj = json.loads(string)
            except:
                print('***Failed on song id: ' + str(song_id))
                continue

            # The data we want can be found here
            # For this song there are 9 lyric/annotation pairs
            referents = json_obj['response']['referents']

            # Many songs return zero annotations. If we get annotations, parse through them
            if len(referents) > 0:
                for ref in referents:
                    lyric = ref['fragment'].replace('\n', ' ')
                    try:
                        annotation = recurse(ref['annotations'][0]['body']['dom']['children']).replace('\n', ' ')
                    except:
                        print('***Empty annotation on song id: ' + str(song_id))
                        continue
                    
                    sys.stdout.write('\r'+str(i)+" "+lyric)
                    f.write(str(song_id) + '\t' + lyric + '\t' + annotation + '\n')
                    
            time.sleep(0.5)

In [19]:
songdf = get_songs()
append_annotations(songdf, 1000)

143 I stare at the lawn, it's Wednesday morning  It needs a cut but I leave it growing  All different sizes and all shades of green  Slashing it down just seems kind of meanhy can't we just talk nice?nna see    So take what you want from me  Don't ask me what I really mean  I am just a reflection  Of what you really wanna see rebuildin'  If you've got a spare half a million  You could knock it down and start rebuildin'  If you've got a spare half a million  You could knock it down and start rebuildin'***Failed on song id: 693803
538 Everybody in here just bounce  Just bounce  Just bounce  Just bounce  Uh uh uh uh  I like, ok let's go  Uh Uh  Yeah yeah yeah yeah  Uh Uh Uh  Yeah yeah yeah yeah  Uh Uh Uh  Big pimpin'  Spendin' cheese we doin'  Big pimpin' on BLADES  We doin'  Big Pimpin' up in NYC  It's just that Jigga man Roots band on MTVtop was shorter than leprechauns Y'all can't fuck with Hov', what type of X y'all on? I got great lawyers for cops so dress warm Charges don't stick to

805 Retro act, I'm just bringing it back like Jordan Packs***Empty annotation on song id: 182949urn down the homieally the same storiesy,that or y'all gon' make me  Put the double Desert Eagles in your life, nigga  (Some people hate) I think...  (I think they've lost their minds) Think y'all lost your mind  Y'all got me on my shit you live with the fear of just being me  Living in the shadow feels like the safe place to be  No harm for them, no harm for me  But life is short, and it's time to be free  Love who you love, because life isn't guaranteed  SmileKeef Out of spite, I just might flood these streets Hear the freedom in my speech Got an onion from Universal, read it and weep Would've brought the Nets to Brooklyn for free Except I made millions off it, you fuckin' dweeb I still own the building, I'm still keeping my seat Y'all buy that bullshit, you'd better keep y'all receipt Obama said "chill, you gonna get me impeached" But you don't need this shit anyway Chill with me on the b