In [1]:
import urllib
import json
import pandas as pd
import time
import sys

In [2]:
# Secret token
token = open("../ACCESS_TOKEN.txt", "r").read()

In [3]:
# Format a request URI for the Genius API
search_term = 'Ayla Celik'
_URL_API = "https://api.genius.com/"
_URL_SEARCH = "search?q="
querystring = _URL_API + _URL_SEARCH + urllib.parse.quote(search_term)
request = urllib.request.Request(querystring)
request.add_header("Authorization", "Bearer " + token)
request.add_header("User-Agent", "")

In [4]:
def get_songs():
    annotation_header=['song_id','lyric','annotation']
    songs_header = ['artist','song']
    try:
        annotations = pd.read_csv('annotations.tsv', names=annotation_header, sep='\t')
    except:
        return pd.read_csv('song_ids_1.csv', sep=',')
    songs = pd.read_csv('song_ids_1.csv', sep=',')
    last_id = str(annotations.song_id.values[-1])
    i = songs[songs.song == last_id].index[0]
    return songs[i+1:]

In [5]:
# Function to recursively search the annotation dict
# and get the text, skipping blockquotes, formatting, images, etc.

def recurse(children):
    st = ''
    for child in children:
        #print(child)
        if type(child) == dict and 'tag' in child:
            if child['tag'] == 'p' or child['tag'] == 'a':
                st += recurse(child['children'])
            elif child['tag'] == 'blockquote':
                st += '[BLOCKQUOTE]'
        elif type(child) == list:
            st += recurse(child)
        elif type(child) == str:
            st += child + ' '
    return st

In [6]:
def append_annotations(songdf, batchsize):
    # Get a chunk of song ids
    song_ids = songdf.head(batchsize)
    song_ids = song_ids[song_ids.song != 'None'].song.astype(int)

    with open('annotations.tsv', 'a') as f:

        for i, song_id in enumerate(song_ids):
            sys.stdout.write('\r'+str(i))

            # Format the search query using the 'referents' api
            # Given a song id, returns all of its referents (lines that have been annotated)
            # and the corresponding annotations.
            querystring = "https://api.genius.com/referents?song_id=" + str(song_id) + "&per_page=50"
            request = urllib.request.Request(querystring)
            request.add_header("Authorization", "Bearer " + token)
            request.add_header("User-Agent", "")

            # Do internet stuff
            # Send the request to Genius, and parse the response
            try:
                response = urllib.request.urlopen(request, timeout=10)
                string = response.read().decode('utf-8')
                json_obj = json.loads(string)
            except:
                print('***Failed on song id: ' + str(song_id))
                continue

            # The data we want can be found here
            # For this song there are 9 lyric/annotation pairs
            referents = json_obj['response']['referents']

            # Many songs return zero annotations. If we get annotations, parse through them
            if len(referents) > 0:
                for ref in referents:
                    lyric = ref['fragment'].replace('\n', ' ')
                    try:
                        annotation = recurse(ref['annotations'][0]['body']['dom']['children']).replace('\n', ' ')
                    except:
                        print('***Empty annotation on song id: ' + str(song_id))
                        continue
                    
                    sys.stdout.write('\r'+str(i)+" "+lyric)
                    f.write(str(song_id) + '\t' + lyric + '\t' + annotation + '\n')
                    
            time.sleep(0.5)

In [7]:
songdf = get_songs()
append_annotations(songdf, 1000)

205 White high school's wasn't laughing at the black jokes***Empty annotation on song id: 371866 your only son that walk and talkr breaking up your happy homey as the day my mama had me, see shitse hoes won't hold me back  These hoes won't hold me back  These hoes won't hold me backanna bang, wanna slang?  Get with the programmy all in  If I'm gonna fall in, I'm gonna fall my all in get knocked back  Just gonna get knocked back  Just gonna get knocked back  Back, back, back, back  Back, back, back, back...up in your touch Feel so enamored, hold me tight within your clutch How do you do it? You got me losing every breath What did you give me to make my heart bleed out my chest?
239 They let the monkey out the cage, he got a gun  He got a book, he got a brain, you better run  Was the one that they shun  Baptized in the tears of the slaves as a young***Empty annotation on song id: 371863ith the sharks since the kiddy poolt  Is it real? need to get your shit together  Because a mí no me va