In [56]:
import urllib
import json
import pandas as pd
import time
import sys

In [2]:
# Secret token
token = open("ACCESS_TOKEN.txt", "r").read()

In [3]:
# Format a request URI for the Genius API
search_term = 'Ayla Celik'
_URL_API = "https://api.genius.com/"
_URL_SEARCH = "search?q="
querystring = _URL_API + _URL_SEARCH + urllib.parse.quote(search_term)
request = urllib.request.Request(querystring)
request.add_header("Authorization", "Bearer " + token)
request.add_header("User-Agent", "")

In [52]:
def get_songs():
    annotation_header=['song_id','lyric','annotation']
    songs_header = ['artist','song']
    try:
        annotations = pd.read_csv('annotations.tsv', names=annotation_header, sep='\t')
    except:
        return pd.read_csv('song_ids_2.csv', names=annotation_header, sep=',')
    songs = pd.read_csv('song_ids_2.csv', names=songs_header, sep=',')
    last_id = annotations.song_id.values[-1]
    i = songs[songs.song == last_id].index[0]
    return songs[i+1:]

In [51]:
# Function to recursively search the annotation dict
# and get the text, skipping blockquotes, formatting, images, etc.

def recurse(children):
    st = ''
    for child in children:
        #print(child)
        if type(child) == dict and 'tag' in child:
            if child['tag'] == 'p' or child['tag'] == 'a':
                st += recurse(child['children'])
            elif child['tag'] == 'blockquote':
                st += '[BLOCKQUOTE]'
        elif type(child) == list:
            st += recurse(child)
        elif type(child) == str:
            st += child + ' '
    return st

In [60]:
def append_annotations(songdf, batchsize):
    # Get a chunk of song ids
    song_ids = songdf.head(batchsize).song.astype(int)

    with open('annotations.tsv', 'a') as f:

        count=0
        for song_id in song_ids:

            # Format the search query using the 'referents' api
            # Given a song id, returns all of its referents (lines that have been annotated)
            # and the corresponding annotations.
            querystring = "https://api.genius.com/referents?song_id=" + str(song_id) + "&per_page=50"
            request = urllib.request.Request(querystring)
            request.add_header("Authorization", "Bearer " + token)
            request.add_header("User-Agent", "")

            # Do internet stuff
            # Send the request to Genius, and parse the response
            try:
                response = urllib.request.urlopen(request, timeout=10)
                string = response.read().decode('utf-8')
                json_obj = json.loads(string)
            except:
                print('***Failed on song id: ' + str(song_id))
                continue

            # The data we want can be found here
            # For this song there are 9 lyric/annotation pairs
            referents = json_obj['response']['referents']

            # Many songs return zero annotations. If we get annotations, parse through them
            if len(referents) > 0:
                for ref in referents:
                    lyric = ref['fragment'].replace('\n', ' ')
                    try:
                        annotation = recurse(ref['annotations'][0]['body']['dom']['children']).replace('\n', ' ')
                    except:
                        print('***Empty annotation on song id: ' + str(song_id))
                        continue
                    
                    print(str(song_id) + " " + lyric)
                    #sys.stdout.write('\r'+str(count).zfill(5)+" "+lyric)
                    f.write(str(song_id) + '\t' + lyric + '\t' + annotation + '\n')
                    
            count+=1
            time.sleep(0.5)

In [None]:
songdf = get_songs()
append_annotations(songdf, 200)

Using the Guardian as a shield  To cover my thighs against the rain  I do not mind about my hair
The unfamiliar is right below our eyes
The beauty memories of all the places  We've captured with our camera  We've seen the pyramids  We've seen the Louvre  We've seen the Orion upside down  Total eclipses and moonlight shadows  We've seen dolphins jumping waves  We've skied the mountains and we swam in the rivers  And let the sunlight dry our skins
Show a view to someone  Who chose to live his whole life in cave  He'll raise his arms to protect his eyes from learning  And the blindness to which he belongs
This time it's me, it's me
Don't look for what we know
The unfamiliar is right below our eyes
Cascades of chances I'll just let them be
Of two soft voices blended in perfection From the reels of this record that I've found
But I realized that the one you were before  Had changed into somebody for whom  I wouldn't mind to put the kettle on
Still I don't know what I can save you from  I do