In [3]:
import urllib
import json
import pandas as pd
import time
import sys
import os

In [6]:
# Secret token
token = open("../ACCESS_TOKEN.txt", "r").read()

In [7]:
# Format a request URI for the Genius API
search_term = 'Ayla Celik'
_URL_API = "https://api.genius.com/"
_URL_SEARCH = "search?q="
querystring = _URL_API + _URL_SEARCH + urllib.parse.quote(search_term)
request = urllib.request.Request(querystring)
request.add_header("Authorization", "Bearer " + token)
request.add_header("User-Agent", "")

In [42]:
def get_songs(annotations_file, song_ids_file):
    annotation_header=['song_id','lyric','annotation']
    song_header = ['artist','song']
    try:
        annotations = pd.read_csv(annotations_file, names=annotation_header, sep='\t')
    except:
        return pd.read_csv(song_ids_file, names=song_header, sep=',')
    songs = pd.read_csv(song_ids_file, names=song_header, sep=',')
    last_id = str(annotations.song_id.values[-1])
    i = songs[songs.song.astype(str) == last_id].index[0]
    return songs[i+1:]

In [43]:
# Function to recursively search the annotation dict
# and get the text, skipping blockquotes, formatting, images, etc.

def recurse(children):
    st = ''
    for child in children:
        #print(child)
        if type(child) == dict and 'tag' in child:
            if child['tag'] == 'p' or child['tag'] == 'a':
                st += recurse(child['children'])
            elif child['tag'] == 'blockquote':
                st += '[BLOCKQUOTE]'
        elif type(child) == list:
            st += recurse(child)
        elif type(child) == str:
            st += child + ' '
    return st

In [44]:
def append_annotations(songdf, annotations_file, batchsize):
    # Get a chunk of song ids
    song_ids = songdf.head(batchsize)
    song_ids = song_ids[song_ids.song.astype(str) != 'None'].song.astype(int)

    with open(annotations_file, 'a') as f:

        for i, song_id in enumerate(song_ids):
            sys.stdout.write('\r'+str(i))

            # Format the search query using the 'referents' api
            # Given a song id, returns all of its referents (lines that have been annotated)
            # and the corresponding annotations.
            querystring = "https://api.genius.com/referents?song_id=" + str(song_id) + "&per_page=50"
            request = urllib.request.Request(querystring)
            request.add_header("Authorization", "Bearer " + token)
            request.add_header("User-Agent", "")

            # Do internet stuff
            # Send the request to Genius, and parse the response
            try:
                response = urllib.request.urlopen(request, timeout=10)
                string = response.read().decode('utf-8')
                json_obj = json.loads(string)
            except:
                print('***Failed on song id: ' + str(song_id))
                continue

            # The data we want can be found here
            # For this song there are 9 lyric/annotation pairs
            referents = json_obj['response']['referents']

            # Many songs return zero annotations. If we get annotations, parse through them
            if len(referents) > 0:
                for ref in referents:
                    lyric = ref['fragment'].replace('\n', ' ')
                    try:
                        annotation = recurse(ref['annotations'][0]['body']['dom']['children']).replace('\n', ' ')
                    except:
                        print('***Empty annotation on song id: ' + str(song_id))
                        continue
                    
                    #print(song_id)
                    sys.stdout.write('\r'+str(i)+" "+lyric)
                    f.write(str(song_id) + '\t' + lyric + '\t' + annotation + '\n')
                    
            time.sleep(0.5)

In [45]:
def main(song_ids_file, annotations_file, batch_size=1000):
    songdf = get_songs(annotations_file, song_ids_file)
    try:
        append_annotations(songdf, annotations_file, batch_size)
    except:
        os.system('say "failure"')

    os.system('say "batch done"')

In [46]:
main('song_ids_2.csv','annotations2.tsv',5)

4 Here come da sunsh like Kool & The Gang Sikh ke adava sharmile nain ho gaye  Tera ki kasoor je nashili nain ho gaye  Sikh ke adava sharmile nain ho gaye