In [73]:
import sys
import re
import requests
import json
from bs4 import BeautifulSoup

import urllib2
import socket
import time

In [8]:
def load_credentials():
    lines = [line.rstrip('\n') for line in open('credentials.ini')]
    chars_to_strip = " \'\""
    for line in lines:
        if "client_id" in line:
            client_id = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
        if "client_secret" in line:
            client_secret = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
        #Currently only need access token to run, the other two perhaps for future implementation
        if "client_access_token" in line:
            client_access_token = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]

    return client_id, client_secret, client_access_token

In [9]:
# Genius API credentials (available globally)
client_id, client_secret, client_access_token = load_credentials()
genius_url = "http://api.genius.com"
headers = {'Authorization': 'Bearer ' + client_access_token}

In [10]:
def search_genius(search_term):             
    querystring = "http://api.genius.com/search?q=" + urllib2.quote(search_term) + "&page=" + str(1)        
    request = urllib2.Request(querystring)
    request.add_header("Authorization", "Bearer " + client_access_token)   
    request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") #Must include user agent of some sort, otherwise 403 returned
    while True:
        try:
            response = urllib2.urlopen(request, timeout=4) #timeout set to 4 seconds; automatically retries if times out
            raw = response.read()
        except socket.timeout:
            print("Timeout raised and caught")
            continue
        break    

    return json.loads(raw)

In [11]:
def get_song_and_artist_ids(song_title, artist_name):  
    json_obj = search_genius(song_title + " " + artist_name)
    body = json_obj["response"]["hits"]
    body = body[0] # Just keep the first hit (for now)
    song_api   = body['result']['api_path']
    artist_api = body['result']['primary_artist']['api_path']
    
    return song_api, artist_api

**Download all song lyrics for a given artist**

In [18]:
# It's dumb, but you have to use an artist's song to get their artist ID
artist_name ='Mos Def'
obj = search_genius(artist_name)
song_title = obj['response']['hits'][0]['result']['title']
song_id, artist_id = get_song_and_artist_ids(song_title,artist_name)
id_num = int(artist_id.split('/')[-1])

# Okay, we have the artist API id, let's get a list of all of their songs on Genius
all_song_ids = []
# for page in range(1,10):
page = 1
while True:
    request = urllib2.Request(genius_url + artist_id + '/songs' + '?page=%d' % page)
    request.add_header("Authorization", "Bearer " + client_access_token)
    request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)")
    response = urllib2.urlopen(request, timeout=4)
    raw = response.read()
    json_obj = json.loads(raw)      
    songs = json_obj['response']['songs']
    
    # Keep track of song API paths if the primary artist is correct
    [all_song_ids.append(song['api_path']) for song in songs if song['primary_artist']['id'] == id_num]            

    num_songs = len(songs)    
    if json_obj['response']['next_page']==None:
        if page==1 & num_songs == 0:
            print("No results for: " + search_term)
        break      
    print("Page {0} -- all songs {1}:".format(page, num_songs))    
    page += 1
    
print('Total songs found: {0}'.format(len(all_song_ids)))

Page 1 -- all songs 19:
Page 2 -- all songs 20:
Page 3 -- all songs 20:
Page 4 -- all songs 20:
Page 5 -- all songs 20:
Page 6 -- all songs 20:
Page 7 -- all songs 20:
Page 8 -- all songs 20:
Page 9 -- all songs 20:
Page 10 -- all songs 19:
Page 11 -- all songs 20:
Page 12 -- all songs 20:
Page 13 -- all songs 20:
Page 14 -- all songs 20:
Page 15 -- all songs 19:
Page 16 -- all songs 20:
Total songs found: 187


In [19]:
def lyrics_from_song_api_path(song_api_path):
    # Use BeautifulSoup to scrape lyrics off of a Genius song URL
    querystring = genius_url + song_api_path      
    request = urllib2.Request(querystring)
    request.add_header("Authorization", "Bearer " + client_access_token)   
    request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") #Must include user agent of some sort, otherwise 403 returned
    while True:
        try:
            response = urllib2.urlopen(request, timeout=4) #timeout set to 4 seconds; automatically retries if times out
            raw = response.read()
        except socket.timeout:
            print("Timeout raised and caught")
            continue
        break

    json_obj = json.loads(raw)  

    # Get the URL to the song lyrics
    path = json_obj['response']['song']['path']
    page_url = "http://genius.com" + path
    page = requests.get(page_url)
    html = BeautifulSoup(page.text, "html.parser")    
    [h.extract() for h in html('script')]        
    lyrics = html.find("div", class_="lyrics").get_text().encode('ascii','ignore').decode('ascii')
    lyrics = re.sub('\[.*\]','',lyrics) # Remove [Verse] and [Bridge] stuff
    lyrics = re.sub('\n{2}','',lyrics)  # Remove gaps between verses
    return str(lyrics)

In [20]:
# This is annoying, Genius doesn't seem to differentiate between interviews and actual songs
# It just calls them all songs. You'd think there'd be a distinction in the metadata.
lyrics = lyrics_from_song_api_path(all_song_ids[4])
print(lyrics)


Peace, this is Yasiin
No more parties in SA
Please, tell 'em no more parties in SA
Ain't home arrest, I don't need to stay
I'll leave and I'll stay away
I committed no crime any place
Why these police up in my face?
Why they raiding my place?
Why I don't feel safe?
This is not an expression of fear
This is just to make things clear
My intentions are pure in coming here
And that's for everything I love or hold dear
Umi's in the building
So is my wife and my children
I committed no crime
Why is the state wasting my time?
They must be out of their minds
I forgive 'em, that's the spirit of divine
I just wanna go where I'm wanted
Where I'm loved, stop frontin'
Where I live is my choice, you cannot mute my voice
Thank you, Kanye West, for being a real friend, a real friend
A real friend
No more parties in SA
Please, no more parties in SA
I heard your choice the first time, I'll go away
And when I leave, that's exactly where I'll stay
This is not an expression of fear
This is just to make th

In [21]:
def write_lyrics_to_file(lyrics,artist=''):   
    if artist!='':
        filename = 'Lyrics_{0}.txt'.format(artist.replace(' ',''))
    else:
        filename = "Lyrics.txt"
    with open(filename, "a") as text_file:
        text_file.write('\n' + lyrics)

In [22]:
# Output text file containing the lyrics of every identified song
for i in range(len(all_song_ids)):
    lyrics = lyrics_from_song_api_path(all_song_ids[i])
    write_lyrics_to_file(lyrics,artist_name)

**Get song lyrics by searching song and artist name**

In [17]:
song_title = 'Mos Def'
artist_name = 'UMI Says'
song_api_path, artist_api_path = get_song_and_artist_ids(song_title, artist_name)   
lyrics = lyrics_from_song_api_path(song_api_path)   
print(lyrics)


I don't wanna write this down
I wanna tell you how I feel right now
I don't wanna take no time to write this down
I wanna tell you how I feel right now, hey (World premiere)Tomorrow may never come
For you or me, life is not promised
Tomorrow may never show up
For you and me, this life is not promisedI ain't no perfect man
I'm trying to do, the best that I can
With what it is I have
I ain't no perfect man
I'm trying to do, the best that I can
With what it is I have
Put my heart and soul into this song
I hope you feel me
From where I am, to wherever you are
I mean that sincerely
Tomorrow may never come
For you and me, life is not promised
Tomorrow may never appear
You better hold this very moment very close to you
Very close to you, so close to you
So close to you, don't be afraid, to let it shine
My Umi said shine your light on the world
Shine your light for the world to see
My Abi said shine your light on the world
Shine your light for the world to see
My Umi said shine your light on 