In [1]:
import sys
import re
import requests
import json
from bs4 import BeautifulSoup
import urllib2
import socket
import time

## Genius song and artist classes ##

* Actually, it probably makes more sense to make a file called GeniusAPI.py that contains Song and Artist classes and then the functions for using the Genius API. So you'd do something like:

```python
# Here's how I envision the final form of the GeniusAPI.py file and the song and artist classes
import GeniusAPI as Genius

song1 = Genius.search_song('Yesterday','The Beatles') # Song object
song2 = Genius.search_song('Prom Night','Chance the Rapper')
artist = Genius.search_artist('Michael Jackson') # Artist object

# Hmm, what other functions would be a part of the GeniusAPI class?

```

### TODO ###
* These methods are generally fairly slow. I think it would help if I minimized the calls to urllib2. Right now my methods (probably unnecessarily) make multiple calls to the Genius API or URLs to get information and multiple json objects. Probably a lot of the information could be pulled in one fell swoop and then extracted locally when needed.

In [2]:
class _GeniusAPI(object):
    # This is a superclass that Genius() inherits from. Not sure if this makes any sense, but it
    # seemed like a good idea to have this class (more removed from user) handle the lower-level
    # interaction with the Genius API, and then Genius() has the more user-friendly search
    # functions
    """Interface with the Genius.com API
    
    Attributes:
        base_url: (str) Top-most URL to access the Genius.com API with
        
    Methods:
        _load_credentials()
            OUTPUT: client_id, client_secret, client_access_token
        _make_api_url_request()
            INPUT:  
            OUTPUT: 
        _search_genius_api()
            INPUT:  
            OUTPUT: 
                                
    """    
    
    # Genius API constant
    _base_url = "http://api.genius.com"    
    
    def __init__(self):
        self._client_access_token = self._load_credentials()
        self._header_authorization = 'Bearer ' + self._client_access_token        
        
    def _load_credentials(self):
        """Load the Genius.com API authorization information from the 'credentials.ini' file"""
        lines = [line.rstrip('\n') for line in open('credentials.ini')]
        chars_to_strip = " \'\""
        for line in lines:
            if "client_id" in line:
                client_id = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
            if "client_secret" in line:
                client_secret = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
            #Currently only need access token to run, the other two perhaps for future implementation
            if "client_access_token" in line:
                client_access_token = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
                
        return client_access_token
                    
    def _make_api_url_request(self, querystring):
        """Send a URL request to the Genius API with the designated search term, returning a json object"""
        request = urllib2.Request(querystring)        
        request.add_header("Authorization",self._header_authorization)
        request.add_header("User-Agent","curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpnSSL 0.9.6b) (ipv6 enabled)")
        while True:
            try:
                response = urllib2.urlopen(request, timeout=5) #timeout set to 4 seconds; automatically retries if times out
                raw = response.read()
            except socket.timeout:
                print("Timeout raised and caught")
                continue
            break

        return json.loads(raw)

    def _search_genius_api(self, search_term, is_api_path):
        if is_api_path == True:
            querystring = self._base_url + search_term
        else:
            querystring = self._base_url + "/search?q=" + urllib2.quote(search_term)
        return self._make_api_url_request(querystring)
                    

In [3]:
class Genius(_GeniusAPI):
    """User-level interface with the Genius.com API. User can search for songs (getting lyrics) and artists (getting songs)        
    
    Attributes:    
    
    
    Methods:
        search_song(song_name, artist_name)
            INPUT:  (str) Name of song, (str) Name of artist
            OUTPUT: (Song) Song object for the requested song
        
        search_artist(artist_name)
            INPUT:  (str) Name of artist
            OUTPUT: (Artist) Artist object containing (optional?) all artist songs    
    
    """    
    
    def _get_api_paths(self, song_name, artist_name):
        
        # Where does this method really belong?
        
        json_obj = self._search_genius_api(song_name + " " + artist_name, False)                        
        body = json_obj["response"]["hits"]
        body = body[0] # Just keep the first hit (for now)
        song_api   = body['result']['api_path']
        artist_api = body['result']['primary_artist']['api_path']

        return song_api, artist_api

    def _scrape_lyrics_from_song_api_path(self, song_api_path):
        # Use BeautifulSoup to scrape lyrics off of a Genius song URL
        json_obj = get_genius_api_item(song_api_path)

        # Get the URL to the song lyrics
        path = json_obj['response']['song']['path']
        page_url = "http://genius.com" + path
        page = requests.get(page_url)    
        html = BeautifulSoup(page.text, "html.parser")
        [h.extract() for h in html('script')]        
        lyrics = html.find("div", class_="lyrics").get_text().encode('ascii','ignore').decode('ascii')
        lyrics = re.sub('\[.*\]','',lyrics) # Remove [Verse] and [Bridge] stuff
        lyrics = re.sub('\n{2}','',lyrics)  # Remove gaps between verses
        return str(lyrics).strip('\n')
    
    def _get_url_from_api_path(self, song_api_path):
        # Access the Genius API at the designated song path        
        json_obj = self._search_genius_api(song_api_path, True)                
        path = json_obj['response']['song']['path'] # Get the URL to the song page
        
        return "http://genius.com" + path
        
    def _scrape_song_info_from_api_path(self, page_url):
        """Use BeautifulSoup to scrape song info off of a Genius song URL"""                                
        page = requests.get(page_url)    
        html = BeautifulSoup(page.text, "html.parser")
        
        # Song info (scraped from HTML)
        song_info = {}
        # Title
        song_info['title'] = 'TITLE'
        
        # Artist
        song_info['artist'] = 'ARTIST'
        
        # Lyrics
        lyrics = html.find("div", class_="lyrics").get_text().encode('ascii','ignore').decode('ascii')
        lyrics = re.sub('\[.*\]','',lyrics) # Remove [Verse] and [Bridge] stuff
        lyrics = re.sub('\n{2}','',lyrics)  # Remove gaps between verses        
        song_info['lyrics'] = str(lyrics).strip('\n')
        
        # Album
        song_info['album'] = 'ALBUM'
        
        # Year
        song_info['year'] = None
        
        return song_info
        
    def search_song(self, song_title, artist_name):
        """Allow user to search for a song on the Genius.com database by supplying song and artist name"""
        # TODO - The Genius search engine pretty much takes care of this, but it'd be kind of
        # cool to use NLTK to make sure the song title and artist name were spelled correctly
        
        # Get the API path Genius.com uses to refer to the song
        song_api_path, artist_api_path = self._get_api_paths(song_title, artist_name)        
        
        # Get the URL to the song page from the API path
        song_url = self._get_url_from_api_path(song_api_path)
                
        # Scrape the HTML on the API song page
        song_info = self._scrape_song_info_from_api_path(song_url)        
        
        # Create the Song object
        song = Song(song_info['title'], song_info['artist'], song_info['lyrics'], song_info['album'], song_info['year'])
        
        return song
                    

In [4]:
class Song(object):
    """A song from the Genius.com database.
    
    Attributes:
        title:  (str) Title of the song.
        artist: (str) Primary artist on the song.
        lyrcis: (str) Full set of song lyrics.
        album:  (str) Name of the album the song is on.
        year:   (int) Year the song was released.        
    """
    
    def __init__(self, title, artist, lyrics, album='',year=None, api_info={}):
        """Return a Song object whose title is *title*, artist is *artist*, and so on."""    
        self.title  = title
        self.artist = artist
        self.lyrics = lyrics
        self.album  = album
        self.year = year
        self.api_info = api_info # This should contain api_path, web URL, etc.
        
    def __str__(self):
        """Return a string representation of the Song object."""
        if len(self.lyrics) > 100:
            lyr = self.lyrics[:100] + "..."
        else: lyr = self.lyrics[:100]            
        return '{0}, by {1}, recorded in {2}:\n"{3}"'.format(self.title,self.artist,self.year,lyr)
    
    def __repr__(self):
        return repr(self.title)
    
    def __cmp__(self, other):                        
        return cmp(self.title, other.title) and cmp(self.artist, other.artist) and cmp(self.lyrics, other.lyrics)
    
    def __list__(self):
        # How do I do this?
        return 


In [5]:
class Artist(object):
    """An artist from the Genius.com database.
    
    Attributes:
        name: (str) Artist name.
        num_songs: (int) Total number of songs listed on Genius.com
    
    """
    def __init__(self, name, num_songs=0, song=None, api_info={}):
        """Return an Artist object whose name is *name*, etc."""
        self.name = name
        self.num_songs = num_songs
        self.songs = songs
        self.api_info = api_info # This should contain api_path, web URL, etc.
                
    def add_song(self, song):
        """Add a Song object to the Artist object"""
        self.songs.append(song)
        self.num_songs += 1
        
    def remove_song(self, song):
        """Do I need this ability?"""
        

In [6]:
# Would it make any sense to have a Lyrics class? To store attributes and stuff?
class Lyrics(unicode):
    # These methods come from the TextBlob Word() class -- don't really know what I'm doing here
    def __new__(cls, string):
        """Return a new instance of the class. It is necessary to override
        this method in order to handle the extra pos_tag argument in the
        constructor.
        """
        return super(Lyrics, cls).__new__(cls, string)

    def __init__(self, text, annotations={}):
        self.text = text
        self.annotations = annotations

    def __str__(self):            
        if len(self.text) > 100:
            lyr = self.text[:100] + "..."
        else: lyr = self.text[:100]            
        return lyr

# Usage:
# lyrics = Lyrics(song.lyrics)

### TODO - find how stuff like title, artist name, year, etc. is stored in the HTML ###

In [7]:
interface = Genius()
song_api, artist_api = interface._get_api_paths('Yesterday','The Beatles')
song_url = interface._get_url_from_api_path(song_api)
page = requests.get(song_url)    
html = BeautifulSoup(page.text, "html.parser")

In [8]:
print(song_url)

http://genius.com/The-beatles-yesterday-lyrics


In [9]:
print(html)


<!DOCTYPE html>

<html class="snarly bagon_song_page--enabled gastly--disabled" lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
<head>
<base href="//genius.com/" target="_top"/>
<script type="text/javascript">
//<![CDATA[
var _sf_startpt=(new Date()).getTime();
//]]>
</script>
<title>The Beatles – Yesterday Lyrics | Genius Lyrics</title>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="width=device-width,initial-scale=1" name="viewport"/>
<link href="https://chrome.google.com/webstore/detail/ccaokncpmmjiakalbcfdbfmpcaiddjdn" rel="chrome-webstore-item"/>
<meta content="app-id=709482991" name="apple-itunes-app"/>
<link href="https://assets.genius.com/images/apple-touch-icon.png?1499885101" rel="apple-touch-icon-precomposed">
<link href="https://assets.genius.com/images/apple-touch-icon.png?1499885101" rel="apple-touch-icon-precomposed"/>
<!-- Mobile IE allows us to activate ClearType technology

## Example usage of the Genius class ##
### Search for songs, get Song objects in return ###

In [10]:
interface = Genius()
song1 = interface.search_song('Take off your sunglasses','Ezra Furman')
print(song1)

song2 = interface.search_song('English Tea','Paul McCartney')
print('\n')
print(song2)


TITLE, by ARTIST, recorded in None:
"My baby went out with her family to a ski resort in Colorado
Well she put on her skis and she slid d..."


TITLE, by ARTIST, recorded in None:
"Would you care to sit with me
For a cup of English tea
Very twee, very me
Any sunny morning
What a p..."
