In [2]:
import sys
import re
import requests
import json
from bs4 import BeautifulSoup
import urllib2
import socket
import time

## Genius song and artist classes ##

* Actually, it probably makes more sense to make a file called GeniusAPI.py that contains Song and Artist classes and then the functions for using the Genius API. So you'd do something like:

```python
# Here's how I envision the final form of the GeniusAPI.py file and the song and artist classes
import GeniusAPI as Genius

song1 = Genius.search_song('Yesterday','The Beatles') # Song object
song2 = Genius.search_song('Prom Night','Chance the Rapper')
artist = Genius.search_artist('Michael Jackson') # Artist object

# Hmm, what other functions would be a part of the GeniusAPI class?

```

### TODO ###
* These methods are generally fairly slow. I think it would help if I minimized the calls to urllib2. Right now my methods (probably unnecessarily) make multiple calls to the Genius API or URLs to get information and multiple json objects. Probably a lot of the information could be pulled in one fell swoop and then extracted locally when needed.
  * I have greatly improved the speed by writing classes for Genius json dicts (Song, Artist, etc.) and avoiding URL calls. Woo hoo!
* Would it make more sense for these data classes to all inherit from __dict__ ?
* Add many more fields from the Genius API json_dicts into my Song, Artist, Search classes

### GeniusAPI data classes ###

In [80]:
# TODO - Obviously these classes (Song/SongData, Artist/ArtistData, etc.) are redundant, I'll consolidate them
class GeniusData(object):
    """Wrapper class for the json objects returned from searching the Genius.com database"""
                
    def __init__(self, json_dict, data_type=None):
        self._json_dict = json_dict
        
        if 'response' in json_dict: # Standard usage
            self._data_type = json_dict['response'].keys()[0] # See _GeniusAPI._API_REQUEST_TYPES        
            self._body      = json_dict['response'][self._data_type]
        else: # Allow user to supply a Song or Artist object from within another API returned object
            assert (data_type!=None), "json_dict is missing 'response' key, therefore must supply data_type"
            self._data_type = data_type
            self._body      = json_dict
        self._url      = str(self._body['url'])
        self._api_path = str(self._body['api_path'])
        self._id       = str(self._body['id'])
                                                    
class SongData(GeniusData):
    """Subclass of GeniusData for Song data objects. Returned from _make_api_request()"""
                                                        
    @property
    def title(self):
        return str(self._body['title'])
    
    @property
    def album(self):
        try: return str(self._body['album']['name'])
        except:
            return ''
        
    @property
    def artist(self):
        return str(self._body['primary_artist']['name'])
    
    @property
    def year(self):
        return str(self._body['release_date'])
                
class ArtistData(GeniusData):
    """Subclass of GeniusData for Artist data objects. Returned from _make_api_request()"""                
                    
    @property
    def name(self):
        return str(self._body['name'])
                    
    @property
    def image_url(self):
        return str(self._body['image_url'])
    
class SearchData(GeniusData):
    """Subclass of GeniusData for Search data objects. Returned from _make_api_request()"""
    
    def __init__(self, json):
        # TODO - Search results are unique, have multiple fields, must deal with this
        self._json = json
        self._data_type = json['response'].keys()[0] # See _GeniusAPI._API_REQUEST_TYPES        
        self._body      = json['response'][self._data_type][0]['result']
        self._url      = str(self._body['url'])
        self._api_path = str(self._body['api_path'])
        self._id       = str(self._body['id'])
        
    @property
    def stats(self):
        return self._body['stats']
    
    @property
    def artist_id(self):        
        return str(self._body['primary_artist']['id'])

# Would it make any sense to have a Lyrics class? To store attributes and stuff?

class Song(object):
    # Would it make more sense for these data classes to all inherit from __dict__ ?
    """A song from the Genius.com database.
    
    Attributes:
        title:  (str) Title of the song.
        artist: (str) Primary artist on the song.
        lyrcis: (str) Full set of song lyrics.
        album:  (str) Name of the album the song is on.
        year:   (int) Year the song was released.        
    """
    
    def __init__(self, title, artist, lyrics, album='',year=None, api_info={}):
        """Return a Song object whose title is *title*, artist is *artist*, and so on."""    
        self.title  = title
        self.artist = artist
        self.lyrics = lyrics
        self.album  = album
        self.year   = year
        self.api_info = api_info # This should contain api_path, web URL, etc. This feels like bad python.
                    
    def __str__(self):
        """Return a string representation of the Song object."""
        if len(self.lyrics) > 100:
            lyr = self.lyrics[:100] + "..."
        else: lyr = self.lyrics[:100]            
        return '{0}, by {1}, recorded in {2} on the album {3}:\n"{4}"'.format(self.title,self.artist,self.year,self.album,lyr)
    
    def __repr__(self):
        return repr((self.title, self.artist))
    
    def __cmp__(self, other):                        
        return cmp(self.title, other.title) and cmp(self.artist, other.artist) and cmp(self.lyrics, other.lyrics)
    
    def __list__(self):
        # How do I do this?
        return 

class Artist(object):
    """An artist from the Genius.com database.
    
    Attributes:
        name: (str) Artist name.
        num_songs: (int) Total number of songs listed on Genius.com
    
    """
    def __init__(self, name, num_songs=0, songs=[], api_info={}):
        """Return an Artist object whose name is *name*, etc."""
        self.name = name
        self.num_songs = num_songs
        self.songs = songs
        self.api_info = api_info # This should contain api_path, web URL, etc.
                
    def add_song(self, song):
        """Add a Song object to the Artist object"""
        self.songs.append(song)
        self.num_songs += 1
        
    def remove_song(self, song):
        """Do I need this ability?"""
        
    def __str__(self):
        """Return a string representation of the Artist object."""                        
        return '{0}, {1} songs'.format(self.name,self.num_songs)
    
    def __repr__(self):
        return repr((self.name, '{0} songs'.format(self.num_songs)))

## Making Genius API requests ##
The Genius API lets you make different sorts of requests to the API database. The ones I'm most interested in are:
  * Songs
  * Artists
  * Search
    * This API request is just like typing a search term into the search box on Genius.com
  
Each type of API request (Song, Artist, etc.) has its own URL format that gets fed into the API. The process of accessing the API database is independent of request type, so we just have to make sure we format the URL correctly for whatever type of request we're making at the time. Here are the formats:
  * Songs: api.genius.com/songs/[song_api_id]
    * The *song_api_id* is Genius's method of identifying songs and artists, (e.g. 2236 = Yesterday by The Beatles)
  * Artists: api.genius.com/artists/[artist_api_id]
    * To get all songs from a given artist: api.genius.com/artists/songs
  * Search: api.genius.com/search?q=[search_term]
    * Use urllib2.quote(search_term) to make sure the URL is properly formatted (e.g. a space is %20)
  


In [81]:
class _GeniusAPI(object):
    # This is a superclass that Genius() inherits from. Not sure if this makes any sense, but it
    # seemed like a good idea to have this class (more removed from user) handle the lower-level
    # interaction with the Genius API, and then Genius() has the more user-friendly search
    # functions
    """Interface with the Genius.com API
    
    Attributes:
        base_url: (str) Top-most URL to access the Genius.com API with
        
    Methods:
        _load_credentials()
            OUTPUT: client_id, client_secret, client_access_token
        _make_api_request()
            INPUT:  
            OUTPUT:                                 
    """    
    
    # Genius API constants
    _API_URL = "https://api.genius.com/"    
    _API_REQUEST_TYPES = {'song': 'songs/', 'artist': 'artists/', 'artist-songs': 'artists/songs/','search': 'search?q='}
    
    def __init__(self):
        self._CLIENT_ACCESS_TOKEN = self._load_credentials()
        self._HEADER_AUTHORIZATION = 'Bearer ' + self._CLIENT_ACCESS_TOKEN        
        
    def _load_credentials(self):
        """Load the Genius.com API authorization information from the 'credentials.ini' file"""
        lines = [line.rstrip('\n') for line in open('credentials.ini')]
        chars_to_strip = " \'\""
        for line in lines:
            if "client_id" in line:
                client_id = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
            if "client_secret" in line:
                client_secret = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
            #Currently only need access token to run, the other two perhaps for future implementation
            if "client_access_token" in line:
                client_access_token = re.findall(r'[\"\']([^\"\']*)[\"\']', line)[0]
                
        return client_access_token
    
    def _make_api_request(self, request_term_and_type, page=1):
        """Send a request (song, artist, or search) to the Genius API, returning a json object
        
        INPUT:
            request_term_and_type: (tuple) (request_term, request_type)
        
        *request term* is a string. If *request_type* is 'search', then *request_term* is just
        what you'd type into the search box on Genius.com. If you have an song ID or an artist ID,
        you'd do this: self._make_api_request('2236','song')
        
        Returns a json object.
        """        
        
        #The API request URL must be formatted according to the desired request type"""
        formatted_request = self._format_request_type(request_term_and_type,page=page)                
        
        # Add the necessary headers to the request
        request = urllib2.Request(formatted_request)        
        request.add_header("Authorization",self._HEADER_AUTHORIZATION)
        request.add_header("User-Agent","curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpnSSL 0.9.6b) (ipv6 enabled)")
        while True:
            try:
                response = urllib2.urlopen(request, timeout=4) #timeout set to 4 seconds; automatically retries if times out
                raw = response.read()
            except socket.timeout:
                print("Timeout raised and caught")
                continue
            break

        return json.loads(raw)
        
    def _format_request_type(self, term_and_type,page=1):
        """Format the request URL depending on the type of request"""            
        request_term, request_type = term_and_type[0], term_and_type[1]                
        assert (request_type in self._API_REQUEST_TYPES), "Unknown API request type"
        
        # TODO - Clean this up (might not need separate returns)
        if request_type=='artist-songs':                        
            return self._API_URL + 'artists/' + urllib2.quote(request_term) + '/songs?per_page=50&page=' + str(page)
        else:        
            return self._API_URL + self._API_REQUEST_TYPES[request_type] + urllib2.quote(request_term)
    
    def _scrape_song_lyrics_from_url(self, URL):
        """Use BeautifulSoup to scrape song info off of a Genius song URL"""                                
        page = requests.get(URL)    
        html = BeautifulSoup(page.text, "html.parser")
        
        # Song info (scraped from HTML)
        
        # *** But remember! A lot of this info already comes from the API in the json object, use it! ***
        lyrics = html.find("div", class_="lyrics").get_text().encode('ascii','ignore').decode('ascii')
        lyrics = re.sub('\[.*\]','',lyrics) # Remove [Verse] and [Bridge] stuff
        lyrics = re.sub('\n{2}','',lyrics)  # Remove gaps between verses        
        lyrics = str(lyrics).strip('\n')
        
        return lyrics    
        

In [86]:
class Genius(_GeniusAPI):
    """User-level interface with the Genius.com API. User can search for songs (getting lyrics) and artists (getting songs)"""    
    
    def search_song(self, song_title, artist_name):
        """Search Genius.com for *song_title* by *artist_name*"""
        
        print('\nSearching for {0} by {1}...'.format(song_title,artist_name)),
    
        # Perform a Genius API search for the song
        search_term = song_title + ' ' + artist_name
        json_dict = self._make_api_request((search_term,'search'))
        search_results = SearchData(json_dict)
        
        # Access the api_path found by searching
        data = SongData(self._make_api_request((search_results._id, 'song')))
    
        # Scrape the song URL (html) for lyrics
        lyrics = self._scrape_song_lyrics_from_url(data._url)        
        
        # Create the Song object
        # TODO - Obviously Song and SongData are redundant, I'll need to consolidate
        song = Song(data.title, data.artist, lyrics, data.album, data.year)
        
        print(' Done.\n')        
        return song
    
    
    def search_artist(self, artist_name):
        """Allow user to search for an artist on the Genius.com database by supplying an artist name.
        Returns an Artist() object containing all songs for that particular artist."""
                                
        print('\nSearching for songs by {0}...\n'.format(artist_name))
    
        # Perform a Genius API search for the artist        
        json_dict = self._make_api_request((artist_name,'search'))        
        search_results = SearchData(json_dict)
        
        # Access the api_path found by searching        
        artist_search_results = self._make_api_request((search_results.artist_id, 'artist-songs'))['response']        
        
        # Get a list of all song IDs for the artist
        song_ids = [];
        while True:
            [song_ids.append(song['id']) for song in artist_search_results['songs']]            
            next_page = artist_search_results['next_page']
            if next_page == None:
                break
            else:
                artist_search_results =\
                    self._make_api_request((search_results.artist_id, 'artist-songs'), page=next_page)['response']            
                        
        # Download each song from the list of song IDs                
        artist = Artist(artist_name); n=0        
        for song_id in song_ids:    
            n += 1            
            # Access the api_path found by searching
            data = SongData(self._make_api_request((str(song_id), 'song')),data_type='song')
            print('Song {0} out of {1}: "{2}"'.format(n,len(song_ids),data.title))

            # Scrape the song URL (html) for lyrics
            lyrics = self._scrape_song_lyrics_from_url(data._url)            

            # Create the Song object
            # TODO - Obviously Song and SongData are redundant, I'll need to consolidate
            song = Song(data.title, data.artist, lyrics, data.album, data.year)
            
            # Add the current song to the Artist object
            artist.add_song(song)
                                        
        print('Done.\n')        
        return artist                    
    

## Example usage of the song and artist search functions ##

In [83]:
interface = Genius()
artist = interface.search_artist('Ezra Furman')
artist


Searching for songs by Ezra Furman...

Song 1 out of 5: "40 days in Kansas"
Song 2 out of 5: "American Soil"
Song 3 out of 5: "And Maybe God Is a Train"
Song 4 out of 5: "Anything Can Happen"
Song 5 out of 5: "Are You Gonna Break My Heart?"
Done.



('Ezra Furman', '5 songs')

In [84]:
print(artist.name) # Artist name
artist.num_songs   # Number of songs by the artist

# Look at an individual song contained within the Artist object
song = artist.songs[1]
print(song)
print('\n'+song.lyrics)
song.lyrics

Ezra Furman
American Soil, by Ezra Furman, recorded in 2013-01-01 on the album The Year of No Returning :
"There's something in the water, something sick in the blood and the oil
See the white chicken grinni..."

There's something in the water, something sick in the blood and the oil
See the white chicken grinning, see the serpent in a horrible coil
Don't be scared by the stares of the peasants on the subway car
Because the way things seem in a dream is just the way that they are
When I can feel God taking his eyes off you
You were born for American soilEvery race has its place, every nation fights for species survival
I'm a Jew through and through and I'm about to write you a Bible
Now let me take your hand and show you through the twisted hallways of the house of song
We got the magazine, wall paper and the million dollar bills on the lawn
Oh and I can feel God taking his eyes off us
We were born for American soilYa don't have to be clever to detect a big American sadness
When the po

"There's something in the water, something sick in the blood and the oil\nSee the white chicken grinning, see the serpent in a horrible coil\nDon't be scared by the stares of the peasants on the subway car\nBecause the way things seem in a dream is just the way that they are\nWhen I can feel God taking his eyes off you\nYou were born for American soilEvery race has its place, every nation fights for species survival\nI'm a Jew through and through and I'm about to write you a Bible\nNow let me take your hand and show you through the twisted hallways of the house of song\nWe got the magazine, wall paper and the million dollar bills on the lawn\nOh and I can feel God taking his eyes off us\nWe were born for American soilYa don't have to be clever to detect a big American sadness\nWhen the population rules, every man has his hand in the madness\nWell there's a dead grove a' trees, you can visit on the outside of town\nAnd there's  digital dust inside the cemetery settling down\nWoo and I c