In [15]:
## decorator intro

## take a number, add 1 to it
def add_one(x):
    return x + 1
    
print 'add_one:', add_one(5)


## inner function adds one to number and returns, outer function returns inner function
def outer(x):
    def inner():
        return x + 1
    return inner

print_three_add_one = outer(3)
print 'outer:', print_three_add_one()


## takes a function, takes a number, applies the function to the number and adds one, returns the output
def add_one_dec(func):
    def add_one(x):
        return func(x) + 1
    return add_one

something = add_one_dec(poly)
print something(6)


## applies function as decorator, wraps around function
@add_one_dec
def poly(x):
    return x**2

print poly(10)

add_one: 6
outer: 4
38
101


In [16]:
"""
msd/ -- code for using the million song dataset
static/ -- CSS and code that are manually changed
templates/ -- HTML base ("views")
lyrics.csv -- CSV of lyrics
lyrics_classifier.py -- class of classifier and other functions
schema.sql -- creates database table for storing results
"""

'\nmsd/ -- code for using the million song dataset\nstatic/ -- CSS and code that are manually changed\ntemplates/ -- HTML base ("views")\nlyrics.csv -- CSV of lyrics\nlyrics_classifier.py -- class of classifier and other functions\nschema.sql -- creates database table for storing results\n'

In [17]:
"""Lyrics classifier for Flask application"""

# Our familiar imports  
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# A new import
import pickle

# Local imports for the Million Song Dataset stemming algorithm
from msd.stem import transformLyrics

class LyricsClf():
    """A MultinomialNB classifier for predicting artists from lyrics.
    Offers train, save, and load routines for offline and startup
    purposes. Offers predictArtist for online use.
    """
    def __init__(self,picklefile=None):
        """Constructor that creates an empty artistLabels dictionary,
        a CountVectorizer placeholder, and a classifier placeholder.
        If a picklefile is specified, the returned object is instantiated
        from a pickled version on disk.
        """
        self.artistLabels = dict()
        self.vectorizer = None
        self.clf = None
        if picklefile:
            self.load(picklefile)

    def makeArtistLabels(self,artistList):
        """Creates a mapping between artist names and
        integer class labels.
        """
        for e, artist in enumerate(artistList):
            self.artistLabels[artist] = e
        pass

    def getLabel(self,artist):
        """Returns the integer label for a given artist.
        Returns -1 if an artist does not exist.
        """
        if artist in self.artistLabels.keys():
            return self.artistLabels[artist]
        else:
            return -1

    def predictArtist(self,lyrics):
        """Returns an artist name given sample song lyrics.
        Applies the Million Song Dataset stemming routine to
        the lyrics (pre-processing), vectorizes the lyrics,
        and runs them through the MultinomialNB classifier.
        Returns the artist name associated with the predicted
        label.
        """
        transformed_lyrics = transformLyrics(lyrics)
        df = pd.DataFrame({'lyrics':[transformed_lyrics]})
        X = self.vectorizer.transform(df['lyrics'])
        y = clf.predict(X)
        for artist, label in self.artistLabels.items():
            if label == y:
                return artist
        return "Artist not found (predicted label: %d)" % y

    def train(self,csvfile):
        """Read in a csv of lyrics then do the following:
        - Turn artist names into class labels
        - Build a CountVectorizer
        - Define the classifier's training inputs and outputs
        - Instantiate the classifier
        - Train the classifier
        """
        # Read the input file
        df = pd.read_csv(csvfile)
        
        # Create a mapping of artist (string) to label (integer)
        self.makeArtistLabels(df['artist'].unique())        
        
        # Create a new column, Label, which will be the model's output label
        df['label'] = df['artist'].apply(self.getLabel)
        
        # Create the input and output for training the classifier
        self.vectorizer = CountVectorizer()
        X = self.vectorizer.fit_transform(df['lyrics'])
        y = df['label']
        
        # Instantiate and train the classifier
        self.clf = MultinomialNB()
        self.clf.fit(X, y)
        
    def save(self,picklefile):
        """Save this LyricsClf object to disk as picklefile."""
        pickle.dump(self, open(picklefile, 'wb'))        

    def load(self,picklefile):
        """Load a LyricsClf object from picklefile.
        Return this loaded object for future use.
        """
        loaded = pickle.load(open(picklefile, 'rb'))
        self.artistLabels = loaded.artistLabels
        self.vectorizer = loaded.vectorizer
        self.clf = loaded.clf
        return self

# end of LyricsClf class

ImportError: No module named msd.stem

In [22]:
artistList = ['a', 'b', 'c']

for e, artist in enumerate(artistList):
    print e, artist

0 a
1 b
2 c
