In [1]:
# Import local bentoml repository
import sys
import os
sys.path.append(os.path.join(os.getcwd(), "..", ".."))
import bentoml

print(bentoml.__file__)

/Users/chaoyuyang/workspace/BentoML/examples/sklearn-sentiment-clf/../../bentoml/__init__.py


# sentiment_analysis_twitter_model

Based on https://github.com/crawles/sentiment_analysis_twitter_model/blob/master/build-sentiment-classifier.ipynb

Using dataset from http://help.sentiment140.com/for-students/

In [None]:
%%bash

wget -q http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
unzip -n trainingandtestdata.zip
rm trainingandtestdata.zip

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline

# Read Data

In [3]:
columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']
dftrain = pd.read_csv('training.1600000.processed.noemoticon.csv',
                      header = None,
                      encoding ='ISO-8859-1')
dftest = pd.read_csv('testdata.manual.2009.06.14.csv',
                     header = None,
                     encoding ='ISO-8859-1')
dftrain.columns = columns
dftest.columns = columns

# Text Pre-processing

In [4]:
class RegexPreprocess(object):
    """Create a preprocessing module for a tweet or data structure of tweets.
    1) replace username, e.g., @crawles -> USERNAME
    2) replace http links -> URL
    3) replace repeated letters to two letters
    """
    
    user_pat = '(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9]+)'
    http_pat = '(https?:\/\/(?:www\.|(?!www))[^\s\.]+\.[^\s]{2,}|www\.[^\s]+\.[^\s]{2,})'
    repeat_pat, repeat_repl = "(.)\\1\\1+",'\\1\\1'
    
    def __init__(self):
        pass
    
    def transform(self, X):
        is_pd_series = isinstance(X, pd.core.frame.Series)
        if not is_pd_series:
            pp_text = pd.Series(X)
        else:
            pp_text = X
        pp_text = pp_text.str.replace(pat = self.user_pat, repl = 'USERNAME')
        pp_text = pp_text.str.replace(pat = self.http_pat, repl = 'URL')
        pp_text.str.replace(pat = self.repeat_pat, repl = self.repeat_repl)
        return pp_text
        
    def fit(self, X, y=None):
        return self

# Train the model

In [5]:
sentiment_lr = Pipeline([('regex_preprocess', RegexPreprocess()),
                         ('count_vect', CountVectorizer(min_df = 100,
                                                        ngram_range = (1,1),
                                                        stop_words = 'english')), 
                         ('lr', LogisticRegression())])
sentiment_lr.fit(dftrain.text, dftrain.polarity)



Pipeline(memory=None,
     steps=[('regex_preprocess', <__main__.RegexPreprocess object at 0x103f54a20>), ('count_vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=10...penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])

In [6]:
Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]
print(classification_report(ytest,sentiment_lr.predict(Xtest)))

              precision    recall  f1-score   support

           0       0.86      0.81      0.83       177
           4       0.82      0.87      0.85       182

   micro avg       0.84      0.84      0.84       359
   macro avg       0.84      0.84      0.84       359
weighted avg       0.84      0.84      0.84       359



In [7]:
sentiment_lr.predict(["stupid", "awesome"])

array([0, 4])

# Export model for production

In [10]:
%%writefile sentiment_lr_model.py
import pandas as pd
import bentoml
from bentoml.artifacts import PickleArtifact

class SentimentLRModel(bentoml.BentoModel):
    """
    My SentimentLRModel packaging with BentoML
    """
    _model_package_version = "1.0.2"
    
    def config(self, artifacts, env):
        artifacts.add(PickleArtifact('sentiment_lr'))

        env.add_conda_dependencies(["scikit-learn", "pandas"])

    def predict(self, df):
        """
        predict expects dataframe as input
        """        
        return self.artifacts.sentiment_lr.predict(df)

Overwriting sentiment_lr_model.py


In [11]:
from sentiment_lr_model import SentimentLRModel

# Initialize bentoML model with artifacts
bento_model = SentimentLRModel(sentiment_lr=sentiment_lr)

# Save bentoML model to directory
saved_path = bento_model.save("/tmp/bento")

# print the directory containing exported model archive (prefixed with model name and version)
print(saved_path)

/tmp/bento/SentimentLRModel/2019_04_02_7fb1af6d


# Load model from archive

In [12]:
import bentoml

# Load exported bentoML model archive from path
bento_model = bentoml.load(saved_path)

# Call predict on the restored sklearn model
bento_model.predict(["hello", "hi"])

array([4, 4])

##### * For demo purpurse, copy generated model to ./model folder

In [13]:
import os
import shutil
shutil.rmtree('./model', ignore_errors=True)
shutil.copytree(saved_path, './model')

'./model'

# Install exported model as a python package

In [14]:
!pip install ./model

Processing ./model
Building wheels for collected packages: SentimentLRModel
  Building wheel for SentimentLRModel (setup.py) ... [?25ldone
[?25h  Stored in directory: /private/var/folders/ns/vc9qhmqx5dx_9fws7d869lqh0000gn/T/pip-ephem-wheel-cache-o69d8vi4/wheels/8d/e0/07/401c7995bf36b95e36efe3a2dc251c46fe8f2d34b7e50b813e
Successfully built SentimentLRModel
Installing collected packages: SentimentLRModel
  Found existing installation: SentimentLRModel 1.0.0
    Uninstalling SentimentLRModel-1.0.0:
      Successfully uninstalled SentimentLRModel-1.0.0
Successfully installed SentimentLRModel-1.0.0


In [15]:
# Your bentoML model class name will become packaged name
from SentimentLRModel import SentimentLRModel

ms = SentimentLRModel().load() # call load to ensure all artifacts are loaded
ms.predict(["stupid", "awesome"])

array([0, 4])

# Build API server docker image

In [16]:
!cd "./model" && docker build -t atalaya/sentiment-lr-model .

Sending build context to Docker daemon  5.995MB
Step 1/7 : FROM continuumio/miniconda3
 ---> ae46c364060f
Step 2/7 : COPY . /model
 ---> 73e561e1203b
Step 3/7 : RUN conda env create -f /model/environment.yml
 ---> Running in a67661c66a61
Solving environment: ...working... done
[91m

  current version: 4.5.12
  latest version: 4.6.9

Please update conda by running

    $ conda update -n base -c defaults conda


scipy-1.2.1          | 17.7 MB   | ########## | 100% [0m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[9

mkl-2019.3           | 203.3 MB  | #######4   |  75% [0m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[9

mkl-2019.3           | 203.3 MB  | ########## | 100% [0m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[91m[9

Collecting Werkzeug>=0.14 (from flask->bentoml==0.0.5->-r /model/condaenv.y1svo2gw.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/18/79/84f02539cc181cdbf5ff5a41b9f52cae870b6f632767e43ba6ac70132e92/Werkzeug-0.15.2-py2.py3-none-any.whl (328kB)
Collecting MarkupSafe>=0.23 (from Jinja2>=2.10->flask->bentoml==0.0.5->-r /model/condaenv.y1svo2gw.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/98/7b/ff284bd8c80654e471b769062a9b43cc5d03e7a615048d96f4619df8d420/MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl
Building wheels for collected packages: python-json-logger, prometheus-client, dill
  Building wheel for python-json-logger (setup.py): started
  Building wheel for python-json-logger (setup.py): finished with status 'done'
  Stored in directory: /root/.cache/pip/wheels/97/f7/a1/752e22bb30c1cfe38194ea0070a5c66e76ef4d06ad0c7dc401
  Building wheel for prometheus-client (setup.py): started
  Building wheel for prometheus-clien

In [None]:
!bentoml serve --model-path=./model