In [2]:
# Import local bentoml repository
import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), "..", ".."))
import bentoml

print(bentoml.__file__)

/Users/bozhaoyu/src/bento/examples/deploy-with-serverless/../../bentoml/__init__.py


# Sentiment Analysis with Scikit-learn

Based on https://github.com/crawles/sentiment_analysis_twitter_model/blob/master/build-sentiment-classifier.ipynb

Using dataset from http://help.sentiment140.com/for-students/

In [3]:
%%bash

if [ ! -f ./trainingandtestdata.zip ]; then
    wget -q http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
    unzip -n trainingandtestdata.zip
fi

Archive:  trainingandtestdata.zip
  inflating: testdata.manual.2009.06.14.csv  
  inflating: training.1600000.processed.noemoticon.csv  


In [4]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline

# Read Data

In [5]:
columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']
dftrain = pd.read_csv('training.1600000.processed.noemoticon.csv',
                      header = None,
                      encoding ='ISO-8859-1')
dftest = pd.read_csv('testdata.manual.2009.06.14.csv',
                     header = None,
                     encoding ='ISO-8859-1')
dftrain.columns = columns
dftest.columns = columns

# Train the model

In [6]:
sentiment_lr = Pipeline([
                         ('count_vect', CountVectorizer(min_df = 100,
                                                        ngram_range = (1,1),
                                                        stop_words = 'english')), 
                         ('lr', LogisticRegression())])
sentiment_lr.fit(dftrain.text, dftrain.polarity)



Pipeline(memory=None,
     steps=[('count_vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=100,
        ngram_range=(1, 1), preprocessor=None, stop_words='english',
...penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])

In [7]:
Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]
print(classification_report(ytest,sentiment_lr.predict(Xtest)))

              precision    recall  f1-score   support

           0       0.85      0.80      0.83       177
           4       0.82      0.86      0.84       182

   micro avg       0.83      0.83      0.83       359
   macro avg       0.83      0.83      0.83       359
weighted avg       0.83      0.83      0.83       359



In [8]:
sentiment_lr.predict([Xtest[0]])

array([4])

# Export model for production

In [11]:
%%writefile sentiment_lr_model.py
import pandas as pd
import bentoml
from bentoml.artifact import PickleArtifact
from bentoml.handlers import DataframeHandler

@bentoml.artifacts([PickleArtifact('sentiment_lr')])
@bentoml.env(conda_dependencies=["scikit-learn", "pandas"])
class SentimentLRModel(bentoml.BentoService):
    
    @bentoml.api(DataframeHandler, typ='series')
    def predict(self, series):
        """
        predict expects pandas.Series as input
        """        
        return self.artifacts.sentiment_lr.predict(series)

Overwriting sentiment_lr_model.py


In [12]:
from sentiment_lr_model import SentimentLRModel

# Initialize bentoML model with artifacts

bento_model = SentimentLRModel.pack(
    sentiment_lr=sentiment_lr
)

# Save bentoML model to directory
saved_path = bento_model.save("/tmp/bento")

# print the directory containing exported model archive (prefixed with model name and version)
print(saved_path)

/tmp/bento/SentimentLRModel/2019_05_17_dffadc05


# Load model from archive

In [13]:
import bentoml

# Load exported bentoML model archive from path
bento_model = bentoml.load(saved_path)

# Call predict on the restored sklearn model
bento_model.predict(pd.Series(["hello", "hi"]))

array([4, 4])

##### * For demo purpurse, copy generated model to ./model folder

In [14]:
import os
import shutil
shutil.rmtree('./model', ignore_errors=True)
shutil.copytree(saved_path, './model')

'./model'

# Deploy to AWS lambda

```bash
bentoml deploy ARCHIVE_PATH --platform=PLATFORM --region=REGION --stage=STAGE
```
### Arguments:
* archive_path: The file path or s3 that contains BentoML bundles.

### Options:
* platform: REQUIRED.  The platform that you want to deploy bentoml bundle to.  For serverless, we support aws-lambda, aws-lambda-py2 and gcp-function.
* region: OPTIONAL The cloud provider's region you want to deploy in.
* stage: OPTIONAL Stage is a helper value that identify the position in the workflow process. DEFAULT value is dev


In [18]:
!bentoml deploy ./model --platform aws-lambda --region us-west-2

BentoML: [32mDeploy to aws-lambda complete![0m
BentoML: [32mDeployment archive is saved at /Users/bozhaoyu/.bentoml/deployment-snapshots/aws-lambda/SentimentLRModel/2019_05_17_dffadc05/2019-05-17T16:46:09.185027[0m


# Check deployment status

```
bentoml checkt-deployment-status ARCHIVE_PATH --platform=PLATFORM --region=REGION --stage=STAGE
```

### Arguments:
* archive_path: The file path or s3 that contains BentoML bundles.

### Options:
* platform: REQUIRED.  The platform that you want to deploy bentoml bundle to.  For serverless, we support aws-lambda, aws-lambda-py2 and gcp-function.
* region: OPTIONAL The cloud provider's region you want to deploy in.
* stage: OPTIONAL Stage is a helper value that identify the position in the workflow process. DEFAULT value is dev

In [16]:
!bentoml check-deployment-status ./model --platform=aws-lambda --region=us-west-2

# Delete serverless Deployment


```bash
bentoml delete-deployment ARCHIVE_PATH --platform=PLATFORM
```
### Arguments:
* archive_path: The file path or s3 that contains BentoML bundles.

### Options:
* platform: REQUIRED.  The platform that you want to deploy bentoml bundle to.  For serverless, we support aws-lambda, aws-lambda-py2 and gcp-function.
* region: OPTIONAL The cloud provider's region you want to deploy in.
* stage: OPTIONAL Stage is a helper value that identify the position in the workflow process. DEFAULT value is dev



In [17]:
!bentoml delete-deployment ./model --platform aws-lambda --region us-west-2

BentoML: [32mDelete aws-lambda deployment successful[0m
