# BentoML Example: Sentiment Analysis with Scikit-learn

Based on https://github.com/crawles/sentiment_analysis_twitter_model/blob/master/build-sentiment-classifier.ipynb

Using dataset from http://help.sentiment140.com/for-students/

In [None]:
!pip install -I bentoml
!pip install sklearn pandas numpy

In [None]:
%%bash

if [ ! -f ./trainingandtestdata.zip ]; then
    wget -q http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
    unzip -n trainingandtestdata.zip
fi

In [None]:
import sys
import os

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline

import bentoml

# Read Data

In [None]:
columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']
dftrain = pd.read_csv('training.1600000.processed.noemoticon.csv',
                      header = None,
                      encoding ='ISO-8859-1')
dftest = pd.read_csv('testdata.manual.2009.06.14.csv',
                     header = None,
                     encoding ='ISO-8859-1')
dftrain.columns = columns
dftest.columns = columns

# Train the model

In [None]:
sentiment_lr = Pipeline([
                         ('count_vect', CountVectorizer(min_df = 100,
                                                        ngram_range = (1,1),
                                                        stop_words = 'english')), 
                         ('lr', LogisticRegression())])
sentiment_lr.fit(dftrain.text, dftrain.polarity)

In [None]:
Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]
print(classification_report(ytest,sentiment_lr.predict(Xtest)))

In [None]:
sentiment_lr.predict([Xtest[0]])

# Export model for production

In [None]:
%%writefile sentiment_lr_model.py
import pandas as pd
import bentoml
from bentoml.artifact import PickleArtifact
from bentoml.handlers import DataframeHandler

@bentoml.artifacts([PickleArtifact('sentiment_lr')])
@bentoml.env(conda_dependencies=["scikit-learn", "pandas"])
class SentimentLRModel(bentoml.BentoService):

    @bentoml.api(DataframeHandler, typ='series')
    def predict(self, series):
        """
        predict expects pandas.Series as input
        """        
        return self.artifacts.sentiment_lr.predict(series)

In [None]:
from sentiment_lr_model import SentimentLRModel

# Initialize bentoML model with artifacts

bento_model = SentimentLRModel.pack(
    sentiment_lr=sentiment_lr
)

# Save bentoML model to directory
saved_path = bento_model.save("/tmp/bento")

# print the directory containing exported model archive (prefixed with model name and version)
print(saved_path)

# Load model from archive

In [None]:
import bentoml

# Load exported bentoML model archive from path
bento_model = bentoml.load(saved_path)

# Call predict on the restored sklearn model
bento_model.predict(pd.Series(["hello", "hi"]))

##### * For demo purpurse, copy generated model to ./model folder

In [None]:
import shutil
shutil.rmtree('./model', ignore_errors=True)
shutil.copytree(saved_path, './model')

# Install exported model as a python package

In [None]:
!pip install ./model

In [None]:
# Your bentoML model class name will become packaged name
import SentimentLRModel

ms = SentimentLRModel.load() # call load to ensure all artifacts are loaded
ms.predict(pd.Series(["stupid", "awesome"]))

# CLI access

* `pip install ./model` also installs a CLI tool for accessing the BentoML service

In [None]:
!SentimentLRModel info

In [None]:
!SentimentLRModel --help

In [None]:
!SentimentLRModel predict --help

In [None]:
!SentimentLRModel predict --input='["some new text, sweet noodles", "happy time", "sad day"]'

# Run REST API server

In [None]:
!bentoml serve ./model

*Run the following command in terminal to make a HTTP request to Rest API server*
```bash
curl -i \
--header "Content-Type: application/json" \
--request POST \
--data '["some new text, sweet noodles", "happy time", "sad day"]' \
localhost:5000/predict
```

You can also view all availabl API endpoints at [localhost:5000](localhost:5000), or look at prometheus metrics at [localhost:5000/metrics](localhost:5000/metrics) in browser.

# Run REST API server with Docker

** _Note: `docker` is not available when running in Google Colaboratory_

### 1) build docker image with BentoArchive and tag it (e.g. sentiment-lr-model)

In [None]:
!cd "./model" && docker build -t sentiment-lr-model .

### 2) run docker image and expose port 5000

In [None]:
!docker run -p 5000:5000 sentiment-lr-model

### 3) Similarly use the following command to query the REST server in Docker
```bash
curl -i \
--header "Content-Type: application/json" \
--request POST \
--data '["some new text, sweet noodles", "happy time", "sad day"]' \
localhost:5000/predict
```