# BentoML

Deploy word2vec+tfidf model as REST API to google cloud 

In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from gensim.models import Word2Vec

In [2]:
import sys
sys.path.append('../')
from data_preprocess import Posts
from word_embedding_vectorizer import WordEmbeddingVectorizer

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gillianchiang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/gillianchiang/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## Prepare Dataset

In [3]:
# path = '/content/Insight_Stress_Analysis/data/' 
path = '../../data/'
train = pd.read_csv(path + 'dreaddit-train.csv', encoding = "ISO-8859-1")
test = pd.read_csv(path + 'dreaddit-test.csv', encoding = "ISO-8859-1")

In [4]:
train_text = Posts(train.text)
test_text = Posts(test.text)

## Create Model for Production

In [5]:
train_text = train_text.preprocess()
test_text = test_text.preprocess()

  from pandas import Panel
100%|██████████| 2838/2838 [00:00<00:00, 32826.92it/s]
100%|██████████| 2838/2838 [00:32<00:00, 88.26it/s] 
100%|██████████| 2838/2838 [00:01<00:00, 1511.64it/s]
100%|██████████| 715/715 [00:00<00:00, 36565.60it/s]
100%|██████████| 715/715 [00:07<00:00, 94.46it/s] 
100%|██████████| 715/715 [00:00<00:00, 5712.00it/s]


In [6]:
word2vec = Word2Vec(train_text, size=300, window=10, min_count=2, workers=10, iter=100)
word_vectorizer = WordEmbeddingVectorizer(word2vec)

In [7]:
X_train = word_vectorizer.fit(train_text).transform(train_text)
X_test = word_vectorizer.fit(test_text).transform(test_text)

In [8]:
y_train = train.label
y_test = test.label

In [9]:
# random forest
word_embedding_rf = RandomForestClassifier(n_estimators=100, random_state=0).fit(X_train, y_train) 
y_pred = word_embedding_rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.76      0.55      0.64       346
           1       0.67      0.84      0.74       369

    accuracy                           0.70       715
   macro avg       0.72      0.70      0.69       715
weighted avg       0.71      0.70      0.70       715



In [10]:
print(accuracy_score(y_test, y_pred))

0.7020979020979021


In [11]:
print(confusion_matrix(y_test, y_pred))

[[192 154]
 [ 59 310]]


## BentoService for model serving

In [12]:
%%writefile "../word_embedding_model.py"
import pandas as pd
import bentoml
from bentoml.artifact import PickleArtifact
from bentoml.handlers import DataframeHandler
from data_preprocess import Posts
from word_embedding_vectorizer import WordEmbeddingVectorizer
from gensim.models import Word2Vec

@bentoml.artifacts([PickleArtifact('word_vectorizer'),
                    PickleArtifact('word_embedding_rf')]) 

@bentoml.env(pip_dependencies=["pandas", "numpy", "gensim", "scikit-learn", "nltk"])

class WordEmbeddingModel(bentoml.BentoService):
        
    @bentoml.api(DataframeHandler, typ='series')
    def preprocess(self, series):
        preprocess_series = Posts(series).preprocess()
        input_matrix = self.artifacts.word_vectorizer.fit(preprocess_series).transform(preprocess_series)
        return input_matrix
    
    @bentoml.api(DataframeHandler, typ='series')
    def predict(self, series):
        input_matrix = self.preprocess(series)
        pred_labels = self.artifacts.word_embedding_rf.predict(input_matrix)
        pred_proba = self.artifacts.word_embedding_rf.predict_proba(input_matrix)
        confidence_score = [prob[1] for prob in pred_proba]
        output = pd.DataFrame({'text': series, 'confidence_score': confidence_score, 'labels': pred_labels})
        output['labels'] = output['labels'].map({1: 'stress', 0: 'non-stress'})
        
        return output

Overwriting ../word_embedding_model.py


In [13]:
sys.path.append('../')
from word_embedding_model import WordEmbeddingModel
# Initialize bentoML model with artifacts

bento_model = WordEmbeddingModel()
bento_model.pack('word_vectorizer', word_vectorizer)
bento_model.pack('word_embedding_rf', word_embedding_rf)

# Save bentoML model to directory
saved_path = bento_model.save()

[2020-02-06 15:09:55,468] INFO - BentoService bundle 'WordEmbeddingModel:20200206150926_DCA9FA' created at: /private/var/folders/xb/6z8y2h493gqb0zxz171jkflw0000gn/T/bentoml-temp-sp8km8dn
[2020-02-06 15:09:55,656] INFO - BentoService bundle 'WordEmbeddingModel:20200206150926_DCA9FA' created at: /Users/gillianchiang/bentoml/repository/WordEmbeddingModel/20200206150926_DCA9FA


In [14]:
# print the directory containing exported model archive (prefixed with model name and version)
print(saved_path)

/Users/gillianchiang/bentoml/repository/WordEmbeddingModel/20200206150926_DCA9FA


## Load BentoService from saved bundle

In [15]:
import bentoml

# Load exported bentoML model archive from path
bento_model = bentoml.load(saved_path)

# Call predict on the restored sklearn model
series = test.text.iloc[:10]
bento_model.predict(series)



  from pandas import Panel
100%|██████████| 10/10 [00:00<00:00, 6977.71it/s]
100%|██████████| 10/10 [00:00<00:00, 62.64it/s]
100%|██████████| 10/10 [00:00<00:00, 3146.75it/s]


Unnamed: 0,text,confidence_score,labels
0,"Its like that, if you want or not. ME: I have ...",0.68,stress
1,I man the front desk and my title is HR Custom...,0.55,stress
2,We'd be saving so much money with this new hou...,0.675,stress
3,"My ex used to shoot back with ""Do you want me ...",0.58,stress
4,I havent said anything to him yet because Im n...,0.78,stress
5,Thanks. Edit 1 - Fuel Receipt As Requested. <u...,0.47,non-stress
6,"When moving into their tiny house, they would ...",0.62,stress
7,"More specifically, for example, I live with ro...",0.8,stress
8,Long story short my family in NE Ohio is abusi...,0.75,stress
9,"This new ""roommate"" lived 3 hours away in an p...",0.33,non-stress


In [16]:
bento_tag = '{name}:{version}'.format(name=bento_model.name, version=bento_model.version)
bento_tag

'WordEmbeddingModel:20200206150926_DCA9FA'

## Deploy BentoService with Google Cloud Run
- <b>Link to API: https://sentiment-ghxotopljq-uw.a.run.app</b>


- Tutorial: https://github.com/bentoml/BentoML/blob/master/guides/deployment/deploy-with-google-cloud-run/deploy-with-google-cloud-run.ipynb