# BentoML

Deploy word2vec+tfidf model as REST API to google cloud 

In [50]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from gensim.models import Word2Vec

In [51]:
import sys
sys.path.append('../')
from data_preprocess import Posts
from word_embedding_vectorizer import WordEmbeddingVectorizer

## Prepare Dataset

In [52]:
# path = '/content/Insight_Stress_Analysis/data/' 
path = '../data/'
train = pd.read_csv(path + 'dreaddit-train.csv', encoding = "ISO-8859-1")
test = pd.read_csv(path + 'dreaddit-test.csv', encoding = "ISO-8859-1")

In [53]:
train_text = Posts(train.text)
test_text = Posts(test.text)

## Create Model for Production

In [54]:
train_text = train_text.preprocess()
test_text = test_text.preprocess()

  0%|          | 0/2838 [00:00<?, ?it/s]

100%|██████████| 2838/2838 [00:00<00:00, 28518.94it/s]
100%|██████████| 2838/2838 [01:30<00:00, 31.20it/s]
100%|██████████| 2838/2838 [00:00<00:00, 4217.48it/s]
100%|██████████| 715/715 [00:00<00:00, 20292.78it/s]
100%|██████████| 715/715 [00:26<00:00, 27.05it/s]
100%|██████████| 715/715 [00:00<00:00, 2161.31it/s]


In [55]:
word2vec = Word2Vec(train_text, vector_size=300, window=10, min_count=2, workers=10, epochs=100)
word_vectorizer = WordEmbeddingVectorizer(word2vec)

In [56]:
X_train = word_vectorizer.fit(train_text).transform(train_text)
X_test = word_vectorizer.fit(test_text).transform(test_text)

In [57]:
y_train = train.label
y_test = test.label

In [58]:
# random forest
word_embedding_rf = RandomForestClassifier(n_estimators=100, random_state=0).fit(X_train, y_train) 
y_pred = word_embedding_rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.56      0.66       346
           1       0.68      0.87      0.76       369

    accuracy                           0.72       715
   macro avg       0.74      0.71      0.71       715
weighted avg       0.74      0.72      0.71       715



In [59]:
print(accuracy_score(y_test, y_pred))

0.7188811188811188


In [60]:
print(confusion_matrix(y_test, y_pred))

[[194 152]
 [ 49 320]]


## BentoService for model serving

In [61]:
# %%writefile "../word_embedding_model.py"
import pandas as pd
import bentoml
from bentoml.artifact import PickleArtifact
from bentoml.handlers import DataframeHandler
from data_preprocess import Posts
from word_embedding_vectorizer import WordEmbeddingVectorizer
from gensim.models import Word2Vec

@bentoml.artifacts([PickleArtifact('word_vectorizer'),
                    PickleArtifact('word_embedding_rf')]) 

@bentoml.env(pip_dependencies=["pandas", "numpy", "gensim", "scikit-learn", "nltk"])

class WordEmbeddingModel(bentoml.BentoService):
        
    @bentoml.api(DataframeHandler, typ='series')
    def preprocess(self, series):
        preprocess_series = Posts(series).preprocess()
        input_matrix = self.artifacts.word_vectorizer.fit(preprocess_series).transform(preprocess_series)
        return input_matrix
    
    @bentoml.api(DataframeHandler, typ='series')
    def predict(self, series):
        input_matrix = self.preprocess(series)
        pred_labels = self.artifacts.word_embedding_rf.predict(input_matrix)
        pred_proba = self.artifacts.word_embedding_rf.predict_proba(input_matrix)
        confidence_score = [prob[1] for prob in pred_proba]
        output = pd.DataFrame({'text': series, 'confidence_score': confidence_score, 'labels': pred_labels})
        output['labels'] = output['labels'].map({1: 'stress', 0: 'non-stress'})
        
        return output

In [62]:

# Initialize bentoML model with artifacts

bento_model = WordEmbeddingModel()
bento_model.pack('word_vectorizer', word_vectorizer)
bento_model.pack('word_embedding_rf', word_embedding_rf)

series = test.text.iloc[:10]
bento_model.predict(series)

100%|██████████| 10/10 [00:00<00:00, 6224.85it/s]
100%|██████████| 10/10 [00:00<00:00, 23.19it/s]
100%|██████████| 10/10 [00:00<00:00, 1803.15it/s]


Unnamed: 0,text,confidence_score,labels
0,"Its like that, if you want or not. ME: I have ...",0.66,stress
1,I man the front desk and my title is HR Custom...,0.44,non-stress
2,We'd be saving so much money with this new hou...,0.67,stress
3,"My ex used to shoot back with ""Do you want me ...",0.73,stress
4,I havent said anything to him yet because Im n...,0.85,stress
5,Thanks. Edit 1 - Fuel Receipt As Requested. <u...,0.37,non-stress
6,"When moving into their tiny house, they would ...",0.5,non-stress
7,"More specifically, for example, I live with ro...",0.83,stress
8,Long story short my family in NE Ohio is abusi...,0.73,stress
9,"This new ""roommate"" lived 3 hours away in an p...",0.21,non-stress


## Load BentoService from saved bundle

In [64]:
bento_tag = '{name}:{version}'.format(name=bento_model.name, version=bento_model.version)
bento_tag

'WordEmbeddingModel:20230806031905_9407F7'

## Deploy BentoService with Google Cloud Run
- <b>Link to API: https://sentiment-ghxotopljq-uw.a.run.app</b>


- Tutorial: https://github.com/bentoml/BentoML/blob/master/guides/deployment/deploy-with-google-cloud-run/deploy-with-google-cloud-run.ipynb

In [1]:
#import requests
#response = requests.post("http://127.0.0.1:5000/predict", json=["I like you", "I feel stressful"])
#print(response.text)