Skip to content

Commit

Permalink
Update more examples to new API format (#32)
Browse files Browse the repository at this point in the history
* update local-dependencis example

* formating nit

* simplify sklearn sentiment analysis example

* tf-keras-text-classification example
  • Loading branch information
parano committed Apr 10, 2019
1 parent a27060f commit 1765a7f
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 522 deletions.
2 changes: 2 additions & 0 deletions bentoml/utils/py_module_utils.py
Expand Up @@ -109,10 +109,12 @@ def copy_used_py_modules(target_module, destination):
# For modules within current top level package, module_file here should be a
# relative path to the src file
target_file = os.path.join(destination, module_file)

elif os.path.split(module_file)[1] == '__init__.py':
# for module a.b.c in 'some_path/a/b/c/__init__.py', copy file to
# 'destination/a/b/c/__init__.py'
target_file = os.path.join(destination, module_name.replace('.', os.sep), '__init__.py')

else:
# for module a.b.c in 'some_path/a/b/c.py', copy file to 'destination/a/b/c.py'
target_file = os.path.join(destination, module_name.replace('.', os.sep) + '.py')
Expand Down
27 changes: 15 additions & 12 deletions examples/local-dependencies/mymodule/scripts/main.py
@@ -1,28 +1,30 @@
import os
import sys
import tempfile
from sklearn import svm
from sklearn import datasets

# Use local bentoml code
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../..')))
from bentoml import BentoModel, api, load
from bentoml.artifacts import PickleArtifact
from bentoml import BentoService, load, api, env, artifacts
from bentoml.artifact import PickleArtifact
from bentoml.handlers import JsonHandler

# Simulating when user manually add project path to sys.path, and invoke
# script as `python ./mymodule/scripts/main.py`
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from mymodule import method_in_mymodule
from mymodule.submodule import method_in_submodule
from mymodule.submodule1 import method_in_submodule1
from mymodule.submodule.submodule2 import method_in_submodule2

class IrisClassifier(BentoModel):

@artifacts([PickleArtifact('clf')])
@env(conda_dependencies=["scikit-learn"])
class IrisClassifier(BentoService):
"""
Iris SVM Classifier
"""

def config(self, artifacts, env):
artifacts.add(PickleArtifact('clf'))
env.add_conda_dependencies(["scikit-learn"])

@api(JsonHandler)
def predict(self, parsed_json):
Expand All @@ -32,17 +34,18 @@ def predict(self, parsed_json):
data = method_in_submodule2(data)
return self.artifacts.clf.predict(data)


if __name__ == "__main__":
clf = svm.SVC(gamma='scale')
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X, y)

model = IrisClassifier(clf=clf)
iris_clf_service = IrisClassifier.pack(clf=clf)

saved_path = model.save("./model")
print("Saving new bento model archive to: '{}'".format(saved_path))
saved_path = iris_clf_service.save(tempfile.mkdtemp())
print("Saving new bento service archive to: '{}'".format(saved_path))

loaded_model = load(saved_path)
print(loaded_model.predict(X[0:1]))
loaded_service = load(saved_path)
print(loaded_service.predict(X[0:1]))

53 changes: 2 additions & 51 deletions examples/sklearn-sentiment-clf/sklearn-sentiment-clf.ipynb
Expand Up @@ -19,7 +19,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# sentiment_analysis_twitter_model\n",
"# Sentiment Analysis with Scikit-learn\n",
"\n",
"Based on https://github.com/crawles/sentiment_analysis_twitter_model/blob/master/build-sentiment-classifier.ipynb\n",
"\n",
Expand Down Expand Up @@ -79,48 +79,6 @@
"dftest.columns = columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Text Pre-processing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class RegexPreprocess(object):\n",
" \"\"\"Create a preprocessing module for a tweet or data structure of tweets.\n",
" 1) replace username, e.g., @crawles -> USERNAME\n",
" 2) replace http links -> URL\n",
" 3) replace repeated letters to two letters\n",
" \"\"\"\n",
" \n",
" user_pat = '(?<=^|(?<=[^a-zA-Z0-9-_\\.]))@([A-Za-z]+[A-Za-z0-9]+)'\n",
" http_pat = '(https?:\\/\\/(?:www\\.|(?!www))[^\\s\\.]+\\.[^\\s]{2,}|www\\.[^\\s]+\\.[^\\s]{2,})'\n",
" repeat_pat, repeat_repl = \"(.)\\\\1\\\\1+\",'\\\\1\\\\1'\n",
" \n",
" def __init__(self):\n",
" pass\n",
" \n",
" def transform(self, X):\n",
" is_pd_series = isinstance(X, pd.core.frame.Series)\n",
" if not is_pd_series:\n",
" pp_text = pd.Series(X)\n",
" else:\n",
" pp_text = X\n",
" pp_text = pp_text.str.replace(pat = self.user_pat, repl = 'USERNAME')\n",
" pp_text = pp_text.str.replace(pat = self.http_pat, repl = 'URL')\n",
" pp_text.str.replace(pat = self.repeat_pat, repl = self.repeat_repl)\n",
" return pp_text\n",
" \n",
" def fit(self, X, y=None):\n",
" return self"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -136,7 +94,7 @@
},
"outputs": [],
"source": [
"sentiment_lr = Pipeline([('regex_preprocess', RegexPreprocess()),\n",
"sentiment_lr = Pipeline([\n",
" ('count_vect', CountVectorizer(min_df = 100,\n",
" ngram_range = (1,1),\n",
" stop_words = 'english')), \n",
Expand Down Expand Up @@ -318,13 +276,6 @@
"!bentoml serve --model-path=./model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
@@ -0,0 +1,35 @@
import pandas as pd
import numpy as np
from tensorflow import keras
from bentoml import api, env, BentoService, artifacts
from bentoml.artifact import TfKerasModelArtifact, PickleArtifact
from bentoml.handlers import JsonHandler

@artifacts([
TfKerasModelArtifact('model'),
PickleArtifact('word_index')
])
@env(conda_dependencies=['tensorflow', 'numpy', 'pandas'])
class TextClassificationService(BentoService):

def word_to_index(self, word):
if word in self.artifacts.word_index:
return self.artifacts.word_index[word]
else:
return self.artifacts.word_index["<UNK>"]

@api(JsonHandler)
def predict(self, parsed_json):
"""
"""
text = parsed_json['text']

sequence = keras.preprocessing.text.hashing_trick(
text,
256,
hash_function=self.word_to_index,
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True,
split=' ')

return self.artifacts.model.predict(np.expand_dims(sequence, 0))

0 comments on commit 1765a7f

Please sign in to comment.