Update more examples to new API format (#32)

* update local-dependencis example * formating nit * simplify sklearn sentiment analysis example * tf-keras-text-classification example
bentoml · Apr 10, 2019 · 1765a7f · 1765a7f
1 parent a27060f
commit 1765a7f
Show file tree

Hide file tree

Showing 5 changed files with 167 additions and 522 deletions.
diff --git a/bentoml/utils/py_module_utils.py b/bentoml/utils/py_module_utils.py
@@ -109,10 +109,12 @@ def copy_used_py_modules(target_module, destination):
             # For modules within current top level package, module_file here should be a
             # relative path to the src file
             target_file = os.path.join(destination, module_file)
+
         elif os.path.split(module_file)[1] == '__init__.py':
             # for module a.b.c in 'some_path/a/b/c/__init__.py', copy file to
             # 'destination/a/b/c/__init__.py'
             target_file = os.path.join(destination, module_name.replace('.', os.sep), '__init__.py')
+
         else:
             # for module a.b.c in 'some_path/a/b/c.py', copy file to 'destination/a/b/c.py'
             target_file = os.path.join(destination, module_name.replace('.', os.sep) + '.py')

diff --git a/examples/local-dependencies/mymodule/scripts/main.py b/examples/local-dependencies/mymodule/scripts/main.py
@@ -1,28 +1,30 @@
 import os
 import sys
+import tempfile
 from sklearn import svm
 from sklearn import datasets
 
 # Use local bentoml code
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../..')))
-from bentoml import BentoModel, api, load
-from bentoml.artifacts import PickleArtifact
+from bentoml import BentoService, load, api, env, artifacts
+from bentoml.artifact import PickleArtifact
 from bentoml.handlers import JsonHandler
 
+# Simulating when user manually add project path to sys.path, and invoke
+# script as `python ./mymodule/scripts/main.py`
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from mymodule import method_in_mymodule
 from mymodule.submodule import method_in_submodule
 from mymodule.submodule1 import method_in_submodule1
 from mymodule.submodule.submodule2 import method_in_submodule2
 
-class IrisClassifier(BentoModel):
+
+@artifacts([PickleArtifact('clf')])
+@env(conda_dependencies=["scikit-learn"])
+class IrisClassifier(BentoService):
     """
     Iris SVM Classifier
     """
-
-    def config(self, artifacts, env):
-        artifacts.add(PickleArtifact('clf'))
-        env.add_conda_dependencies(["scikit-learn"])
 
     @api(JsonHandler)
     def predict(self, parsed_json):
@@ -32,17 +34,18 @@ def predict(self, parsed_json):
         data = method_in_submodule2(data)
         return self.artifacts.clf.predict(data)
 
+
 if __name__ == "__main__":
     clf = svm.SVC(gamma='scale')
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
     clf.fit(X, y)
 
-    model = IrisClassifier(clf=clf)
+    iris_clf_service = IrisClassifier.pack(clf=clf)
 
-    saved_path = model.save("./model")
-    print("Saving new bento model archive to: '{}'".format(saved_path))
+    saved_path = iris_clf_service.save(tempfile.mkdtemp())
+    print("Saving new bento service archive to: '{}'".format(saved_path))
 
-    loaded_model = load(saved_path)
-    print(loaded_model.predict(X[0:1]))
+    loaded_service = load(saved_path)
+    print(loaded_service.predict(X[0:1]))
 
diff --git a/examples/sklearn-sentiment-clf/sklearn-sentiment-clf.ipynb b/examples/sklearn-sentiment-clf/sklearn-sentiment-clf.ipynb
@@ -19,7 +19,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# sentiment_analysis_twitter_model\n",
+    "# Sentiment Analysis with Scikit-learn\n",
     "\n",
     "Based on https://github.com/crawles/sentiment_analysis_twitter_model/blob/master/build-sentiment-classifier.ipynb\n",
     "\n",
@@ -79,48 +79,6 @@
     "dftest.columns = columns"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Text Pre-processing"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class RegexPreprocess(object):\n",
-    "    \"\"\"Create a preprocessing module for a tweet or data structure of tweets.\n",
-    "    1) replace username, e.g., @crawles -> USERNAME\n",
-    "    2) replace http links -> URL\n",
-    "    3) replace repeated letters to two letters\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    user_pat = '(?<=^|(?<=[^a-zA-Z0-9-_\\.]))@([A-Za-z]+[A-Za-z0-9]+)'\n",
-    "    http_pat = '(https?:\\/\\/(?:www\\.|(?!www))[^\\s\\.]+\\.[^\\s]{2,}|www\\.[^\\s]+\\.[^\\s]{2,})'\n",
-    "    repeat_pat, repeat_repl = \"(.)\\\\1\\\\1+\",'\\\\1\\\\1'\n",
-    "    \n",
-    "    def __init__(self):\n",
-    "        pass\n",
-    "    \n",
-    "    def transform(self, X):\n",
-    "        is_pd_series = isinstance(X, pd.core.frame.Series)\n",
-    "        if not is_pd_series:\n",
-    "            pp_text = pd.Series(X)\n",
-    "        else:\n",
-    "            pp_text = X\n",
-    "        pp_text = pp_text.str.replace(pat = self.user_pat, repl = 'USERNAME')\n",
-    "        pp_text = pp_text.str.replace(pat = self.http_pat, repl = 'URL')\n",
-    "        pp_text.str.replace(pat = self.repeat_pat, repl = self.repeat_repl)\n",
-    "        return pp_text\n",
-    "        \n",
-    "    def fit(self, X, y=None):\n",
-    "        return self"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -136,7 +94,7 @@
    },
    "outputs": [],
    "source": [
-    "sentiment_lr = Pipeline([('regex_preprocess', RegexPreprocess()),\n",
+    "sentiment_lr = Pipeline([\n",
     "                         ('count_vect', CountVectorizer(min_df = 100,\n",
     "                                                        ngram_range = (1,1),\n",
     "                                                        stop_words = 'english')), \n",
@@ -318,13 +276,6 @@
     "!bentoml serve --model-path=./model"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/examples/tf-keras-text-classification/text_classification_service.py b/examples/tf-keras-text-classification/text_classification_service.py
@@ -0,0 +1,35 @@
+import pandas as pd
+import numpy as np
+from tensorflow import keras
+from bentoml import api, env, BentoService, artifacts
+from bentoml.artifact import TfKerasModelArtifact, PickleArtifact
+from bentoml.handlers import JsonHandler
+
+@artifacts([
+    TfKerasModelArtifact('model'),
+    PickleArtifact('word_index')
+])
+@env(conda_dependencies=['tensorflow', 'numpy', 'pandas'])
+class TextClassificationService(BentoService):
+
+    def word_to_index(self, word):
+        if word in self.artifacts.word_index:
+            return self.artifacts.word_index[word]
+        else:
+            return self.artifacts.word_index["<UNK>"]
+
+    @api(JsonHandler)
+    def predict(self, parsed_json):
+        """
+        """
+        text = parsed_json['text']
+
+        sequence = keras.preprocessing.text.hashing_trick(
+            text,
+            256,
+            hash_function=self.word_to_index,
+            filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+            lower=True,
+            split=' ')
+
+        return self.artifacts.model.predict(np.expand_dims(sequence, 0))