From 5f3df042e18a866f5774062e9c49423c08b21c90 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 1 Dec 2020 16:34:03 -0800 Subject: [PATCH 01/36] Update examples --- .gitbook.yaml | 6 +- docs/aws/install.md | 2 +- docs/deployments/batch-api.md | 2 +- docs/deployments/batch-api/deployment.md | 2 +- docs/deployments/batch-api/predictors.md | 2 +- docs/deployments/realtime-api.md | 2 +- docs/deployments/realtime-api/deployment.md | 2 +- docs/deployments/realtime-api/models.md | 3 - docs/deployments/realtime-api/predictors.md | 58 - .../realtime-api/traffic-splitter.md | 4 +- docs/guides/multi-model.md | 5 +- docs/guides/single-node-deployment.md | 4 +- docs/summary.md | 4 +- examples/README.md | 4 - .../{image-classifier => python}/README.md | 0 .../{image-classifier => python}/cortex.yaml | 0 .../{image-classifier => python}/predictor.py | 0 .../requirements.txt | 0 .../{image-classifier => python}/sample.json | 0 .../python}/README.md | 0 .../python}/cortex.yaml | 0 .../python}/cortex_gpu.yaml | 0 .../python}/cortex_inf.yaml | 0 .../python}/generate_resnet50_models.ipynb | 0 .../python}/predictor.py | 0 .../python}/sample.json | 0 .../tensorflow}/README.md | 0 .../tensorflow}/cortex.yaml | 0 .../tensorflow}/cortex_gpu.yaml | 0 .../cortex_gpu_server_side_batching.yaml | 0 .../tensorflow}/cortex_inf.yaml | 0 .../cortex_inf_server_side_batching.yaml | 0 .../generate_gpu_resnet50_model.ipynb | 0 .../generate_resnet50_models.ipynb | 0 .../tensorflow}/predictor.py | 0 .../tensorflow}/requirements.txt | 0 .../tensorflow}/sample.bin | Bin .../tensorflow}/sample.json | 0 .../onnx}/README.md | 2 +- .../onnx}/cortex.yaml | 0 .../onnx}/predictor.py | 0 .../onnx}/sample.json | 0 .../onnx}/xgboost.ipynb | 0 .../python}/README.md | 0 .../python}/deploy.ipynb | 0 .../python}/predictor.py | 0 .../python}/requirements.txt | 0 .../tensorflow}/README.md | 2 +- .../tensorflow}/cortex.yaml | 0 .../tensorflow}/predictor.py | 0 .../tensorflow}/sample.json | 0 .../tensorflow}/tensorflow.ipynb | 0 examples/keras/document-denoiser/README.md | 46 - examples/keras/document-denoiser/cortex.yaml | 12 - examples/keras/document-denoiser/predictor.py | 86 -- .../keras/document-denoiser/requirements.txt | 5 - examples/keras/document-denoiser/sample.json | 3 - .../keras/document-denoiser/trainer.ipynb | 620 ---------- examples/live-reloading/onnx/README.md | 4 +- examples/live-reloading/tensorflow/README.md | 8 +- .../onnx}/README.md | 0 .../onnx}/cortex.yaml | 0 .../onnx}/predictor.py | 0 .../onnx}/requirements.txt | 0 .../onnx}/sample.json | 0 .../python}/README.md | 0 .../python}/cortex.yaml | 0 .../python}/predictor.py | 0 .../python}/requirements.txt | 0 .../python}/sample-sentiment.json | 0 .../python}/sample-summarizer.json | 0 .../tensorflow}/README.md | 0 .../tensorflow}/cortex.yaml | 4 - .../tensorflow}/predictor.py | 0 .../tensorflow}/requirements.txt | 0 .../tensorflow}/sample-image.json | 0 examples/onnx/iris-classifier/README.md | 3 - examples/onnx/yolov5-youtube/README.md | 61 - .../onnx/yolov5-youtube/conda-packages.txt | 3 - examples/onnx/yolov5-youtube/cortex.yaml | 13 - examples/onnx/yolov5-youtube/labels.json | 82 -- examples/onnx/yolov5-youtube/predictor.py | 65 -- examples/onnx/yolov5-youtube/requirements.txt | 3 - examples/onnx/yolov5-youtube/sample.json | 3 - examples/onnx/yolov5-youtube/utils.py | 130 --- examples/pytorch/answer-generator/README.md | 3 - examples/pytorch/answer-generator/cortex.yaml | 11 - .../pytorch/answer-generator/generator.py | 44 - .../pytorch/answer-generator/predictor.py | 36 - .../pytorch/answer-generator/requirements.txt | 3 - examples/pytorch/answer-generator/sample.json | 3 - .../image-classifier-alexnet/cortex.yaml | 11 - .../image-classifier-alexnet/predictor.py | 39 - .../image-classifier-alexnet/requirements.txt | 2 - .../image-classifier-alexnet/sample.json | 3 - examples/pytorch/iris-classifier/cortex.yaml | 11 - examples/pytorch/iris-classifier/predictor.py | 50 - .../pytorch/iris-classifier/requirements.txt | 2 - examples/pytorch/iris-classifier/sample.json | 6 - .../pytorch/language-identifier/README.md | 3 - .../pytorch/language-identifier/cortex.yaml | 9 - .../pytorch/language-identifier/predictor.py | 18 - .../language-identifier/requirements.txt | 2 - .../pytorch/language-identifier/sample.json | 3 - examples/pytorch/object-detector/README.md | 3 - .../pytorch/object-detector/coco_labels.txt | 91 -- examples/pytorch/object-detector/cortex.yaml | 11 - examples/pytorch/object-detector/predictor.py | 49 - .../pytorch/object-detector/requirements.txt | 2 - examples/pytorch/object-detector/sample.json | 4 - .../pytorch/question-generator/cortex.yaml | 10 - .../question-generator/dependencies.sh | 4 - .../pytorch/question-generator/predictor.py | 36 - .../question-generator/requirements.txt | 4 - .../pytorch/question-generator/sample.json | 4 - .../pytorch/reading-comprehender/README.md | 3 - .../pytorch/reading-comprehender/cortex.yaml | 11 - .../pytorch/reading-comprehender/predictor.py | 25 - .../reading-comprehender/requirements.txt | 1 - .../pytorch/reading-comprehender/sample.json | 4 - examples/pytorch/search-completer/README.md | 3 - examples/pytorch/search-completer/cortex.yaml | 11 - .../pytorch/search-completer/predictor.py | 20 - .../pytorch/search-completer/requirements.txt | 5 - examples/pytorch/search-completer/sample.json | 3 - examples/pytorch/sentiment-analyzer/README.md | 3 - .../pytorch/sentiment-analyzer/cortex.yaml | 10 - .../pytorch/sentiment-analyzer/predictor.py | 15 - .../sentiment-analyzer/requirements.txt | 2 - .../pytorch/sentiment-analyzer/sample.json | 3 - examples/pytorch/text-summarizer/README.md | 5 - examples/pytorch/text-summarizer/cortex.yaml | 11 - examples/pytorch/text-summarizer/predictor.py | 18 - .../pytorch/text-summarizer/requirements.txt | 2 - examples/pytorch/text-summarizer/sample.json | 3 - examples/sklearn/iris-classifier/README.md | 3 - examples/sklearn/iris-classifier/cortex.yaml | 15 - examples/sklearn/iris-classifier/predictor.py | 31 - .../sklearn/iris-classifier/requirements.txt | 2 - examples/sklearn/iris-classifier/sample.json | 6 - examples/sklearn/iris-classifier/trainer.py | 25 - examples/sklearn/mpg-estimator/README.md | 3 - examples/sklearn/mpg-estimator/cortex.yaml | 11 - examples/sklearn/mpg-estimator/predictor.py | 41 - .../sklearn/mpg-estimator/requirements.txt | 4 - examples/sklearn/mpg-estimator/sample.json | 7 - examples/sklearn/mpg-estimator/trainer.py | 25 - examples/spacy/entity-recognizer/README.md | 3 - examples/spacy/entity-recognizer/cortex.yaml | 10 - examples/spacy/entity-recognizer/predictor.py | 22 - .../spacy/entity-recognizer/requirements.txt | 1 - examples/spacy/entity-recognizer/sample.json | 3 - .../image-classifier-inception/README.md | 3 - .../image-classifier-inception/cortex.yaml | 13 - .../cortex_server_side_batching.yaml | 17 - .../inception.ipynb | 211 ---- .../image-classifier-inception/predictor.py | 21 - .../image-classifier-inception/sample.json | 3 - examples/tensorflow/iris-classifier/README.md | 3 - .../tensorflow/license-plate-reader/README.md | 175 --- .../license-plate-reader/config.json | 8 - .../license-plate-reader/cortex_full.yaml | 35 - .../license-plate-reader/cortex_lite.yaml | 14 - .../license-plate-reader/predictor_crnn.py | 44 - .../license-plate-reader/predictor_lite.py | 120 -- .../license-plate-reader/predictor_yolo.py | 46 - .../license-plate-reader/requirements.txt | 5 - .../license-plate-reader/sample_inference.py | 100 -- .../license-plate-reader/utils/__init__.py | 1 - .../license-plate-reader/utils/bbox.py | 111 -- .../license-plate-reader/utils/colors.py | 100 -- .../license-plate-reader/utils/preprocess.py | 59 - .../license-plate-reader/utils/utils.py | 160 --- .../multi-model-classifier/requirements.txt | 1 - .../multi-model-classifier/sample-iris.json | 8 - .../tensorflow/sentiment-analyzer/README.md | 3 - .../tensorflow/sentiment-analyzer/bert.ipynb | 1007 ----------------- .../tensorflow/sentiment-analyzer/cortex.yaml | 13 - .../sentiment-analyzer/predictor.py | 29 - .../sentiment-analyzer/requirements.txt | 5 - .../tensorflow/sentiment-analyzer/sample.json | 3 - examples/tensorflow/text-generator/README.md | 3 - .../tensorflow/text-generator/cortex.yaml | 11 - examples/tensorflow/text-generator/encoder.py | 118 -- .../tensorflow/text-generator/gpt-2.ipynb | 383 ------- .../tensorflow/text-generator/predictor.py | 24 - .../text-generator/requirements.txt | 2 - .../tensorflow/text-generator/sample.json | 3 - examples/traffic-splitter/model.py | 59 - .../README.md | 0 .../cortex.yaml | 0 .../model.py | 0 .../onnx_predictor.py | 0 .../pytorch_predictor.py | 0 .../sample.json | 0 195 files changed, 20 insertions(+), 5003 deletions(-) rename examples/batch/{image-classifier => python}/README.md (100%) rename examples/batch/{image-classifier => python}/cortex.yaml (100%) rename examples/batch/{image-classifier => python}/predictor.py (100%) rename examples/batch/{image-classifier => python}/requirements.txt (100%) rename examples/batch/{image-classifier => python}/sample.json (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/python}/README.md (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/python}/cortex.yaml (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/python}/cortex_gpu.yaml (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/python}/cortex_inf.yaml (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/python}/generate_resnet50_models.ipynb (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/python}/predictor.py (100%) rename examples/{onnx/multi-model-classifier => compute/python}/sample.json (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/README.md (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/cortex.yaml (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/cortex_gpu.yaml (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/cortex_gpu_server_side_batching.yaml (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/cortex_inf.yaml (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/cortex_inf_server_side_batching.yaml (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/generate_gpu_resnet50_model.ipynb (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/generate_resnet50_models.ipynb (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/predictor.py (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/requirements.txt (100%) rename examples/{tensorflow/image-classifier-resnet50 => compute/tensorflow}/sample.bin (100%) rename examples/{pytorch/image-classifier-resnet50 => compute/tensorflow}/sample.json (100%) rename examples/{pytorch/iris-classifier => hello-world/onnx}/README.md (64%) rename examples/{onnx/iris-classifier => hello-world/onnx}/cortex.yaml (100%) rename examples/{onnx/iris-classifier => hello-world/onnx}/predictor.py (100%) rename examples/{onnx/iris-classifier => hello-world/onnx}/sample.json (100%) rename examples/{onnx/iris-classifier => hello-world/onnx}/xgboost.ipynb (100%) rename examples/{pytorch/text-generator => hello-world/python}/README.md (100%) rename examples/{pytorch/text-generator => hello-world/python}/deploy.ipynb (100%) rename examples/{pytorch/text-generator => hello-world/python}/predictor.py (100%) rename examples/{pytorch/text-generator => hello-world/python}/requirements.txt (100%) rename examples/{pytorch/image-classifier-alexnet => hello-world/tensorflow}/README.md (64%) rename examples/{tensorflow/iris-classifier => hello-world/tensorflow}/cortex.yaml (100%) rename examples/{tensorflow/iris-classifier => hello-world/tensorflow}/predictor.py (100%) rename examples/{tensorflow/iris-classifier => hello-world/tensorflow}/sample.json (100%) rename examples/{tensorflow/iris-classifier => hello-world/tensorflow}/tensorflow.ipynb (100%) delete mode 100644 examples/keras/document-denoiser/README.md delete mode 100644 examples/keras/document-denoiser/cortex.yaml delete mode 100644 examples/keras/document-denoiser/predictor.py delete mode 100644 examples/keras/document-denoiser/requirements.txt delete mode 100644 examples/keras/document-denoiser/sample.json delete mode 100644 examples/keras/document-denoiser/trainer.ipynb rename examples/{onnx/multi-model-classifier => multi-model/onnx}/README.md (100%) rename examples/{onnx/multi-model-classifier => multi-model/onnx}/cortex.yaml (100%) rename examples/{onnx/multi-model-classifier => multi-model/onnx}/predictor.py (100%) rename examples/{onnx/multi-model-classifier => multi-model/onnx}/requirements.txt (100%) rename examples/{tensorflow/image-classifier-resnet50 => multi-model/onnx}/sample.json (100%) rename examples/{pytorch/multi-model-text-analyzer => multi-model/python}/README.md (100%) rename examples/{pytorch/multi-model-text-analyzer => multi-model/python}/cortex.yaml (100%) rename examples/{pytorch/multi-model-text-analyzer => multi-model/python}/predictor.py (100%) rename examples/{pytorch/multi-model-text-analyzer => multi-model/python}/requirements.txt (100%) rename examples/{pytorch/multi-model-text-analyzer => multi-model/python}/sample-sentiment.json (100%) rename examples/{pytorch/multi-model-text-analyzer => multi-model/python}/sample-summarizer.json (100%) rename examples/{tensorflow/multi-model-classifier => multi-model/tensorflow}/README.md (100%) rename examples/{tensorflow/multi-model-classifier => multi-model/tensorflow}/cortex.yaml (84%) rename examples/{tensorflow/multi-model-classifier => multi-model/tensorflow}/predictor.py (100%) rename examples/{tensorflow/image-classifier-inception => multi-model/tensorflow}/requirements.txt (100%) rename examples/{tensorflow/multi-model-classifier => multi-model/tensorflow}/sample-image.json (100%) delete mode 100644 examples/onnx/iris-classifier/README.md delete mode 100644 examples/onnx/yolov5-youtube/README.md delete mode 100644 examples/onnx/yolov5-youtube/conda-packages.txt delete mode 100644 examples/onnx/yolov5-youtube/cortex.yaml delete mode 100644 examples/onnx/yolov5-youtube/labels.json delete mode 100644 examples/onnx/yolov5-youtube/predictor.py delete mode 100644 examples/onnx/yolov5-youtube/requirements.txt delete mode 100644 examples/onnx/yolov5-youtube/sample.json delete mode 100644 examples/onnx/yolov5-youtube/utils.py delete mode 100644 examples/pytorch/answer-generator/README.md delete mode 100644 examples/pytorch/answer-generator/cortex.yaml delete mode 100644 examples/pytorch/answer-generator/generator.py delete mode 100644 examples/pytorch/answer-generator/predictor.py delete mode 100644 examples/pytorch/answer-generator/requirements.txt delete mode 100644 examples/pytorch/answer-generator/sample.json delete mode 100644 examples/pytorch/image-classifier-alexnet/cortex.yaml delete mode 100644 examples/pytorch/image-classifier-alexnet/predictor.py delete mode 100644 examples/pytorch/image-classifier-alexnet/requirements.txt delete mode 100644 examples/pytorch/image-classifier-alexnet/sample.json delete mode 100644 examples/pytorch/iris-classifier/cortex.yaml delete mode 100644 examples/pytorch/iris-classifier/predictor.py delete mode 100644 examples/pytorch/iris-classifier/requirements.txt delete mode 100644 examples/pytorch/iris-classifier/sample.json delete mode 100644 examples/pytorch/language-identifier/README.md delete mode 100644 examples/pytorch/language-identifier/cortex.yaml delete mode 100644 examples/pytorch/language-identifier/predictor.py delete mode 100644 examples/pytorch/language-identifier/requirements.txt delete mode 100644 examples/pytorch/language-identifier/sample.json delete mode 100644 examples/pytorch/object-detector/README.md delete mode 100644 examples/pytorch/object-detector/coco_labels.txt delete mode 100644 examples/pytorch/object-detector/cortex.yaml delete mode 100644 examples/pytorch/object-detector/predictor.py delete mode 100644 examples/pytorch/object-detector/requirements.txt delete mode 100644 examples/pytorch/object-detector/sample.json delete mode 100644 examples/pytorch/question-generator/cortex.yaml delete mode 100644 examples/pytorch/question-generator/dependencies.sh delete mode 100644 examples/pytorch/question-generator/predictor.py delete mode 100644 examples/pytorch/question-generator/requirements.txt delete mode 100644 examples/pytorch/question-generator/sample.json delete mode 100644 examples/pytorch/reading-comprehender/README.md delete mode 100644 examples/pytorch/reading-comprehender/cortex.yaml delete mode 100644 examples/pytorch/reading-comprehender/predictor.py delete mode 100644 examples/pytorch/reading-comprehender/requirements.txt delete mode 100644 examples/pytorch/reading-comprehender/sample.json delete mode 100644 examples/pytorch/search-completer/README.md delete mode 100644 examples/pytorch/search-completer/cortex.yaml delete mode 100644 examples/pytorch/search-completer/predictor.py delete mode 100644 examples/pytorch/search-completer/requirements.txt delete mode 100644 examples/pytorch/search-completer/sample.json delete mode 100644 examples/pytorch/sentiment-analyzer/README.md delete mode 100644 examples/pytorch/sentiment-analyzer/cortex.yaml delete mode 100644 examples/pytorch/sentiment-analyzer/predictor.py delete mode 100644 examples/pytorch/sentiment-analyzer/requirements.txt delete mode 100644 examples/pytorch/sentiment-analyzer/sample.json delete mode 100644 examples/pytorch/text-summarizer/README.md delete mode 100644 examples/pytorch/text-summarizer/cortex.yaml delete mode 100644 examples/pytorch/text-summarizer/predictor.py delete mode 100644 examples/pytorch/text-summarizer/requirements.txt delete mode 100644 examples/pytorch/text-summarizer/sample.json delete mode 100644 examples/sklearn/iris-classifier/README.md delete mode 100644 examples/sklearn/iris-classifier/cortex.yaml delete mode 100644 examples/sklearn/iris-classifier/predictor.py delete mode 100644 examples/sklearn/iris-classifier/requirements.txt delete mode 100644 examples/sklearn/iris-classifier/sample.json delete mode 100644 examples/sklearn/iris-classifier/trainer.py delete mode 100644 examples/sklearn/mpg-estimator/README.md delete mode 100644 examples/sklearn/mpg-estimator/cortex.yaml delete mode 100644 examples/sklearn/mpg-estimator/predictor.py delete mode 100644 examples/sklearn/mpg-estimator/requirements.txt delete mode 100644 examples/sklearn/mpg-estimator/sample.json delete mode 100644 examples/sklearn/mpg-estimator/trainer.py delete mode 100644 examples/spacy/entity-recognizer/README.md delete mode 100644 examples/spacy/entity-recognizer/cortex.yaml delete mode 100644 examples/spacy/entity-recognizer/predictor.py delete mode 100644 examples/spacy/entity-recognizer/requirements.txt delete mode 100644 examples/spacy/entity-recognizer/sample.json delete mode 100644 examples/tensorflow/image-classifier-inception/README.md delete mode 100644 examples/tensorflow/image-classifier-inception/cortex.yaml delete mode 100644 examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml delete mode 100644 examples/tensorflow/image-classifier-inception/inception.ipynb delete mode 100644 examples/tensorflow/image-classifier-inception/predictor.py delete mode 100644 examples/tensorflow/image-classifier-inception/sample.json delete mode 100644 examples/tensorflow/iris-classifier/README.md delete mode 100644 examples/tensorflow/license-plate-reader/README.md delete mode 100644 examples/tensorflow/license-plate-reader/config.json delete mode 100644 examples/tensorflow/license-plate-reader/cortex_full.yaml delete mode 100644 examples/tensorflow/license-plate-reader/cortex_lite.yaml delete mode 100644 examples/tensorflow/license-plate-reader/predictor_crnn.py delete mode 100644 examples/tensorflow/license-plate-reader/predictor_lite.py delete mode 100644 examples/tensorflow/license-plate-reader/predictor_yolo.py delete mode 100644 examples/tensorflow/license-plate-reader/requirements.txt delete mode 100644 examples/tensorflow/license-plate-reader/sample_inference.py delete mode 100644 examples/tensorflow/license-plate-reader/utils/__init__.py delete mode 100644 examples/tensorflow/license-plate-reader/utils/bbox.py delete mode 100644 examples/tensorflow/license-plate-reader/utils/colors.py delete mode 100644 examples/tensorflow/license-plate-reader/utils/preprocess.py delete mode 100644 examples/tensorflow/license-plate-reader/utils/utils.py delete mode 100644 examples/tensorflow/multi-model-classifier/requirements.txt delete mode 100644 examples/tensorflow/multi-model-classifier/sample-iris.json delete mode 100644 examples/tensorflow/sentiment-analyzer/README.md delete mode 100644 examples/tensorflow/sentiment-analyzer/bert.ipynb delete mode 100644 examples/tensorflow/sentiment-analyzer/cortex.yaml delete mode 100644 examples/tensorflow/sentiment-analyzer/predictor.py delete mode 100644 examples/tensorflow/sentiment-analyzer/requirements.txt delete mode 100644 examples/tensorflow/sentiment-analyzer/sample.json delete mode 100644 examples/tensorflow/text-generator/README.md delete mode 100644 examples/tensorflow/text-generator/cortex.yaml delete mode 100644 examples/tensorflow/text-generator/encoder.py delete mode 100644 examples/tensorflow/text-generator/gpt-2.ipynb delete mode 100644 examples/tensorflow/text-generator/predictor.py delete mode 100644 examples/tensorflow/text-generator/requirements.txt delete mode 100644 examples/tensorflow/text-generator/sample.json delete mode 100644 examples/traffic-splitter/model.py rename examples/{traffic-splitter => traffic-splitting}/README.md (100%) rename examples/{traffic-splitter => traffic-splitting}/cortex.yaml (100%) rename examples/{pytorch/iris-classifier => traffic-splitting}/model.py (100%) rename examples/{traffic-splitter => traffic-splitting}/onnx_predictor.py (100%) rename examples/{traffic-splitter => traffic-splitting}/pytorch_predictor.py (100%) rename examples/{traffic-splitter => traffic-splitting}/sample.json (100%) diff --git a/.gitbook.yaml b/.gitbook.yaml index 8e0ed9a10b..09f320911a 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -5,9 +5,9 @@ structure: summary: summary.md redirects: - tutorial: ../examples/pytorch/text-generator/README.md - tutorial/realtime: ../examples/pytorch/text-generator/README.md - tutorial/batch: ../examples/batch/image-classifier/README.md + tutorial: ../examples/hello-world/python/README.md + tutorial/realtime: ../examples/hello-world/python/README.md + tutorial/batch: ../examples/batch/python/README.md install: ./aws/install.md uninstall: ./aws/uninstall.md update: ./aws/update.md diff --git a/docs/aws/install.md b/docs/aws/install.md index 67bda7563b..44a40b4aba 100644 --- a/docs/aws/install.md +++ b/docs/aws/install.md @@ -20,7 +20,7 @@ cortex env default aws ``` -Try the [tutorial](../../examples/pytorch/text-generator/README.md) or deploy one of our [examples](https://github.com/cortexlabs/cortex/tree/master/examples). +Try the [tutorial](../../examples/hello-world/python/README.md) or deploy one of our [examples](https://github.com/cortexlabs/cortex/tree/master/examples). ## Configure Cortex diff --git a/docs/deployments/batch-api.md b/docs/deployments/batch-api.md index a9b368be15..9710290a6c 100644 --- a/docs/deployments/batch-api.md +++ b/docs/deployments/batch-api.md @@ -37,7 +37,7 @@ At any point, you can use the Job ID that was provided upon job submission to ma ## Next steps -* Try the [tutorial](../../examples/batch/image-classifier/README.md) to deploy a Batch API on your Cortex cluster. +* Try the [tutorial](../../examples/batch/python/README.md) to deploy a Batch API on your Cortex cluster. * See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Batch API. * See the [Predictor docs](batch-api/predictors.md) for how to implement a Predictor class. * See the [API configuration docs](batch-api/api-configuration.md) for a full list of features that can be used to deploy your Batch API. diff --git a/docs/deployments/batch-api/deployment.md b/docs/deployments/batch-api/deployment.md index 81168fac42..9608e927cb 100644 --- a/docs/deployments/batch-api/deployment.md +++ b/docs/deployments/batch-api/deployment.md @@ -122,6 +122,6 @@ deleting my-api ## Additional resources -* [Tutorial](../../../examples/batch/image-classifier/README.md) provides a step-by-step walkthrough of deploying an image classification batch API +* [Tutorial](../../../examples/batch/python/README.md) provides a step-by-step walkthrough of deploying an image classification batch API * [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands * [Examples](https://github.com/cortexlabs/cortex/tree/master/examples/batch) demonstrate how to deploy models from common ML libraries diff --git a/docs/deployments/batch-api/predictors.md b/docs/deployments/batch-api/predictors.md index 426a64ddd2..e66681b6a8 100644 --- a/docs/deployments/batch-api/predictors.md +++ b/docs/deployments/batch-api/predictors.md @@ -97,7 +97,7 @@ For proper separation of concerns, it is recommended to use the constructor's `c ### Examples -You can find an example of a BatchAPI using a PythonPredictor in [examples/batch/image-classifier](https://github.com/cortexlabs/cortex/tree/master/examples/batch/image-classifier). +You can find an example of a BatchAPI using a PythonPredictor in [examples/batch/python](https://github.com/cortexlabs/cortex/tree/master/examples/batch/python). ### Pre-installed packages diff --git a/docs/deployments/realtime-api.md b/docs/deployments/realtime-api.md index f90110690f..3bdba221ea 100644 --- a/docs/deployments/realtime-api.md +++ b/docs/deployments/realtime-api.md @@ -40,7 +40,7 @@ The Cortex Cluster will automatically scale based on the incoming traffic and th ## Next steps -* Try the [tutorial](../../examples/pytorch/text-generator/README.md) to deploy a Realtime API locally or on AWS. +* Try the [tutorial](../../examples/hello-world/python/README.md) to deploy a Realtime API locally or on AWS. * See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Realtime API. * See the [Predictor docs](realtime-api/predictors.md) for how to implement a Predictor class. * See the [API configuration docs](realtime-api/api-configuration.md) for a full list of features that can be used to deploy your Realtime API. diff --git a/docs/deployments/realtime-api/deployment.md b/docs/deployments/realtime-api/deployment.md index a7c0a09a4c..b2bf5dccc1 100644 --- a/docs/deployments/realtime-api/deployment.md +++ b/docs/deployments/realtime-api/deployment.md @@ -63,6 +63,6 @@ deleting my-api ## Additional resources -* [Tutorial](../../../examples/pytorch/text-generator/README.md) provides a step-by-step walkthrough of deploying a text generation API +* [Tutorial](../../../examples/hello-world/python/README.md) provides a step-by-step walkthrough of deploying a text generation API * [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands * [Examples](https://github.com/cortexlabs/cortex/tree/master/examples) demonstrate how to deploy models from common ML libraries diff --git a/docs/deployments/realtime-api/models.md b/docs/deployments/realtime-api/models.md index 07fdc2ce1a..114d30236e 100644 --- a/docs/deployments/realtime-api/models.md +++ b/docs/deployments/realtime-api/models.md @@ -182,9 +182,6 @@ The following is a list of events that will trigger the API to update its model( * A model changes its directory structure. * A file in the model directory is updated in-place. - -Examples can be seen in [examples/live-reloading](https://github.com/cortexlabs/cortex/tree/master/examples/live-reloading). - Usage varies based on the predictor type: ### Python diff --git a/docs/deployments/realtime-api/predictors.md b/docs/deployments/realtime-api/predictors.md index 9bc05823b5..0ff5b9951d 100644 --- a/docs/deployments/realtime-api/predictors.md +++ b/docs/deployments/realtime-api/predictors.md @@ -134,64 +134,6 @@ Your API can accept requests with different types of payloads such as `JSON`-par Your `predictor` method can return different types of objects such as `JSON`-parseable, `string`, and `bytes` objects. Navigate to the [API responses](#api-responses) section to learn about how to configure your `predictor` method to respond with different response codes and content-types. -### Examples - - -Many of the [examples](https://github.com/cortexlabs/cortex/tree/master/examples) use the Python Predictor, including all of the PyTorch examples. - - -Here is the Predictor for [examples/pytorch/text-generator](https://github.com/cortexlabs/cortex/tree/master/examples/pytorch/text-generator): - -```python -import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel - - -class PythonPredictor: - def __init__(self, config): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - print(f"using device: {self.device}") - self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device) - - def predict(self, payload): - input_length = len(payload["text"].split()) - tokens = self.tokenizer.encode(payload["text"], return_tensors="pt").to(self.device) - prediction = self.model.generate(tokens, max_length=input_length + 20, do_sample=True) - return self.tokenizer.decode(prediction[0]) -``` - - -Here is the Predictor for [examples/live-reloading/python/mpg-estimator](https://github.com/cortexlabs/cortex/tree/feature/master/examples/live-reloading/python/mpg-estimator): - -```python -import mlflow.sklearn -import numpy as np - - -class PythonPredictor: - def __init__(self, config, python_client): - self.client = python_client - - def load_model(self, model_path): - return mlflow.sklearn.load_model(model_path) - - def predict(self, payload, query_params): - model_version = query_params.get("version") - - model = self.client.get_model(model_version=model_version) - model_input = [ - payload["cylinders"], - payload["displacement"], - payload["horsepower"], - payload["weight"], - payload["acceleration"], - ] - result = model.predict([model_input]).item() - - return {"prediction": result, "model": {"version": model_version}} -``` - ### Pre-installed packages The following Python packages are pre-installed in Python Predictors and can be used in your implementations: diff --git a/docs/deployments/realtime-api/traffic-splitter.md b/docs/deployments/realtime-api/traffic-splitter.md index 3a8a004da1..90726aa173 100644 --- a/docs/deployments/realtime-api/traffic-splitter.md +++ b/docs/deployments/realtime-api/traffic-splitter.md @@ -76,6 +76,6 @@ Note that this will not delete the Realtime APIs targeted by the Traffic Splitte ## Additional resources -* [Traffic Splitter Tutorial](../../../examples/traffic-splitter/README.md) provides a step-by-step walkthrough for deploying an Traffic Splitter -* [Realtime API Tutorial](../../../examples/pytorch/text-generator/README.md) provides a step-by-step walkthrough of deploying a realtime API for text generation +* [Traffic Splitter Tutorial](../../../examples/traffic-splitting/README.md) provides a step-by-step walkthrough for deploying an Traffic Splitter +* [Realtime API Tutorial](../../../examples/hello-world/python/README.md) provides a step-by-step walkthrough of deploying a realtime API for text generation * [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands diff --git a/docs/guides/multi-model.md b/docs/guides/multi-model.md index 79ed0507ab..e8380d7631 100644 --- a/docs/guides/multi-model.md +++ b/docs/guides/multi-model.md @@ -9,9 +9,6 @@ It is possible to serve multiple models in the same Cortex API using any type of ### Specifying models in API config - -The following template is based on the [live-reloading/python/mpg-estimator](https://github.com/cortexlabs/cortex/tree/master/examples/live-reloading/python/mpg-estimator) example. - #### `cortex.yaml` Even though it looks as if there's only a single model served, there are actually 4 different versions saved in `s3://cortex-examples/sklearn/mpg-estimator/linreg/`. @@ -158,7 +155,7 @@ Machine learning is the study of algorithms and statistical models that computer For the TensorFlow Predictor, a multi-model API is configured by placing the list of models in the Predictor's `models` field (each model will specify its own unique name). The `predict()` method of the `tensorflow_client` object expects a second argument that represents the name of the model that will be used for inference. -The following template is based on the [tensorflow/multi-model-classifier](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/multi-model-classifier) example. +The following template is based on the [multi-model/tensorflow](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/multi-model-classifier) example. ### `cortex.yaml` diff --git a/docs/guides/single-node-deployment.md b/docs/guides/single-node-deployment.md index 7a21e560fe..1ec54a0003 100644 --- a/docs/guides/single-node-deployment.md +++ b/docs/guides/single-node-deployment.md @@ -101,7 +101,7 @@ $ sudo groupadd docker; sudo gpasswd -a $USER docker $ logout ``` -If you have installed Docker correctly, you should be able to run docker commands such as `docker run hello-world` without running into permission issues or needing `sudo`. +If you have installed Docker correctly, you should be able to run docker commands such as `docker run hello-world/python` without running into permission issues or needing `sudo`. ### Step 12 @@ -120,7 +120,7 @@ You can now use Cortex to deploy your model: ```bash $ git clone -b master https://github.com/cortexlabs/cortex.git -$ cd cortex/examples/pytorch/text-generator +$ cd cortex/examples/hello-world/python $ cortex deploy diff --git a/docs/summary.md b/docs/summary.md index e92d69a771..d1891fc600 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -32,14 +32,14 @@ * [Autoscaling](deployments/realtime-api/autoscaling.md) * [Prediction monitoring](deployments/realtime-api/prediction-monitoring.md) * [Traffic Splitter](deployments/realtime-api/traffic-splitter.md) - * [Realtime API tutorial](../examples/pytorch/text-generator/README.md) + * [Realtime API tutorial](../examples/hello-world/python/README.md) * [Batch API](deployments/batch-api.md) * [Predictor implementation](deployments/batch-api/predictors.md) * [API configuration](deployments/batch-api/api-configuration.md) * [API deployment](deployments/batch-api/deployment.md) * [Endpoints](deployments/batch-api/endpoints.md) * [Job statuses](deployments/batch-api/statuses.md) - * [Batch API tutorial](../examples/batch/image-classifier/README.md) + * [Batch API tutorial](../examples/batch/python/README.md) ## Advanced diff --git a/examples/README.md b/examples/README.md index 1eb711f57d..a9b4f3ed15 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,8 +2,6 @@ ## TensorFlow -- [Iris classification](tensorflow/iris-classifier): deploy a model to classify iris flowers. - - [Text generation](tensorflow/text-generator): deploy OpenAI's GPT-2 to generate text. - [Sentiment analysis](tensorflow/sentiment-analyzer): deploy a BERT model for sentiment analysis. @@ -50,8 +48,6 @@ ## ONNX -- [Iris classification](onnx/iris-classifier): deploy an XGBoost model (exported in ONNX) to classify iris flowers. - - [YOLOv5 YouTube detection](onnx/yolov5-youtube): deploy a YOLOv5 model trained on COCO val2017 dataset. - [Multi-model classification](onnx/multi-model-classifier): deploy 3 models (ResNet50, MobileNet, ShuffleNet) in a single API. diff --git a/examples/batch/image-classifier/README.md b/examples/batch/python/README.md similarity index 100% rename from examples/batch/image-classifier/README.md rename to examples/batch/python/README.md diff --git a/examples/batch/image-classifier/cortex.yaml b/examples/batch/python/cortex.yaml similarity index 100% rename from examples/batch/image-classifier/cortex.yaml rename to examples/batch/python/cortex.yaml diff --git a/examples/batch/image-classifier/predictor.py b/examples/batch/python/predictor.py similarity index 100% rename from examples/batch/image-classifier/predictor.py rename to examples/batch/python/predictor.py diff --git a/examples/batch/image-classifier/requirements.txt b/examples/batch/python/requirements.txt similarity index 100% rename from examples/batch/image-classifier/requirements.txt rename to examples/batch/python/requirements.txt diff --git a/examples/batch/image-classifier/sample.json b/examples/batch/python/sample.json similarity index 100% rename from examples/batch/image-classifier/sample.json rename to examples/batch/python/sample.json diff --git a/examples/pytorch/image-classifier-resnet50/README.md b/examples/compute/python/README.md similarity index 100% rename from examples/pytorch/image-classifier-resnet50/README.md rename to examples/compute/python/README.md diff --git a/examples/pytorch/image-classifier-resnet50/cortex.yaml b/examples/compute/python/cortex.yaml similarity index 100% rename from examples/pytorch/image-classifier-resnet50/cortex.yaml rename to examples/compute/python/cortex.yaml diff --git a/examples/pytorch/image-classifier-resnet50/cortex_gpu.yaml b/examples/compute/python/cortex_gpu.yaml similarity index 100% rename from examples/pytorch/image-classifier-resnet50/cortex_gpu.yaml rename to examples/compute/python/cortex_gpu.yaml diff --git a/examples/pytorch/image-classifier-resnet50/cortex_inf.yaml b/examples/compute/python/cortex_inf.yaml similarity index 100% rename from examples/pytorch/image-classifier-resnet50/cortex_inf.yaml rename to examples/compute/python/cortex_inf.yaml diff --git a/examples/pytorch/image-classifier-resnet50/generate_resnet50_models.ipynb b/examples/compute/python/generate_resnet50_models.ipynb similarity index 100% rename from examples/pytorch/image-classifier-resnet50/generate_resnet50_models.ipynb rename to examples/compute/python/generate_resnet50_models.ipynb diff --git a/examples/pytorch/image-classifier-resnet50/predictor.py b/examples/compute/python/predictor.py similarity index 100% rename from examples/pytorch/image-classifier-resnet50/predictor.py rename to examples/compute/python/predictor.py diff --git a/examples/onnx/multi-model-classifier/sample.json b/examples/compute/python/sample.json similarity index 100% rename from examples/onnx/multi-model-classifier/sample.json rename to examples/compute/python/sample.json diff --git a/examples/tensorflow/image-classifier-resnet50/README.md b/examples/compute/tensorflow/README.md similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/README.md rename to examples/compute/tensorflow/README.md diff --git a/examples/tensorflow/image-classifier-resnet50/cortex.yaml b/examples/compute/tensorflow/cortex.yaml similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/cortex.yaml rename to examples/compute/tensorflow/cortex.yaml diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_gpu.yaml b/examples/compute/tensorflow/cortex_gpu.yaml similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/cortex_gpu.yaml rename to examples/compute/tensorflow/cortex_gpu.yaml diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml b/examples/compute/tensorflow/cortex_gpu_server_side_batching.yaml similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml rename to examples/compute/tensorflow/cortex_gpu_server_side_batching.yaml diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_inf.yaml b/examples/compute/tensorflow/cortex_inf.yaml similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/cortex_inf.yaml rename to examples/compute/tensorflow/cortex_inf.yaml diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml b/examples/compute/tensorflow/cortex_inf_server_side_batching.yaml similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml rename to examples/compute/tensorflow/cortex_inf_server_side_batching.yaml diff --git a/examples/tensorflow/image-classifier-resnet50/generate_gpu_resnet50_model.ipynb b/examples/compute/tensorflow/generate_gpu_resnet50_model.ipynb similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/generate_gpu_resnet50_model.ipynb rename to examples/compute/tensorflow/generate_gpu_resnet50_model.ipynb diff --git a/examples/tensorflow/image-classifier-resnet50/generate_resnet50_models.ipynb b/examples/compute/tensorflow/generate_resnet50_models.ipynb similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/generate_resnet50_models.ipynb rename to examples/compute/tensorflow/generate_resnet50_models.ipynb diff --git a/examples/tensorflow/image-classifier-resnet50/predictor.py b/examples/compute/tensorflow/predictor.py similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/predictor.py rename to examples/compute/tensorflow/predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/requirements.txt b/examples/compute/tensorflow/requirements.txt similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/requirements.txt rename to examples/compute/tensorflow/requirements.txt diff --git a/examples/tensorflow/image-classifier-resnet50/sample.bin b/examples/compute/tensorflow/sample.bin similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/sample.bin rename to examples/compute/tensorflow/sample.bin diff --git a/examples/pytorch/image-classifier-resnet50/sample.json b/examples/compute/tensorflow/sample.json similarity index 100% rename from examples/pytorch/image-classifier-resnet50/sample.json rename to examples/compute/tensorflow/sample.json diff --git a/examples/pytorch/iris-classifier/README.md b/examples/hello-world/onnx/README.md similarity index 64% rename from examples/pytorch/iris-classifier/README.md rename to examples/hello-world/onnx/README.md index 41a04891b3..a45b69db8f 100644 --- a/examples/pytorch/iris-classifier/README.md +++ b/examples/hello-world/onnx/README.md @@ -1,3 +1,3 @@ _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. +Please refer to the [tutorial](https://docs.cortex.dev/tutorial) to see how to deploy an example with Cortex. diff --git a/examples/onnx/iris-classifier/cortex.yaml b/examples/hello-world/onnx/cortex.yaml similarity index 100% rename from examples/onnx/iris-classifier/cortex.yaml rename to examples/hello-world/onnx/cortex.yaml diff --git a/examples/onnx/iris-classifier/predictor.py b/examples/hello-world/onnx/predictor.py similarity index 100% rename from examples/onnx/iris-classifier/predictor.py rename to examples/hello-world/onnx/predictor.py diff --git a/examples/onnx/iris-classifier/sample.json b/examples/hello-world/onnx/sample.json similarity index 100% rename from examples/onnx/iris-classifier/sample.json rename to examples/hello-world/onnx/sample.json diff --git a/examples/onnx/iris-classifier/xgboost.ipynb b/examples/hello-world/onnx/xgboost.ipynb similarity index 100% rename from examples/onnx/iris-classifier/xgboost.ipynb rename to examples/hello-world/onnx/xgboost.ipynb diff --git a/examples/pytorch/text-generator/README.md b/examples/hello-world/python/README.md similarity index 100% rename from examples/pytorch/text-generator/README.md rename to examples/hello-world/python/README.md diff --git a/examples/pytorch/text-generator/deploy.ipynb b/examples/hello-world/python/deploy.ipynb similarity index 100% rename from examples/pytorch/text-generator/deploy.ipynb rename to examples/hello-world/python/deploy.ipynb diff --git a/examples/pytorch/text-generator/predictor.py b/examples/hello-world/python/predictor.py similarity index 100% rename from examples/pytorch/text-generator/predictor.py rename to examples/hello-world/python/predictor.py diff --git a/examples/pytorch/text-generator/requirements.txt b/examples/hello-world/python/requirements.txt similarity index 100% rename from examples/pytorch/text-generator/requirements.txt rename to examples/hello-world/python/requirements.txt diff --git a/examples/pytorch/image-classifier-alexnet/README.md b/examples/hello-world/tensorflow/README.md similarity index 64% rename from examples/pytorch/image-classifier-alexnet/README.md rename to examples/hello-world/tensorflow/README.md index 41a04891b3..a45b69db8f 100644 --- a/examples/pytorch/image-classifier-alexnet/README.md +++ b/examples/hello-world/tensorflow/README.md @@ -1,3 +1,3 @@ _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. +Please refer to the [tutorial](https://docs.cortex.dev/tutorial) to see how to deploy an example with Cortex. diff --git a/examples/tensorflow/iris-classifier/cortex.yaml b/examples/hello-world/tensorflow/cortex.yaml similarity index 100% rename from examples/tensorflow/iris-classifier/cortex.yaml rename to examples/hello-world/tensorflow/cortex.yaml diff --git a/examples/tensorflow/iris-classifier/predictor.py b/examples/hello-world/tensorflow/predictor.py similarity index 100% rename from examples/tensorflow/iris-classifier/predictor.py rename to examples/hello-world/tensorflow/predictor.py diff --git a/examples/tensorflow/iris-classifier/sample.json b/examples/hello-world/tensorflow/sample.json similarity index 100% rename from examples/tensorflow/iris-classifier/sample.json rename to examples/hello-world/tensorflow/sample.json diff --git a/examples/tensorflow/iris-classifier/tensorflow.ipynb b/examples/hello-world/tensorflow/tensorflow.ipynb similarity index 100% rename from examples/tensorflow/iris-classifier/tensorflow.ipynb rename to examples/hello-world/tensorflow/tensorflow.ipynb diff --git a/examples/keras/document-denoiser/README.md b/examples/keras/document-denoiser/README.md deleted file mode 100644 index 05f90b9bef..0000000000 --- a/examples/keras/document-denoiser/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Clean Dirty Documents w/ Autoencoders - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example model cleans text documents of anything that isn't text (aka noise): coffee stains, old wear artifacts, etc. You can inspect the notebook that has been used to train the model [here](trainer.ipynb). - -Here's a collage of input texts and predictions. - -![Imgur](https://i.imgur.com/M4Mjz2l.jpg) - -*Figure 1 - The dirty documents are on the left side and the cleaned ones are on the right* - -## Sample Prediction - -Once this model is deployed, get the API endpoint by running `cortex get document-denoiser`. - -Now let's take a sample image like this one. - -![Imgur](https://i.imgur.com/JJLfFxB.png) - -Export the endpoint & the image's URL by running -```bash -export ENDPOINT= -export IMAGE_URL=https://i.imgur.com/JJLfFxB.png -``` - -Then run the following piped commands -```bash -curl "${ENDPOINT}" -X POST -H "Content-Type: application/json" -d '{"url":"'${IMAGE_URL}'"}' | -sed 's/"//g' | -base64 -d > prediction.png -``` - -Once this has run, we'll see a `prediction.png` file saved to the disk. This is the result. - -![Imgur](https://i.imgur.com/PRB2oS8.png) - -As it can be seen, the text document has been cleaned of any noise. Success! - ---- - -Here's a short list of URLs of other text documents in image format that can be cleaned using this model. Export these links to `IMAGE_URL` variable: - -* https://i.imgur.com/6COQ46f.png -* https://i.imgur.com/alLI83b.png -* https://i.imgur.com/QVoSTuu.png diff --git a/examples/keras/document-denoiser/cortex.yaml b/examples/keras/document-denoiser/cortex.yaml deleted file mode 100644 index b616a0ff0f..0000000000 --- a/examples/keras/document-denoiser/cortex.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: document-denoiser - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - model: s3://cortex-examples/keras/document-denoiser/model.h5 - resize_shape: [540, 260] - compute: - cpu: 1 diff --git a/examples/keras/document-denoiser/predictor.py b/examples/keras/document-denoiser/predictor.py deleted file mode 100644 index 2554560388..0000000000 --- a/examples/keras/document-denoiser/predictor.py +++ /dev/null @@ -1,86 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import boto3, base64, cv2, re, os, requests -from botocore import UNSIGNED -from botocore.client import Config -import numpy as np -from tensorflow.keras.models import load_model - - -def get_url_image(url_image): - """ - Get numpy image from URL image. - """ - resp = requests.get(url_image, stream=True).raw - image = np.asarray(bytearray(resp.read()), dtype="uint8") - image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE) - return image - - -def image_to_png_nparray(image): - """ - Convert numpy image to jpeg numpy vector. - """ - is_success, im_buf_arr = cv2.imencode(".png", image) - return im_buf_arr - - -def image_to_png_bytes(image): - """ - Convert numpy image to bytes-encoded png image. - """ - buf = image_to_png_nparray(image) - byte_im = buf.tobytes() - return byte_im - - -class PythonPredictor: - def __init__(self, config): - # download the model - bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() - - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - model_path = os.path.join("/tmp/model.h5") - s3.download_file(bucket, key, model_path) - - # load the model - self.model = load_model(model_path) - - # resize shape (width, height) - self.resize_shape = tuple(config["resize_shape"]) - - def predict(self, payload): - # download image - img_url = payload["url"] - image = get_url_image(img_url) - resized = cv2.resize(image, self.resize_shape) - - # prediction - pred = self.make_prediction(resized) - - # image represented in bytes - byte_im = image_to_png_bytes(pred) - - # encode image - image_enc = base64.b64encode(byte_im).decode("utf-8") - - return image_enc - - def make_prediction(self, img): - """ - Make prediction on image. - """ - processed = img / 255.0 - processed = np.expand_dims(processed, 0) - processed = np.expand_dims(processed, 3) - pred = self.model.predict(processed) - pred = np.squeeze(pred, 3) - pred = np.squeeze(pred, 0) - out_img = pred * 255 - out_img[out_img > 255.0] = 255.0 - out_img = out_img.astype(np.uint8) - return out_img diff --git a/examples/keras/document-denoiser/requirements.txt b/examples/keras/document-denoiser/requirements.txt deleted file mode 100644 index 77eb59dc52..0000000000 --- a/examples/keras/document-denoiser/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy==1.18.0 -requests==2.22.0 -opencv-python==4.1.2.30 -keras==2.3.1 -h5py==2.10.0 diff --git a/examples/keras/document-denoiser/sample.json b/examples/keras/document-denoiser/sample.json deleted file mode 100644 index 651595f4fb..0000000000 --- a/examples/keras/document-denoiser/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/JJLfFxB.png" -} diff --git a/examples/keras/document-denoiser/trainer.ipynb b/examples/keras/document-denoiser/trainer.ipynb deleted file mode 100644 index c8b0799b1b..0000000000 --- a/examples/keras/document-denoiser/trainer.ipynb +++ /dev/null @@ -1,620 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training a Document Denoiser Model with AutoEncoders" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [], - "source": [ - "# _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n", - "\n", - "import keras\n", - "import cv2\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "import os\n", - "import ntpath\n", - "from glob import glob\n", - "from matplotlib.pyplot import imshow\n", - "from sklearn.model_selection import train_test_split\n", - "from keras.preprocessing.image import ImageDataGenerator\n", - "from keras.models import Sequential, Model, load_model\n", - "from keras.layers import Activation, Flatten, Dropout, SpatialDropout2D, Conv2D, UpSampling2D, MaxPooling2D, add, concatenate, Input, BatchNormalization\n", - "from keras.backend import set_image_data_format\n", - "from keras.utils import plot_model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download Dataset\n", - "\n", - "Download the dataset from [kaggle (denoising dirty documents)](https://www.kaggle.com/c/denoising-dirty-documents/data). You will need to be logged in to be able to download the data.\n", - "\n", - "Once downloaded run the following commands" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!unzip denoising-dirty-documents.zip && rm denoising-dirty-documents.zip\n", - "!mv denoising-dirty-documents/*.zip . && rm -rf denoising-dirty-documents\n", - "!unzip '*.zip' > /dev/null && rm *.zip" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define the Data Generator\n", - "\n", - "Include data augmentation because the dataset is rather small." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "x_dirty = sorted(glob(\"train/*.png\"))\n", - "x_cleaned = sorted(glob(\"train_cleaned/*.png\"))\n", - "x_test = sorted(glob(\"test/*.png\"))\n", - "input_shape = (260, 540)\n", - "height = input_shape[0]\n", - "width = input_shape[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "x_train, x_valid, y_train, y_valid = train_test_split(x_dirty, x_cleaned, test_size=0.20)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "set_image_data_format(\"channels_last\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def model_train_generator(x_train, y_train, epochs, batch_size, resize_shape):\n", - " white_fill = 1.0\n", - " datagen = ImageDataGenerator(\n", - " rotation_range=180,\n", - " width_shift_range=0.2,\n", - " height_shift_range=0.2,\n", - " zoom_range=0.3,\n", - " fill_mode=\"constant\",\n", - " cval=white_fill,\n", - " horizontal_flip=True,\n", - " vertical_flip=True,\n", - " )\n", - " \n", - " for _ in range(epochs):\n", - " for x_file, y_file in zip(x_train, y_train):\n", - " x_img = cv2.imread(x_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", - " y_img = cv2.imread(y_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", - " \n", - " xs = []\n", - " ys = []\n", - " for i in range(batch_size):\n", - " if i == 0:\n", - " x = x_img\n", - " y = y_img\n", - " else:\n", - " params = datagen.get_random_transform(img_shape=x_img.shape)\n", - " x = datagen.apply_transform(np.expand_dims(x_img, 2), params)\n", - " y = datagen.apply_transform(np.expand_dims(y_img, 2), params)\n", - " x = cv2.resize(x, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", - " y = cv2.resize(y, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", - " x = np.expand_dims(x, 2)\n", - " y = np.expand_dims(y, 2)\n", - " xs.append(x)\n", - " ys.append(y)\n", - " xs_imgs = np.array(xs)\n", - " ys_imgs = np.array(ys)\n", - " yield (xs_imgs, ys_imgs)\n", - "\n", - "def model_valid_generator(x_valid, y_valid, epochs, resize_shape):\n", - " xs = []\n", - " ys = []\n", - " for x_file, y_file in zip(x_valid, y_valid):\n", - " x_img = cv2.imread(x_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", - " y_img = cv2.imread(y_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", - " x = cv2.resize(x_img, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", - " y = cv2.resize(y_img, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", - " x = np.expand_dims(x, 2)\n", - " x = np.expand_dims(x, 0)\n", - " y = np.expand_dims(y, 2)\n", - " y = np.expand_dims(y, 0)\n", - " xs.append(x)\n", - " ys.append(y)\n", - " \n", - " for _ in range(epochs):\n", - " for xs_img, ys_img in zip(xs, ys):\n", - " yield (xs_img, ys_img)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create the Model" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def create_encoder(input_shape):\n", - " inp = Input(shape=input_shape)\n", - " x = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), \n", - " input_shape=input_shape, activation=\"relu\", padding=\"same\")(inp)\n", - " x = BatchNormalization()(x)\n", - " x = MaxPooling2D(pool_size=(2,2))(x)\n", - " \n", - " x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), \n", - " activation=\"relu\", padding=\"same\")(x)\n", - " x = BatchNormalization()(x)\n", - "\n", - " return inp, x" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def create_decoder(inp):\n", - " x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation=\"relu\",\n", - " padding=\"same\")(inp)\n", - " x = BatchNormalization()(x)\n", - " x = UpSampling2D(size=(2,2))(x)\n", - " \n", - " x = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), \n", - " activation=\"relu\", padding=\"same\")(x)\n", - " x = BatchNormalization()(x)\n", - " \n", - " x = Conv2D(filters=1, kernel_size=(1,1), strides=(1,1), \n", - " activation=\"sigmoid\", padding=\"same\")(x)\n", - " x = BatchNormalization()(x)\n", - " \n", - " return inp, x" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def create_autoencoder(input_shape):\n", - " enc_inp, encoder = create_encoder(input_shape)\n", - " dec_inp, autoencoder = create_decoder(encoder)\n", - " model = Model(inputs=[enc_inp], outputs=[autoencoder], name='AutoEncoder')\n", - " \n", - " return model" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From C:\\Users\\OboTh\\Anaconda3\\envs\\lightweight-gpu-python\\lib\\site-packages\\tensorflow_core\\python\\ops\\resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "If using Keras pass *_constraint arguments to layers.\n", - "WARNING:tensorflow:From C:\\Users\\OboTh\\Anaconda3\\envs\\lightweight-gpu-python\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", - "\n", - "Model: \"AutoEncoder\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "input_1 (InputLayer) (None, 260, 540, 1) 0 \n", - "_________________________________________________________________\n", - "conv2d_1 (Conv2D) (None, 260, 540, 64) 640 \n", - "_________________________________________________________________\n", - "batch_normalization_1 (Batch (None, 260, 540, 64) 256 \n", - "_________________________________________________________________\n", - "max_pooling2d_1 (MaxPooling2 (None, 130, 270, 64) 0 \n", - "_________________________________________________________________\n", - "conv2d_2 (Conv2D) (None, 130, 270, 32) 18464 \n", - "_________________________________________________________________\n", - "batch_normalization_2 (Batch (None, 130, 270, 32) 128 \n", - "_________________________________________________________________\n", - "conv2d_3 (Conv2D) (None, 130, 270, 32) 9248 \n", - "_________________________________________________________________\n", - "batch_normalization_3 (Batch (None, 130, 270, 32) 128 \n", - "_________________________________________________________________\n", - "up_sampling2d_1 (UpSampling2 (None, 260, 540, 32) 0 \n", - "_________________________________________________________________\n", - "conv2d_4 (Conv2D) (None, 260, 540, 64) 18496 \n", - "_________________________________________________________________\n", - "batch_normalization_4 (Batch (None, 260, 540, 64) 256 \n", - "_________________________________________________________________\n", - "conv2d_5 (Conv2D) (None, 260, 540, 1) 65 \n", - "_________________________________________________________________\n", - "batch_normalization_5 (Batch (None, 260, 540, 1) 4 \n", - "=================================================================\n", - "Total params: 47,685\n", - "Trainable params: 47,299\n", - "Non-trainable params: 386\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "model = create_autoencoder((height, width, 1))\n", - "model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(optimizer='adam', loss='mse')\n", - "epochs = 20\n", - "batch_size = 8\n", - "samples = len(x_train)\n", - "validation_samples = len(x_valid)\n", - "train_generator = model_train_generator(x_train, y_train, epochs=epochs, batch_size=batch_size, resize_shape=(height, width))\n", - "valid_generator = model_valid_generator(x_valid, y_valid, epochs=epochs, resize_shape=(height, width))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train the AutoEncoder Model" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From C:\\Users\\OboTh\\Anaconda3\\envs\\lightweight-gpu-python\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n", - "\n", - "Epoch 1/20\n", - "115/115 [==============================] - 49s 429ms/step - loss: 1.2062 - val_loss: 0.1817\n", - "Epoch 2/20\n", - "115/115 [==============================] - 43s 373ms/step - loss: 0.5792 - val_loss: 0.1720\n", - "Epoch 3/20\n", - "115/115 [==============================] - 43s 373ms/step - loss: 0.4297 - val_loss: 0.1399\n", - "Epoch 4/20\n", - "115/115 [==============================] - 43s 375ms/step - loss: 0.3160 - val_loss: 0.1023\n", - "Epoch 5/20\n", - "115/115 [==============================] - 44s 385ms/step - loss: 0.2276 - val_loss: 0.0609\n", - "Epoch 6/20\n", - "115/115 [==============================] - 44s 379ms/step - loss: 0.1599 - val_loss: 0.0292\n", - "Epoch 7/20\n", - "115/115 [==============================] - 43s 376ms/step - loss: 0.1091 - val_loss: 0.0112\n", - "Epoch 8/20\n", - "115/115 [==============================] - 43s 376ms/step - loss: 0.0730 - val_loss: 0.0074\n", - "Epoch 9/20\n", - "115/115 [==============================] - 44s 381ms/step - loss: 0.0473 - val_loss: 0.0055\n", - "Epoch 10/20\n", - "115/115 [==============================] - 45s 393ms/step - loss: 0.0301 - val_loss: 0.0047\n", - "Epoch 11/20\n", - "115/115 [==============================] - 45s 387ms/step - loss: 0.0189 - val_loss: 0.0041\n", - "Epoch 12/20\n", - "115/115 [==============================] - 43s 376ms/step - loss: 0.0118 - val_loss: 0.0042\n", - "Epoch 13/20\n", - "115/115 [==============================] - 44s 380ms/step - loss: 0.0075 - val_loss: 0.0061\n", - "Epoch 14/20\n", - "115/115 [==============================] - 43s 377ms/step - loss: 0.0051 - val_loss: 0.0048\n", - "Epoch 15/20\n", - "115/115 [==============================] - 43s 378ms/step - loss: 0.0037 - val_loss: 0.0045\n", - "Epoch 16/20\n", - "115/115 [==============================] - 43s 373ms/step - loss: 0.0029 - val_loss: 0.0045\n", - "Epoch 17/20\n", - "115/115 [==============================] - 44s 378ms/step - loss: 0.0025 - val_loss: 0.0048\n", - "Epoch 18/20\n", - "115/115 [==============================] - 43s 375ms/step - loss: 0.0023 - val_loss: 0.0047\n", - "Epoch 19/20\n", - "115/115 [==============================] - 43s 376ms/step - loss: 0.0022 - val_loss: 0.0043\n", - "Epoch 20/20\n", - "115/115 [==============================] - 44s 380ms/step - loss: 0.0021 - val_loss: 0.0042\n" - ] - } - ], - "source": [ - "hist_obj = model.fit_generator(train_generator, validation_data=valid_generator, validation_steps=validation_samples, steps_per_epoch=samples, epochs=epochs, shuffle=True) " - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "hist_pd = pd.DataFrame(hist_obj.history, index=np.arange(1, len(hist_obj.history['loss'])+1))\n", - "hist_pd.index.name = 'epoch'\n", - "sns.lineplot(data=hist_pd)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"model.h5\"" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "model.save(model_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "# model = load_model(model_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Testing Accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "def test_generator(x_test, resize_shape):\n", - " for sample in x_test:\n", - " img = cv2.imread(sample, cv2.IMREAD_GRAYSCALE) / 255.0\n", - " res_img = cv2.resize(img, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", - " res_img = np.expand_dims(res_img, 0)\n", - " res_img = np.expand_dims(res_img, 3)\n", - " np_img = np.array(res_img)\n", - " yield (np_img, np_img)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MSE Loss: 0.07084273546934128\n" - ] - } - ], - "source": [ - "steps = len(x_test)\n", - "test_gen = test_generator(x_test, input_shape)\n", - "loss = model.evaluate_generator(test_gen, steps=steps)\n", - "print(\"MSE Loss:\", loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sample Prediction" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "img = cv2.imread(x_test[0], cv2.IMREAD_GRAYSCALE)\n", - "img = cv2.resize(img, input_shape[::-1], interpolation=cv2.INTER_AREA)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "imshow(img, cmap='gray')" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "def make_prediction(img):\n", - " processed = img / 255.0\n", - " processed = np.expand_dims(processed, 0)\n", - " processed = np.expand_dims(processed, 3)\n", - " pred = model.predict(processed)\n", - " pred = np.squeeze(pred, 3)\n", - " pred = np.squeeze(pred, 0)\n", - " out_img = pred * 255\n", - " out_img[out_img > 255.0] = 255.0\n", - " out_img = out_img.astype(np.uint8)\n", - " return out_img\n", - "\n", - "def path_leaf(path):\n", - " head, tail = ntpath.split(path)\n", - " return tail or ntpath.basename(head)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "pred = make_prediction(img)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "imshow(pred, cmap='gray')" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [], - "source": [ - "output_dir = 'test_preds'\n", - "if not os.path.exists(output_dir):\n", - " os.makedirs(output_dir)\n", - "for x_test_file in x_test:\n", - " img = cv2.imread(x_test_file, cv2.IMREAD_GRAYSCALE)\n", - " img = cv2.resize(img, input_shape[::-1], interpolation=cv2.INTER_AREA)\n", - " pred = make_prediction(img)\n", - " filename = path_leaf(x_test_file)\n", - " filepath = os.path.join(output_dir, filename)\n", - " cv2.imwrite(filepath, pred)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "lightweight-gpu-kernel", - "language": "python", - "name": "lightweight-gpu-kernel" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/live-reloading/onnx/README.md b/examples/live-reloading/onnx/README.md index 77456896ee..e8ec367b01 100644 --- a/examples/live-reloading/onnx/README.md +++ b/examples/live-reloading/onnx/README.md @@ -2,6 +2,4 @@ _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ -The model live-reloading feature is automatically enabled for the ONNX predictors. This means that any ONNX examples found in the [examples](../..) directory will already have this running. - -The live-reloading is a feature that reloads models at run-time from (a) specified S3 bucket(s) in the `cortex.yaml` config of each API. Models are added/removed from the API when the said models are added/removed from the S3 bucket(s) or reloaded when the models are edited. More on this in the [docs](insert-link). +Model live-reloading is automatically enabled for ONNX predictors. diff --git a/examples/live-reloading/tensorflow/README.md b/examples/live-reloading/tensorflow/README.md index 2444484b77..46f4111a4f 100644 --- a/examples/live-reloading/tensorflow/README.md +++ b/examples/live-reloading/tensorflow/README.md @@ -2,10 +2,4 @@ _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ -The model live-reloading feature is automatically enabled 1 for the TensorFlow predictors. This means that any TensorFLow examples found in the [examples](../..) directory will already have this running. - -The live-reloading is a feature that reloads models at run-time from (a) specified S3 bucket(s) in the `cortex.yaml` config of each API. Models are added/removed from the API when the said models are added/removed from the S3 bucket(s) or reloaded when the models are edited. More on this in the [docs](insert-link). - ---- - -*1: The live-reloading feature for the TensorFlow predictor is disabled when Inferentia resources (`compute.inf`) are added to the API and `processes_per_replica` > 1.* +Model live-reloading is automatically enabled for TensorFlow predictors unless using Inferentia resources (`compute.inf`) and `processes_per_replica` > 1. diff --git a/examples/onnx/multi-model-classifier/README.md b/examples/multi-model/onnx/README.md similarity index 100% rename from examples/onnx/multi-model-classifier/README.md rename to examples/multi-model/onnx/README.md diff --git a/examples/onnx/multi-model-classifier/cortex.yaml b/examples/multi-model/onnx/cortex.yaml similarity index 100% rename from examples/onnx/multi-model-classifier/cortex.yaml rename to examples/multi-model/onnx/cortex.yaml diff --git a/examples/onnx/multi-model-classifier/predictor.py b/examples/multi-model/onnx/predictor.py similarity index 100% rename from examples/onnx/multi-model-classifier/predictor.py rename to examples/multi-model/onnx/predictor.py diff --git a/examples/onnx/multi-model-classifier/requirements.txt b/examples/multi-model/onnx/requirements.txt similarity index 100% rename from examples/onnx/multi-model-classifier/requirements.txt rename to examples/multi-model/onnx/requirements.txt diff --git a/examples/tensorflow/image-classifier-resnet50/sample.json b/examples/multi-model/onnx/sample.json similarity index 100% rename from examples/tensorflow/image-classifier-resnet50/sample.json rename to examples/multi-model/onnx/sample.json diff --git a/examples/pytorch/multi-model-text-analyzer/README.md b/examples/multi-model/python/README.md similarity index 100% rename from examples/pytorch/multi-model-text-analyzer/README.md rename to examples/multi-model/python/README.md diff --git a/examples/pytorch/multi-model-text-analyzer/cortex.yaml b/examples/multi-model/python/cortex.yaml similarity index 100% rename from examples/pytorch/multi-model-text-analyzer/cortex.yaml rename to examples/multi-model/python/cortex.yaml diff --git a/examples/pytorch/multi-model-text-analyzer/predictor.py b/examples/multi-model/python/predictor.py similarity index 100% rename from examples/pytorch/multi-model-text-analyzer/predictor.py rename to examples/multi-model/python/predictor.py diff --git a/examples/pytorch/multi-model-text-analyzer/requirements.txt b/examples/multi-model/python/requirements.txt similarity index 100% rename from examples/pytorch/multi-model-text-analyzer/requirements.txt rename to examples/multi-model/python/requirements.txt diff --git a/examples/pytorch/multi-model-text-analyzer/sample-sentiment.json b/examples/multi-model/python/sample-sentiment.json similarity index 100% rename from examples/pytorch/multi-model-text-analyzer/sample-sentiment.json rename to examples/multi-model/python/sample-sentiment.json diff --git a/examples/pytorch/multi-model-text-analyzer/sample-summarizer.json b/examples/multi-model/python/sample-summarizer.json similarity index 100% rename from examples/pytorch/multi-model-text-analyzer/sample-summarizer.json rename to examples/multi-model/python/sample-summarizer.json diff --git a/examples/tensorflow/multi-model-classifier/README.md b/examples/multi-model/tensorflow/README.md similarity index 100% rename from examples/tensorflow/multi-model-classifier/README.md rename to examples/multi-model/tensorflow/README.md diff --git a/examples/tensorflow/multi-model-classifier/cortex.yaml b/examples/multi-model/tensorflow/cortex.yaml similarity index 84% rename from examples/tensorflow/multi-model-classifier/cortex.yaml rename to examples/multi-model/tensorflow/cortex.yaml index ef99bc941e..2c0e39bea8 100644 --- a/examples/tensorflow/multi-model-classifier/cortex.yaml +++ b/examples/multi-model/tensorflow/cortex.yaml @@ -9,14 +9,10 @@ paths: - name: inception model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - - name: iris - model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ - name: resnet50 model_path: s3://cortex-examples/tensorflow/resnet50/ config: models: - iris: - labels: ["setosa", "versicolor", "virginica"] resnet50: input_shape: [224, 224] input_key: input diff --git a/examples/tensorflow/multi-model-classifier/predictor.py b/examples/multi-model/tensorflow/predictor.py similarity index 100% rename from examples/tensorflow/multi-model-classifier/predictor.py rename to examples/multi-model/tensorflow/predictor.py diff --git a/examples/tensorflow/image-classifier-inception/requirements.txt b/examples/multi-model/tensorflow/requirements.txt similarity index 100% rename from examples/tensorflow/image-classifier-inception/requirements.txt rename to examples/multi-model/tensorflow/requirements.txt diff --git a/examples/tensorflow/multi-model-classifier/sample-image.json b/examples/multi-model/tensorflow/sample-image.json similarity index 100% rename from examples/tensorflow/multi-model-classifier/sample-image.json rename to examples/multi-model/tensorflow/sample-image.json diff --git a/examples/onnx/iris-classifier/README.md b/examples/onnx/iris-classifier/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/onnx/iris-classifier/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/onnx/yolov5-youtube/README.md b/examples/onnx/yolov5-youtube/README.md deleted file mode 100644 index f7822449bb..0000000000 --- a/examples/onnx/yolov5-youtube/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# YOLOv5 Detection model - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example deploys a detection model trained using [ultralytics' yolo repo](https://github.com/ultralytics/yolov5) using ONNX. -We'll use the `yolov5s` model as an example here. -In can be used to run inference on youtube videos and returns the annotated video with bounding boxes. - -The example can be run on both CPU and on GPU hardware. - -## Sample Prediction - -Deploy the model by running: - -```bash -cortex deploy -``` - -And wait for it to become live by tracking its status with `cortex get --watch`. - -Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get yolov5-youtube`. - -```bash -export ENDPOINT=your-api-endpoint -``` - -When making a prediction with [sample.json](sample.json), [this](https://www.youtube.com/watch?v=aUdKzb4LGJI) youtube video will be used. - -To make a request to the model: - -```bash -curl "${ENDPOINT}" -X POST -H "Content-Type: application/json" -d @sample.json --output video.mp4 -``` - -After a few seconds, `curl` will save the resulting video `video.mp4` in the current working directory. The following is a sample of what should be exported: - -![yolov5](https://user-images.githubusercontent.com/26958764/86545098-e0dce900-bf34-11ea-83a7-8fd544afa11c.gif) - - -## Exporting ONNX - -To export a custom model from the repo, use the [`model/export.py`](https://github.com/ultralytics/yolov5/blob/master/models/export.py) script. -The only change we need to make is to change the line - -```bash -model.model[-1].export = True # set Detect() layer export=True -``` - -to - -```bash -model.model[-1].export = False -``` - -Originally, the ultralytics repo does not export postprocessing steps of the model, e.g. the conversion from the raw CNN outputs to bounding boxes. -With newer ONNX versions, these can be exported as part of the model making the deployment much easier. - -With this modified script, the ONNX graph used for this example has been exported using -```bash -python models/export.py --weights weights/yolov5s.pt --img 416 --batch 1 -``` diff --git a/examples/onnx/yolov5-youtube/conda-packages.txt b/examples/onnx/yolov5-youtube/conda-packages.txt deleted file mode 100644 index 131fce12b5..0000000000 --- a/examples/onnx/yolov5-youtube/conda-packages.txt +++ /dev/null @@ -1,3 +0,0 @@ -conda-forge::ffmpeg=4.2.3 -conda-forge::youtube-dl -conda-forge::matplotlib diff --git a/examples/onnx/yolov5-youtube/cortex.yaml b/examples/onnx/yolov5-youtube/cortex.yaml deleted file mode 100644 index 80d0393308..0000000000 --- a/examples/onnx/yolov5-youtube/cortex.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: yolov5-youtube - kind: RealtimeAPI - predictor: - type: onnx - path: predictor.py - model_path: s3://cortex-examples/onnx/yolov5-youtube/ - config: - iou_threshold: 0.5 - confidence_threshold: 0.6 - compute: - gpu: 1 # this is optional, since the api can also run on cpu diff --git a/examples/onnx/yolov5-youtube/labels.json b/examples/onnx/yolov5-youtube/labels.json deleted file mode 100644 index c86f2f812a..0000000000 --- a/examples/onnx/yolov5-youtube/labels.json +++ /dev/null @@ -1,82 +0,0 @@ -[ - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic light", - "fire hydrant", - "stop sign", - "parking meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports ball", - "kite", - "baseball bat", - "baseball glove", - "skateboard", - "surfboard", - "tennis racket", - "bottle", - "wine glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted plant", - "bed", - "dining table", - "toilet", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy bear", - "hair drier", - "toothbrush" -] diff --git a/examples/onnx/yolov5-youtube/predictor.py b/examples/onnx/yolov5-youtube/predictor.py deleted file mode 100644 index b99d29d911..0000000000 --- a/examples/onnx/yolov5-youtube/predictor.py +++ /dev/null @@ -1,65 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import json -import os -import io -import uuid -import utils - -import numpy as np -from matplotlib import pyplot as plt - -from starlette.responses import StreamingResponse - - -class ONNXPredictor: - def __init__(self, onnx_client, config): - self.client = onnx_client - # Get the input shape from the ONNX runtime - (signature,) = onnx_client.get_model()["input_signatures"].values() - _, _, height, width = signature["shape"] - self.input_size = (width, height) - self.config = config - with open("labels.json") as buf: - self.labels = json.load(buf) - color_map = plt.cm.tab20(np.linspace(0, 20, len(self.labels))) - self.color_map = [tuple(map(int, colors)) for colors in 255 * color_map] - - def postprocess(self, output): - boxes, obj_score, class_scores = np.split(output[0], [4, 5], axis=1) - boxes = utils.boxes_yolo_to_xyxy(boxes) - - # get the class-prediction & class confidences - class_id = class_scores.argmax(axis=1) - cls_score = class_scores[np.arange(len(class_scores)), class_id] - - confidence = obj_score.squeeze(axis=1) * cls_score - sel = confidence > self.config["confidence_threshold"] - boxes, class_id, confidence = boxes[sel], class_id[sel], confidence[sel] - sel = utils.nms(boxes, confidence, self.config["iou_threshold"]) - boxes, class_id, confidence = boxes[sel], class_id[sel], confidence[sel] - return boxes, class_id, confidence - - def predict(self, payload): - # download YT video - in_path = utils.download_from_youtube(payload["url"], self.input_size[1]) - out_path = f"{uuid.uuid1()}.mp4" - - # run predictions - with utils.FrameWriter(out_path, size=self.input_size) as writer: - for frame in utils.frame_reader(in_path, size=self.input_size): - x = (frame.astype(np.float32) / 255).transpose(2, 0, 1) - # 4 output tensors, the last three are intermediate values and - # not necessary for detection - output, *_ = self.client.predict(x[None]) - boxes, class_ids, confidence = self.postprocess(output) - utils.overlay_boxes(frame, boxes, class_ids, self.labels, self.color_map) - writer.write(frame) - - with open(out_path, "rb") as f: - output_buf = io.BytesIO(f.read()) - - os.remove(in_path) - os.remove(out_path) - - return StreamingResponse(output_buf, media_type="video/mp4") diff --git a/examples/onnx/yolov5-youtube/requirements.txt b/examples/onnx/yolov5-youtube/requirements.txt deleted file mode 100644 index 2c779ca7f1..0000000000 --- a/examples/onnx/yolov5-youtube/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -ffmpeg-python -aiofiles -opencv-python-headless diff --git a/examples/onnx/yolov5-youtube/sample.json b/examples/onnx/yolov5-youtube/sample.json deleted file mode 100644 index 8421278f58..0000000000 --- a/examples/onnx/yolov5-youtube/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://www.youtube.com/watch?v=aUdKzb4LGJI" -} diff --git a/examples/onnx/yolov5-youtube/utils.py b/examples/onnx/yolov5-youtube/utils.py deleted file mode 100644 index c9bbeb73fe..0000000000 --- a/examples/onnx/yolov5-youtube/utils.py +++ /dev/null @@ -1,130 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import youtube_dl -import ffmpeg -import numpy as np -import cv2 -import uuid - -from pathlib import Path -from typing import Iterable, Tuple - - -def download_from_youtube(url: str, min_height: int) -> Path: - target = f"{uuid.uuid1()}.mp4" - ydl_opts = { - "outtmpl": target, - "format": f"worstvideo[vcodec=vp9][height>={min_height}]", - } - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download([url]) - # we need to glob in case youtube-dl adds suffix - (path,) = Path().absolute().glob(f"{target}*") - return path - - -def frame_reader(path: Path, size: Tuple[int, int]) -> Iterable[np.ndarray]: - width, height = size - # letterbox frames to fixed size - process = ( - ffmpeg.input(path) - .filter("scale", size=f"{width}:{height}", force_original_aspect_ratio="decrease") - # Negative values for x and y center the padded video - .filter("pad", height=height, width=width, x=-1, y=-1) - .output("pipe:", format="rawvideo", pix_fmt="rgb24") - .run_async(pipe_stdout=True) - ) - - while True: - in_bytes = process.stdout.read(height * width * 3) - if not in_bytes: - process.wait() - break - frame = np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3]) - yield frame - - -class FrameWriter: - def __init__(self, path: Path, size: Tuple[int, int]): - width, height = size - self.process = ( - ffmpeg.input("pipe:", format="rawvideo", pix_fmt="rgb24", s=f"{width}x{height}") - .output(path, pix_fmt="yuv420p") - .overwrite_output() - .run_async(pipe_stdin=True) - ) - - def write(self, frame: np.ndarray): - self.process.stdin.write(frame.astype(np.uint8).tobytes()) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.__del__() - - def __del__(self): - self.process.stdin.close() - self.process.wait() - - -def nms(dets: np.ndarray, scores: np.ndarray, thresh: float) -> np.ndarray: - x1 = dets[:, 0] - y1 = dets[:, 1] - x2 = dets[:, 2] - y2 = dets[:, 3] - - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] # get boxes with more ious first - - keep = [] - while order.size > 0: - i = order[0] # pick maxmum iou box - keep.append(i) - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - w = np.maximum(0.0, xx2 - xx1 + 1) # maximum width - h = np.maximum(0.0, yy2 - yy1 + 1) # maxiumum height - inter = w * h - ovr = inter / (areas[i] + areas[order[1:]] - inter) - - inds = np.where(ovr <= thresh)[0] - order = order[inds + 1] - - return np.array(keep).astype(np.int) - - -def boxes_yolo_to_xyxy(boxes: np.ndarray): - boxes[:, 0] -= boxes[:, 2] / 2 - boxes[:, 1] -= boxes[:, 3] / 2 - boxes[:, 2] = boxes[:, 2] + boxes[:, 0] - boxes[:, 3] = boxes[:, 3] + boxes[:, 1] - return boxes - - -def overlay_boxes(frame, boxes, class_ids, label_map, color_map, line_thickness=None): - tl = ( - line_thickness or round(0.0005 * (frame.shape[0] + frame.shape[1]) / 2) + 1 - ) # line/font thickness - - for class_id, (x1, y1, x2, y2) in zip(class_ids, boxes.astype(np.int)): - color = color_map[class_id] - label = label_map[class_id] - cv2.rectangle(frame, (x1, y1), (x2, y2), color, tl, cv2.LINE_AA) - tf = max(tl - 1, 1) # font thickness - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - x3, y3 = x1 + t_size[0], y1 - t_size[1] - 3 - cv2.rectangle(frame, (x1, y1), (x3, y3), color, -1, cv2.LINE_AA) # filled - cv2.putText( - frame, - label, - (x1, y1 - 2), - 0, - tl / 3, - [225, 255, 255], - thickness=tf, - lineType=cv2.LINE_AA, - ) diff --git a/examples/pytorch/answer-generator/README.md b/examples/pytorch/answer-generator/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/pytorch/answer-generator/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/pytorch/answer-generator/cortex.yaml b/examples/pytorch/answer-generator/cortex.yaml deleted file mode 100644 index b336f257dd..0000000000 --- a/examples/pytorch/answer-generator/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: answer-generator - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 - mem: 5G diff --git a/examples/pytorch/answer-generator/generator.py b/examples/pytorch/answer-generator/generator.py deleted file mode 100644 index 4a9aba613e..0000000000 --- a/examples/pytorch/answer-generator/generator.py +++ /dev/null @@ -1,44 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -# This file includes code which was modified from https://colab.research.google.com/drive/1KTLqiAOdKM_3RnBWfqgrvOQLqumUyOdA - -import torch -import torch.nn.functional as F - - -END_OF_TEXT = 50256 - - -def generate(model, conditioned_tokens, device): - generated_tokens = [] - while True: - result = recalc(model, conditioned_tokens, generated_tokens, device) - if result == END_OF_TEXT: - return generated_tokens[:-1] - - -def recalc(model, conditioned_tokens, generated_tokens, device): - indexed_tokens = conditioned_tokens + generated_tokens - tokens_tensor = torch.tensor([indexed_tokens]) - tokens_tensor = tokens_tensor.to(device) - with torch.no_grad(): - outputs = model(tokens_tensor) - predictions = outputs[0] - logits = predictions[0, -1, :] - filtered_logits = top_p_filtering(logits) - probabilities = F.softmax(filtered_logits, dim=-1) - next_token = torch.multinomial(probabilities, 1) - generated_tokens.append(next_token.item()) - return next_token.item() - - -def top_p_filtering(logits, top_p=0.9, filter_value=-float("Inf")): - assert logits.dim() == 1 - sorted_logits, sorted_indices = torch.sort(logits, descending=True) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 - indices_to_remove = sorted_indices[sorted_indices_to_remove] - logits[indices_to_remove] = filter_value - return logits diff --git a/examples/pytorch/answer-generator/predictor.py b/examples/pytorch/answer-generator/predictor.py deleted file mode 100644 index 38c6622bf3..0000000000 --- a/examples/pytorch/answer-generator/predictor.py +++ /dev/null @@ -1,36 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import wget -import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config -import generator - - -class PythonPredictor: - def __init__(self, config): - medium_config = GPT2Config(n_embd=1024, n_layer=24, n_head=16) - model = GPT2LMHeadModel(medium_config) - wget.download( - "https://convaisharables.blob.core.windows.net/lsp/multiref/medium_ft.pkl", - "/tmp/medium_ft.pkl", - ) - - weights = torch.load("/tmp/medium_ft.pkl") - weights["lm_head.weight"] = weights["lm_head.decoder.weight"] - weights.pop("lm_head.decoder.weight", None) - - model.load_state_dict(weights) - - device = "cuda" if torch.cuda.is_available() else "cpu" - print(f"using device: {device}") - model.to(device) - model.eval() - - self.device = device - self.model = model - self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - - def predict(self, payload): - conditioned_tokens = self.tokenizer.encode(payload["text"]) + [generator.END_OF_TEXT] - prediction = generator.generate(self.model, conditioned_tokens, self.device) - return self.tokenizer.decode(prediction) diff --git a/examples/pytorch/answer-generator/requirements.txt b/examples/pytorch/answer-generator/requirements.txt deleted file mode 100644 index effba0ef1b..0000000000 --- a/examples/pytorch/answer-generator/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -torch -transformers==2.3.* -wget==3.* diff --git a/examples/pytorch/answer-generator/sample.json b/examples/pytorch/answer-generator/sample.json deleted file mode 100644 index aa91c9d2eb..0000000000 --- a/examples/pytorch/answer-generator/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "What is machine learning?" -} diff --git a/examples/pytorch/image-classifier-alexnet/cortex.yaml b/examples/pytorch/image-classifier-alexnet/cortex.yaml deleted file mode 100644 index 74c463c0b0..0000000000 --- a/examples/pytorch/image-classifier-alexnet/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-alexnet - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 - mem: 4G diff --git a/examples/pytorch/image-classifier-alexnet/predictor.py b/examples/pytorch/image-classifier-alexnet/predictor.py deleted file mode 100644 index a739ddbb8a..0000000000 --- a/examples/pytorch/image-classifier-alexnet/predictor.py +++ /dev/null @@ -1,39 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import requests -import torch -import torchvision -from torchvision import transforms -from PIL import Image -from io import BytesIO - - -class PythonPredictor: - def __init__(self, config): - device = "cuda" if torch.cuda.is_available() else "cpu" - print(f"using device: {device}") - - model = torchvision.models.alexnet(pretrained=True).to(device) - model.eval() - # https://github.com/pytorch/examples/blob/447974f6337543d4de6b888e244a964d3c9b71f6/imagenet/main.py#L198-L199 - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - - self.preprocess = transforms.Compose( - [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] - ) - self.labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" - ).text.split("\n")[1:] - self.model = model - self.device = device - - def predict(self, payload): - image = requests.get(payload["url"]).content - img_pil = Image.open(BytesIO(image)) - img_tensor = self.preprocess(img_pil) - img_tensor.unsqueeze_(0) - img_tensor = img_tensor.to(self.device) - with torch.no_grad(): - prediction = self.model(img_tensor) - _, index = prediction[0].max(0) - return self.labels[index] diff --git a/examples/pytorch/image-classifier-alexnet/requirements.txt b/examples/pytorch/image-classifier-alexnet/requirements.txt deleted file mode 100644 index ac988bdf84..0000000000 --- a/examples/pytorch/image-classifier-alexnet/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -torchvision diff --git a/examples/pytorch/image-classifier-alexnet/sample.json b/examples/pytorch/image-classifier-alexnet/sample.json deleted file mode 100644 index eb72ddb869..0000000000 --- a/examples/pytorch/image-classifier-alexnet/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/PzXprwl.jpg" -} diff --git a/examples/pytorch/iris-classifier/cortex.yaml b/examples/pytorch/iris-classifier/cortex.yaml deleted file mode 100644 index a8b590882d..0000000000 --- a/examples/pytorch/iris-classifier/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: iris-classifier - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - model: s3://cortex-examples/pytorch/iris-classifier/weights.pth - monitoring: - model_type: classification diff --git a/examples/pytorch/iris-classifier/predictor.py b/examples/pytorch/iris-classifier/predictor.py deleted file mode 100644 index 71994bb9ae..0000000000 --- a/examples/pytorch/iris-classifier/predictor.py +++ /dev/null @@ -1,50 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import re -import torch -import os -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -from model import IrisNet - -labels = ["setosa", "versicolor", "virginica"] - - -class PythonPredictor: - def __init__(self, config): - # download the model - bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() - - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - s3.download_file(bucket, key, "/tmp/model.pth") - - # initialize the model - model = IrisNet() - model.load_state_dict(torch.load("/tmp/model.pth")) - model.eval() - - self.model = model - - def predict(self, payload): - # Convert the request to a tensor and pass it into the model - input_tensor = torch.FloatTensor( - [ - [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - ] - ) - - # Run the prediction - output = self.model(input_tensor) - - # Translate the model output to the corresponding label string - return labels[torch.argmax(output[0])] diff --git a/examples/pytorch/iris-classifier/requirements.txt b/examples/pytorch/iris-classifier/requirements.txt deleted file mode 100644 index f2f30b7ef9..0000000000 --- a/examples/pytorch/iris-classifier/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -scikit-learn diff --git a/examples/pytorch/iris-classifier/sample.json b/examples/pytorch/iris-classifier/sample.json deleted file mode 100644 index 0bc6836266..0000000000 --- a/examples/pytorch/iris-classifier/sample.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "sepal_length": 2.2, - "sepal_width": 3.6, - "petal_length": 1.4, - "petal_width": 3.3 -} diff --git a/examples/pytorch/language-identifier/README.md b/examples/pytorch/language-identifier/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/pytorch/language-identifier/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/pytorch/language-identifier/cortex.yaml b/examples/pytorch/language-identifier/cortex.yaml deleted file mode 100644 index e8243a58fa..0000000000 --- a/examples/pytorch/language-identifier/cortex.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: language-identifier - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - monitoring: - model_type: classification diff --git a/examples/pytorch/language-identifier/predictor.py b/examples/pytorch/language-identifier/predictor.py deleted file mode 100644 index e59ebe5012..0000000000 --- a/examples/pytorch/language-identifier/predictor.py +++ /dev/null @@ -1,18 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import wget -import fasttext - - -class PythonPredictor: - def __init__(self, config): - wget.download( - "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin", "/tmp/model" - ) - - self.model = fasttext.load_model("/tmp/model") - - def predict(self, payload): - prediction = self.model.predict(payload["text"]) - language = prediction[0][0][-2:] - return language diff --git a/examples/pytorch/language-identifier/requirements.txt b/examples/pytorch/language-identifier/requirements.txt deleted file mode 100644 index a342ff2914..0000000000 --- a/examples/pytorch/language-identifier/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -wget==3.* -fasttext==0.9.* diff --git a/examples/pytorch/language-identifier/sample.json b/examples/pytorch/language-identifier/sample.json deleted file mode 100644 index 225c357392..0000000000 --- a/examples/pytorch/language-identifier/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "build machine learning apis" -} diff --git a/examples/pytorch/object-detector/README.md b/examples/pytorch/object-detector/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/pytorch/object-detector/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/pytorch/object-detector/coco_labels.txt b/examples/pytorch/object-detector/coco_labels.txt deleted file mode 100644 index 8d950d95da..0000000000 --- a/examples/pytorch/object-detector/coco_labels.txt +++ /dev/null @@ -1,91 +0,0 @@ -__background__ -person -bicycle -car -motorcycle -airplane -bus -train -truck -boat -traffic light -fire hydrant -N/A -stop sign -parking meter -bench -bird -cat -dog -horse -sheep -cow -elephant -bear -zebra -giraffe -N/A -backpack -umbrella -N/A -N/A -handbag -tie -suitcase -frisbee -skis -snowboard -sports ball -kite -baseball bat -baseball glove -skateboard -surfboard -tennis racket -bottle -N/A -wine glass -cup -fork -knife -spoon -bowl -banana -apple -sandwich -orange -broccoli -carrot -hot dog -pizza -donut -cake -chair -couch -potted plant -bed -N/A -dining table -N/A -N/A -toilet -N/A -tv -laptop -mouse -remote -keyboard -cell phone -microwave -oven -toaster -sink -refrigerator -N/A -book -clock -vase -scissors -teddy bear -hair drier -toothbrush diff --git a/examples/pytorch/object-detector/cortex.yaml b/examples/pytorch/object-detector/cortex.yaml deleted file mode 100644 index 9b06d29e9e..0000000000 --- a/examples/pytorch/object-detector/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: object-detector - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 - mem: 4G diff --git a/examples/pytorch/object-detector/predictor.py b/examples/pytorch/object-detector/predictor.py deleted file mode 100644 index 52aa593774..0000000000 --- a/examples/pytorch/object-detector/predictor.py +++ /dev/null @@ -1,49 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -from io import BytesIO - -import requests -import torch -from PIL import Image -from torchvision import models -from torchvision import transforms - - -class PythonPredictor: - def __init__(self, config): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - print(f"using device: {self.device}") - - model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(self.device) - model.eval() - - self.preprocess = transforms.Compose([transforms.ToTensor()]) - - with open("/mnt/project/coco_labels.txt") as f: - self.coco_labels = f.read().splitlines() - - self.model = model - - def predict(self, payload): - threshold = float(payload["threshold"]) - image = requests.get(payload["url"]).content - img_pil = Image.open(BytesIO(image)) - img_tensor = self.preprocess(img_pil).to(self.device) - img_tensor.unsqueeze_(0) - - with torch.no_grad(): - pred = self.model(img_tensor) - - predicted_class = [self.coco_labels[i] for i in pred[0]["labels"].cpu().tolist()] - predicted_boxes = [ - [(i[0], i[1]), (i[2], i[3])] for i in pred[0]["boxes"].detach().cpu().tolist() - ] - predicted_score = pred[0]["scores"].detach().cpu().tolist() - predicted_t = [predicted_score.index(x) for x in predicted_score if x > threshold] - if len(predicted_t) == 0: - return [], [] - - predicted_t = predicted_t[-1] - predicted_boxes = predicted_boxes[: predicted_t + 1] - predicted_class = predicted_class[: predicted_t + 1] - return predicted_boxes, predicted_class diff --git a/examples/pytorch/object-detector/requirements.txt b/examples/pytorch/object-detector/requirements.txt deleted file mode 100644 index ac988bdf84..0000000000 --- a/examples/pytorch/object-detector/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -torchvision diff --git a/examples/pytorch/object-detector/sample.json b/examples/pytorch/object-detector/sample.json deleted file mode 100644 index 5005f13bad..0000000000 --- a/examples/pytorch/object-detector/sample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "url": "https://i.imgur.com/PzXprwl.jpg", - "threshold": "0.8" -} diff --git a/examples/pytorch/question-generator/cortex.yaml b/examples/pytorch/question-generator/cortex.yaml deleted file mode 100644 index a944303edb..0000000000 --- a/examples/pytorch/question-generator/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: question-generator - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - mem: 6G diff --git a/examples/pytorch/question-generator/dependencies.sh b/examples/pytorch/question-generator/dependencies.sh deleted file mode 100644 index 5040da2342..0000000000 --- a/examples/pytorch/question-generator/dependencies.sh +++ /dev/null @@ -1,4 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -# torchvision isn’t required for this example, and pip was throwing warnings with it installed -pip uninstall torchvision -y diff --git a/examples/pytorch/question-generator/predictor.py b/examples/pytorch/question-generator/predictor.py deleted file mode 100644 index 0b7692890c..0000000000 --- a/examples/pytorch/question-generator/predictor.py +++ /dev/null @@ -1,36 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -from transformers import AutoModelWithLMHead, AutoTokenizer -import spacy -import subprocess -import json - - -class PythonPredictor: - def __init__(self, config): - subprocess.call("python -m spacy download en_core_web_sm".split(" ")) - import en_core_web_sm - - self.tokenizer = AutoTokenizer.from_pretrained( - "mrm8488/t5-base-finetuned-question-generation-ap" - ) - self.model = AutoModelWithLMHead.from_pretrained( - "mrm8488/t5-base-finetuned-question-generation-ap" - ) - self.nlp = en_core_web_sm.load() - - def predict(self, payload): - context = payload["context"] - answer = payload["answer"] - max_length = int(payload.get("max_length", 64)) - - input_text = "answer: {} context: {} ".format(answer, context) - features = self.tokenizer([input_text], return_tensors="pt") - - output = self.model.generate( - input_ids=features["input_ids"], - attention_mask=features["attention_mask"], - max_length=max_length, - ) - - return {"result": self.tokenizer.decode(output[0])} diff --git a/examples/pytorch/question-generator/requirements.txt b/examples/pytorch/question-generator/requirements.txt deleted file mode 100644 index d7b5db27a0..0000000000 --- a/examples/pytorch/question-generator/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -spacy==2.1.8 --e git+https://github.com/huggingface/transformers.git#egg=transformers ---find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.6.0+cpu diff --git a/examples/pytorch/question-generator/sample.json b/examples/pytorch/question-generator/sample.json deleted file mode 100644 index 88c9fb0c92..0000000000 --- a/examples/pytorch/question-generator/sample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "context": "Sarah works as a software engineer in London", - "answer": "London" -} diff --git a/examples/pytorch/reading-comprehender/README.md b/examples/pytorch/reading-comprehender/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/pytorch/reading-comprehender/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/pytorch/reading-comprehender/cortex.yaml b/examples/pytorch/reading-comprehender/cortex.yaml deleted file mode 100644 index ba89862c78..0000000000 --- a/examples/pytorch/reading-comprehender/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: reading-comprehender - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 - mem: 4G diff --git a/examples/pytorch/reading-comprehender/predictor.py b/examples/pytorch/reading-comprehender/predictor.py deleted file mode 100644 index 7b86ac4770..0000000000 --- a/examples/pytorch/reading-comprehender/predictor.py +++ /dev/null @@ -1,25 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -from allennlp.predictors.predictor import Predictor as AllenNLPPredictor - - -class PythonPredictor: - def __init__(self, config): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - print(f"using device: {self.device}") - - cuda_device = -1 - if self.device == "cuda": - cuda_device = 0 - - self.predictor = AllenNLPPredictor.from_path( - "https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2018.11.30-charpad.tar.gz", - cuda_device=cuda_device, - ) - - def predict(self, payload): - prediction = self.predictor.predict( - passage=payload["passage"], question=payload["question"] - ) - return prediction["best_span_str"] diff --git a/examples/pytorch/reading-comprehender/requirements.txt b/examples/pytorch/reading-comprehender/requirements.txt deleted file mode 100644 index 13dd5fbdba..0000000000 --- a/examples/pytorch/reading-comprehender/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -allennlp==0.9.* diff --git a/examples/pytorch/reading-comprehender/sample.json b/examples/pytorch/reading-comprehender/sample.json deleted file mode 100644 index 14f60455bc..0000000000 --- a/examples/pytorch/reading-comprehender/sample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "passage": "Cortex Labs is building machine learning infrastructure for deploying models in production", - "question": "What does Cortex Labs do?" -} diff --git a/examples/pytorch/search-completer/README.md b/examples/pytorch/search-completer/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/pytorch/search-completer/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/pytorch/search-completer/cortex.yaml b/examples/pytorch/search-completer/cortex.yaml deleted file mode 100644 index cd73458149..0000000000 --- a/examples/pytorch/search-completer/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: search-completer - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 - mem: 4G diff --git a/examples/pytorch/search-completer/predictor.py b/examples/pytorch/search-completer/predictor.py deleted file mode 100644 index 58d03ccc2c..0000000000 --- a/examples/pytorch/search-completer/predictor.py +++ /dev/null @@ -1,20 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -import regex -import tqdm - - -class PythonPredictor: - def __init__(self, config): - roberta = torch.hub.load("pytorch/fairseq", "roberta.large", force_reload=True) - roberta.eval() - device = "cuda" if torch.cuda.is_available() else "cpu" - print(f"using device: {device}") - roberta.to(device) - - self.model = roberta - - def predict(self, payload): - predictions = self.model.fill_mask(payload["text"] + " ", topk=5) - return [prediction[0] for prediction in predictions] diff --git a/examples/pytorch/search-completer/requirements.txt b/examples/pytorch/search-completer/requirements.txt deleted file mode 100644 index 16b9215d31..0000000000 --- a/examples/pytorch/search-completer/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -torch -regex -tqdm -dataclasses -hydra-core diff --git a/examples/pytorch/search-completer/sample.json b/examples/pytorch/search-completer/sample.json deleted file mode 100644 index dfd2a2f433..0000000000 --- a/examples/pytorch/search-completer/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "machine learning is" -} diff --git a/examples/pytorch/sentiment-analyzer/README.md b/examples/pytorch/sentiment-analyzer/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/pytorch/sentiment-analyzer/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/pytorch/sentiment-analyzer/cortex.yaml b/examples/pytorch/sentiment-analyzer/cortex.yaml deleted file mode 100644 index 1ed6c45bbf..0000000000 --- a/examples/pytorch/sentiment-analyzer/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: sentiment-analyzer - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - # gpu: 1 # this is optional, since the api can also run on cpu diff --git a/examples/pytorch/sentiment-analyzer/predictor.py b/examples/pytorch/sentiment-analyzer/predictor.py deleted file mode 100644 index 03b796d199..0000000000 --- a/examples/pytorch/sentiment-analyzer/predictor.py +++ /dev/null @@ -1,15 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -from transformers import pipeline - - -class PythonPredictor: - def __init__(self, config): - device = 0 if torch.cuda.is_available() else -1 - print(f"using device: {'cuda' if device == 0 else 'cpu'}") - - self.analyzer = pipeline(task="sentiment-analysis", device=device) - - def predict(self, payload): - return self.analyzer(payload["text"])[0] diff --git a/examples/pytorch/sentiment-analyzer/requirements.txt b/examples/pytorch/sentiment-analyzer/requirements.txt deleted file mode 100644 index 3f565d80e4..0000000000 --- a/examples/pytorch/sentiment-analyzer/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -transformers==2.9.* diff --git a/examples/pytorch/sentiment-analyzer/sample.json b/examples/pytorch/sentiment-analyzer/sample.json deleted file mode 100644 index 7622d16ae0..0000000000 --- a/examples/pytorch/sentiment-analyzer/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "best day ever" -} diff --git a/examples/pytorch/text-summarizer/README.md b/examples/pytorch/text-summarizer/README.md deleted file mode 100644 index 4323c6e133..0000000000 --- a/examples/pytorch/text-summarizer/README.md +++ /dev/null @@ -1,5 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. - -Please refer [here](https://sshleifer.github.io/blog_v2/jupyter/2020/03/12/bart.html) to learn more about BART. diff --git a/examples/pytorch/text-summarizer/cortex.yaml b/examples/pytorch/text-summarizer/cortex.yaml deleted file mode 100644 index 9f7b620ca9..0000000000 --- a/examples/pytorch/text-summarizer/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: text-summarizer - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 # this is optional, since the api can also run on cpu - mem: 6G diff --git a/examples/pytorch/text-summarizer/predictor.py b/examples/pytorch/text-summarizer/predictor.py deleted file mode 100644 index 05652afd17..0000000000 --- a/examples/pytorch/text-summarizer/predictor.py +++ /dev/null @@ -1,18 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -from transformers import pipeline - - -class PythonPredictor: - def __init__(self, config): - device = 0 if torch.cuda.is_available() else -1 - print(f"using device: {'cuda' if device == 0 else 'cpu'}") - - self.summarizer = pipeline(task="summarization", device=device) - - def predict(self, payload): - summary = self.summarizer( - payload["text"], num_beams=4, length_penalty=2.0, max_length=142, no_repeat_ngram_size=3 - ) - return summary[0]["summary_text"] diff --git a/examples/pytorch/text-summarizer/requirements.txt b/examples/pytorch/text-summarizer/requirements.txt deleted file mode 100644 index 5afceb377e..0000000000 --- a/examples/pytorch/text-summarizer/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -transformers==2.9.* -torch diff --git a/examples/pytorch/text-summarizer/sample.json b/examples/pytorch/text-summarizer/sample.json deleted file mode 100644 index e54b77f18c..0000000000 --- a/examples/pytorch/text-summarizer/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems use to perform a specific task without using explicit instructions, relying on patterns and inference instead. It is seen as a subset of artificial intelligence. Machine learning algorithms build a mathematical model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to perform the task. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop a conventional algorithm for effectively performing the task. Machine learning is closely related to computational statistics, which focuses on making predictions using computers. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a field of study within machine learning, and focuses on exploratory data analysis through unsupervised learning. In its application across business problems, machine learning is also referred to as predictive analytics." -} diff --git a/examples/sklearn/iris-classifier/README.md b/examples/sklearn/iris-classifier/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/sklearn/iris-classifier/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/sklearn/iris-classifier/cortex.yaml b/examples/sklearn/iris-classifier/cortex.yaml deleted file mode 100644 index 1f05c85eca..0000000000 --- a/examples/sklearn/iris-classifier/cortex.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: iris-classifier - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - bucket: cortex-examples - key: sklearn/iris-classifier/model.pkl - monitoring: - model_type: classification - compute: - cpu: 0.2 - mem: 200M diff --git a/examples/sklearn/iris-classifier/predictor.py b/examples/sklearn/iris-classifier/predictor.py deleted file mode 100644 index 46edab0ad2..0000000000 --- a/examples/sklearn/iris-classifier/predictor.py +++ /dev/null @@ -1,31 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import os -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import pickle - -labels = ["setosa", "versicolor", "virginica"] - - -class PythonPredictor: - def __init__(self, config): - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - s3.download_file(config["bucket"], config["key"], "/tmp/model.pkl") - self.model = pickle.load(open("/tmp/model.pkl", "rb")) - - def predict(self, payload): - measurements = [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - - label_id = self.model.predict([measurements])[0] - return labels[label_id] diff --git a/examples/sklearn/iris-classifier/requirements.txt b/examples/sklearn/iris-classifier/requirements.txt deleted file mode 100644 index bbc213cf3e..0000000000 --- a/examples/sklearn/iris-classifier/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -boto3 -scikit-learn==0.21.3 diff --git a/examples/sklearn/iris-classifier/sample.json b/examples/sklearn/iris-classifier/sample.json deleted file mode 100644 index 9e792863cd..0000000000 --- a/examples/sklearn/iris-classifier/sample.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "sepal_length": 5.2, - "sepal_width": 3.6, - "petal_length": 1.5, - "petal_width": 0.3 -} diff --git a/examples/sklearn/iris-classifier/trainer.py b/examples/sklearn/iris-classifier/trainer.py deleted file mode 100644 index db1b047938..0000000000 --- a/examples/sklearn/iris-classifier/trainer.py +++ /dev/null @@ -1,25 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import boto3 -import pickle - -from sklearn.datasets import load_iris -from sklearn.model_selection import train_test_split -from sklearn.linear_model import LogisticRegression - -# Train the model - -iris = load_iris() -data, labels = iris.data, iris.target -training_data, test_data, training_labels, test_labels = train_test_split(data, labels) - -model = LogisticRegression(solver="lbfgs", multi_class="multinomial") -model.fit(training_data, training_labels) -accuracy = model.score(test_data, test_labels) -print("accuracy: {:.2f}".format(accuracy)) - -# Upload the model - -pickle.dump(model, open("model.pkl", "wb")) -s3 = boto3.client("s3") -s3.upload_file("model.pkl", "cortex-examples", "sklearn/iris-classifier/model.pkl") diff --git a/examples/sklearn/mpg-estimator/README.md b/examples/sklearn/mpg-estimator/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/sklearn/mpg-estimator/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/sklearn/mpg-estimator/cortex.yaml b/examples/sklearn/mpg-estimator/cortex.yaml deleted file mode 100644 index e6ffc969ee..0000000000 --- a/examples/sklearn/mpg-estimator/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: mpg-estimator - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - model: s3://cortex-examples/sklearn/mpg-estimator/linreg/ - monitoring: - model_type: regression diff --git a/examples/sklearn/mpg-estimator/predictor.py b/examples/sklearn/mpg-estimator/predictor.py deleted file mode 100644 index bb1c2ed19a..0000000000 --- a/examples/sklearn/mpg-estimator/predictor.py +++ /dev/null @@ -1,41 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -import mlflow.sklearn -import numpy as np -import re -import os - - -class PythonPredictor: - def __init__(self, config): - model_path = "/tmp/model" - os.makedirs(model_path, exist_ok=True) - - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - # download mlflow model folder from S3 - bucket, prefix = re.match("s3://(.+?)/(.+)", config["model"]).groups() - response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) - for s3_obj in response["Contents"]: - obj_key = s3_obj["Key"] - s3.download_file(bucket, obj_key, os.path.join(model_path, os.path.basename(obj_key))) - - self.model = mlflow.sklearn.load_model(model_path) - - def predict(self, payload): - model_input = [ - payload["cylinders"], - payload["displacement"], - payload["horsepower"], - payload["weight"], - payload["acceleration"], - ] - - result = self.model.predict([model_input]) - return np.asscalar(result) diff --git a/examples/sklearn/mpg-estimator/requirements.txt b/examples/sklearn/mpg-estimator/requirements.txt deleted file mode 100644 index cbcad6b321..0000000000 --- a/examples/sklearn/mpg-estimator/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -mlflow -pandas -numpy -scikit-learn==0.21.3 diff --git a/examples/sklearn/mpg-estimator/sample.json b/examples/sklearn/mpg-estimator/sample.json deleted file mode 100644 index 2dbbca46dd..0000000000 --- a/examples/sklearn/mpg-estimator/sample.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "cylinders": 4, - "displacement": 135, - "horsepower": 84, - "weight": 2490, - "acceleration": 15.7 -} diff --git a/examples/sklearn/mpg-estimator/trainer.py b/examples/sklearn/mpg-estimator/trainer.py deleted file mode 100644 index f17b7d9c05..0000000000 --- a/examples/sklearn/mpg-estimator/trainer.py +++ /dev/null @@ -1,25 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import mlflow.sklearn -import pandas as pd -import numpy as np -from sklearn.linear_model import LinearRegression -from sklearn.model_selection import train_test_split - - -df = pd.read_csv( - "https://www.uio.no/studier/emner/sv/oekonomi/ECON4150/v16/statacourse/datafiles/auto.csv" -) -df = df.replace("?", np.nan) -df = df.dropna() -df = df.drop(["name", "origin", "year"], axis=1) # drop categorical variables for simplicity -data = df.drop("mpg", axis=1) -labels = df[["mpg"]] - -training_data, test_data, training_labels, test_labels = train_test_split(data, labels) -model = LinearRegression() -model.fit(training_data, training_labels) -accuracy = model.score(test_data, test_labels) -print("accuracy: {:.2f}".format(accuracy)) - -mlflow.sklearn.save_model(model, "linreg") diff --git a/examples/spacy/entity-recognizer/README.md b/examples/spacy/entity-recognizer/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/spacy/entity-recognizer/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/spacy/entity-recognizer/cortex.yaml b/examples/spacy/entity-recognizer/cortex.yaml deleted file mode 100644 index cc4dbbba38..0000000000 --- a/examples/spacy/entity-recognizer/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: entity-recognizer - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - mem: 1G diff --git a/examples/spacy/entity-recognizer/predictor.py b/examples/spacy/entity-recognizer/predictor.py deleted file mode 100644 index 9d42a9de4c..0000000000 --- a/examples/spacy/entity-recognizer/predictor.py +++ /dev/null @@ -1,22 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import spacy -import subprocess - - -class PythonPredictor: - """ - Class to perform NER (named entity recognition) - """ - - def __init__(self, config): - subprocess.call("python -m spacy download en_core_web_md".split(" ")) - import en_core_web_md - - self.nlp = en_core_web_md.load() - - def predict(self, payload): - doc = self.nlp(payload["text"]) - proc = lambda ent: {"label": ent.label_, "start": ent.start, "end": ent.end} - out = {ent.text: proc(ent) for ent in doc.ents} - return out diff --git a/examples/spacy/entity-recognizer/requirements.txt b/examples/spacy/entity-recognizer/requirements.txt deleted file mode 100644 index 568e4fc634..0000000000 --- a/examples/spacy/entity-recognizer/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -spacy diff --git a/examples/spacy/entity-recognizer/sample.json b/examples/spacy/entity-recognizer/sample.json deleted file mode 100644 index ae0f0f4120..0000000000 --- a/examples/spacy/entity-recognizer/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "Lilium, a Munich-based startup that is designing and building vertical take-off and landing (VTOL) aircraft with speeds of up to 100 km/h that it plans eventually to run in its own taxi fleet, has closed a funding round of over $240 million — money that it plans to use to keep developing its aircraft, and to start building manufacturing facilities to produce more of them, for an expected launch date of 2025." -} diff --git a/examples/tensorflow/image-classifier-inception/README.md b/examples/tensorflow/image-classifier-inception/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/tensorflow/image-classifier-inception/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/tensorflow/image-classifier-inception/cortex.yaml b/examples/tensorflow/image-classifier-inception/cortex.yaml deleted file mode 100644 index e5177788ba..0000000000 --- a/examples/tensorflow/image-classifier-inception/cortex.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-inception - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - monitoring: - model_type: classification - compute: - cpu: 1 - gpu: 1 diff --git a/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml b/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml deleted file mode 100644 index 919870651c..0000000000 --- a/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-inception - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - server_side_batching: - max_batch_size: 2 - batch_interval: 0.2s - threads_per_process: 2 - monitoring: - model_type: classification - compute: - cpu: 1 - gpu: 1 diff --git a/examples/tensorflow/image-classifier-inception/inception.ipynb b/examples/tensorflow/image-classifier-inception/inception.ipynb deleted file mode 100644 index 46956e0e48..0000000000 --- a/examples/tensorflow/image-classifier-inception/inception.ipynb +++ /dev/null @@ -1,211 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "inception.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "n8CwINQcEBKz", - "colab_type": "text" - }, - "source": [ - "# Exporting ImageNet Inception\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n", - "In this notebook, we'll show how to export the [pre-trained Imagenet Inception model](https://tfhub.dev/google/imagenet/inception_v3/classification/3) for serving." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3221z3P69fgf", - "colab_type": "text" - }, - "source": [ - "First, we'll install the required packages:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_SdQpq7g9LiI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!pip install tensorflow==1.14.* tensorflow-hub==0.6.* boto3==1.*" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I-k0gUpxDGkU", - "colab_type": "text" - }, - "source": [ - "Next, we'll download the model from TensorFlow Hub and export it for serving:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "z6QLCzB4BKMe", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import time\n", - "import tensorflow as tf\n", - "import tensorflow_hub as hub\n", - "from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def\n", - "\n", - "export_dir = \"export/\" + str(time.time()).split('.')[0]\n", - "builder = tf.saved_model.builder.SavedModelBuilder(export_dir)\n", - "\n", - "with tf.Session(graph=tf.Graph()) as sess:\n", - " module = hub.Module(\"https://tfhub.dev/google/imagenet/inception_v3/classification/3\")\n", - "\n", - " input_params = module.get_input_info_dict()\n", - " image_input = tf.placeholder(\n", - " name=\"images\", dtype=input_params[\"images\"].dtype, shape=input_params[\"images\"].get_shape()\n", - " )\n", - " \n", - " sess.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", - "\n", - " classes = module(image_input)\n", - " signature = predict_signature_def(inputs={\"images\": image_input}, outputs={\"classes\": classes})\n", - "\n", - " builder.add_meta_graph_and_variables(\n", - " sess, [\"serve\"], signature_def_map={\"predict\": signature}, strip_default_attrs=True\n", - " )\n", - "\n", - "builder.save()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aGtJiyEnBgwl", - "colab_type": "text" - }, - "source": [ - "## Upload the model to AWS\n", - "\n", - "Cortex loads models from AWS, so we need to upload the exported model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fTkjvSKBBmUB", - "colab_type": "text" - }, - "source": [ - "Set these variables to configure your AWS credentials and model upload path:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4xcDWxqCBPre", - "colab_type": "code", - "cellView": "form", - "colab": {} - }, - "source": [ - "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", - "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", - "S3_UPLOAD_PATH = \"s3://my-bucket/image-classifier/inception\" #@param {type:\"string\"}\n", - "\n", - "import sys\n", - "import re\n", - "\n", - "if AWS_ACCESS_KEY_ID == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", - "\n", - "elif AWS_SECRET_ACCESS_KEY == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", - "\n", - "else:\n", - " try:\n", - " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", - " except:\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "czZkjb1IBr-f", - "colab_type": "text" - }, - "source": [ - "Upload the model to S3:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "M0b0IbyaBsim", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import os\n", - "import boto3\n", - "\n", - "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", - "\n", - "for dirpath, _, filenames in os.walk(\"export\"):\n", - " for filename in filenames:\n", - " filepath = os.path.join(dirpath, filename)\n", - " filekey = os.path.join(key, filepath[len(\"export/\"):])\n", - " print(\"Uploading s3://{}/{}...\".format(bucket, filekey), end = '')\n", - " s3.upload_file(filepath, bucket, filekey)\n", - " print(\" ✓\")\n", - "\n", - "print(\"\\nUploaded model export directory to \" + S3_UPLOAD_PATH)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pZQWoeZbE7Wc", - "colab_type": "text" - }, - "source": [ - "\n", - "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/image-classifier-inception) for how to deploy the model as an API." - ] - } - ] -} diff --git a/examples/tensorflow/image-classifier-inception/predictor.py b/examples/tensorflow/image-classifier-inception/predictor.py deleted file mode 100644 index c2afb63c0c..0000000000 --- a/examples/tensorflow/image-classifier-inception/predictor.py +++ /dev/null @@ -1,21 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import requests -import numpy as np -from PIL import Image -from io import BytesIO - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - self.labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" - ).text.split("\n") - - def predict(self, payload): - image = requests.get(payload["url"]).content - decoded_image = np.asarray(Image.open(BytesIO(image)), dtype=np.float32) / 255 - model_input = {"images": np.expand_dims(decoded_image, axis=0)} - prediction = self.client.predict(model_input) - return self.labels[np.argmax(prediction["classes"])] diff --git a/examples/tensorflow/image-classifier-inception/sample.json b/examples/tensorflow/image-classifier-inception/sample.json deleted file mode 100644 index 667652007a..0000000000 --- a/examples/tensorflow/image-classifier-inception/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/PzXprwl.jpg" -} diff --git a/examples/tensorflow/iris-classifier/README.md b/examples/tensorflow/iris-classifier/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/tensorflow/iris-classifier/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/tensorflow/license-plate-reader/README.md b/examples/tensorflow/license-plate-reader/README.md deleted file mode 100644 index 009286a4e1..0000000000 --- a/examples/tensorflow/license-plate-reader/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Real-Time License Plate Identification System - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This project implements a license plate identification system. On resource-constrained systems, running inferences may prove to be too computationally expensive. One solution is to run the ML in the cloud and have the local (embedded) system act as a client of these services. - -![Demo GIF](https://i.imgur.com/jgkJB59.gif) - -*Figure 1 - GIF taken from this real-time recording [video](https://www.youtube.com/watch?v=gsYEZtecXlA) of predictions* - -![Raspberry Pi client with 4G access and onboard GPS that connects to cortex's APIs for inference](https://i.imgur.com/MvDAXWU.jpg) - -*Figure 2 - Raspberry Pi-powered client with 4G access and onboard GPS that connects to cortex's APIs for inference. More on that [here](https://github.com/RobertLucian/cortex-license-plate-reader-client).* - -In our example, we assume we have a dashcam mounted on a car and we want to detect and recognize all license plates in the video stream in real-time. We can use an embedded computer system to record the video, then stream and infer frame-by-frame using a web service, reassemble the stream with the licence plate annotations, and finally display the annotated stream on a screen. The web service in our case is a set of 2 web APIs deployed using cortex. - -## Used Models - -The identification of license plates is done in three steps: - -1. Detecting the bounding boxes of each license plate using *YOLOv3* model. -1. Detecting the very specific region of each word inside each bounding box with high accuracy using a pretrained *CRAFT* text detector. -1. Recognizing the text inside the previously detected boxes using a pretrained *CRNN* model. - -Out of these three models (*YOLOv3*, *CRAFT* and *CRNN*) only *YOLOv3* has been fine-tuned with a rather small dataset to better work with license plates. This dataset can be found [here](https://github.com/RobertLucian/license-plate-dataset). This *YOLOv3* model has in turn been trained using [this](https://github.com/experiencor/keras-yolo3) GitHub project. To get more details about our fine-tuned model, check the project's description page. - -The other two models, *CRAFT* and *CRNN*, can be found in [keras-ocr](https://github.com/faustomorales/keras-ocr). - -## Deployment - Lite Version - -A lite version of the deployment is available with `cortex_lite.yaml`. The lite version accepts an image as input and returns an image with the recognized license plates overlayed on top. A single GPU is required for this deployment (i.e. `g4dn.xlarge`). - -Once the cortex cluster is created, run - -```bash -cortex deploy cortex_lite.yaml -``` - -And monitor the API with - -```bash -cortex get --watch -``` - -To run an inference on the lite version, the only 3 tools you need are `curl`, `sed` and `base64`. This API expects an URL pointing to an image onto which the inferencing is done. This includes the detection of license plates with *YOLOv3* and the recognition part with *CRAFT* + *CRNN* models. - -Export the endpoint & the image's URL by running - -```bash -export ENDPOINT=your-api-endpoint -export IMAGE_URL=https://i.imgur.com/r8xdI7P.png -``` - -Then run the following piped commands - -```bash -curl "${ENDPOINT}" -X POST -H "Content-Type: application/json" -d '{"url":"'${IMAGE_URL}'"}' | -sed 's/"//g' | -base64 -d > prediction.jpg -``` - -The resulting image is the same as the one in [Verifying the Deployed APIs](#verifying-the-deployed-apis). - -For another prediction, let's use a generic image from the web. Export [this image's URL link](https://i.imgur.com/mYuvMOs.jpg) and re-run the prediction. This is what we get. - -![annotated sample image](https://i.imgur.com/tg1PE1E.jpg) - -*The above prediction has the bounding boxes colored differently to distinguish them from the cars' red bodies* - -## Deployment - Full Version - -The recommended number of instances to run this smoothly on a video stream is about 12 GPU instances (2 GPU instances for *YOLOv3* and 10 for *CRNN* + *CRAFT*). `cortex_full.yaml` is already set up to use these 12 instances. Note: this is the optimal number of instances when using the `g4dn.xlarge` instance type. For the client to work smoothly, the number of processes per replica can be adjusted, especially for `p3` or `g4` instances, where the GPU has a lot of compute capacity. - -If you don't have access to this many GPU-equipped instances, you could just lower the number and expect dropped frames. It will still prove the point, albeit at a much lower framerate and with higher latency. More on that [here](https://github.com/RobertLucian/cortex-license-plate-reader-client). - -Then after the cortex cluster is created, run - -```bash -cortex deploy cortex_full.yaml -``` - -And monitor the APIs with - -```bash -cortex get --watch -``` - -We can run the inference on a sample image to verify that both APIs are working as expected before we move on to running the client. Here is an example image: - -![sample image](https://i.imgur.com/r8xdI7P.png) - -On your local machine run: - -``` -pip install requests click opencv-contrib-python numpy -``` - -and run the following script with Python >= `3.6.x`. The application expects the argument to be a link to an image. The following link is for the above sample image. - - -```bash -export YOLOV3_ENDPOINT=api_endpoint_for_yolov3 -export CRNN_ENDPOINT=api_endpoint_for_crnn -python sample_inference.py "https://i.imgur.com/r8xdI7P.png" -``` - -If all goes well, then a prediction will be saved as a JPEG image to disk. By default, it's saved to `prediction.jpg`. Here is the output for the image above: - -![annotated sample image](https://i.imgur.com/JaD4A05.jpg) - -You can use `python sample_inference.py --help` to find out more. Keep in mind that any detected license plates with a confidence score lower than 80% are discarded. - -If this verification works, then we can move on and run the main client. - -### Running the Client - -Once the APIs are up and running, launch the streaming client by following the instructions at [robertlucian/cortex-license-plate-reader-client](https://github.com/RobertLucian/cortex-license-plate-reader-client). - -*Note: The client is kept in a separate repository to maintain the cortex project clean and focused. Keeping some of the projects that are more complex out of this repository can reduce the confusion.* - -## Customization/Optimization - -### Uploading the Model to S3 - -The only model to upload to an S3 bucket (for Cortex to deploy) is the *YOLOv3* model. The other two models are downloaded automatically upon deploying the service. - -If you would like to host the model from your own bucket, or if you want to fine tune the model for your needs, here's what you can do. - -#### Lite Version - -Download the *Keras* model: - -```bash -wget -O license_plate.h5 "https://www.dropbox.com/s/vsvgoyricooksyv/license_plate.h5?dl=0" -``` - -And then upload it to your bucket (also make sure [cortex_lite.yaml](cortex_lite.yaml) points to this bucket): - -```bash -BUCKET=my-bucket -YOLO3_PATH=examples/tensorflow/license-plate-reader/yolov3_keras -aws s3 cp license_plate.h5 "s3://$BUCKET/$YOLO3_PATH/model.h5" -``` - -#### Full Version - -Download the *SavedModel*: - -```bash -wget -O yolov3.zip "https://www.dropbox.com/sh/4ltffycnzfeul01/AAB7Xdmmi59w0EPOwhQ1nkvua/yolov3?dl=0" -``` - -Unzip it: - -```bash -unzip yolov3.zip -d yolov3 -``` - -And then upload it to your bucket (also make sure [cortex_full.yaml](cortex_full.yaml) points to this bucket): - -```bash -BUCKET=my-bucket -YOLO3_PATH=examples/tensorflow/license-plate-reader/yolov3_tf -aws s3 cp yolov3/ "s3://$BUCKET/$YOLO3_PATH" --recursive -``` - -### Configuring YOLOv3 Predictor - -The `yolov3` API predictor requires a [config.json](config.json) file to configure the input size of the image (dependent on the model's architecture), the anchor boxes, the object threshold, and the IoU threshold. All of these are already set appropriately so no other change is required. - -The configuration file's content is based on [this](https://github.com/experiencor/keras-yolo3/blob/bf37c87561caeccc4f1b879e313d4a3fec1b987e/zoo/config_license_plates.json#L2-L7). - -### Opportunities for performance improvements - -One way to reduce the inference time is to convert the models to use FP16/BFP16 (in mixed mode or not) and then choose the accelerator that gives the best performance in half precision mode - i.e. T4/V100. A speedup of an order of magnitude can be expected. diff --git a/examples/tensorflow/license-plate-reader/config.json b/examples/tensorflow/license-plate-reader/config.json deleted file mode 100644 index 0ff64d0a98..0000000000 --- a/examples/tensorflow/license-plate-reader/config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "labels": ["license-plate"], - "net_h" : 416, - "net_w" : 416, - "anchors" : [15,6, 18,8, 22,9, 27,11, 32,13, 41,17, 54,21, 66,27, 82,33], - "obj_thresh" : 0.8, - "nms_thresh" : 0.01 -} diff --git a/examples/tensorflow/license-plate-reader/cortex_full.yaml b/examples/tensorflow/license-plate-reader/cortex_full.yaml deleted file mode 100644 index f16f6ab934..0000000000 --- a/examples/tensorflow/license-plate-reader/cortex_full.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: yolov3 - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor_yolo.py - model_path: s3://cortex-examples/tensorflow/license-plate-reader/yolov3_tf/ - processes_per_replica: 4 - threads_per_process: 3 - signature_key: serving_default - config: - model_config: config.json - compute: - cpu: 1 - gpu: 1 - mem: 8G - autoscaling: - min_replicas: 2 - max_replicas: 2 - -- name: crnn - kind: RealtimeAPI - predictor: - type: python - path: predictor_crnn.py - processes_per_replica: 1 - threads_per_process: 1 - compute: - cpu: 1 - gpu: 1 - mem: 8G - autoscaling: - min_replicas: 10 - max_replicas: 10 diff --git a/examples/tensorflow/license-plate-reader/cortex_lite.yaml b/examples/tensorflow/license-plate-reader/cortex_lite.yaml deleted file mode 100644 index 8e07cd8280..0000000000 --- a/examples/tensorflow/license-plate-reader/cortex_lite.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: license-plate-reader - kind: RealtimeAPI - predictor: - type: python - path: predictor_lite.py - config: - yolov3: s3://cortex-examples/tensorflow/license-plate-reader/yolov3_keras/model.h5 - yolov3_model_config: config.json - compute: - cpu: 1 - gpu: 1 - mem: 4G diff --git a/examples/tensorflow/license-plate-reader/predictor_crnn.py b/examples/tensorflow/license-plate-reader/predictor_crnn.py deleted file mode 100644 index aa543f45cf..0000000000 --- a/examples/tensorflow/license-plate-reader/predictor_crnn.py +++ /dev/null @@ -1,44 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import cv2 -import numpy as np -import keras_ocr -import base64 -import pickle -import tensorflow as tf - - -class PythonPredictor: - def __init__(self, config): - # limit memory usage on each process - for gpu in tf.config.list_physical_devices("GPU"): - tf.config.experimental.set_memory_growth(gpu, True) - - # keras-ocr will automatically download pretrained - # weights for the detector and recognizer. - self.pipeline = keras_ocr.pipeline.Pipeline() - - def predict(self, payload): - # preprocess the images w/ license plates (LPs) - imgs = payload["imgs"] - imgs = base64.b64decode(imgs.encode("utf-8")) - jpgs_as_np = pickle.loads(imgs) - images = [cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR) for jpg_as_np in jpgs_as_np] - - # run batch inference - try: - prediction_groups = self.pipeline.recognize(images) - except ValueError: - # exception can occur when the images are too small - prediction_groups = [] - - image_list = [] - for img_predictions in prediction_groups: - boxes_per_image = [] - for predictions in img_predictions: - boxes_per_image.append([predictions[0], predictions[1].tolist()]) - image_list.append(boxes_per_image) - - lps = {"license-plates": image_list} - - return lps diff --git a/examples/tensorflow/license-plate-reader/predictor_lite.py b/examples/tensorflow/license-plate-reader/predictor_lite.py deleted file mode 100644 index 0a71b775fa..0000000000 --- a/examples/tensorflow/license-plate-reader/predictor_lite.py +++ /dev/null @@ -1,120 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import boto3, base64, cv2, re, os, requests, json -import keras_ocr - -from botocore import UNSIGNED -from botocore.client import Config -from tensorflow.keras.models import load_model -import utils.utils as utils -import utils.bbox as bbox_utils -import utils.preprocess as preprocess_utils - - -class PythonPredictor: - def __init__(self, config): - # download yolov3 model - bucket, key = re.match("s3://(.+?)/(.+)", config["yolov3"]).groups() - - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - model_path = "/tmp/model.h5" - s3.download_file(bucket, key, model_path) - - # load yolov3 model - self.yolov3_model = load_model(model_path) - - # get configuration for yolov3 model - with open(config["yolov3_model_config"]) as json_file: - data = json.load(json_file) - for key in data: - setattr(self, key, data[key]) - self.box_confidence_score = 0.8 - - # keras-ocr automatically downloads the pretrained - # weights for the detector and recognizer - self.recognition_model_pipeline = keras_ocr.pipeline.Pipeline() - - def predict(self, payload): - # download image - img_url = payload["url"] - image = preprocess_utils.get_url_image(img_url) - - # detect the bounding boxes - boxes = utils.get_yolo_boxes( - self.yolov3_model, - image, - self.net_h, - self.net_w, - self.anchors, - self.obj_thresh, - self.nms_thresh, - len(self.labels), - tensorflow_model=False, - ) - - # purge bounding boxes with a low confidence score - aux = [] - for b in boxes: - label = -1 - for i in range(len(b.classes)): - if b.classes[i] > self.box_confidence_score: - label = i - if label >= 0: - aux.append(b) - boxes = aux - del aux - - # if bounding boxes have been detected - dec_words = [] - if len(boxes) > 0: - # create set of images of the detected license plates - lps = [] - for b in boxes: - lp = image[b.ymin : b.ymax, b.xmin : b.xmax] - lps.append(lp) - - # run batch inference - try: - prediction_groups = self.recognition_model_pipeline.recognize(lps) - except ValueError: - # exception can occur when the images are too small - prediction_groups = [] - - # process pipeline output - image_list = [] - for img_predictions in prediction_groups: - boxes_per_image = [] - for predictions in img_predictions: - boxes_per_image.append([predictions[0], predictions[1].tolist()]) - image_list.append(boxes_per_image) - - # reorder text within detected LPs based on horizontal position - dec_lps = preprocess_utils.reorder_recognized_words(image_list) - for dec_lp in dec_lps: - dec_words.append([word[0] for word in dec_lp]) - - # if there are no recognized LPs, then don't draw them - if len(dec_words) == 0: - dec_words = [[] for i in range(len(boxes))] - - # draw predictions as overlays on the source image - draw_image = bbox_utils.draw_boxes( - image, - boxes, - overlay_text=dec_words, - labels=["LP"], - obj_thresh=self.box_confidence_score, - ) - - # image represented in bytes - byte_im = preprocess_utils.image_to_jpeg_bytes(draw_image) - - # encode image - image_enc = base64.b64encode(byte_im).decode("utf-8") - - # image with draw boxes overlayed - return image_enc diff --git a/examples/tensorflow/license-plate-reader/predictor_yolo.py b/examples/tensorflow/license-plate-reader/predictor_yolo.py deleted file mode 100644 index 7648b66960..0000000000 --- a/examples/tensorflow/license-plate-reader/predictor_yolo.py +++ /dev/null @@ -1,46 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import json -import base64 -import numpy as np -import cv2 -import pickle -import utils.utils as utils - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - with open(config["model_config"]) as json_file: - data = json.load(json_file) - for key in data: - setattr(self, key, data[key]) - - def predict(self, payload): - # decode the payload - img = payload["img"] - img = base64.b64decode(img) - jpg_as_np = np.frombuffer(img, dtype=np.uint8) - image = cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR) - - # detect the bounding boxes - boxes = utils.get_yolo_boxes( - self.client, - image, - self.net_h, - self.net_w, - self.anchors, - self.obj_thresh, - self.nms_thresh, - len(self.labels), - ) - - # package the response - response = {"boxes": []} - for box in boxes: - response["boxes"].append( - [box.xmin, box.ymin, box.xmax, box.ymax, float(box.c), box.classes.tolist()] - ) - - return response diff --git a/examples/tensorflow/license-plate-reader/requirements.txt b/examples/tensorflow/license-plate-reader/requirements.txt deleted file mode 100644 index 0fb87fcf23..0000000000 --- a/examples/tensorflow/license-plate-reader/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -keras-ocr==0.8.5 -keras==2.3.1 -tensorflow==2.3.0 -scipy==1.4.1 -numpy==1.18.* diff --git a/examples/tensorflow/license-plate-reader/sample_inference.py b/examples/tensorflow/license-plate-reader/sample_inference.py deleted file mode 100644 index 11e217ec78..0000000000 --- a/examples/tensorflow/license-plate-reader/sample_inference.py +++ /dev/null @@ -1,100 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import click, cv2, requests, pickle, base64, json -import numpy as np -import utils.bbox as bbox_utils -import utils.preprocess as preprocess_utils - - -@click.command( - help=( - "Identify license plates in a given image" - " while outsourcing the predictions using the REST API endpoints." - " Both API endpoints have to be exported as environment variables." - ) -) -@click.argument("img_url_src", type=str) -@click.argument("yolov3_endpoint", envvar="YOLOV3_ENDPOINT") -@click.argument("crnn_endpoint", envvar="CRNN_ENDPOINT") -@click.option( - "--output", - "-o", - type=str, - default="prediction.jpg", - show_default=True, - help="File to save the prediction to.", -) -def main(img_url_src, yolov3_endpoint, crnn_endpoint, output): - - # get the image in bytes representation - image = preprocess_utils.get_url_image(img_url_src) - image_bytes = preprocess_utils.image_to_jpeg_bytes(image) - - # encode image - image_enc = base64.b64encode(image_bytes).decode("utf-8") - image_dump = json.dumps({"img": image_enc}) - - # make yolov3 api request - resp = requests.post( - yolov3_endpoint, data=image_dump, headers={"content-type": "application/json"} - ) - - # parse response - boxes_raw = resp.json()["boxes"] - boxes = [] - for b in boxes_raw: - box = bbox_utils.BoundBox(*b) - boxes.append(box) - - # purge bounding boxes with a low confidence score - confidence_score = 0.8 - aux = [] - for b in boxes: - label = -1 - for i in range(len(b.classes)): - if b.classes[i] > confidence_score: - label = i - if label >= 0: - aux.append(b) - boxes = aux - del aux - - dec_words = [] - if len(boxes) > 0: - # create set of images of the detected license plates - lps = [] - for b in boxes: - lp = image[b.ymin : b.ymax, b.xmin : b.xmax] - jpeg = preprocess_utils.image_to_jpeg_nparray(lp) - lps.append(jpeg) - - # encode the cropped license plates - lps = pickle.dumps(lps, protocol=0) - lps_enc = base64.b64encode(lps).decode("utf-8") - lps_dump = json.dumps({"imgs": lps_enc}) - - # make crnn api request - resp = requests.post( - crnn_endpoint, data=lps_dump, headers={"content-type": "application/json"} - ) - - # parse the response - dec_lps = resp.json()["license-plates"] - dec_lps = preprocess_utils.reorder_recognized_words(dec_lps) - for dec_lp in dec_lps: - dec_words.append([word[0] for word in dec_lp]) - - if len(dec_words) == 0: - dec_words = [[] for i in range(len(boxes))] - - # draw predictions as overlays on the source image - draw_image = bbox_utils.draw_boxes( - image, boxes, overlay_text=dec_words, labels=["LP"], obj_thresh=confidence_score - ) - - # and save it to disk - cv2.imwrite(output, draw_image) - - -if __name__ == "__main__": - main() diff --git a/examples/tensorflow/license-plate-reader/utils/__init__.py b/examples/tensorflow/license-plate-reader/utils/__init__.py deleted file mode 100644 index 5f47d63e43..0000000000 --- a/examples/tensorflow/license-plate-reader/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) diff --git a/examples/tensorflow/license-plate-reader/utils/bbox.py b/examples/tensorflow/license-plate-reader/utils/bbox.py deleted file mode 100644 index de9c7ef8c0..0000000000 --- a/examples/tensorflow/license-plate-reader/utils/bbox.py +++ /dev/null @@ -1,111 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import numpy as np -import cv2 -from .colors import get_color - - -class BoundBox: - def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None): - self.xmin = xmin - self.ymin = ymin - self.xmax = xmax - self.ymax = ymax - - self.c = c - self.classes = classes - - self.label = -1 - self.score = -1 - - def get_label(self): - if self.label == -1: - self.label = np.argmax(self.classes) - - return self.label - - def get_score(self): - if self.score == -1: - self.score = self.classes[self.get_label()] - - return self.score - - -def _interval_overlap(interval_a, interval_b): - x1, x2 = interval_a - x3, x4 = interval_b - - if x3 < x1: - if x4 < x1: - return 0 - else: - return min(x2, x4) - x1 - else: - if x2 < x3: - return 0 - else: - return min(x2, x4) - x3 - - -def bbox_iou(box1, box2): - intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) - intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) - - intersect = intersect_w * intersect_h - - w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin - w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin - - union = w1 * h1 + w2 * h2 - intersect - - return float(intersect) / union - - -def draw_boxes(image, boxes, overlay_text, labels, obj_thresh, quiet=True): - for box, overlay in zip(boxes, overlay_text): - label_str = "" - label = -1 - - for i in range(len(labels)): - if box.classes[i] > obj_thresh: - if label_str != "": - label_str += ", " - label_str += labels[i] + " " + str(round(box.get_score() * 100, 2)) + "%" - label = i - if not quiet: - print(label_str) - - if label >= 0: - if len(overlay) > 0: - text = label_str + ": [" + " ".join(overlay) + "]" - else: - text = label_str - text = text.upper() - text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5) - width, height = text_size[0][0], text_size[0][1] - region = np.array( - [ - [box.xmin - 3, box.ymin], - [box.xmin - 3, box.ymin - height - 26], - [box.xmin + width + 13, box.ymin - height - 26], - [box.xmin + width + 13, box.ymin], - ], - dtype="int32", - ) - - # cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5) - rec = (box.xmin, box.ymin, box.xmax - box.xmin, box.ymax - box.ymin) - rec = tuple(int(i) for i in rec) - cv2.rectangle(img=image, rec=rec, color=get_color(label), thickness=3) - cv2.fillPoly(img=image, pts=[region], color=get_color(label)) - cv2.putText( - img=image, - text=text, - org=(box.xmin + 13, box.ymin - 13), - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=1e-3 * image.shape[0], - color=(0, 0, 0), - thickness=1, - ) - - return image diff --git a/examples/tensorflow/license-plate-reader/utils/colors.py b/examples/tensorflow/license-plate-reader/utils/colors.py deleted file mode 100644 index 2902c4e5aa..0000000000 --- a/examples/tensorflow/license-plate-reader/utils/colors.py +++ /dev/null @@ -1,100 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - - -def get_color(label): - """Return a color from a set of predefined colors. Contains 80 colors in total. - code originally from https://github.com/fizyr/keras-retinanet/ - Args - label: The label to get the color for. - Returns - A list of three values representing a RGB color. - """ - if label < len(colors): - return colors[label] - else: - print("Label {} has no color, returning default.".format(label)) - return (0, 255, 0) - - -colors = [ - [31, 0, 255], - [0, 159, 255], - [255, 95, 0], - [255, 19, 0], - [255, 0, 0], - [255, 38, 0], - [0, 255, 25], - [255, 0, 133], - [255, 172, 0], - [108, 0, 255], - [0, 82, 255], - [0, 255, 6], - [255, 0, 152], - [223, 0, 255], - [12, 0, 255], - [0, 255, 178], - [108, 255, 0], - [184, 0, 255], - [255, 0, 76], - [146, 255, 0], - [51, 0, 255], - [0, 197, 255], - [255, 248, 0], - [255, 0, 19], - [255, 0, 38], - [89, 255, 0], - [127, 255, 0], - [255, 153, 0], - [0, 255, 255], - [0, 255, 216], - [0, 255, 121], - [255, 0, 248], - [70, 0, 255], - [0, 255, 159], - [0, 216, 255], - [0, 6, 255], - [0, 63, 255], - [31, 255, 0], - [255, 57, 0], - [255, 0, 210], - [0, 255, 102], - [242, 255, 0], - [255, 191, 0], - [0, 255, 63], - [255, 0, 95], - [146, 0, 255], - [184, 255, 0], - [255, 114, 0], - [0, 255, 235], - [255, 229, 0], - [0, 178, 255], - [255, 0, 114], - [255, 0, 57], - [0, 140, 255], - [0, 121, 255], - [12, 255, 0], - [255, 210, 0], - [0, 255, 44], - [165, 255, 0], - [0, 25, 255], - [0, 255, 140], - [0, 101, 255], - [0, 255, 82], - [223, 255, 0], - [242, 0, 255], - [89, 0, 255], - [165, 0, 255], - [70, 255, 0], - [255, 0, 172], - [255, 76, 0], - [203, 255, 0], - [204, 0, 255], - [255, 0, 229], - [255, 133, 0], - [127, 0, 255], - [0, 235, 255], - [0, 255, 197], - [255, 0, 191], - [0, 44, 255], - [50, 255, 0], -] diff --git a/examples/tensorflow/license-plate-reader/utils/preprocess.py b/examples/tensorflow/license-plate-reader/utils/preprocess.py deleted file mode 100644 index 5e40a35719..0000000000 --- a/examples/tensorflow/license-plate-reader/utils/preprocess.py +++ /dev/null @@ -1,59 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import numpy as np -import cv2, requests -from statistics import mean - - -def get_url_image(url_image): - """ - Get numpy image from URL image. - """ - resp = requests.get(url_image, stream=True).raw - image = np.asarray(bytearray(resp.read()), dtype="uint8") - image = cv2.imdecode(image, cv2.IMREAD_COLOR) - return image - - -def image_to_jpeg_nparray(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): - """ - Convert numpy image to jpeg numpy vector. - """ - is_success, im_buf_arr = cv2.imencode(".jpg", image, quality) - return im_buf_arr - - -def image_to_jpeg_bytes(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): - """ - Convert numpy image to bytes-encoded jpeg image. - """ - buf = image_to_jpeg_nparray(image, quality) - byte_im = buf.tobytes() - return byte_im - - -def reorder_recognized_words(detected_images): - """ - Reorder the detected words in each image based on the average horizontal position of each word. - Sorting them in ascending order. - """ - - reordered_images = [] - for detected_image in detected_images: - - # computing the mean average position for each word - mean_horizontal_positions = [] - for words in detected_image: - box = words[1] - y_positions = [point[0] for point in box] - mean_y_position = mean(y_positions) - mean_horizontal_positions.append(mean_y_position) - indexes = np.argsort(mean_horizontal_positions) - - # and reordering them - reordered = [] - for index, words in zip(indexes, detected_image): - reordered.append(detected_image[index]) - reordered_images.append(reordered) - - return reordered_images diff --git a/examples/tensorflow/license-plate-reader/utils/utils.py b/examples/tensorflow/license-plate-reader/utils/utils.py deleted file mode 100644 index 9d07b289e0..0000000000 --- a/examples/tensorflow/license-plate-reader/utils/utils.py +++ /dev/null @@ -1,160 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import cv2 -import numpy as np -import math -from .bbox import BoundBox, bbox_iou -from scipy.special import expit - - -def _sigmoid(x): - return expit(x) - - -def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): - if (float(net_w) / image_w) < (float(net_h) / image_h): - new_w = net_w - new_h = (image_h * net_w) / image_w - else: - new_h = net_w - new_w = (image_w * net_h) / image_h - - for i in range(len(boxes)): - x_offset, x_scale = (net_w - new_w) / 2.0 / net_w, float(new_w) / net_w - y_offset, y_scale = (net_h - new_h) / 2.0 / net_h, float(new_h) / net_h - - boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) - boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) - boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) - boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) - - -def do_nms(boxes, nms_thresh): - if len(boxes) > 0: - nb_class = len(boxes[0].classes) - else: - return - - for c in range(nb_class): - sorted_indices = np.argsort([-box.classes[c] for box in boxes]) - - for i in range(len(sorted_indices)): - index_i = sorted_indices[i] - - if boxes[index_i].classes[c] == 0: - continue - - for j in range(i + 1, len(sorted_indices)): - index_j = sorted_indices[j] - - if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: - boxes[index_j].classes[c] = 0 - - -def decode_netout(netout, anchors, obj_thresh, net_h, net_w): - grid_h, grid_w = netout.shape[:2] - nb_box = 3 - netout = netout.reshape((grid_h, grid_w, nb_box, -1)) - nb_class = netout.shape[-1] - 5 - - boxes = [] - - netout[..., :2] = _sigmoid(netout[..., :2]) - netout[..., 4] = _sigmoid(netout[..., 4]) - netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) - netout[..., 5:] *= netout[..., 5:] > obj_thresh - - for i in range(grid_h * grid_w): - row = i // grid_w - col = i % grid_w - - for b in range(nb_box): - # 4th element is objectness score - objectness = netout[row, col, b, 4] - - if objectness <= obj_thresh: - continue - - # first 4 elements are x, y, w, and h - x, y, w, h = netout[row, col, b, :4] - - x = (col + x) / grid_w # center position, unit: image width - y = (row + y) / grid_h # center position, unit: image height - w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width - h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height - - # last elements are class probabilities - classes = netout[row, col, b, 5:] - - box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes) - - boxes.append(box) - - return boxes - - -def preprocess_input(image, net_h, net_w): - new_h, new_w, _ = image.shape - - # determine the new size of the image - if (float(net_w) / new_w) < (float(net_h) / new_h): - new_h = (new_h * net_w) // new_w - new_w = net_w - else: - new_w = (new_w * net_h) // new_h - new_h = net_h - - # resize the image to the new size - resized = cv2.resize(image[:, :, ::-1] / 255.0, (new_w, new_h)) - - # embed the image into the standard letter box - new_image = np.ones((net_h, net_w, 3)) * 0.5 - new_image[ - (net_h - new_h) // 2 : (net_h + new_h) // 2, (net_w - new_w) // 2 : (net_w + new_w) // 2, : - ] = resized - new_image = np.expand_dims(new_image, 0) - - return new_image - - -def get_yolo_boxes( - model, image, net_h, net_w, anchors, obj_thresh, nms_thresh, classes, tensorflow_model=True -): - # preprocess the input - image_h, image_w, _ = image.shape - batch_input = np.zeros((1, net_h, net_w, 3)) - batch_input[0] = preprocess_input(image, net_h, net_w) - - # run the prediction - if tensorflow_model: - output = model.predict({"input_1": batch_input}) - yolos = [output["conv_81"], output["conv_93"], output["conv_105"]] - filters = 3 * (5 + classes) - for i in range(len(yolos)): - length = len(yolos[i]) - box_size = int(math.sqrt(length / filters)) - yolos[i] = np.array(yolos[i]).reshape((box_size, box_size, filters)) - else: - output = model.predict_on_batch(batch_input) - yolos = [output[0][0], output[1][0], output[2][0]] - - boxes = [] - # decode the output of the network - for j in range(len(yolos)): - yolo_anchors = anchors[(2 - j) * 6 : (3 - j) * 6] # config['model']['anchors'] - boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w) - - # correct the sizes of the bounding boxes - correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) - - # suppress non-maximal boxes - do_nms(boxes, nms_thresh) - - return boxes - - -def _softmax(x, axis=-1): - x = x - np.amax(x, axis, keepdims=True) - e_x = np.exp(x) - - return e_x / e_x.sum(axis, keepdims=True) diff --git a/examples/tensorflow/multi-model-classifier/requirements.txt b/examples/tensorflow/multi-model-classifier/requirements.txt deleted file mode 100644 index 7e2fba5e6c..0000000000 --- a/examples/tensorflow/multi-model-classifier/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow diff --git a/examples/tensorflow/multi-model-classifier/sample-iris.json b/examples/tensorflow/multi-model-classifier/sample-iris.json deleted file mode 100644 index 67c03827f2..0000000000 --- a/examples/tensorflow/multi-model-classifier/sample-iris.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "input": { - "sepal_length": 5.2, - "sepal_width": 3.6, - "petal_length": 1.4, - "petal_width": 0.3 - } -} diff --git a/examples/tensorflow/sentiment-analyzer/README.md b/examples/tensorflow/sentiment-analyzer/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/tensorflow/sentiment-analyzer/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/tensorflow/sentiment-analyzer/bert.ipynb b/examples/tensorflow/sentiment-analyzer/bert.ipynb deleted file mode 100644 index 27ca8c67b1..0000000000 --- a/examples/tensorflow/sentiment-analyzer/bert.ipynb +++ /dev/null @@ -1,1007 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "bert.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "code", - "metadata": { - "id": "j0a4mTk9o1Qg", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Modified source from https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb\n", - "\n", - "# Copyright 2019 Google Inc.\n", - "\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dCpvgG0vwXAZ", - "colab_type": "text" - }, - "source": [ - "#Predicting Movie Review Sentiment with BERT on TF Hub", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xiYrZKaHwV81", - "colab_type": "text" - }, - "source": [ - "If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.\n", - "\n", - "Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing TensorFlow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.\n", - "\n", - "Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in TensorFlow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "chM4UttbMIqq", - "colab_type": "text" - }, - "source": [ - "First, we'll install the required packages:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jviywGyWyKsA", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!pip install bert-tensorflow==1.0.* tensorflow-gpu==1.13.* scikit-learn==0.21.* pandas==0.24.* tensorflow-hub==0.6.* boto3==1.*" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "hsZvic2YxnTz", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from datetime import datetime\n", - "\n", - "from sklearn.model_selection import train_test_split\n", - "import pandas as pd\n", - "import tensorflow as tf\n", - "import tensorflow_hub as hub\n", - "\n", - "import bert\n", - "from bert import run_classifier\n", - "from bert import optimization\n", - "from bert import tokenization" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KVB3eOcjxxm1", - "colab_type": "text" - }, - "source": [ - "Below, we'll set an output location to store our model output, checkpoints, and export in a local directory. Note: if you're running on Google Colab, local directories don't persist after the session ends." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "US_EAnICvP7f", - "colab_type": "code", - "colab": {} - }, - "source": [ - "OUTPUT_DIR = \"bert\"\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pmFYvkylMwXn", - "colab_type": "text" - }, - "source": [ - "#Data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MC_w8SRqN0fr", - "colab_type": "text" - }, - "source": [ - "First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this TensorFlow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fom_ff20gyy6", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from tensorflow import keras\n", - "import os\n", - "import re\n", - "\n", - "# Load all files from a directory in a DataFrame.\n", - "def load_directory_data(directory):\n", - " data = {}\n", - " data[\"sentence\"] = []\n", - " data[\"sentiment\"] = []\n", - " for file_path in os.listdir(directory):\n", - " with tf.gfile.GFile(os.path.join(directory, file_path), \"r\") as f:\n", - " data[\"sentence\"].append(f.read())\n", - " data[\"sentiment\"].append(re.match(\"\\d+_(\\d+)\\.txt\", file_path).group(1))\n", - " return pd.DataFrame.from_dict(data)\n", - "\n", - "# Merge positive and negative examples, add a polarity column and shuffle.\n", - "def load_dataset(directory):\n", - " pos_df = load_directory_data(os.path.join(directory, \"pos\"))\n", - " neg_df = load_directory_data(os.path.join(directory, \"neg\"))\n", - " pos_df[\"polarity\"] = 1\n", - " neg_df[\"polarity\"] = 0\n", - " return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)\n", - "\n", - "# Download and process the dataset files.\n", - "def download_and_load_datasets(force_download=False):\n", - " dataset = tf.keras.utils.get_file(\n", - " fname=\"aclImdb.tar.gz\", \n", - " origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\", \n", - " extract=True)\n", - " \n", - " train_df = load_dataset(os.path.join(os.path.dirname(dataset), \n", - " \"aclImdb\", \"train\"))\n", - " test_df = load_dataset(os.path.join(os.path.dirname(dataset), \n", - " \"aclImdb\", \"test\"))\n", - " \n", - " return train_df, test_df\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "2abfwdn-g135", - "colab_type": "code", - "colab": {} - }, - "source": [ - "train, test = download_and_load_datasets()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XA8WHJgzhIZf", - "colab_type": "text" - }, - "source": [ - "To keep training fast, we'll take a sample of 5000 train and test examples, respectively." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "lw_F488eixTV", - "colab_type": "code", - "colab": {} - }, - "source": [ - "train = train.sample(5000)\n", - "test = test.sample(5000)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "prRQM8pDi8xI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "train.columns" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sfRnHSz3iSXz", - "colab_type": "text" - }, - "source": [ - "For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "IuMOGwFui4it", - "colab_type": "code", - "colab": {} - }, - "source": [ - "DATA_COLUMN = 'sentence'\n", - "LABEL_COLUMN = 'polarity'\n", - "# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'\n", - "label_list = [0, 1]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V399W0rqNJ-Z", - "colab_type": "text" - }, - "source": [ - "#Data Preprocessing\n", - "We'll need to transform our data into a format BERT understands. This involves two steps. First, we create `InputExample`'s using the constructor provided in the BERT library.\n", - "\n", - "- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. \n", - "- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.\n", - "- `label` is the label for our example, i.e. True, False" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "p9gEt5SmM6i6", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Use the InputExample class from BERT's run_classifier code to create examples from the data\n", - "train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example\n", - " text_a = x[DATA_COLUMN], \n", - " text_b = None, \n", - " label = x[LABEL_COLUMN]), axis = 1)\n", - "\n", - "test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, \n", - " text_a = x[DATA_COLUMN], \n", - " text_b = None, \n", - " label = x[LABEL_COLUMN]), axis = 1)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SCZWZtKxObjh", - "colab_type": "text" - }, - "source": [ - "Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):\n", - "\n", - "\n", - "1. Lowercase our text (if we're using a BERT lowercase model)\n", - "2. Tokenize it (i.e. \"sally says hi\" -> [\"sally\", \"says\", \"hi\"])\n", - "3. Break words into WordPieces (i.e. \"calling\" -> [\"call\", \"##ing\"])\n", - "4. Map our words to indexes using a vocab file that BERT provides\n", - "5. Add special \"CLS\" and \"SEP\" tokens (see the [readme](https://github.com/google-research/bert))\n", - "6. Append \"index\" and \"segment\" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))\n", - "\n", - "Happily, we don't have to worry about most of these details.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qMWiDtpyQSoU", - "colab_type": "text" - }, - "source": [ - "To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "IhJSe0QHNG7U", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# This is a path to an uncased (all lowercase) version of BERT\n", - "BERT_MODEL_HUB = \"https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1\"\n", - "\n", - "def create_tokenizer_from_hub_module():\n", - " \"\"\"Get the vocab file and casing info from the Hub module.\"\"\"\n", - " with tf.Graph().as_default():\n", - " bert_module = hub.Module(BERT_MODEL_HUB)\n", - " tokenization_info = bert_module(signature=\"tokenization_info\", as_dict=True)\n", - " with tf.Session() as sess:\n", - " vocab_file, do_lower_case = sess.run([tokenization_info[\"vocab_file\"],\n", - " tokenization_info[\"do_lower_case\"]])\n", - " \n", - " return bert.tokenization.FullTokenizer(\n", - " vocab_file=vocab_file, do_lower_case=do_lower_case)\n", - "\n", - "tokenizer = create_tokenizer_from_hub_module()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z4oFkhpZBDKm", - "colab_type": "text" - }, - "source": [ - "Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info[\"do_lower_case\"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dsBo6RCtQmwx", - "colab_type": "code", - "colab": {} - }, - "source": [ - "tokenizer.tokenize(\"This here's an example of using the BERT tokenizer\")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0OEzfFIt6GIc", - "colab_type": "text" - }, - "source": [ - "Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "LL5W8gEGRTAf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# We'll set sequences to be at most 128 tokens long.\n", - "MAX_SEQ_LENGTH = 128\n", - "# Convert our train and test features to InputFeatures that BERT understands.\n", - "train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)\n", - "test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ccp5trMwRtmr", - "colab_type": "text" - }, - "source": [ - "#Creating a model\n", - "\n", - "Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6o2a5ZIvRcJq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,\n", - " num_labels):\n", - " \"\"\"Creates a classification model.\"\"\"\n", - "\n", - " bert_module = hub.Module(\n", - " BERT_MODEL_HUB,\n", - " trainable=True)\n", - " bert_inputs = dict(\n", - " input_ids=input_ids,\n", - " input_mask=input_mask,\n", - " segment_ids=segment_ids)\n", - " bert_outputs = bert_module(\n", - " inputs=bert_inputs,\n", - " signature=\"tokens\",\n", - " as_dict=True)\n", - "\n", - " # Use \"pooled_output\" for classification tasks on an entire sentence.\n", - " # Use \"sequence_outputs\" for token-level output.\n", - " output_layer = bert_outputs[\"pooled_output\"]\n", - "\n", - " hidden_size = output_layer.shape[-1].value\n", - "\n", - " # Create our own layer to tune for politeness data.\n", - " output_weights = tf.get_variable(\n", - " \"output_weights\", [num_labels, hidden_size],\n", - " initializer=tf.truncated_normal_initializer(stddev=0.02))\n", - "\n", - " output_bias = tf.get_variable(\n", - " \"output_bias\", [num_labels], initializer=tf.zeros_initializer())\n", - "\n", - " with tf.variable_scope(\"loss\"):\n", - "\n", - " # Dropout helps prevent overfitting\n", - " output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)\n", - "\n", - " logits = tf.matmul(output_layer, output_weights, transpose_b=True)\n", - " logits = tf.nn.bias_add(logits, output_bias)\n", - " log_probs = tf.nn.log_softmax(logits, axis=-1)\n", - "\n", - " # Convert labels into one-hot encoding\n", - " one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)\n", - "\n", - " predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))\n", - " # If we're predicting, we want predicted labels and the probabiltiies.\n", - " if is_predicting:\n", - " return (predicted_labels, log_probs)\n", - "\n", - " # If we're train/eval, compute loss between predicted and actual label\n", - " per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)\n", - " loss = tf.reduce_mean(per_example_loss)\n", - " return (loss, predicted_labels, log_probs)\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qpE0ZIDOCQzE", - "colab_type": "text" - }, - "source": [ - "Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "FnH-AnOQ9KKW", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# model_fn_builder actually creates our model function\n", - "# using the passed parameters for num_labels, learning_rate, etc.\n", - "def model_fn_builder(num_labels, learning_rate, num_train_steps,\n", - " num_warmup_steps):\n", - " \"\"\"Returns `model_fn` closure for TPUEstimator.\"\"\"\n", - " def model_fn(features, labels, mode, params): # pylint: disable=unused-argument\n", - " \"\"\"The `model_fn` for TPUEstimator.\"\"\"\n", - "\n", - " input_ids = features[\"input_ids\"]\n", - " input_mask = features[\"input_mask\"]\n", - " segment_ids = features[\"segment_ids\"]\n", - " label_ids = features[\"label_ids\"]\n", - "\n", - " is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)\n", - " \n", - " # TRAIN and EVAL\n", - " if not is_predicting:\n", - "\n", - " (loss, predicted_labels, log_probs) = create_model(\n", - " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n", - "\n", - " train_op = bert.optimization.create_optimizer(\n", - " loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)\n", - "\n", - " # Calculate evaluation metrics. \n", - " def metric_fn(label_ids, predicted_labels):\n", - " accuracy = tf.metrics.accuracy(label_ids, predicted_labels)\n", - " f1_score = tf.contrib.metrics.f1_score(\n", - " label_ids,\n", - " predicted_labels)\n", - " auc = tf.metrics.auc(\n", - " label_ids,\n", - " predicted_labels)\n", - " recall = tf.metrics.recall(\n", - " label_ids,\n", - " predicted_labels)\n", - " precision = tf.metrics.precision(\n", - " label_ids,\n", - " predicted_labels) \n", - " true_pos = tf.metrics.true_positives(\n", - " label_ids,\n", - " predicted_labels)\n", - " true_neg = tf.metrics.true_negatives(\n", - " label_ids,\n", - " predicted_labels) \n", - " false_pos = tf.metrics.false_positives(\n", - " label_ids,\n", - " predicted_labels) \n", - " false_neg = tf.metrics.false_negatives(\n", - " label_ids,\n", - " predicted_labels)\n", - " return {\n", - " \"eval_accuracy\": accuracy,\n", - " \"f1_score\": f1_score,\n", - " \"auc\": auc,\n", - " \"precision\": precision,\n", - " \"recall\": recall,\n", - " \"true_positives\": true_pos,\n", - " \"true_negatives\": true_neg,\n", - " \"false_positives\": false_pos,\n", - " \"false_negatives\": false_neg\n", - " }\n", - "\n", - " eval_metrics = metric_fn(label_ids, predicted_labels)\n", - "\n", - " if mode == tf.estimator.ModeKeys.TRAIN:\n", - " return tf.estimator.EstimatorSpec(mode=mode,\n", - " loss=loss,\n", - " train_op=train_op)\n", - " else:\n", - " return tf.estimator.EstimatorSpec(mode=mode,\n", - " loss=loss,\n", - " eval_metric_ops=eval_metrics)\n", - " else:\n", - " (predicted_labels, log_probs) = create_model(\n", - " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n", - "\n", - " predictions = {\n", - " 'probabilities': log_probs,\n", - " 'labels': predicted_labels\n", - " }\n", - " return tf.estimator.EstimatorSpec(mode, predictions=predictions)\n", - "\n", - " # Return the actual model function in the closure\n", - " return model_fn\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "OjwJ4bTeWXD8", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Compute train and warmup steps from batch size\n", - "# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)\n", - "BATCH_SIZE = 32\n", - "LEARNING_RATE = 2e-5\n", - "NUM_TRAIN_EPOCHS = 3.0\n", - "# Warmup is a period of time where hte learning rate \n", - "# is small and gradually increases--usually helps training.\n", - "WARMUP_PROPORTION = 0.1\n", - "# Model configs\n", - "SAVE_CHECKPOINTS_STEPS = 500\n", - "SAVE_SUMMARY_STEPS = 100" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "emHf9GhfWBZ_", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Compute # train and warmup steps from batch size\n", - "num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)\n", - "num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "oEJldMr3WYZa", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Specify outpit directory and number of checkpoint steps to save\n", - "run_config = tf.estimator.RunConfig(\n", - " model_dir=OUTPUT_DIR,\n", - " save_summary_steps=SAVE_SUMMARY_STEPS,\n", - " save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "q_WebpS1X97v", - "colab_type": "code", - "colab": {} - }, - "source": [ - "model_fn = model_fn_builder(\n", - " num_labels=len(label_list),\n", - " learning_rate=LEARNING_RATE,\n", - " num_train_steps=num_train_steps,\n", - " num_warmup_steps=num_warmup_steps)\n", - "\n", - "estimator = tf.estimator.Estimator(\n", - " model_fn=model_fn,\n", - " config=run_config,\n", - " params={\"batch_size\": BATCH_SIZE})\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NOO3RfG1DYLo", - "colab_type": "text" - }, - "source": [ - "Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with TensorFlow [Estimators](https://www.tensorflow.org/guide/estimators)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "1Pv2bAlOX_-K", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Create an input function for training. drop_remainder = True for using TPUs.\n", - "train_input_fn = bert.run_classifier.input_fn_builder(\n", - " features=train_features,\n", - " seq_length=MAX_SEQ_LENGTH,\n", - " is_training=True,\n", - " drop_remainder=False)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t6Nukby2EB6-", - "colab_type": "text" - }, - "source": [ - "Now we train our model! For me, using a Colab notebook running on Google's GPUs, training time is typically 8-14 minutes." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nucD4gluYJmK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "print(f'Beginning Training!')\n", - "current_time = datetime.now()\n", - "estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)\n", - "print(\"Training took time \", datetime.now() - current_time)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CmbLTVniARy3", - "colab_type": "text" - }, - "source": [ - "Now let's use our test data to see how well our model did:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JIhejfpyJ8Bx", - "colab_type": "code", - "colab": {} - }, - "source": [ - "test_input_fn = run_classifier.input_fn_builder(\n", - " features=test_features,\n", - " seq_length=MAX_SEQ_LENGTH,\n", - " is_training=False,\n", - " drop_remainder=False)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "PPVEXhNjYXC-", - "colab_type": "code", - "colab": {} - }, - "source": [ - "estimator.evaluate(input_fn=test_input_fn, steps=None)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ueKsULteiz1B", - "colab_type": "text" - }, - "source": [ - "Now let's write code to make predictions on new sentences:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "OsrbTD2EJTVl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def getPrediction(in_sentences):\n", - " labels = [\"Negative\", \"Positive\"]\n", - " input_examples = [run_classifier.InputExample(guid=\"\", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, \"\" is just a dummy label\n", - " input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)\n", - " predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)\n", - " predictions = estimator.predict(predict_input_fn)\n", - " return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "-thbodgih_VJ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "pred_sentences = [\n", - " \"That movie was absolutely awful\",\n", - " \"The acting was a bit lacking\",\n", - " \"The film was creative and surprising\",\n", - " \"Absolutely fantastic!\"\n", - "]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "QrZmvZySKQTm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "predictions = getPrediction(pred_sentences)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MXkRiEBUqN3n", - "colab_type": "text" - }, - "source": [ - "Voila! We have a sentiment classifier!" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ERkTE8-7oQLZ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "predictions" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "P3Tg7c47vfLE", - "colab_type": "text" - }, - "source": [ - "# Export the model\n", - "\n", - "We are now ready to export the model. The following code defines the serving input function and exports the model to `OUTPUT_DIR`." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "NfXsdV4qtlpW", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def serving_input_fn():\n", - " reciever_tensors = {\n", - " \"input_ids\": tf.placeholder(dtype=tf.int32,\n", - " shape=[1, MAX_SEQ_LENGTH])\n", - " }\n", - " features = {\n", - " \"input_ids\": reciever_tensors['input_ids'],\n", - " \"input_mask\": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int32),\n", - " \"segment_ids\": tf.zeros(dtype=tf.int32, shape=[1, MAX_SEQ_LENGTH]),\n", - " \"label_ids\": tf.placeholder(tf.int32, [None], name='label_ids')\n", - " }\n", - " return tf.estimator.export.ServingInputReceiver(features, reciever_tensors)\n", - " \n", - "estimator._export_to_tpu = False\n", - "estimator.export_saved_model(OUTPUT_DIR+\"/export\", serving_input_fn)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tIFTmUbcwI0w", - "colab_type": "text" - }, - "source": [ - "# Upload the model to AWS\n", - "\n", - "Cortex loads models from AWS, so we need to upload the exported model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gByRzrnR_OBX", - "colab_type": "text" - }, - "source": [ - "Set these variables to configure your AWS credentials and model upload path:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "1bdCOb3z0_Gh", - "colab_type": "code", - "cellView": "form", - "colab": {} - }, - "source": [ - "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", - "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", - "S3_UPLOAD_PATH = \"s3://my-bucket/sentiment-analyzer/bert\" #@param {type:\"string\"}\n", - "\n", - "import sys\n", - "import re\n", - "\n", - "if AWS_ACCESS_KEY_ID == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", - "\n", - "elif AWS_SECRET_ACCESS_KEY == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", - "\n", - "else:\n", - " try:\n", - " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", - " except:\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WLT09hZr_bhm", - "colab_type": "text" - }, - "source": [ - "Upload to S3:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jCN3BINl2sKN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import os\n", - "import boto3\n", - "\n", - "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", - "\n", - "for dirpath, _, filenames in os.walk(OUTPUT_DIR+\"/export\"):\n", - " for filename in filenames:\n", - " filepath = os.path.join(dirpath, filename)\n", - " filekey = os.path.join(key, filepath[len(OUTPUT_DIR+\"/export/\"):])\n", - " print(\"Uploading s3://{}/{} ...\".format(bucket, filekey), end = '')\n", - " s3.upload_file(filepath, bucket, filekey)\n", - " print(\" ✓\")\n", - "\n", - "print(\"\\nUploaded model export directory to \" + S3_UPLOAD_PATH)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7XPKSHzf_d7M", - "colab_type": "text" - }, - "source": [ - "\n", - "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/sentiment-analyzer) for how to deploy the model as an API." - ] - } - ] -} diff --git a/examples/tensorflow/sentiment-analyzer/cortex.yaml b/examples/tensorflow/sentiment-analyzer/cortex.yaml deleted file mode 100644 index 3e6447053e..0000000000 --- a/examples/tensorflow/sentiment-analyzer/cortex.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: sentiment-analyzer - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/sentiment-analyzer/bert/ - monitoring: - model_type: classification - compute: - cpu: 1 - gpu: 1 diff --git a/examples/tensorflow/sentiment-analyzer/predictor.py b/examples/tensorflow/sentiment-analyzer/predictor.py deleted file mode 100644 index 901f2bf349..0000000000 --- a/examples/tensorflow/sentiment-analyzer/predictor.py +++ /dev/null @@ -1,29 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import tensorflow as tf -import tensorflow_hub as hub -from bert import tokenization, run_classifier - -labels = ["negative", "positive"] - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - with tf.Graph().as_default(): - bert_module = hub.Module("https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1") - info = bert_module(signature="tokenization_info", as_dict=True) - with tf.Session() as sess: - vocab_file, do_lower_case = sess.run([info["vocab_file"], info["do_lower_case"]]) - self._tokenizer = tokenization.FullTokenizer( - vocab_file=vocab_file, do_lower_case=do_lower_case - ) - self.client = tensorflow_client - - def predict(self, payload): - input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0) - input_feature = run_classifier.convert_single_example( - 0, input_example, [0, 1], 128, self._tokenizer - ) - model_input = {"input_ids": [input_feature.input_ids]} - prediction = self.client.predict(model_input) - return labels[prediction["labels"][0]] diff --git a/examples/tensorflow/sentiment-analyzer/requirements.txt b/examples/tensorflow/sentiment-analyzer/requirements.txt deleted file mode 100644 index 273614922e..0000000000 --- a/examples/tensorflow/sentiment-analyzer/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -bert-tensorflow==1.0.1 -tensorflow-hub==0.7.0 -tensorflow==1.15.* -tensorflow-serving-api==1.15.* -numpy==1.16.* diff --git a/examples/tensorflow/sentiment-analyzer/sample.json b/examples/tensorflow/sentiment-analyzer/sample.json deleted file mode 100644 index c433e33216..0000000000 --- a/examples/tensorflow/sentiment-analyzer/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "review": "the movie was amazing!" -} diff --git a/examples/tensorflow/text-generator/README.md b/examples/tensorflow/text-generator/README.md deleted file mode 100644 index 41a04891b3..0000000000 --- a/examples/tensorflow/text-generator/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/examples/tensorflow/text-generator/cortex.yaml b/examples/tensorflow/text-generator/cortex.yaml deleted file mode 100644 index d0e54b527d..0000000000 --- a/examples/tensorflow/text-generator/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: text-generator - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/text-generator/gpt-2/124M/ - compute: - cpu: 1 - gpu: 1 diff --git a/examples/tensorflow/text-generator/encoder.py b/examples/tensorflow/text-generator/encoder.py deleted file mode 100644 index 2f73dd509b..0000000000 --- a/examples/tensorflow/text-generator/encoder.py +++ /dev/null @@ -1,118 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -# This file includes code which was modified from https://github.com/openai/gpt-2 - -import json -import regex -from functools import lru_cache - - -@lru_cache() -def bytes_to_unicode(): - bs = ( - list(range(ord("!"), ord("~") + 1)) - + list(range(ord("¡"), ord("¬") + 1)) - + list(range(ord("®"), ord("ÿ") + 1)) - ) - cs = bs[:] - n = 0 - for b in range(2 ** 8): - if b not in bs: - bs.append(b) - cs.append(2 ** 8 + n) - n += 1 - cs = [chr(n) for n in cs] - return dict(zip(bs, cs)) - - -def get_pairs(word): - pairs = set() - prev_char = word[0] - for char in word[1:]: - pairs.add((prev_char, char)) - prev_char = char - return pairs - - -class Encoder: - def __init__(self, encoder, bpe_merges, errors="replace"): - self.encoder = encoder - self.decoder = {v: k for k, v in self.encoder.items()} - self.errors = errors - self.byte_encoder = bytes_to_unicode() - self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} - self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges)))) - self.cache = {} - self.pat = regex.compile( - r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" - ) - - def bpe(self, token): - if token in self.cache: - return self.cache[token] - word = tuple(token) - pairs = get_pairs(word) - - if not pairs: - return token - - while True: - bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf"))) - if bigram not in self.bpe_ranks: - break - first, second = bigram - new_word = [] - i = 0 - while i < len(word): - try: - j = word.index(first, i) - new_word.extend(word[i:j]) - i = j - except: - new_word.extend(word[i:]) - break - - if word[i] == first and i < len(word) - 1 and word[i + 1] == second: - new_word.append(first + second) - i += 2 - else: - new_word.append(word[i]) - i += 1 - new_word = tuple(new_word) - word = new_word - if len(word) == 1: - break - else: - pairs = get_pairs(word) - word = " ".join(word) - self.cache[token] = word - return word - - def encode(self, text): - bpe_tokens = [] - for token in regex.findall(self.pat, text): - token = "".join(self.byte_encoder[b] for b in token.encode("utf-8")) - bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ")) - return bpe_tokens - - def decode(self, tokens): - text = "".join([self.decoder[token] for token in tokens]) - text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors) - return text - - -def get_encoder(s3_client): - encoder = json.load( - s3_client.get_object( - Bucket="cortex-examples", Key="tensorflow/text-generator/gpt-2/encoder.json" - )["Body"] - ) - bpe_data = ( - s3_client.get_object( - Bucket="cortex-examples", Key="tensorflow/text-generator/gpt-2/vocab.bpe" - )["Body"] - .read() - .decode("utf-8") - ) - bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split("\n")[1:-1]] - return Encoder(encoder=encoder, bpe_merges=bpe_merges) diff --git a/examples/tensorflow/text-generator/gpt-2.ipynb b/examples/tensorflow/text-generator/gpt-2.ipynb deleted file mode 100644 index 1597816fcd..0000000000 --- a/examples/tensorflow/text-generator/gpt-2.ipynb +++ /dev/null @@ -1,383 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "gpt-2.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "kc5cIgeEmv8o", - "colab_type": "text" - }, - "source": [ - "# Exporting GPT-2\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n", - "In this notebook, we'll show how to export [OpenAI's GPT-2 text generation model](https://github.com/openai/gpt-2) for serving." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RAWs29lAktOK", - "colab_type": "text" - }, - "source": [ - "First, we'll download the GPT-2 code repository:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "gHs3aaFaLUXq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!git clone --no-checkout https://github.com/openai/gpt-2.git\n", - "!cd gpt-2 && git reset --hard ac5d52295f8a1c3856ea24fb239087cc1a3d1131" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A4al4P14nmni", - "colab_type": "text" - }, - "source": [ - "Next we'll specify the model size (choose one of 124M, 355M, or 774M):" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3Y4bt6hkfuxY", - "colab_type": "code", - "colab": {}, - "cellView": "form" - }, - "source": [ - "import sys\n", - "\n", - "MODEL_SIZE = \"124M\" #@param {type:\"string\"}\n", - "\n", - "if MODEL_SIZE not in {\"124M\", \"355M\", \"774M\"}:\n", - " print(\"\\033[91m{}\\033[00m\".format('ERROR: MODEL_SIZE must be \"124M\", \"355M\", or \"774M\"'), file=sys.stderr)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C6xRx0Monh_j", - "colab_type": "text" - }, - "source": [ - "We can use `download_model.py` to download the model:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Kb50Z6NjbJBN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!python3 ./gpt-2/download_model.py $MODEL_SIZE" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zz2ioOcpoPjV", - "colab_type": "text" - }, - "source": [ - "Next, we'll install the required packages:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Vk4Q2RR-UZQm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!pip install tensorflow==1.14.* numpy==1.* boto3==1.*" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "KkVf5FmuUMrl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import sys\n", - "import os\n", - "import time\n", - "import json\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6Ay7qiQFoWRn", - "colab_type": "text" - }, - "source": [ - "Now we can export the model for serving:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "GdnYXr1IKaF0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "sys.path.append(os.path.join(os.getcwd(), 'gpt-2/src'))\n", - "import model, sample\n", - "\n", - "def export_for_serving(\n", - " model_name='124M',\n", - " seed=None,\n", - " batch_size=1,\n", - " length=None,\n", - " temperature=1,\n", - " top_k=0,\n", - " models_dir='models'\n", - "):\n", - " \"\"\"\n", - " Export the model for TF Serving\n", - " :model_name=124M : String, which model to use\n", - " :seed=None : Integer seed for random number generators, fix seed to reproduce\n", - " results\n", - " :length=None : Number of tokens in generated text, if None (default), is\n", - " determined by model hyperparameters\n", - " :temperature=1 : Float value controlling randomness in boltzmann\n", - " distribution. Lower temperature results in less random completions. As the\n", - " temperature approaches zero, the model will become deterministic and\n", - " repetitive. Higher temperature results in more random completions.\n", - " :top_k=0 : Integer value controlling diversity. 1 means only 1 word is\n", - " considered for each step (token), resulting in deterministic completions,\n", - " while 40 means 40 words are considered at each step. 0 (default) is a\n", - " special setting meaning no restrictions. 40 generally is a good value.\n", - " :models_dir : path to parent folder containing model subfolders\n", - " (i.e. contains the folder)\n", - " \"\"\"\n", - " models_dir = os.path.expanduser(os.path.expandvars(models_dir))\n", - "\n", - " hparams = model.default_hparams()\n", - " with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:\n", - " hparams.override_from_dict(json.load(f))\n", - "\n", - " if length is None:\n", - " length = hparams.n_ctx\n", - " elif length > hparams.n_ctx:\n", - " raise ValueError(\"Can't get samples longer than window size: %s\" % hparams.n_ctx)\n", - "\n", - " with tf.Session(graph=tf.Graph()) as sess:\n", - " context = tf.placeholder(tf.int32, [batch_size, None])\n", - " np.random.seed(seed)\n", - " tf.set_random_seed(seed)\n", - "\n", - " output = sample.sample_sequence(\n", - " hparams=hparams, length=length,\n", - " context=context,\n", - " batch_size=batch_size,\n", - " temperature=temperature, top_k=top_k\n", - " )\n", - "\n", - " saver = tf.train.Saver()\n", - " ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))\n", - " saver.restore(sess, ckpt)\n", - "\n", - " export_dir=os.path.join(models_dir, model_name, \"export\", str(time.time()).split('.')[0])\n", - " if not os.path.isdir(export_dir):\n", - " os.makedirs(export_dir)\n", - "\n", - " builder = tf.saved_model.builder.SavedModelBuilder(export_dir)\n", - " signature = predict_signature_def(inputs={'context': context},\n", - " outputs={'sample': output})\n", - "\n", - " builder.add_meta_graph_and_variables(sess,\n", - " [tf.saved_model.SERVING],\n", - " signature_def_map={\"predict\": signature},\n", - " strip_default_attrs=True)\n", - " builder.save()\n", - "\n", - "\n", - "export_for_serving(top_k=40, length=256, model_name=MODEL_SIZE)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hGfSohMrowmg", - "colab_type": "text" - }, - "source": [ - "## Upload the model to AWS\n", - "\n", - "Cortex loads models from AWS, so we need to upload the exported model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BfB5QZ82ozj9", - "colab_type": "text" - }, - "source": [ - "Set these variables to configure your AWS credentials and model upload path:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "B2RNuNk7o1c5", - "colab_type": "code", - "colab": {}, - "cellView": "form" - }, - "source": [ - "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", - "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", - "S3_UPLOAD_PATH = \"s3://my-bucket/text-generator/gpt-2\" #@param {type:\"string\"}\n", - "\n", - "import sys\n", - "import re\n", - "\n", - "if AWS_ACCESS_KEY_ID == \"\":\n", - " print(\"\\033[91m {}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", - "\n", - "elif AWS_SECRET_ACCESS_KEY == \"\":\n", - " print(\"\\033[91m {}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", - "\n", - "else:\n", - " try:\n", - " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", - " except:\n", - " print(\"\\033[91m {}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ics0omsrpS8V", - "colab_type": "text" - }, - "source": [ - "Upload the model to S3:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BnKncToppUhN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import os\n", - "import boto3\n", - "\n", - "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", - "\n", - "for dirpath, _, filenames in os.walk(\"models/{}/export\".format(MODEL_SIZE)):\n", - " for filename in filenames:\n", - " filepath = os.path.join(dirpath, filename)\n", - " filekey = os.path.join(key, MODEL_SIZE, filepath[len(\"models/{}/export/\".format(MODEL_SIZE)):])\n", - " print(\"Uploading s3://{}/{} ...\".format(bucket, filekey), end = '')\n", - " s3.upload_file(filepath, bucket, filekey)\n", - " print(\" ✓\")\n", - "\n", - "print(\"\\nUploaded model export directory to {}/{}\".format(S3_UPLOAD_PATH, MODEL_SIZE))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IIMVPhe2qkU4", - "colab_type": "text" - }, - "source": [ - "\n", - "We also need to upload `vocab.bpe` and `encoder.json`, so that the [encoder](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/text-generator/encoder.py) in the [Predictor](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/text-generator/predictor.py) can encode the input text before making a request to the model." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YdN8MtZxsO9V", - "colab_type": "code", - "colab": {} - }, - "source": [ - "print(\"Uploading s3://{}/{}/vocab.bpe ...\".format(bucket, key), end = '')\n", - "s3.upload_file(os.path.join(\"models\", MODEL_SIZE, \"vocab.bpe\"), bucket, os.path.join(key, \"vocab.bpe\"))\n", - "print(\" ✓\")\n", - "\n", - "print(\"Uploading s3://{}/{}/encoder.json ...\".format(bucket, key), end = '')\n", - "s3.upload_file(os.path.join(\"models\", MODEL_SIZE, \"encoder.json\"), bucket, os.path.join(key, \"encoder.json\"))\n", - "print(\" ✓\")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MsoxwahIpnTO", - "colab_type": "text" - }, - "source": [ - "\n", - "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/text-generator) for how to deploy the model as an API." - ] - } - ] -} diff --git a/examples/tensorflow/text-generator/predictor.py b/examples/tensorflow/text-generator/predictor.py deleted file mode 100644 index 3cbc45e1d7..0000000000 --- a/examples/tensorflow/text-generator/predictor.py +++ /dev/null @@ -1,24 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import os -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -from encoder import get_encoder - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - self.encoder = get_encoder(s3) - - def predict(self, payload): - model_input = {"context": [self.encoder.encode(payload["text"])]} - prediction = self.client.predict(model_input) - return self.encoder.decode(prediction["sample"]) diff --git a/examples/tensorflow/text-generator/requirements.txt b/examples/tensorflow/text-generator/requirements.txt deleted file mode 100644 index f064e1eb7e..0000000000 --- a/examples/tensorflow/text-generator/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -requests -regex diff --git a/examples/tensorflow/text-generator/sample.json b/examples/tensorflow/text-generator/sample.json deleted file mode 100644 index dfd2a2f433..0000000000 --- a/examples/tensorflow/text-generator/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "machine learning is" -} diff --git a/examples/traffic-splitter/model.py b/examples/traffic-splitter/model.py deleted file mode 100644 index fe29ff7b6d..0000000000 --- a/examples/traffic-splitter/model.py +++ /dev/null @@ -1,59 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -from sklearn.datasets import load_iris -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score - - -class IrisNet(nn.Module): - def __init__(self): - super(IrisNet, self).__init__() - self.fc1 = nn.Linear(4, 100) - self.fc2 = nn.Linear(100, 100) - self.fc3 = nn.Linear(100, 3) - self.softmax = nn.Softmax(dim=1) - - def forward(self, X): - X = F.relu(self.fc1(X)) - X = self.fc2(X) - X = self.fc3(X) - X = self.softmax(X) - return X - - -if __name__ == "__main__": - iris = load_iris() - X, y = iris.data, iris.target - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) - - train_X = Variable(torch.Tensor(X_train).float()) - test_X = Variable(torch.Tensor(X_test).float()) - train_y = Variable(torch.Tensor(y_train).long()) - test_y = Variable(torch.Tensor(y_test).long()) - - model = IrisNet() - - criterion = nn.CrossEntropyLoss() - - optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - - for epoch in range(1000): - optimizer.zero_grad() - out = model(train_X) - loss = criterion(out, train_y) - loss.backward() - optimizer.step() - - if epoch % 100 == 0: - print("number of epoch {} loss {}".format(epoch, loss)) - - predict_out = model(test_X) - _, predict_y = torch.max(predict_out, 1) - - print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data))) - - torch.save(model.state_dict(), "weights.pth") diff --git a/examples/traffic-splitter/README.md b/examples/traffic-splitting/README.md similarity index 100% rename from examples/traffic-splitter/README.md rename to examples/traffic-splitting/README.md diff --git a/examples/traffic-splitter/cortex.yaml b/examples/traffic-splitting/cortex.yaml similarity index 100% rename from examples/traffic-splitter/cortex.yaml rename to examples/traffic-splitting/cortex.yaml diff --git a/examples/pytorch/iris-classifier/model.py b/examples/traffic-splitting/model.py similarity index 100% rename from examples/pytorch/iris-classifier/model.py rename to examples/traffic-splitting/model.py diff --git a/examples/traffic-splitter/onnx_predictor.py b/examples/traffic-splitting/onnx_predictor.py similarity index 100% rename from examples/traffic-splitter/onnx_predictor.py rename to examples/traffic-splitting/onnx_predictor.py diff --git a/examples/traffic-splitter/pytorch_predictor.py b/examples/traffic-splitting/pytorch_predictor.py similarity index 100% rename from examples/traffic-splitter/pytorch_predictor.py rename to examples/traffic-splitting/pytorch_predictor.py diff --git a/examples/traffic-splitter/sample.json b/examples/traffic-splitting/sample.json similarity index 100% rename from examples/traffic-splitter/sample.json rename to examples/traffic-splitting/sample.json From f97ba2d169c48d55435bf99db5c270dda2c24e3b Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 1 Dec 2020 16:37:37 -0800 Subject: [PATCH 02/36] Delete README.md --- examples/README.md | 63 ---------------------------------------------- 1 file changed, 63 deletions(-) delete mode 100644 examples/README.md diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index a9b4f3ed15..0000000000 --- a/examples/README.md +++ /dev/null @@ -1,63 +0,0 @@ -# Examples - -## TensorFlow - -- [Text generation](tensorflow/text-generator): deploy OpenAI's GPT-2 to generate text. - -- [Sentiment analysis](tensorflow/sentiment-analyzer): deploy a BERT model for sentiment analysis. - -- [Image classification](tensorflow/image-classifier-inception): deploy an Inception model to classify images. - -- [Image classification](tensorflow/image-classifier-resnet50): deploy a ResNet50 model to classify images. - -- [License plate reader](tensorflow/license-plate-reader): deploy a YOLOv3 model (and others) to identify license plates in real time. - -- [Multi-model classification](tensorflow/multi-model-classifier): deploy 3 models (ResNet50, Iris, Inception) in a single API. - -## Keras - -- [Denoisify text documents](keras/document-denoiser): deploy an Autoencoder model to clean text document images of noise. - -## PyTorch - -- [Iris classification](pytorch/iris-classifier): deploy a model to classify iris flowers. - -- [Text generation](pytorch/text-generator): deploy Hugging Face's GPT-2 model to generate text. - -- [Sentiment analysis](pytorch/sentiment-analyzer): deploy a Hugging Face transformers model for sentiment analysis. - -- [Search completion](pytorch/search-completer): deploy a Facebook's RoBERTa model to complete search terms. - -- [Answer generation](pytorch/answer-generator): deploy Microsoft's DialoGPT model to answer questions. - -- [Text summarization](pytorch/text-summarizer): deploy a BART model (from Hugging Face's transformers library) to summarize text. - -- [Reading comprehension](pytorch/reading-comprehender): deploy an AllenNLP model for reading comprehension. - -- [Language identification](pytorch/language-identifier): deploy a fastText model to identify languages. - -- [Multi-model text analysis](pytorch/multi-model-text-analyzer): deploy 2 models (Sentiment and Summarization analyzers) in a single API. - -- [Image classification](pytorch/image-classifier-alexnet): deploy an AlexNet model from TorchVision to classify images. - -- [Image classification](pytorch/image-classifier-resnet50): deploy a ResNet50 model from TorchVision to classify images. - -- [Object detection](pytorch/object-detector): deploy a Faster R-CNN model from TorchVision to detect objects in images. - -- [Question generator](pytorch/question-generator): deploy a transformers model to generate questions given text and the correct answer. - -## ONNX - -- [YOLOv5 YouTube detection](onnx/yolov5-youtube): deploy a YOLOv5 model trained on COCO val2017 dataset. - -- [Multi-model classification](onnx/multi-model-classifier): deploy 3 models (ResNet50, MobileNet, ShuffleNet) in a single API. - -## scikit-learn - -- [Iris classification](sklearn/iris-classifier): deploy a model to classify iris flowers. - -- [MPG estimation](sklearn/mpg-estimator): deploy a linear regression model to estimate MPG. - -## spacy - -- [Entity recognizer](spacy/entity-recognizer): deploy a spacy model for named entity recognition. From 780e8341f61f972531355bb581e44f1602e5acd8 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Thu, 3 Dec 2020 15:11:16 -0800 Subject: [PATCH 03/36] Rename examples to tutorials --- .dockerignore | 1 - .gitbook.yaml | 6 +- build/lint.sh | 1 - build/test-examples.sh | 2 +- docs/aws/install.md | 3 - docs/deployments/batch-api.md | 7 - docs/deployments/batch-api/deployment.md | 7 - docs/deployments/batch-api/predictors.md | 15 - docs/deployments/inferentia.md | 6 +- docs/deployments/realtime-api.md | 7 - docs/deployments/realtime-api/deployment.md | 7 - docs/deployments/realtime-api/models.md | 6 - docs/deployments/realtime-api/parallelism.md | 3 - docs/deployments/realtime-api/predictors.md | 46 --- .../realtime-api/traffic-splitter.md | 6 - docs/guides/exporting.md | 31 +- docs/guides/multi-model.md | 9 - docs/guides/single-node-deployment.md | 2 +- docs/summary.md | 4 +- .../python => docs/tutorials/batch}/README.md | 8 - .../tutorials/batch}/cortex.yaml | 0 .../tutorials/batch}/predictor.py | 0 .../tutorials/batch}/requirements.txt | 0 .../tutorials/batch}/sample.json | 0 .../tutorials/compute}/README.md | 0 .../tutorials/compute}/cortex.yaml | 0 .../tutorials/compute}/cortex_gpu.yaml | 0 .../cortex_gpu_server_side_batching.yaml | 0 .../tutorials/compute}/cortex_inf.yaml | 0 .../cortex_inf_server_side_batching.yaml | 0 .../generate_gpu_resnet50_model.ipynb | 0 .../compute}/generate_resnet50_models.ipynb | 0 .../tutorials/compute}/predictor.py | 0 .../tutorials/compute}/requirements.txt | 0 .../tutorials/compute}/sample.bin | Bin .../tutorials/compute}/sample.json | 0 .../tutorials/multi-model}/README.md | 0 .../tutorials/multi-model}/cortex.yaml | 0 .../tutorials/multi-model}/predictor.py | 0 .../tutorials/multi-model}/requirements.txt | 0 .../tutorials/multi-model}/sample.json | 0 .../tutorials/realtime}/README.md | 0 .../tutorials/realtime}/deploy.ipynb | 0 .../tutorials/realtime}/predictor.py | 0 .../tutorials/realtime}/requirements.txt | 0 .../tutorials}/traffic-splitting/README.md | 0 .../tutorials}/traffic-splitting/cortex.yaml | 0 .../tutorials}/traffic-splitting/model.py | 0 .../traffic-splitting/onnx_predictor.py | 0 .../traffic-splitting/pytorch_predictor.py | 0 .../tutorials}/traffic-splitting/sample.json | 0 {examples => docs/tutorials}/utils/README.md | 0 .../tutorials}/utils/throughput_test.py | 0 examples/batch/onnx/README.md | 6 - examples/batch/onnx/cortex.yaml | 10 - examples/batch/onnx/predictor.py | 64 ---- examples/batch/onnx/requirements.txt | 3 - examples/batch/tensorflow/README.md | 6 - examples/batch/tensorflow/cortex.yaml | 10 - examples/batch/tensorflow/predictor.py | 60 ---- examples/batch/tensorflow/requirements.txt | 1 - examples/compute/python/README.md | 59 ---- examples/compute/python/cortex.yaml | 15 - examples/compute/python/cortex_gpu.yaml | 16 - examples/compute/python/cortex_inf.yaml | 16 - .../python/generate_resnet50_models.ipynb | 121 ------- examples/compute/python/predictor.py | 93 ------ examples/hello-world/onnx/README.md | 3 - examples/hello-world/onnx/cortex.yaml | 10 - examples/hello-world/onnx/predictor.py | 20 -- examples/hello-world/onnx/sample.json | 6 - examples/hello-world/onnx/xgboost.ipynb | 244 --------------- examples/hello-world/tensorflow/README.md | 3 - examples/hello-world/tensorflow/cortex.yaml | 10 - examples/hello-world/tensorflow/predictor.py | 13 - examples/hello-world/tensorflow/sample.json | 6 - .../hello-world/tensorflow/tensorflow.ipynb | 296 ------------------ examples/live-reloading/onnx/README.md | 5 - .../python/mpg-estimator/cortex.yaml | 8 - .../python/mpg-estimator/predictor.py | 27 -- .../python/mpg-estimator/requirements.txt | 4 - .../python/mpg-estimator/sample.json | 7 - examples/live-reloading/tensorflow/README.md | 5 - .../onnx/multi-model-classifier/README.md | 77 ----- .../onnx/multi-model-classifier/cortex.yaml | 22 -- .../onnx/multi-model-classifier/predictor.py | 99 ------ .../onnx/multi-model-classifier/sample.json | 3 - .../python/mpg-estimator/README.md | 75 ----- .../python/mpg-estimator/cortex.yaml | 13 - .../python/mpg-estimator/predictor.py | 28 -- .../python/mpg-estimator/requirements.txt | 4 - .../python/mpg-estimator/sample.json | 7 - .../multi-model-classifier/README.md | 77 ----- .../multi-model-classifier/cortex.yaml | 32 -- .../multi-model-classifier/predictor.py | 63 ---- .../multi-model-classifier/requirements.txt | 1 - .../multi-model-classifier/sample-image.json | 3 - .../multi-model-classifier/sample-iris.json | 8 - examples/multi-model/onnx/requirements.txt | 2 - examples/multi-model/onnx/sample.json | 3 - examples/multi-model/python/README.md | 51 --- examples/multi-model/python/cortex.yaml | 11 - examples/multi-model/python/predictor.py | 25 -- examples/multi-model/python/requirements.txt | 2 - .../multi-model/python/sample-sentiment.json | 3 - .../multi-model/python/sample-summarizer.json | 3 - examples/multi-model/tensorflow/README.md | 69 ---- examples/multi-model/tensorflow/cortex.yaml | 26 -- examples/multi-model/tensorflow/predictor.py | 62 ---- .../multi-model/tensorflow/requirements.txt | 1 - .../multi-model/tensorflow/sample-image.json | 3 - 111 files changed, 14 insertions(+), 2083 deletions(-) rename {examples/batch/python => docs/tutorials/batch}/README.md (97%) rename {examples/batch/python => docs/tutorials/batch}/cortex.yaml (100%) rename {examples/batch/python => docs/tutorials/batch}/predictor.py (100%) rename {examples/batch/python => docs/tutorials/batch}/requirements.txt (100%) rename {examples/batch/python => docs/tutorials/batch}/sample.json (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/README.md (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/cortex.yaml (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/cortex_gpu.yaml (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/cortex_gpu_server_side_batching.yaml (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/cortex_inf.yaml (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/cortex_inf_server_side_batching.yaml (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/generate_gpu_resnet50_model.ipynb (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/generate_resnet50_models.ipynb (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/predictor.py (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/requirements.txt (100%) rename {examples/compute/tensorflow => docs/tutorials/compute}/sample.bin (100%) rename {examples/compute/python => docs/tutorials/compute}/sample.json (100%) rename {examples/multi-model/onnx => docs/tutorials/multi-model}/README.md (100%) rename {examples/multi-model/onnx => docs/tutorials/multi-model}/cortex.yaml (100%) rename {examples/multi-model/onnx => docs/tutorials/multi-model}/predictor.py (100%) rename {examples/model-caching/onnx/multi-model-classifier => docs/tutorials/multi-model}/requirements.txt (100%) rename {examples/compute/tensorflow => docs/tutorials/multi-model}/sample.json (100%) rename {examples/hello-world/python => docs/tutorials/realtime}/README.md (100%) rename {examples/hello-world/python => docs/tutorials/realtime}/deploy.ipynb (100%) rename {examples/hello-world/python => docs/tutorials/realtime}/predictor.py (100%) rename {examples/hello-world/python => docs/tutorials/realtime}/requirements.txt (100%) rename {examples => docs/tutorials}/traffic-splitting/README.md (100%) rename {examples => docs/tutorials}/traffic-splitting/cortex.yaml (100%) rename {examples => docs/tutorials}/traffic-splitting/model.py (100%) rename {examples => docs/tutorials}/traffic-splitting/onnx_predictor.py (100%) rename {examples => docs/tutorials}/traffic-splitting/pytorch_predictor.py (100%) rename {examples => docs/tutorials}/traffic-splitting/sample.json (100%) rename {examples => docs/tutorials}/utils/README.md (100%) rename {examples => docs/tutorials}/utils/throughput_test.py (100%) delete mode 100644 examples/batch/onnx/README.md delete mode 100644 examples/batch/onnx/cortex.yaml delete mode 100644 examples/batch/onnx/predictor.py delete mode 100644 examples/batch/onnx/requirements.txt delete mode 100644 examples/batch/tensorflow/README.md delete mode 100644 examples/batch/tensorflow/cortex.yaml delete mode 100644 examples/batch/tensorflow/predictor.py delete mode 100644 examples/batch/tensorflow/requirements.txt delete mode 100644 examples/compute/python/README.md delete mode 100644 examples/compute/python/cortex.yaml delete mode 100644 examples/compute/python/cortex_gpu.yaml delete mode 100644 examples/compute/python/cortex_inf.yaml delete mode 100644 examples/compute/python/generate_resnet50_models.ipynb delete mode 100644 examples/compute/python/predictor.py delete mode 100644 examples/hello-world/onnx/README.md delete mode 100644 examples/hello-world/onnx/cortex.yaml delete mode 100644 examples/hello-world/onnx/predictor.py delete mode 100644 examples/hello-world/onnx/sample.json delete mode 100644 examples/hello-world/onnx/xgboost.ipynb delete mode 100644 examples/hello-world/tensorflow/README.md delete mode 100644 examples/hello-world/tensorflow/cortex.yaml delete mode 100644 examples/hello-world/tensorflow/predictor.py delete mode 100644 examples/hello-world/tensorflow/sample.json delete mode 100644 examples/hello-world/tensorflow/tensorflow.ipynb delete mode 100644 examples/live-reloading/onnx/README.md delete mode 100644 examples/live-reloading/python/mpg-estimator/cortex.yaml delete mode 100644 examples/live-reloading/python/mpg-estimator/predictor.py delete mode 100644 examples/live-reloading/python/mpg-estimator/requirements.txt delete mode 100644 examples/live-reloading/python/mpg-estimator/sample.json delete mode 100644 examples/live-reloading/tensorflow/README.md delete mode 100644 examples/model-caching/onnx/multi-model-classifier/README.md delete mode 100644 examples/model-caching/onnx/multi-model-classifier/cortex.yaml delete mode 100644 examples/model-caching/onnx/multi-model-classifier/predictor.py delete mode 100644 examples/model-caching/onnx/multi-model-classifier/sample.json delete mode 100644 examples/model-caching/python/mpg-estimator/README.md delete mode 100644 examples/model-caching/python/mpg-estimator/cortex.yaml delete mode 100644 examples/model-caching/python/mpg-estimator/predictor.py delete mode 100644 examples/model-caching/python/mpg-estimator/requirements.txt delete mode 100644 examples/model-caching/python/mpg-estimator/sample.json delete mode 100644 examples/model-caching/tensorflow/multi-model-classifier/README.md delete mode 100644 examples/model-caching/tensorflow/multi-model-classifier/cortex.yaml delete mode 100644 examples/model-caching/tensorflow/multi-model-classifier/predictor.py delete mode 100644 examples/model-caching/tensorflow/multi-model-classifier/requirements.txt delete mode 100644 examples/model-caching/tensorflow/multi-model-classifier/sample-image.json delete mode 100644 examples/model-caching/tensorflow/multi-model-classifier/sample-iris.json delete mode 100644 examples/multi-model/onnx/requirements.txt delete mode 100644 examples/multi-model/onnx/sample.json delete mode 100644 examples/multi-model/python/README.md delete mode 100644 examples/multi-model/python/cortex.yaml delete mode 100644 examples/multi-model/python/predictor.py delete mode 100644 examples/multi-model/python/requirements.txt delete mode 100644 examples/multi-model/python/sample-sentiment.json delete mode 100644 examples/multi-model/python/sample-summarizer.json delete mode 100644 examples/multi-model/tensorflow/README.md delete mode 100644 examples/multi-model/tensorflow/cortex.yaml delete mode 100644 examples/multi-model/tensorflow/predictor.py delete mode 100644 examples/multi-model/tensorflow/requirements.txt delete mode 100644 examples/multi-model/tensorflow/sample-image.json diff --git a/.dockerignore b/.dockerignore index 1b0561f446..ee2e048e67 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,7 +2,6 @@ /bin/ /dev/ /docs/ -/examples/ **/.* **/*.md diff --git a/.gitbook.yaml b/.gitbook.yaml index 09f320911a..8b207447a3 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -5,9 +5,9 @@ structure: summary: summary.md redirects: - tutorial: ../examples/hello-world/python/README.md - tutorial/realtime: ../examples/hello-world/python/README.md - tutorial/batch: ../examples/batch/python/README.md + tutorial: ./tutorials/hello-world/python/README.md + tutorial/realtime: ./tutorials/hello-world/python/README.md + tutorial/batch: ./tutorials/batch/python/README.md install: ./aws/install.md uninstall: ./aws/uninstall.md update: ./aws/update.md diff --git a/build/lint.sh b/build/lint.sh index 656f4c4455..9243106188 100755 --- a/build/lint.sh +++ b/build/lint.sh @@ -72,7 +72,6 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "./vendor/*" \ ! -path "**/.vscode/*" \ ! -path "**/__pycache__/*" \ -! -path "./examples/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.circleci/*" \ diff --git a/build/test-examples.sh b/build/test-examples.sh index 33b562f095..a886a63587 100755 --- a/build/test-examples.sh +++ b/build/test-examples.sh @@ -19,7 +19,7 @@ set -eou pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)" CORTEX="$ROOT/bin/cortex" -for example in $ROOT/examples/*/cortex.yaml; do +for example in $ROOT/docs/tutorials/*/cortex.yaml; do timer=1200 example_base_dir=$(dirname "${example}") retry="false" diff --git a/docs/aws/install.md b/docs/aws/install.md index 44a40b4aba..55dc61d1a8 100644 --- a/docs/aws/install.md +++ b/docs/aws/install.md @@ -19,9 +19,6 @@ cortex cluster up # or: cortex cluster up --config cluster.yaml (see configurat cortex env default aws ``` - -Try the [tutorial](../../examples/hello-world/python/README.md) or deploy one of our [examples](https://github.com/cortexlabs/cortex/tree/master/examples). - ## Configure Cortex diff --git a/docs/deployments/batch-api.md b/docs/deployments/batch-api.md index 9710290a6c..57f994d70c 100644 --- a/docs/deployments/batch-api.md +++ b/docs/deployments/batch-api.md @@ -34,10 +34,3 @@ Once you've implemented your predictor and defined your API configuration, you c A job submission typically consists of an input dataset or the location of your input dataset, the number of workers for your job, and the batch size. When a job is submitted to your Batch API endpoint, you will immediately receive a Job ID that you can use to get the job's status and logs, and stop the job if necessary. Behind the scenes, your Batch API will break down the dataset into batches and push them onto a queue. Once all of the batches have been enqueued, the Cortex Cluster will spin up the requested number of workers and initialize them with your predictor implementation. Each worker will take one batch at a time from the queue and run your Predictor implementation. After all batches have been processed, the `on_job_complete` hook in your predictor implementation (if provided) will be executed by one of the workers. At any point, you can use the Job ID that was provided upon job submission to make requests to the Batch API endpoint to get job status, progress metrics, and worker statuses. Logs for each job are aggregated and are accessible via the Cortex CLI or in your AWS console. - -## Next steps - -* Try the [tutorial](../../examples/batch/python/README.md) to deploy a Batch API on your Cortex cluster. -* See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Batch API. -* See the [Predictor docs](batch-api/predictors.md) for how to implement a Predictor class. -* See the [API configuration docs](batch-api/api-configuration.md) for a full list of features that can be used to deploy your Batch API. diff --git a/docs/deployments/batch-api/deployment.md b/docs/deployments/batch-api/deployment.md index 9608e927cb..27b94f82bf 100644 --- a/docs/deployments/batch-api/deployment.md +++ b/docs/deployments/batch-api/deployment.md @@ -118,10 +118,3 @@ $ cortex delete my-api deleting my-api ``` - -## Additional resources - - -* [Tutorial](../../../examples/batch/python/README.md) provides a step-by-step walkthrough of deploying an image classification batch API -* [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands -* [Examples](https://github.com/cortexlabs/cortex/tree/master/examples/batch) demonstrate how to deploy models from common ML libraries diff --git a/docs/deployments/batch-api/predictors.md b/docs/deployments/batch-api/predictors.md index e66681b6a8..beca48fc17 100644 --- a/docs/deployments/batch-api/predictors.md +++ b/docs/deployments/batch-api/predictors.md @@ -94,11 +94,6 @@ class PythonPredictor: For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. -### Examples - - -You can find an example of a BatchAPI using a PythonPredictor in [examples/batch/python](https://github.com/cortexlabs/cortex/tree/master/examples/batch/python). - ### Pre-installed packages The following Python packages are pre-installed in Python Predictors and can be used in your implementations: @@ -231,11 +226,6 @@ When multiple models are defined using the Predictor's `models` field, the `tens For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. -### Examples - - -You can find an example of a BatchAPI using a TensorFlowPredictor in [examples/batch/tensorflow](https://github.com/cortexlabs/cortex/tree/master/examples/batch/tensorflow). - ### Pre-installed packages The following Python packages are pre-installed in TensorFlow Predictors and can be used in your implementations: @@ -318,11 +308,6 @@ When multiple models are defined using the Predictor's `models` field, the `onnx For proper separation of concerns, it is recommended to use the constructor's `config` parameter for information such as from where to download the model and initialization files, or any configurable model parameters. You define `config` in your [API configuration](api-configuration.md), and it is passed through to your Predictor's constructor. The `config` parameters in the `API configuration` can be overridden by providing `config` in the job submission requests. -### Examples - - -You can find an example of a BatchAPI using an ONNXPredictor in [examples/batch/onnx](https://github.com/cortexlabs/cortex/tree/master/examples/batch/onnx). - ### Pre-installed packages The following Python packages are pre-installed in ONNX Predictors and can be used in your implementations: diff --git a/docs/deployments/inferentia.md b/docs/deployments/inferentia.md index 34390f3a57..cfd56efaa8 100644 --- a/docs/deployments/inferentia.md +++ b/docs/deployments/inferentia.md @@ -66,11 +66,7 @@ model_neuron.save(compiled_model) The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Realtime API pre-installed packages list](realtime-api/predictors.md#inferentia-equipped-apis) and [Batch API pre-installed packages list](batch-api/predictors.md#inferentia-equipped-apis). When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. -A list of model compilation examples for Inferentia can be found on the [`aws/aws-neuron-sdk`](https://github.com/aws/aws-neuron-sdk) repo for [TensorFlow](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/) and for [PyTorch](https://github.com/aws/aws-neuron-sdk/blob/master/docs/pytorch-neuron/README.md). Here are 2 examples implemented with Cortex: - - -1. [ResNet50 in TensorFlow](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/image-classifier-resnet50) -1. [ResNet50 in PyTorch](https://github.com/cortexlabs/cortex/tree/master/examples/pytorch/image-classifier-resnet50) +A list of model compilation examples for Inferentia can be found on the [`aws/aws-neuron-sdk`](https://github.com/aws/aws-neuron-sdk) repo for [TensorFlow](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/) and for [PyTorch](https://github.com/aws/aws-neuron-sdk/blob/master/docs/pytorch-neuron/README.md). ### Improving performance diff --git a/docs/deployments/realtime-api.md b/docs/deployments/realtime-api.md index 3bdba221ea..687fb270b0 100644 --- a/docs/deployments/realtime-api.md +++ b/docs/deployments/realtime-api.md @@ -37,10 +37,3 @@ Once you've implemented your predictor and defined your API configuration, you c When a request is made to the HTTP endpoint, it gets routed to one your API's replicas (at random). The replica receives the request, parses the payload and executes the inference code you've defined in your predictor implementation and sends a response. The Cortex Cluster will automatically scale based on the incoming traffic and the autoscaling configuration you've defined. You can safely update your model or your code and use the Cortex CLI to deploy without experiencing downtime because updates to your API will be rolled out automatically. Request metrics and logs will automatically be aggregated and be accessible via the Cortex CLI or on your AWS console. - -## Next steps - -* Try the [tutorial](../../examples/hello-world/python/README.md) to deploy a Realtime API locally or on AWS. -* See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Realtime API. -* See the [Predictor docs](realtime-api/predictors.md) for how to implement a Predictor class. -* See the [API configuration docs](realtime-api/api-configuration.md) for a full list of features that can be used to deploy your Realtime API. diff --git a/docs/deployments/realtime-api/deployment.md b/docs/deployments/realtime-api/deployment.md index b2bf5dccc1..f068ff463d 100644 --- a/docs/deployments/realtime-api/deployment.md +++ b/docs/deployments/realtime-api/deployment.md @@ -59,10 +59,3 @@ $ cortex delete my-api deleting my-api ``` - -## Additional resources - - -* [Tutorial](../../../examples/hello-world/python/README.md) provides a step-by-step walkthrough of deploying a text generation API -* [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands -* [Examples](https://github.com/cortexlabs/cortex/tree/master/examples) demonstrate how to deploy models from common ML libraries diff --git a/docs/deployments/realtime-api/models.md b/docs/deployments/realtime-api/models.md index 114d30236e..ab88460a78 100644 --- a/docs/deployments/realtime-api/models.md +++ b/docs/deployments/realtime-api/models.md @@ -168,9 +168,6 @@ When using the `models.dir` field, the directory provided may contain multiple s In this case, there are two models in the directory, one of which is named "text-generator", and the other is named "sentiment-analyzer". - -Additional examples can be seen in the [multi model guide](../../guides/multi-model.md) and in [examples/model-caching](https://github.com/cortexlabs/cortex/tree/master/examples/model-caching) (remove the `cache_size` and `disk_cache_size` configurations in `cortex.yaml` to disable [multi model caching](#multi-model-caching)). - ## Live model reloading Live model reloading is a mechanism that periodically checks for updated models in the model path(s) provided in `predictor.model_path` or `predictor.models`. It is automatically enabled for all predictor types, including the Python predictor type (as long as model paths are specified via `model_path` or `models` in the `predictor` configuration). @@ -390,9 +387,6 @@ The model cache is a two-layer cache, configured by the following parameters in Both of these fields must be specified, in addition to either the `dir` or `paths` field (which specifies the model paths, see above for documentation). Multi model caching is only supported if `predictor.processes_per_replica` is set to 1 (the default value). - -See [examples/model-caching](https://github.com/cortexlabs/cortex/tree/master/examples/model-caching) for examples. - ### Caveats Cortex periodically runs a background script (every 10 seconds) that counts the number of models in memory and on disk, and evicts the least recently used models if the count exceeds `cache_size` / `disk_cache_size`. diff --git a/docs/deployments/realtime-api/parallelism.md b/docs/deployments/realtime-api/parallelism.md index ad44641ff8..3ec3ca7854 100644 --- a/docs/deployments/realtime-api/parallelism.md +++ b/docs/deployments/realtime-api/parallelism.md @@ -47,6 +47,3 @@ When optimizing for maximum throughput, a good rule of thumb is to follow these 1. Multiply the maximum throughput from step 1 by the `batch_interval` from step 2. The result is a number which you can assign to `max_batch_size`. 1. Run the load test again. If the inference fails with that batch size (e.g. due to running out of GPU or RAM memory), then reduce `max_batch_size` to a level that works (reduce `batch_interval` by the same factor). 1. Use the load test to determine the peak throughput of the API replica. Multiply the observed throughput by the `batch_interval` to calculate the average batch size. If the average batch size coincides with `max_batch_size`, then it might mean that the throughput could still be further increased by increasing `max_batch_size`. If it's lower, then it means that `batch_interval` is triggering the inference before `max_batch_size` requests have been aggregated. If modifying both `max_batch_size` and `batch_interval` doesn't improve the throughput, then the service may be bottlenecked by something else (e.g. CPU, network IO, `processes_per_replica`, `threads_per_process`, etc). - - -An example of server-side batching for the TensorFlow Predictor that has been benchmarked is found in [ResNet50 in TensorFlow](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/image-classifier-resnet50#throughput-test). diff --git a/docs/deployments/realtime-api/predictors.md b/docs/deployments/realtime-api/predictors.md index 0ff5b9951d..d4958a69b9 100644 --- a/docs/deployments/realtime-api/predictors.md +++ b/docs/deployments/realtime-api/predictors.md @@ -275,27 +275,6 @@ Your API can accept requests with different types of payloads such as `JSON`-par Your `predictor` method can return different types of objects such as `JSON`-parseable, `string`, and `bytes` objects. Navigate to the [API responses](#api-responses) section to learn about how to configure your `predictor` method to respond with different response codes and content-types. -### Examples - - -Most of the examples in [examples/tensorflow](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow) use the TensorFlow Predictor. - - -Here is the Predictor for [examples/tensorflow/iris-classifier](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/iris-classifier): - -```python -labels = ["setosa", "versicolor", "virginica"] - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - def predict(self, payload): - prediction = self.client.predict(payload) - predicted_class_id = int(prediction["class_ids"][0]) - return labels[predicted_class_id] -``` - ### Pre-installed packages The following Python packages are pre-installed in TensorFlow Predictors and can be used in your implementations: @@ -387,31 +366,6 @@ Your API can accept requests with different types of payloads such as `JSON`-par Your `predictor` method can return different types of objects such as `JSON`-parseable, `string`, and `bytes` objects. Navigate to the [API responses](#api-responses) section to learn about how to configure your `predictor` method to respond with different response codes and content-types. -### Examples - - -[examples/onnx/iris-classifier](https://github.com/cortexlabs/cortex/tree/master/examples/onnx/iris-classifier) uses the ONNX Predictor: - -```python -labels = ["setosa", "versicolor", "virginica"] - -class ONNXPredictor: - def __init__(self, onnx_client, config): - self.client = onnx_client - - def predict(self, payload): - model_input = [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - - prediction = self.client.predict(model_input) - predicted_class_id = prediction[0][0] - return labels[predicted_class_id] -``` - ### Pre-installed packages The following Python packages are pre-installed in ONNX Predictors and can be used in your implementations: diff --git a/docs/deployments/realtime-api/traffic-splitter.md b/docs/deployments/realtime-api/traffic-splitter.md index 90726aa173..adfee17215 100644 --- a/docs/deployments/realtime-api/traffic-splitter.md +++ b/docs/deployments/realtime-api/traffic-splitter.md @@ -73,9 +73,3 @@ deleted traffic-splitter ``` Note that this will not delete the Realtime APIs targeted by the Traffic Splitter. - -## Additional resources - -* [Traffic Splitter Tutorial](../../../examples/traffic-splitting/README.md) provides a step-by-step walkthrough for deploying an Traffic Splitter -* [Realtime API Tutorial](../../../examples/hello-world/python/README.md) provides a step-by-step walkthrough of deploying a realtime API for text generation -* [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands diff --git a/docs/guides/exporting.md b/docs/guides/exporting.md index 05823382e9..b34e6c5b82 100644 --- a/docs/guides/exporting.md +++ b/docs/guides/exporting.md @@ -10,10 +10,7 @@ Here are examples for some common ML libraries: ### `torch.save()` -The recommended approach is export your PyTorch model with [torch.save()](https://pytorch.org/docs/stable/torch.html?highlight=save#torch.save). Here is PyTorch's documentation on [saving and loading models](https://pytorch.org/tutorials/beginner/saving_loading_models.html). - - -[examples/pytorch/iris-classifier](https://github.com/cortexlabs/cortex/blob/master/examples/pytorch/iris-classifier) exports its trained model like this: +The recommended approach is export your PyTorch model with [torch.save()](https://pytorch.org/docs/stable/torch.html?highlight=save#torch.save). Here is PyTorch's documentation on [saving and loading models](https://pytorch.org/tutorials/beginner/saving_loading_models.html). For example: ```python torch.save(model.state_dict(), "weights.pth") @@ -23,10 +20,7 @@ For Inferentia-equipped instances, check the [Inferentia instructions](inferenti ### ONNX -It may also be possible to export your PyTorch model into the ONNX format using [torch.onnx.export()](https://pytorch.org/docs/stable/onnx.html#torch.onnx.export). - - -For example, if [examples/pytorch/iris-classifier](https://github.com/cortexlabs/cortex/blob/master/examples/pytorch/iris-classifier) were to export the model to ONNX, it would look like this: +It may also be possible to export your PyTorch model into the ONNX format using [torch.onnx.export()](https://pytorch.org/docs/stable/onnx.html#torch.onnx.export). For example: ```python placeholder = torch.randn(1, 4) @@ -63,8 +57,7 @@ A TensorFlow `SavedModel` directory should have this structure: └── variables.data-00002-of-... ``` - -Most of the TensorFlow examples use this approach. Here is the relevant code from [examples/tensorflow/sentiment-analyzer](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/sentiment-analyzer): +For example: ```python import tensorflow as tf @@ -101,24 +94,15 @@ zip -r bert.zip 1568244606 aws s3 cp bert.zip s3://my-bucket/bert.zip ``` - -[examples/tensorflow/iris-classifier](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/iris-classifier) also use the `SavedModel` approach, and includes a Python notebook demonstrating how it was exported. - ### Other model formats There are other ways to export Keras or TensorFlow models, and as long as they can be loaded and used to make predictions in Python, they will be supported by Cortex. - -For example, the `crnn` API in [examples/tensorflow/license-plate-reader](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/license-plate-reader) uses this approach. - ## Scikit-learn ### `pickle` -Scikit-learn models are typically exported using `pickle`. Here is [Scikit-learn's documentation](https://scikit-learn.org/stable/modules/model_persistence.html). - - -[examples/sklearn/iris-classifier](https://github.com/cortexlabs/cortex/blob/master/examples/sklearn/iris-classifier) uses this approach. Here is the relevant code: +Scikit-learn models are typically exported using `pickle`. Here is [Scikit-learn's documentation](https://scikit-learn.org/stable/modules/model_persistence.html). For example: ```python pickle.dump(model, open("model.pkl", "wb")) @@ -126,7 +110,7 @@ pickle.dump(model, open("model.pkl", "wb")) ### ONNX -It is also possible to export a scikit-learn model to the ONNX format using [onnxmltools](https://github.com/onnx/onnxmltools). Here is an example: +It is also possible to export a scikit-learn model to the ONNX format using [onnxmltools](https://github.com/onnx/onnxmltools). For example: ```python from sklearn.linear_model import LogisticRegression @@ -168,10 +152,7 @@ model.save_model("model.bin") ### ONNX -It is also possible to export an XGBoost model to the ONNX format using [onnxmltools](https://github.com/onnx/onnxmltools). - - -[examples/onnx/iris-classifier](https://github.com/cortexlabs/cortex/blob/master/examples/onnx/iris-classifier) uses this approach. Here is the relevant code: +It is also possible to export an XGBoost model to the ONNX format using [onnxmltools](https://github.com/onnx/onnxmltools). For example: ```python from onnxmltools.convert import convert_xgboost diff --git a/docs/guides/multi-model.md b/docs/guides/multi-model.md index e8380d7631..7ea950b1f7 100644 --- a/docs/guides/multi-model.md +++ b/docs/guides/multi-model.md @@ -76,9 +76,6 @@ $ curl "${api_endpoint}?version=2" -X POST -H "Content-Type: application/json" - For the Python Predictor, the API configuration for a multi-model API is similar to single-model APIs. The Predictor's `config` field can be used to customize the behavior of the `predictor.py` implementation. - -The following template is based on the [pytorch/multi-model-text-analyzer](https://github.com/cortexlabs/cortex/tree/master/examples/pytorch/multi-model-text-analyzer) example. - #### `cortex.yaml` ```yaml @@ -154,9 +151,6 @@ Machine learning is the study of algorithms and statistical models that computer For the TensorFlow Predictor, a multi-model API is configured by placing the list of models in the Predictor's `models` field (each model will specify its own unique name). The `predict()` method of the `tensorflow_client` object expects a second argument that represents the name of the model that will be used for inference. - -The following template is based on the [multi-model/tensorflow](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/multi-model-classifier) example. - ### `cortex.yaml` ```yaml @@ -238,9 +232,6 @@ $ curl "${ENDPOINT}?model=inception" -X POST -H "Content-Type: application/json" For the ONNX Predictor, a multi-model API is configured by placing the list of models in the Predictor's `models` field (each model will specify its own unique name). The `predict()` method of the `onnx_client` object expects a second argument that represents the name of the model that will be used for inference. - -The following template is based on the [onnx/multi-model-classifier](https://github.com/cortexlabs/cortex/tree/master/examples/onnx/multi-model-classifier) example. - ### `cortex.yaml` ```yaml diff --git a/docs/guides/single-node-deployment.md b/docs/guides/single-node-deployment.md index 1ec54a0003..c9973ebe68 100644 --- a/docs/guides/single-node-deployment.md +++ b/docs/guides/single-node-deployment.md @@ -120,7 +120,7 @@ You can now use Cortex to deploy your model: ```bash $ git clone -b master https://github.com/cortexlabs/cortex.git -$ cd cortex/examples/hello-world/python +$ cd cortex/docs/tutorials/realtime $ cortex deploy diff --git a/docs/summary.md b/docs/summary.md index d1891fc600..b21c1caed4 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -32,14 +32,14 @@ * [Autoscaling](deployments/realtime-api/autoscaling.md) * [Prediction monitoring](deployments/realtime-api/prediction-monitoring.md) * [Traffic Splitter](deployments/realtime-api/traffic-splitter.md) - * [Realtime API tutorial](../examples/hello-world/python/README.md) + * [Realtime API tutorial](tutorials/realtime/README.md) * [Batch API](deployments/batch-api.md) * [Predictor implementation](deployments/batch-api/predictors.md) * [API configuration](deployments/batch-api/api-configuration.md) * [API deployment](deployments/batch-api/deployment.md) * [Endpoints](deployments/batch-api/endpoints.md) * [Job statuses](deployments/batch-api/statuses.md) - * [Batch API tutorial](../examples/batch/python/README.md) + * [Batch API tutorial](tutorials/batch/README.md) ## Advanced diff --git a/examples/batch/python/README.md b/docs/tutorials/batch/README.md similarity index 97% rename from examples/batch/python/README.md rename to docs/tutorials/batch/README.md index 03cc827d35..a37cb8f966 100644 --- a/examples/batch/python/README.md +++ b/docs/tutorials/batch/README.md @@ -570,11 +570,3 @@ deleting image-classifier ``` Running `cortex delete` will stop all in progress jobs for the API and will delete job history for that API. It will not spin down your cluster. - -## Next steps - - -* Deploy another one of our [batch examples](https://github.com/cortexlabs/cortex/tree/master/examples/batch). -* See our [exporting guide](../../../docs/guides/exporting.md) for how to export your model to use in an API. -* Try the [realtime API tutorial](../../pytorch/text-generator/README.md) to learn how to deploy realtime APIs in Cortex. -* See [uninstall](../../../docs/aws/uninstall.md) if you'd like to spin down your cluster. diff --git a/examples/batch/python/cortex.yaml b/docs/tutorials/batch/cortex.yaml similarity index 100% rename from examples/batch/python/cortex.yaml rename to docs/tutorials/batch/cortex.yaml diff --git a/examples/batch/python/predictor.py b/docs/tutorials/batch/predictor.py similarity index 100% rename from examples/batch/python/predictor.py rename to docs/tutorials/batch/predictor.py diff --git a/examples/batch/python/requirements.txt b/docs/tutorials/batch/requirements.txt similarity index 100% rename from examples/batch/python/requirements.txt rename to docs/tutorials/batch/requirements.txt diff --git a/examples/batch/python/sample.json b/docs/tutorials/batch/sample.json similarity index 100% rename from examples/batch/python/sample.json rename to docs/tutorials/batch/sample.json diff --git a/examples/compute/tensorflow/README.md b/docs/tutorials/compute/README.md similarity index 100% rename from examples/compute/tensorflow/README.md rename to docs/tutorials/compute/README.md diff --git a/examples/compute/tensorflow/cortex.yaml b/docs/tutorials/compute/cortex.yaml similarity index 100% rename from examples/compute/tensorflow/cortex.yaml rename to docs/tutorials/compute/cortex.yaml diff --git a/examples/compute/tensorflow/cortex_gpu.yaml b/docs/tutorials/compute/cortex_gpu.yaml similarity index 100% rename from examples/compute/tensorflow/cortex_gpu.yaml rename to docs/tutorials/compute/cortex_gpu.yaml diff --git a/examples/compute/tensorflow/cortex_gpu_server_side_batching.yaml b/docs/tutorials/compute/cortex_gpu_server_side_batching.yaml similarity index 100% rename from examples/compute/tensorflow/cortex_gpu_server_side_batching.yaml rename to docs/tutorials/compute/cortex_gpu_server_side_batching.yaml diff --git a/examples/compute/tensorflow/cortex_inf.yaml b/docs/tutorials/compute/cortex_inf.yaml similarity index 100% rename from examples/compute/tensorflow/cortex_inf.yaml rename to docs/tutorials/compute/cortex_inf.yaml diff --git a/examples/compute/tensorflow/cortex_inf_server_side_batching.yaml b/docs/tutorials/compute/cortex_inf_server_side_batching.yaml similarity index 100% rename from examples/compute/tensorflow/cortex_inf_server_side_batching.yaml rename to docs/tutorials/compute/cortex_inf_server_side_batching.yaml diff --git a/examples/compute/tensorflow/generate_gpu_resnet50_model.ipynb b/docs/tutorials/compute/generate_gpu_resnet50_model.ipynb similarity index 100% rename from examples/compute/tensorflow/generate_gpu_resnet50_model.ipynb rename to docs/tutorials/compute/generate_gpu_resnet50_model.ipynb diff --git a/examples/compute/tensorflow/generate_resnet50_models.ipynb b/docs/tutorials/compute/generate_resnet50_models.ipynb similarity index 100% rename from examples/compute/tensorflow/generate_resnet50_models.ipynb rename to docs/tutorials/compute/generate_resnet50_models.ipynb diff --git a/examples/compute/tensorflow/predictor.py b/docs/tutorials/compute/predictor.py similarity index 100% rename from examples/compute/tensorflow/predictor.py rename to docs/tutorials/compute/predictor.py diff --git a/examples/compute/tensorflow/requirements.txt b/docs/tutorials/compute/requirements.txt similarity index 100% rename from examples/compute/tensorflow/requirements.txt rename to docs/tutorials/compute/requirements.txt diff --git a/examples/compute/tensorflow/sample.bin b/docs/tutorials/compute/sample.bin similarity index 100% rename from examples/compute/tensorflow/sample.bin rename to docs/tutorials/compute/sample.bin diff --git a/examples/compute/python/sample.json b/docs/tutorials/compute/sample.json similarity index 100% rename from examples/compute/python/sample.json rename to docs/tutorials/compute/sample.json diff --git a/examples/multi-model/onnx/README.md b/docs/tutorials/multi-model/README.md similarity index 100% rename from examples/multi-model/onnx/README.md rename to docs/tutorials/multi-model/README.md diff --git a/examples/multi-model/onnx/cortex.yaml b/docs/tutorials/multi-model/cortex.yaml similarity index 100% rename from examples/multi-model/onnx/cortex.yaml rename to docs/tutorials/multi-model/cortex.yaml diff --git a/examples/multi-model/onnx/predictor.py b/docs/tutorials/multi-model/predictor.py similarity index 100% rename from examples/multi-model/onnx/predictor.py rename to docs/tutorials/multi-model/predictor.py diff --git a/examples/model-caching/onnx/multi-model-classifier/requirements.txt b/docs/tutorials/multi-model/requirements.txt similarity index 100% rename from examples/model-caching/onnx/multi-model-classifier/requirements.txt rename to docs/tutorials/multi-model/requirements.txt diff --git a/examples/compute/tensorflow/sample.json b/docs/tutorials/multi-model/sample.json similarity index 100% rename from examples/compute/tensorflow/sample.json rename to docs/tutorials/multi-model/sample.json diff --git a/examples/hello-world/python/README.md b/docs/tutorials/realtime/README.md similarity index 100% rename from examples/hello-world/python/README.md rename to docs/tutorials/realtime/README.md diff --git a/examples/hello-world/python/deploy.ipynb b/docs/tutorials/realtime/deploy.ipynb similarity index 100% rename from examples/hello-world/python/deploy.ipynb rename to docs/tutorials/realtime/deploy.ipynb diff --git a/examples/hello-world/python/predictor.py b/docs/tutorials/realtime/predictor.py similarity index 100% rename from examples/hello-world/python/predictor.py rename to docs/tutorials/realtime/predictor.py diff --git a/examples/hello-world/python/requirements.txt b/docs/tutorials/realtime/requirements.txt similarity index 100% rename from examples/hello-world/python/requirements.txt rename to docs/tutorials/realtime/requirements.txt diff --git a/examples/traffic-splitting/README.md b/docs/tutorials/traffic-splitting/README.md similarity index 100% rename from examples/traffic-splitting/README.md rename to docs/tutorials/traffic-splitting/README.md diff --git a/examples/traffic-splitting/cortex.yaml b/docs/tutorials/traffic-splitting/cortex.yaml similarity index 100% rename from examples/traffic-splitting/cortex.yaml rename to docs/tutorials/traffic-splitting/cortex.yaml diff --git a/examples/traffic-splitting/model.py b/docs/tutorials/traffic-splitting/model.py similarity index 100% rename from examples/traffic-splitting/model.py rename to docs/tutorials/traffic-splitting/model.py diff --git a/examples/traffic-splitting/onnx_predictor.py b/docs/tutorials/traffic-splitting/onnx_predictor.py similarity index 100% rename from examples/traffic-splitting/onnx_predictor.py rename to docs/tutorials/traffic-splitting/onnx_predictor.py diff --git a/examples/traffic-splitting/pytorch_predictor.py b/docs/tutorials/traffic-splitting/pytorch_predictor.py similarity index 100% rename from examples/traffic-splitting/pytorch_predictor.py rename to docs/tutorials/traffic-splitting/pytorch_predictor.py diff --git a/examples/traffic-splitting/sample.json b/docs/tutorials/traffic-splitting/sample.json similarity index 100% rename from examples/traffic-splitting/sample.json rename to docs/tutorials/traffic-splitting/sample.json diff --git a/examples/utils/README.md b/docs/tutorials/utils/README.md similarity index 100% rename from examples/utils/README.md rename to docs/tutorials/utils/README.md diff --git a/examples/utils/throughput_test.py b/docs/tutorials/utils/throughput_test.py similarity index 100% rename from examples/utils/throughput_test.py rename to docs/tutorials/utils/throughput_test.py diff --git a/examples/batch/onnx/README.md b/examples/batch/onnx/README.md deleted file mode 100644 index b3091bb133..0000000000 --- a/examples/batch/onnx/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Batch Image Classifier in ONNX - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - - -Please refer to the [tutorial](https://docs.cortex.dev/v/master/batch-api/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. diff --git a/examples/batch/onnx/cortex.yaml b/examples/batch/onnx/cortex.yaml deleted file mode 100644 index 4bdf7080e1..0000000000 --- a/examples/batch/onnx/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier - kind: BatchAPI - predictor: - type: onnx - path: predictor.py - model_path: s3://cortex-examples/image-classifier/alexnet_batch/ - compute: - cpu: 1 diff --git a/examples/batch/onnx/predictor.py b/examples/batch/onnx/predictor.py deleted file mode 100644 index 7f005a0b72..0000000000 --- a/examples/batch/onnx/predictor.py +++ /dev/null @@ -1,64 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import requests -import numpy as np -import base64 -from PIL import Image -from io import BytesIO -from torchvision import transforms -import boto3 -import json -import re -import os - - -class ONNXPredictor: - def __init__(self, onnx_client, config, job_spec): - self.client = onnx_client - - self.labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" - ).text.split("\n")[1:] - - # https://github.com/pytorch/examples/blob/447974f6337543d4de6b888e244a964d3c9b71f6/imagenet/main.py#L198-L199 - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - self.preprocess = transforms.Compose( - [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] - ) - - if len(config.get("dest_s3_dir", "")) == 0: - raise Exception("'dest_s3_dir' field was not provided in job submission") - - self.s3 = boto3.client("s3") - - self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() - self.key = os.path.join(self.key, job_spec["job_id"]) - - def predict(self, payload, batch_id): - arr_list = [] - - # download and preprocess each image - for image_url in payload: - if image_url.startswith("s3://"): - bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() - image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() - else: - image_bytes = requests.get(image_url).content - - img_pil = Image.open(BytesIO(image_bytes)) - arr_list.append(self.preprocess(img_pil).numpy()) - - # classify the batch of images - imgs_arr = np.stack(arr_list, axis=0) - result = self.client.predict(imgs_arr) - - # extract predicted classes - predicted_classes = np.argmax(result[0], axis=1) - results = [ - {"url": payload[i], "class": self.labels[class_idx]} - for i, class_idx in enumerate(predicted_classes) - ] - - # save results - json_output = json.dumps(results) - self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) diff --git a/examples/batch/onnx/requirements.txt b/examples/batch/onnx/requirements.txt deleted file mode 100644 index 5a2cde2a12..0000000000 --- a/examples/batch/onnx/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -torchvision -boto3 -pillow diff --git a/examples/batch/tensorflow/README.md b/examples/batch/tensorflow/README.md deleted file mode 100644 index 163fe34968..0000000000 --- a/examples/batch/tensorflow/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Batch Image Classifier in TensorFlow - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - - -Please refer to the [tutorial](https://docs.cortex.dev/v/master/batch-api/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. diff --git a/examples/batch/tensorflow/cortex.yaml b/examples/batch/tensorflow/cortex.yaml deleted file mode 100644 index 189e1a9b0e..0000000000 --- a/examples/batch/tensorflow/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier - kind: BatchAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - compute: - cpu: 1 diff --git a/examples/batch/tensorflow/predictor.py b/examples/batch/tensorflow/predictor.py deleted file mode 100644 index da4bb39ec3..0000000000 --- a/examples/batch/tensorflow/predictor.py +++ /dev/null @@ -1,60 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import requests -import numpy as np -from PIL import Image -from io import BytesIO -import json -import os -import re -import boto3 -import tensorflow as tf - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config, job_spec): - self.client = tensorflow_client - self.labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" - ).text.split("\n")[1:] - - if len(config.get("dest_s3_dir", "")) == 0: - raise Exception("'dest_s3_dir' field was not provided in job submission") - - self.s3 = boto3.client("s3") - - self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() - self.key = os.path.join(self.key, job_spec["job_id"]) - - def predict(self, payload, batch_id): - arr_list = [] - - # download and preprocess each image - for image_url in payload: - if image_url.startswith("s3://"): - bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() - image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() - else: - image_bytes = requests.get(image_url).content - - decoded_image = np.asarray(Image.open(BytesIO(image_bytes)), dtype=np.float32) / 255 - resized_image = tf.image.resize( - decoded_image, [224, 224], method=tf.image.ResizeMethod.BILINEAR - ) - arr_list.append(resized_image) - - # classify the batch of images - model_input = {"images": np.stack(arr_list, axis=0)} - predictions = self.client.predict(model_input) - - # extract predicted classes - reshaped_predictions = np.reshape(np.array(predictions["classes"]), [-1, len(self.labels)]) - predicted_classes = np.argmax(reshaped_predictions, axis=1) - results = [ - {"url": payload[i], "class": self.labels[class_idx]} - for i, class_idx in enumerate(predicted_classes) - ] - - # save results - json_output = json.dumps(results) - self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) diff --git a/examples/batch/tensorflow/requirements.txt b/examples/batch/tensorflow/requirements.txt deleted file mode 100644 index 7e2fba5e6c..0000000000 --- a/examples/batch/tensorflow/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow diff --git a/examples/compute/python/README.md b/examples/compute/python/README.md deleted file mode 100644 index f13020d874..0000000000 --- a/examples/compute/python/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# Image Classifier with ResNet50 - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example implements an image recognition system using ResNet50, which allows for the recognition of up to 1000 classes. - -## Deploying - -There are 3 Cortex APIs available in this example: - -1. [cortex.yaml](cortex.yaml) - can be used with any instances. -1. [cortex_inf.yaml](cortex_inf.yaml) - to be used with `inf1` instances. -1. [cortex_gpu.yaml](cortex_gpu.yaml) - to be used with GPU instances. - -To deploy an API, run: - -```bash -cortex deploy -``` - -E.g. - -```bash -cortex deploy cortex_gpu.yaml -``` - -## Verifying your API - -Check that your API is live by running `cortex get image-classifier-resnet50`, and copy the example `curl` command that's shown. After the API is live, run the `curl` command, e.g. - -```bash -$ curl -X POST -H "Content-Type: application/json" -d @sample.json - -["tabby", "Egyptian_cat", "tiger_cat", "tiger", "plastic_bag"] -``` - -The following image is embedded in [sample.json](sample.json): - -![image](https://i.imgur.com/213xcvs.jpg) - -## Exporting SavedModels - -This example deploys models that we have built and uploaded to a public S3 bucket. If you want to build the models yourself, follow these instructions. - -Run the following command to install the dependencies required for the [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook: - -```bash -pip install --extra-index-url=https://pip.repos.neuron.amazonaws.com \ - neuron-cc==1.0.9410.0+6008239556 \ - torch-neuron==1.0.825.0 -``` - -Also, `torchvision` has to be installed, but without any dependencies: - -```bash -pip install torchvision==0.4.2 --no-deps -``` - -The [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook will generate 2 torch models. One is saved as `resnet50.pt` which can be run on GPU or CPU, and another is saved as `resnet50_neuron.pt`, which can only be run on `inf1` instances. diff --git a/examples/compute/python/cortex.yaml b/examples/compute/python/cortex.yaml deleted file mode 100644 index d6c1cb64c9..0000000000 --- a/examples/compute/python/cortex.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - model_path: s3://cortex-examples/pytorch/image-classifier-resnet50 - model_name: resnet50.pt - device: cpu - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - compute: - cpu: 1 diff --git a/examples/compute/python/cortex_gpu.yaml b/examples/compute/python/cortex_gpu.yaml deleted file mode 100644 index 7f06603504..0000000000 --- a/examples/compute/python/cortex_gpu.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - model_path: s3://cortex-examples/pytorch/image-classifier-resnet50 - model_name: resnet50.pt - device: gpu - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - compute: - gpu: 1 - cpu: 1 diff --git a/examples/compute/python/cortex_inf.yaml b/examples/compute/python/cortex_inf.yaml deleted file mode 100644 index 55ce4ff793..0000000000 --- a/examples/compute/python/cortex_inf.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - config: - model_path: s3://cortex-examples/pytorch/image-classifier-resnet50 - model_name: resnet50_neuron.pt - device: inf - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - compute: - inf: 1 - cpu: 1 diff --git a/examples/compute/python/generate_resnet50_models.ipynb b/examples/compute/python/generate_resnet50_models.ipynb deleted file mode 100644 index e4e1343d85..0000000000 --- a/examples/compute/python/generate_resnet50_models.ipynb +++ /dev/null @@ -1,121 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Generate Resnet50 Models\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import numpy as np\n", - "import os\n", - "import torch_neuron\n", - "from torchvision import models" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Load Resnet50 model" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "model = models.resnet50(pretrained=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Compile model for Inferentia. Should have worked with 1 NeuronCores, but it appears that setting it to a minimum of 2 is required." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:Neuron:compiling module ResNet with neuron-cc\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compiler args type is value is ['--num-neuroncores', '2']\n" - ] - } - ], - "source": [ - "model.eval()\n", - "batch_size = 1\n", - "image = torch.zeros([batch_size, 3, 224, 224], dtype=torch.float32)\n", - "model_neuron = torch.neuron.trace(model, example_inputs=[image], compiler_args=[\"--num-neuroncores\", \"2\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save both models to disk" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "model_neuron.save(\"resnet50_neuron.pt\")\n", - "torch.save(model.state_dict(), \"resnet50.pt\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/compute/python/predictor.py b/examples/compute/python/predictor.py deleted file mode 100644 index 8059c4078c..0000000000 --- a/examples/compute/python/predictor.py +++ /dev/null @@ -1,93 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import os -import torch -import cv2 -import numpy as np -import requests -import re -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -from torchvision import models, transforms, datasets - - -def get_url_image(url_image): - """ - Get numpy image from URL image. - """ - resp = requests.get(url_image, stream=True).raw - image = np.asarray(bytearray(resp.read()), dtype="uint8") - image = cv2.imdecode(image, cv2.IMREAD_COLOR) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - return image - - -class PythonPredictor: - def __init__(self, config): - # load classes - classes = requests.get(config["classes"]).json() - self.idx2label = [classes[str(k)][1] for k in range(len(classes))] - - # create s3 client - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - # download the model - model_path = config["model_path"] - model_name = config["model_name"] - bucket, key = re.match("s3://(.+?)/(.+)", model_path).groups() - s3.download_file(bucket, os.path.join(key, model_name), model_name) - - # load the model - self.device = None - if config["device"] == "gpu": - self.device = torch.device("cuda") - self.model = models.resnet50() - self.model.load_state_dict(torch.load(model_name, map_location="cuda:0")) - self.model.eval() - self.model = self.model.to(self.device) - elif config["device"] == "cpu": - self.model = models.resnet50() - self.model.load_state_dict(torch.load(model_name)) - self.model.eval() - elif config["device"] == "inf": - import torch_neuron - - self.model = torch.jit.load(model_name) - else: - raise RuntimeError("invalid predictor: config: must be cpu, gpu, or inf") - - # save normalization transform for later use - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - self.transform = transforms.Compose( - [ - transforms.ToPILImage(), - transforms.Resize(config["input_shape"]), - transforms.ToTensor(), - normalize, - ] - ) - - def predict(self, payload): - # preprocess image - image = get_url_image(payload["url"]) - image = self.transform(image) - image = torch.tensor(image.numpy()[np.newaxis, ...]) - - # predict - if self.device: - results = self.model(image.to(self.device)) - else: - results = self.model(image) - - # Get the top 5 results - top5_idx = results[0].sort()[1][-5:] - - # Lookup and print the top 5 labels - top5_labels = [self.idx2label[idx] for idx in top5_idx] - top5_labels = top5_labels[::-1] - - return top5_labels diff --git a/examples/hello-world/onnx/README.md b/examples/hello-world/onnx/README.md deleted file mode 100644 index a45b69db8f..0000000000 --- a/examples/hello-world/onnx/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/tutorial) to see how to deploy an example with Cortex. diff --git a/examples/hello-world/onnx/cortex.yaml b/examples/hello-world/onnx/cortex.yaml deleted file mode 100644 index 00b8a61112..0000000000 --- a/examples/hello-world/onnx/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: iris-classifier - kind: RealtimeAPI - predictor: - type: onnx - path: predictor.py - model_path: s3://cortex-examples/onnx/iris-classifier/ - monitoring: - model_type: classification diff --git a/examples/hello-world/onnx/predictor.py b/examples/hello-world/onnx/predictor.py deleted file mode 100644 index b135129e14..0000000000 --- a/examples/hello-world/onnx/predictor.py +++ /dev/null @@ -1,20 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -labels = ["setosa", "versicolor", "virginica"] - - -class ONNXPredictor: - def __init__(self, onnx_client, config): - self.client = onnx_client - - def predict(self, payload): - model_input = [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - - prediction = self.client.predict(model_input) - predicted_class_id = prediction[0][0] - return labels[predicted_class_id] diff --git a/examples/hello-world/onnx/sample.json b/examples/hello-world/onnx/sample.json deleted file mode 100644 index 252c666b3a..0000000000 --- a/examples/hello-world/onnx/sample.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "sepal_length": 5.2, - "sepal_width": 3.6, - "petal_length": 1.4, - "petal_width": 0.3 -} diff --git a/examples/hello-world/onnx/xgboost.ipynb b/examples/hello-world/onnx/xgboost.ipynb deleted file mode 100644 index d4e1497360..0000000000 --- a/examples/hello-world/onnx/xgboost.ipynb +++ /dev/null @@ -1,244 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "iris_xgboost.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "IiTxCwB7t6Ef", - "colab_type": "text" - }, - "source": [ - "# Training an Iris classifier using XGBoost\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n", - "In this notebook, we'll show how to train a classifier trained on the [iris data set](https://archive.ics.uci.edu/ml/datasets/iris) using XGBoost." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j6QdLAUpuW7r", - "colab_type": "text" - }, - "source": [ - "## Install Dependencies\n", - "First, we'll install our dependencies:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BQE5z_kHj9jV", - "colab_type": "code", - "colab": {} - }, - "source": [ - "pip install xgboost==0.90 scikit-learn==0.21.* onnxmltools==1.5.* boto3==1.*" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yEVK-sLnumqn", - "colab_type": "text" - }, - "source": [ - "## Load the data\n", - "We can use scikit-learn to load the Iris dataset:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "tx9Xw0x0lfbl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from sklearn.datasets import load_iris\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "iris = load_iris()\n", - "X, y = iris.data, iris.target\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "obGdgMm3urb2", - "colab_type": "text" - }, - "source": [ - "## Train the model\n", - "We'll use XGBoost's [`XGBClassifier`](https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.XGBClassifier) to train the model:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jjYp8TaflhW0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import xgboost as xgb\n", - "\n", - "xgb_model = xgb.XGBClassifier()\n", - "xgb_model = xgb_model.fit(X_train, y_train)\n", - "\n", - "print(\"Test data accuracy of the xgb classifier is {:.2f}\".format(xgb_model.score(X_test, y_test))) # Accuracy should be > 90%" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Hdwu-wzJvJLb", - "colab_type": "text" - }, - "source": [ - "## Export the model\n", - "Now we can export the model in the ONNX format:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "AVgs2mkdllRn", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from onnxmltools.convert import convert_xgboost\n", - "from onnxconverter_common.data_types import FloatTensorType\n", - "\n", - "onnx_model = convert_xgboost(xgb_model, initial_types=[(\"input\", FloatTensorType([1, 4]))])\n", - "\n", - "with open(\"gbtree.onnx\", \"wb\") as f:\n", - " f.write(onnx_model.SerializeToString())" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ipVlP4yPxFxw", - "colab_type": "text" - }, - "source": [ - "## Upload the model to AWS\n", - "\n", - "Cortex loads models from AWS, so we need to upload the exported model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3IqsfyylxLhy", - "colab_type": "text" - }, - "source": [ - "Set these variables to configure your AWS credentials and model upload path:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "lc9LBH1uHT_h", - "colab_type": "code", - "cellView": "form", - "colab": {} - }, - "source": [ - "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", - "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", - "S3_UPLOAD_PATH = \"s3://my-bucket/iris-classifier/gbtree.onnx\" #@param {type:\"string\"}\n", - "\n", - "import sys\n", - "import re\n", - "\n", - "if AWS_ACCESS_KEY_ID == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", - "\n", - "elif AWS_SECRET_ACCESS_KEY == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", - "\n", - "else:\n", - " try:\n", - " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", - " except:\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NXeuZsaQxUc8", - "colab_type": "text" - }, - "source": [ - "Upload the model to S3:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YLmnWTEVsu55", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import boto3\n", - "\n", - "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", - "print(\"Uploading {} ...\".format(S3_UPLOAD_PATH), end = '')\n", - "s3.upload_file(\"gbtree.onnx\", bucket, key)\n", - "print(\" ✓\")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aR-mmcUzyCV3", - "colab_type": "text" - }, - "source": [ - "\n", - "That's it! See the [example](https://github.com/cortexlabs/cortex/tree/master/examples/onnx/iris-classifier) for how to deploy the model as an API." - ] - } - ] -} diff --git a/examples/hello-world/tensorflow/README.md b/examples/hello-world/tensorflow/README.md deleted file mode 100644 index a45b69db8f..0000000000 --- a/examples/hello-world/tensorflow/README.md +++ /dev/null @@ -1,3 +0,0 @@ -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Please refer to the [tutorial](https://docs.cortex.dev/tutorial) to see how to deploy an example with Cortex. diff --git a/examples/hello-world/tensorflow/cortex.yaml b/examples/hello-world/tensorflow/cortex.yaml deleted file mode 100644 index 2a11090b83..0000000000 --- a/examples/hello-world/tensorflow/cortex.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: iris-classifier - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ - monitoring: - model_type: classification diff --git a/examples/hello-world/tensorflow/predictor.py b/examples/hello-world/tensorflow/predictor.py deleted file mode 100644 index 6267256aaf..0000000000 --- a/examples/hello-world/tensorflow/predictor.py +++ /dev/null @@ -1,13 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -labels = ["setosa", "versicolor", "virginica"] - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - def predict(self, payload): - prediction = self.client.predict(payload) - predicted_class_id = int(prediction["class_ids"][0]) - return labels[predicted_class_id] diff --git a/examples/hello-world/tensorflow/sample.json b/examples/hello-world/tensorflow/sample.json deleted file mode 100644 index 252c666b3a..0000000000 --- a/examples/hello-world/tensorflow/sample.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "sepal_length": 5.2, - "sepal_width": 3.6, - "petal_length": 1.4, - "petal_width": 0.3 -} diff --git a/examples/hello-world/tensorflow/tensorflow.ipynb b/examples/hello-world/tensorflow/tensorflow.ipynb deleted file mode 100644 index 2981ba77ef..0000000000 --- a/examples/hello-world/tensorflow/tensorflow.ipynb +++ /dev/null @@ -1,296 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "iris_tensorflow.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "IiTxCwB7t6Ef", - "colab_type": "text" - }, - "source": [ - "# Training an Iris classifier using TensorFlow\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n", - "In this notebook, we'll show how to train a classifier trained on the [iris data set](https://archive.ics.uci.edu/ml/datasets/iris) using TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j6QdLAUpuW7r", - "colab_type": "text" - }, - "source": [ - "## Install Dependencies\n", - "First, we'll install our dependencies:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BQE5z_kHj9jV", - "colab_type": "code", - "colab": {} - }, - "source": [ - "pip install tensorflow==1.14.* scikit-learn==0.21.* boto3==1.*" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yEVK-sLnumqn", - "colab_type": "text" - }, - "source": [ - "## Load the data\n", - "We can use scikit-learn to load the Iris dataset:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "tx9Xw0x0lfbl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from sklearn.datasets import load_iris\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "iris = load_iris()\n", - "X, y = iris.data, iris.target\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "obGdgMm3urb2", - "colab_type": "text" - }, - "source": [ - "## Train the model\n", - "We'll use TensorFlow's [`DNNClassifier`](https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/estimator/DNNClassifier) to train the model:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jjYp8TaflhW0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import tensorflow as tf\n", - "\n", - "feature_names = [\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]\n", - "\n", - "\n", - "def train_input_fn(features, labels, batch_size):\n", - " irises = {}\n", - "\n", - " for i, feature_name in enumerate(feature_names):\n", - " irises[feature_name] = features[:, i]\n", - " \n", - " dataset = tf.data.Dataset.from_tensor_slices((irises, labels))\n", - " dataset = dataset.shuffle(1000).repeat().batch(batch_size)\n", - "\n", - " return dataset\n", - "\n", - "\n", - "def eval_input_fn(features, labels, batch_size):\n", - " irises = {}\n", - " for i, feature_name in enumerate(feature_names):\n", - " irises[feature_name] = features[:, i]\n", - "\n", - " if labels is None:\n", - " inputs = irises\n", - " else:\n", - " inputs = (irises, labels)\n", - "\n", - " dataset = tf.data.Dataset.from_tensor_slices(inputs)\n", - " dataset = dataset.batch(batch_size)\n", - "\n", - " return dataset\n", - "\n", - "\n", - "feature_columns = [tf.feature_column.numeric_column(feature_name) for feature_name in feature_names]\n", - "\n", - "classifier = tf.estimator.DNNClassifier(\n", - " feature_columns=feature_columns,\n", - " hidden_units=[10, 10],\n", - " n_classes=3,\n", - ")\n", - "\n", - "classifier.train(input_fn=lambda: train_input_fn(X_train, y_train, 100), steps=1000)\n", - "\n", - "eval_result = classifier.evaluate(input_fn=lambda: eval_input_fn(X_test, y_test, 100))\n", - "\n", - "print(\"\\nTest set accuracy: {accuracy:0.3f}\\n\".format(**eval_result)) # Accuracy should be > 90%" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Hdwu-wzJvJLb", - "colab_type": "text" - }, - "source": [ - "## Export the model\n", - "Now we can export the model using [`Estimator.export_saved_model`](https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/estimator/Estimator#export_saved_model):" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "AVgs2mkdllRn", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def json_serving_input_fn():\n", - " placeholders = {}\n", - " features = {}\n", - " for feature_name in feature_names:\n", - " placeholders[feature_name] = tf.placeholder(shape=[None], dtype=tf.float64, name=feature_name)\n", - " features[feature_name] = tf.expand_dims(placeholders[feature_name], -1)\n", - " \n", - " return tf.estimator.export.ServingInputReceiver(features, receiver_tensors=placeholders)\n", - "\n", - "\n", - "classifier.export_saved_model(\"export\", json_serving_input_fn)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ipVlP4yPxFxw", - "colab_type": "text" - }, - "source": [ - "## Upload the model to AWS\n", - "\n", - "Cortex loads models from AWS, so we need to upload the exported model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3IqsfyylxLhy", - "colab_type": "text" - }, - "source": [ - "Set these variables to configure your AWS credentials and model upload path:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "lc9LBH1uHT_h", - "colab_type": "code", - "cellView": "form", - "colab": {} - }, - "source": [ - "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", - "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", - "S3_UPLOAD_PATH = \"s3://my-bucket/iris-classifier/tensorflow\" #@param {type:\"string\"}\n", - "\n", - "import sys\n", - "import re\n", - "\n", - "if AWS_ACCESS_KEY_ID == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", - "\n", - "elif AWS_SECRET_ACCESS_KEY == \"\":\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", - "\n", - "else:\n", - " try:\n", - " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", - " except:\n", - " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NXeuZsaQxUc8", - "colab_type": "text" - }, - "source": [ - "Upload the model to S3:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YLmnWTEVsu55", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import os\n", - "import boto3\n", - "\n", - "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", - "\n", - "for dirpath, _, filenames in os.walk(\"export\"):\n", - " for filename in filenames:\n", - " filepath = os.path.join(dirpath, filename)\n", - " filekey = os.path.join(key, filepath[len(\"export/\"):])\n", - " print(\"Uploading s3://{}/{}...\".format(bucket, filekey), end = '')\n", - " s3.upload_file(filepath, bucket, filekey)\n", - " print(\" ✓\")", - "\n", - "print(\"\\nUploaded model export directory to \" + S3_UPLOAD_PATH)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aR-mmcUzyCV3", - "colab_type": "text" - }, - "source": [ - "\n", - "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/iris-classifier) for how to deploy the model as an API." - ] - } - ] -} diff --git a/examples/live-reloading/onnx/README.md b/examples/live-reloading/onnx/README.md deleted file mode 100644 index e8ec367b01..0000000000 --- a/examples/live-reloading/onnx/README.md +++ /dev/null @@ -1,5 +0,0 @@ -## Live-reloading model APIs - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Model live-reloading is automatically enabled for ONNX predictors. diff --git a/examples/live-reloading/python/mpg-estimator/cortex.yaml b/examples/live-reloading/python/mpg-estimator/cortex.yaml deleted file mode 100644 index 4c243b5032..0000000000 --- a/examples/live-reloading/python/mpg-estimator/cortex.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: mpg-estimator - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/ diff --git a/examples/live-reloading/python/mpg-estimator/predictor.py b/examples/live-reloading/python/mpg-estimator/predictor.py deleted file mode 100644 index 104b9a5c0a..0000000000 --- a/examples/live-reloading/python/mpg-estimator/predictor.py +++ /dev/null @@ -1,27 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import mlflow.sklearn -import numpy as np - - -class PythonPredictor: - def __init__(self, config, python_client): - self.client = python_client - - def load_model(self, model_path): - return mlflow.sklearn.load_model(model_path) - - def predict(self, payload, query_params): - model_version = query_params.get("version") - - model = self.client.get_model(model_version=model_version) - model_input = [ - payload["cylinders"], - payload["displacement"], - payload["horsepower"], - payload["weight"], - payload["acceleration"], - ] - result = model.predict([model_input]).item() - - return {"prediction": result, "model": {"version": model_version}} diff --git a/examples/live-reloading/python/mpg-estimator/requirements.txt b/examples/live-reloading/python/mpg-estimator/requirements.txt deleted file mode 100644 index cbcad6b321..0000000000 --- a/examples/live-reloading/python/mpg-estimator/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -mlflow -pandas -numpy -scikit-learn==0.21.3 diff --git a/examples/live-reloading/python/mpg-estimator/sample.json b/examples/live-reloading/python/mpg-estimator/sample.json deleted file mode 100644 index 2dbbca46dd..0000000000 --- a/examples/live-reloading/python/mpg-estimator/sample.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "cylinders": 4, - "displacement": 135, - "horsepower": 84, - "weight": 2490, - "acceleration": 15.7 -} diff --git a/examples/live-reloading/tensorflow/README.md b/examples/live-reloading/tensorflow/README.md deleted file mode 100644 index 46f4111a4f..0000000000 --- a/examples/live-reloading/tensorflow/README.md +++ /dev/null @@ -1,5 +0,0 @@ -## Live-reloading model APIs - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -Model live-reloading is automatically enabled for TensorFlow predictors unless using Inferentia resources (`compute.inf`) and `processes_per_replica` > 1. diff --git a/examples/model-caching/onnx/multi-model-classifier/README.md b/examples/model-caching/onnx/multi-model-classifier/README.md deleted file mode 100644 index bf5fc906cb..0000000000 --- a/examples/model-caching/onnx/multi-model-classifier/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# Multi-Model Classifier API - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example deploys ResNet50, MobileNet and ShuffleNet models in one API. Query parameters are used for selecting the model and the version. - -Since model caching is enabled, there can only be 2 models loaded into memory - loading a 3rd one will lead to the removal of the least recently used one. To witness the adding/removal process of models, check the logs of the API by running `cortex logs multi-model-classifier` once the API is up. - -The example can be run on both CPU and on GPU hardware. - -## Sample Prediction - -Deploy the model by running: - -```bash -cortex deploy -``` - -And wait for it to become live by tracking its status with `cortex get --watch`. - -Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get multi-model-classifier`. - -```bash -export ENDPOINT=your-api-endpoint -``` - -When making a prediction with [sample.json](sample.json), the following image will be used: - -![cat](https://i.imgur.com/213xcvs.jpg) - -### ResNet50 Classifier - -Make a request to the ResNet50 model: - -```bash -curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample.json -``` - -The expected response is: - -```json -{"label": "tabby", "model": {"name": "resnet50", "version": "latest"}} -``` - -### MobileNet Classifier - -Make a request to the MobileNet model: - -```bash -curl "${ENDPOINT}?model=mobilenet" -X POST -H "Content-Type: application/json" -d @sample.json -``` - -The expected response is: - -```json -{"label": "tabby", "model": {"name": "mobilenet", "version": "latest"}} -``` - -### ShuffleNet Classifier - -At this point, there are 2 models loaded into memory (as specified by `cache_size`). Loading `ShuffleNet` as well will lead to the removal of the least recently used model - in this case, it will be the ResNet50 model that will get evicted. Since the `disk_cache_size` is set to 3, no model will be removed from disk. - -Make a request to the ShuffleNet model: - -```bash -curl "${ENDPOINT}?model=shufflenet" -X POST -H "Content-Type: application/json" -d @sample.json -``` - -The expected response is: - -```json -{"label": "Egyptian_cat", "model": {"name": "shufflenet", "version": "latest"}} -``` - ---- - -Now, inspect `cortex get multi-model-classifier` to see when and which models were removed in this process of making requests to different versions of the same model. diff --git a/examples/model-caching/onnx/multi-model-classifier/cortex.yaml b/examples/model-caching/onnx/multi-model-classifier/cortex.yaml deleted file mode 100644 index f074721fd3..0000000000 --- a/examples/model-caching/onnx/multi-model-classifier/cortex.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: multi-model-classifier - kind: RealtimeAPI - predictor: - type: onnx - path: predictor.py - models: - paths: - - name: resnet50 - model_path: s3://cortex-examples/onnx/resnet50/ - - name: mobilenet - model_path: s3://cortex-examples/onnx/mobilenet/ - - name: shufflenet - model_path: s3://cortex-examples/onnx/shufflenet/ - cache_size: 2 - disk_cache_size: 3 - config: - image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - image-resize: 224 - compute: - mem: 2G diff --git a/examples/model-caching/onnx/multi-model-classifier/predictor.py b/examples/model-caching/onnx/multi-model-classifier/predictor.py deleted file mode 100644 index 6ab949a24c..0000000000 --- a/examples/model-caching/onnx/multi-model-classifier/predictor.py +++ /dev/null @@ -1,99 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import numpy as np -import cv2, requests -from scipy.special import softmax - - -def get_url_image(url_image): - """ - Get numpy image from URL image. - """ - resp = requests.get(url_image, stream=True).raw - image = np.asarray(bytearray(resp.read()), dtype="uint8") - image = cv2.imdecode(image, cv2.IMREAD_COLOR) - return image - - -def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA): - """ - Resize a numpy image. - """ - dim = None - (h, w) = image.shape[:2] - - if width is None and height is None: - return image - - if width is None: - # calculate the ratio of the height and construct the dimensions - r = height / float(h) - dim = (int(w * r), height) - else: - # calculate the ratio of the width and construct the dimensions - r = width / float(w) - dim = (width, int(h * r)) - - resized = cv2.resize(image, dim, interpolation=inter) - - return resized - - -def preprocess(img_data): - """ - Normalize input for inference. - """ - # move pixel color dimension to position 0 - img = np.moveaxis(img_data, 2, 0) - - mean_vec = np.array([0.485, 0.456, 0.406]) - stddev_vec = np.array([0.229, 0.224, 0.225]) - norm_img_data = np.zeros(img.shape).astype("float32") - for i in range(img.shape[0]): - # for each pixel in each channel, divide the value by 255 to get value between [0, 1] and then normalize - norm_img_data[i, :, :] = (img[i, :, :] / 255 - mean_vec[i]) / stddev_vec[i] - - # extend to batch size of 1 - norm_img_data = norm_img_data[np.newaxis, ...] - return norm_img_data - - -def postprocess(results): - """ - Eliminates all dimensions of size 1, softmaxes the input and then returns the index of the element with the highest value. - """ - squeezed = np.squeeze(results) - maxed = softmax(squeezed) - result = np.argmax(maxed) - return result - - -class ONNXPredictor: - def __init__(self, onnx_client, config): - # onnx client - self.client = onnx_client - - # for image classifiers - classes = requests.get(config["image-classifier-classes"]).json() - self.image_classes = [classes[str(k)][1] for k in range(len(classes))] - self.resize_value = config["image-resize"] - - def predict(self, payload, query_params): - # get request params - model_name = query_params["model"] - model_version = query_params.get("version", "latest") - img_url = payload["url"] - - # process the input - img = get_url_image(img_url) - img = image_resize(img, height=self.resize_value) - img = preprocess(img) - - # predict - results = self.client.predict(img, model_name, model_version)[0] - - # interpret result - result = postprocess(results) - predicted_label = self.image_classes[result] - - return {"label": predicted_label, "model": {"name": model_name, "version": model_version}} diff --git a/examples/model-caching/onnx/multi-model-classifier/sample.json b/examples/model-caching/onnx/multi-model-classifier/sample.json deleted file mode 100644 index 4ee3aa45df..0000000000 --- a/examples/model-caching/onnx/multi-model-classifier/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/213xcvs.jpg" -} diff --git a/examples/model-caching/python/mpg-estimator/README.md b/examples/model-caching/python/mpg-estimator/README.md deleted file mode 100644 index e120ac8204..0000000000 --- a/examples/model-caching/python/mpg-estimator/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# MPG Estimator API - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example deploys an MPG estimator model of multiple versions in one API. Query parameters are used for selecting the model and the version. - -Since model caching is enabled, there can only be 2 models loaded into memory (counting the versioned models as well) - loading a 3rd one will lead to the removal of the least recently used one. To witness the adding/removal process of models, check the logs of the API by running `cortex logs mpg-estimator` once the API is up. - -The example can be run on both CPU and on GPU hardware. - -## Sample Prediction - -Deploy the model by running: - -```bash -cortex deploy -``` - -And wait for it to become live by tracking its status with `cortex get --watch`. - -Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get mpg-estimator`. - -```bash -export ENDPOINT=your-api-endpoint -``` - -### Version 1 - -Make a request version `1` of the `mpg-estimator` model: - -```bash -curl "${ENDPOINT}?model=resnet50&version=1" -X POST -H "Content-Type: application/json" -d @sample.json -``` - -The expected response is: - -```json -{"prediction": 26.929889872154185, "model": {"name": "mpg-estimator", "version": "1"}} -``` - -### Version 2 - -At this point, there is one model loaded into memory (as specified by `cache_size`). Loading another versioned model as well will lead to the removal of the least recently used model - in this case, it will be version 1 that will get evicted. Since the `disk_cache_size` is set to 2, no model will be removed from disk. - -Make a request version `2` of the `mpg-estimator` model: - -```bash -curl "${ENDPOINT}?model=mobilenet" -X POST -H "Content-Type: application/json" -d @sample.json -``` - -The expected response is: - -```json -{"prediction": 26.929889872154185, "model": {"name": "mpg-estimator", "version": "1"}} -``` - -### Version 3 - -With the following request, version 2 of the model will have to be evicted from the memory. Since `disk_cache_size` is set to 2, this time, version 1 of the model will get removed from the disk. - -Make a request version `3` of the `mpg-estimator` model: - -```bash -curl "${ENDPOINT}?model=shufflenet" -X POST -H "Content-Type: application/json" -d @sample.json -``` - -The expected response is: - -```json -{"prediction": 26.929889872154185, "model": {"name": "mpg-estimator", "version": "1"}} -``` - ---- - -Now, inspect `cortex get mpg-estimator` to see when and which models were removed in this process of making requests to different versions of the same model. The same algorithm is applied to different models as well, not just for the versions of a specific model. diff --git a/examples/model-caching/python/mpg-estimator/cortex.yaml b/examples/model-caching/python/mpg-estimator/cortex.yaml deleted file mode 100644 index 1d26879aaa..0000000000 --- a/examples/model-caching/python/mpg-estimator/cortex.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: mpg-estimator - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - models: - paths: - - name: mpg-estimator - model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/ - cache_size: 1 - disk_cache_size: 2 diff --git a/examples/model-caching/python/mpg-estimator/predictor.py b/examples/model-caching/python/mpg-estimator/predictor.py deleted file mode 100644 index 84aa206f41..0000000000 --- a/examples/model-caching/python/mpg-estimator/predictor.py +++ /dev/null @@ -1,28 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import mlflow.sklearn -import numpy as np - - -class PythonPredictor: - def __init__(self, config, python_client): - self.client = python_client - - def load_model(self, model_path): - return mlflow.sklearn.load_model(model_path) - - def predict(self, payload, query_params): - model_name = query_params["model"] - model_version = query_params.get("version", "latest") - - model = self.client.get_model(model_name, model_version) - model_input = [ - payload["cylinders"], - payload["displacement"], - payload["horsepower"], - payload["weight"], - payload["acceleration"], - ] - result = model.predict([model_input]).item() - - return {"prediction": result, "model": {"name": model_name, "version": model_version}} diff --git a/examples/model-caching/python/mpg-estimator/requirements.txt b/examples/model-caching/python/mpg-estimator/requirements.txt deleted file mode 100644 index cbcad6b321..0000000000 --- a/examples/model-caching/python/mpg-estimator/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -mlflow -pandas -numpy -scikit-learn==0.21.3 diff --git a/examples/model-caching/python/mpg-estimator/sample.json b/examples/model-caching/python/mpg-estimator/sample.json deleted file mode 100644 index 2dbbca46dd..0000000000 --- a/examples/model-caching/python/mpg-estimator/sample.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "cylinders": 4, - "displacement": 135, - "horsepower": 84, - "weight": 2490, - "acceleration": 15.7 -} diff --git a/examples/model-caching/tensorflow/multi-model-classifier/README.md b/examples/model-caching/tensorflow/multi-model-classifier/README.md deleted file mode 100644 index 9fd921884b..0000000000 --- a/examples/model-caching/tensorflow/multi-model-classifier/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# Multi-Model Classifier API - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example deploys Iris, ResNet50 and Inception models in one API. Query parameters are used for selecting the model. - -Since model caching is enabled, there can only be 2 models loaded into memory - loading a 3rd one will lead to the removal of the least recently used one. To witness the adding/removal process of models, check the logs of the API by running `cortex logs multi-model-classifier` once the API is up. - -The example can be run on both CPU and on GPU hardware. - -## Sample Prediction - -Deploy the model by running: - -```bash -cortex deploy -``` - -And wait for it to become live by tracking its status with `cortex get --watch`. - -Once the API has been successfully deployed, export the APIs endpoint. You can get the API's endpoint by running `cortex get multi-model-classifier`. - -```bash -export ENDPOINT=your-api-endpoint -``` - -When making a prediction with [sample-image.json](sample-image.json), the following image will be used: - -![sports car](https://i.imgur.com/zovGIKD.png) - -### ResNet50 Classifier - -Make a request to the ResNet50 model: - -```bash -curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample-image.json -``` - -The expected response is: - -```json -{"label": "sports_car"} -``` - -### Inception Classifier - -Make a request to the Inception model: - -```bash -curl "${ENDPOINT}?model=inception" -X POST -H "Content-Type: application/json" -d @sample-image.json -``` - -The expected response is: - -```json -{"label": "sports_car"} -``` - -### Iris Classifier - -At this point, there are 2 models loaded into memory (as specified by `cache_size`). Loading the `iris` classifier will lead to the removal of the least recently used model - in this case, it will be the ResNet50 model that will get evicted. Since the `disk_cache_size` is set to 3, no model will be removed from disk. - -Make a request to the Iris model: - -```bash -curl "${ENDPOINT}?model=iris" -X POST -H "Content-Type: application/json" -d @sample-iris.json -``` - -The expected response is: - -```json -{"label": "setosa"} -``` - ---- - -Now, inspect `cortex get multi-model-classifier` to see when and which models were removed in this process of making requests to different versions of the same model. diff --git a/examples/model-caching/tensorflow/multi-model-classifier/cortex.yaml b/examples/model-caching/tensorflow/multi-model-classifier/cortex.yaml deleted file mode 100644 index 4a165d177d..0000000000 --- a/examples/model-caching/tensorflow/multi-model-classifier/cortex.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: multi-model-classifier - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - models: - paths: - - name: inception - model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - - name: iris - model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ - - name: resnet50 - model_path: s3://cortex-examples/tensorflow/resnet50/ - cache_size: 2 - disk_cache_size: 3 - config: - models: - iris: - labels: ["setosa", "versicolor", "virginica"] - resnet50: - input_shape: [224, 224] - input_key: input - output_key: output - inception: - input_shape: [224, 224] - input_key: images - output_key: classes - image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - compute: - mem: 2G diff --git a/examples/model-caching/tensorflow/multi-model-classifier/predictor.py b/examples/model-caching/tensorflow/multi-model-classifier/predictor.py deleted file mode 100644 index d0914b8411..0000000000 --- a/examples/model-caching/tensorflow/multi-model-classifier/predictor.py +++ /dev/null @@ -1,63 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import requests -import numpy as np -import cv2 - - -def get_url_image(url_image): - """ - Get numpy image from URL image. - """ - resp = requests.get(url_image, stream=True).raw - image = np.asarray(bytearray(resp.read()), dtype="uint8") - image = cv2.imdecode(image, cv2.IMREAD_COLOR) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - return image - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - # for image classifiers - classes = requests.get(config["image-classifier-classes"]).json() - self.image_classes = [classes[str(k)][1] for k in range(len(classes))] - - # assign "models"' key value to self.config for ease of use - self.config = config["models"] - - # for iris classifier - self.iris_labels = self.config["iris"]["labels"] - - def predict(self, payload, query_params): - model_name = query_params["model"] - model_version = query_params.get("version", "latest") - predicted_label = None - - if model_name == "iris": - prediction = self.client.predict(payload["input"], model_name, model_version) - predicted_class_id = int(prediction["class_ids"][0]) - predicted_label = self.iris_labels[predicted_class_id] - - elif model_name in ["resnet50", "inception"]: - predicted_label = self.predict_image_classifier(model_name, payload["url"]) - - return {"label": predicted_label, "model": {"model": model_name, "version": model_version}} - - def predict_image_classifier(self, model, img_url): - img = get_url_image(img_url) - img = cv2.resize( - img, tuple(self.config[model]["input_shape"]), interpolation=cv2.INTER_NEAREST - ) - if model == "inception": - img = img.astype("float32") / 255 - img = {self.config[model]["input_key"]: img[np.newaxis, ...]} - - results = self.client.predict(img, model)[self.config[model]["output_key"]] - result = np.argmax(results) - if model == "inception": - result -= 1 - predicted_label = self.image_classes[result] - - return predicted_label diff --git a/examples/model-caching/tensorflow/multi-model-classifier/requirements.txt b/examples/model-caching/tensorflow/multi-model-classifier/requirements.txt deleted file mode 100644 index 7e2fba5e6c..0000000000 --- a/examples/model-caching/tensorflow/multi-model-classifier/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow diff --git a/examples/model-caching/tensorflow/multi-model-classifier/sample-image.json b/examples/model-caching/tensorflow/multi-model-classifier/sample-image.json deleted file mode 100644 index 95200916c7..0000000000 --- a/examples/model-caching/tensorflow/multi-model-classifier/sample-image.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/zovGIKD.png" -} diff --git a/examples/model-caching/tensorflow/multi-model-classifier/sample-iris.json b/examples/model-caching/tensorflow/multi-model-classifier/sample-iris.json deleted file mode 100644 index 67c03827f2..0000000000 --- a/examples/model-caching/tensorflow/multi-model-classifier/sample-iris.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "input": { - "sepal_length": 5.2, - "sepal_width": 3.6, - "petal_length": 1.4, - "petal_width": 0.3 - } -} diff --git a/examples/multi-model/onnx/requirements.txt b/examples/multi-model/onnx/requirements.txt deleted file mode 100644 index 212d089934..0000000000 --- a/examples/multi-model/onnx/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -opencv-python==4.2.0.34 -scipy==1.4.1 diff --git a/examples/multi-model/onnx/sample.json b/examples/multi-model/onnx/sample.json deleted file mode 100644 index 4ee3aa45df..0000000000 --- a/examples/multi-model/onnx/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/213xcvs.jpg" -} diff --git a/examples/multi-model/python/README.md b/examples/multi-model/python/README.md deleted file mode 100644 index 0fbca390cd..0000000000 --- a/examples/multi-model/python/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Multi-Model Analyzer API - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example deploys a sentiment analyzer and a text summarizer in one API. Query parameters are used for selecting the model. - -The example can be run on both CPU and on GPU hardware. - -## Sample Prediction - -Deploy the model by running: - -```bash -cortex deploy -``` - -And wait for it to become live by tracking its status with `cortex get --watch`. - -Once the API has been successfully deployed, export the APIs endpoint. You can get the API's endpoint by running `cortex get text-analyzer`. - -```bash -export ENDPOINT=your-api-endpoint -``` - -### Sentiment Analyzer Classifier - -Make a request to the sentiment analyzer model: - -```bash -curl "${ENDPOINT}?model=sentiment" -X POST -H "Content-Type: application/json" -d @sample-sentiment.json -``` - -The expected response is: - -```json -{"label": "POSITIVE", "score": 0.9998506903648376} -``` - -### Text Summarizer - -Make a request to the text summarizer model: - -```bash -curl "${ENDPOINT}?model=summarizer" -X POST -H "Content-Type: application/json" -d @sample-summarizer.json -``` - -The expected response is: - -```text -Machine learning is the study of algorithms and statistical models that computer systems use to perform a specific task. It is seen as a subset of artificial intelligence. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision. In its application across business problems, machine learning is also referred to as predictive analytics. -``` diff --git a/examples/multi-model/python/cortex.yaml b/examples/multi-model/python/cortex.yaml deleted file mode 100644 index b2ece6bab9..0000000000 --- a/examples/multi-model/python/cortex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: multi-model-text-analyzer - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - gpu: 1 - mem: 6G diff --git a/examples/multi-model/python/predictor.py b/examples/multi-model/python/predictor.py deleted file mode 100644 index 03a8b03fbb..0000000000 --- a/examples/multi-model/python/predictor.py +++ /dev/null @@ -1,25 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -from transformers import pipeline -from starlette.responses import JSONResponse - - -class PythonPredictor: - def __init__(self, config): - device = 0 if torch.cuda.is_available() else -1 - print(f"using device: {'cuda' if device == 0 else 'cpu'}") - - self.analyzer = pipeline(task="sentiment-analysis", device=device) - self.summarizer = pipeline(task="summarization", device=device) - - def predict(self, query_params, payload): - model_name = query_params.get("model") - - if model_name == "sentiment": - return self.analyzer(payload["text"])[0] - elif model_name == "summarizer": - summary = self.summarizer(payload["text"]) - return summary[0]["summary_text"] - else: - return JSONResponse({"error": f"unknown model: {model_name}"}, status_code=400) diff --git a/examples/multi-model/python/requirements.txt b/examples/multi-model/python/requirements.txt deleted file mode 100644 index 3f565d80e4..0000000000 --- a/examples/multi-model/python/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -transformers==2.9.* diff --git a/examples/multi-model/python/sample-sentiment.json b/examples/multi-model/python/sample-sentiment.json deleted file mode 100644 index de3a18a92a..0000000000 --- a/examples/multi-model/python/sample-sentiment.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "best day ever" -} diff --git a/examples/multi-model/python/sample-summarizer.json b/examples/multi-model/python/sample-summarizer.json deleted file mode 100644 index b19a1406d4..0000000000 --- a/examples/multi-model/python/sample-summarizer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems use to perform a specific task without using explicit instructions, relying on patterns and inference instead. It is seen as a subset of artificial intelligence. Machine learning algorithms build a mathematical model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to perform the task. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop a conventional algorithm for effectively performing the task. Machine learning is closely related to computational statistics, which focuses on making predictions using computers. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a field of study within machine learning, and focuses on exploratory data analysis through unsupervised learning. In its application across business problems, machine learning is also referred to as predictive analytics." -} diff --git a/examples/multi-model/tensorflow/README.md b/examples/multi-model/tensorflow/README.md deleted file mode 100644 index 631f800179..0000000000 --- a/examples/multi-model/tensorflow/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Multi-Model Classifier API - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example deploys Iris, ResNet50 and Inception models in one API. Query parameters are used for selecting the model. - -The example can be run on both CPU and on GPU hardware. - -## Sample Prediction - -Deploy the model by running: - -```bash -cortex deploy -``` - -And wait for it to become live by tracking its status with `cortex get --watch`. - -Once the API has been successfully deployed, export the APIs endpoint. You can get the API's endpoint by running `cortex get multi-model-classifier`. - -```bash -export ENDPOINT=your-api-endpoint -``` - -When making a prediction with [sample-image.json](sample-image.json), the following image will be used: - -![sports car](https://i.imgur.com/zovGIKD.png) - -### ResNet50 Classifier - -Make a request to the ResNet50 model: - -```bash -curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample-image.json -``` - -The expected response is: - -```json -{"label": "sports_car"} -``` - -### Inception Classifier - -Make a request to the Inception model: - -```bash -curl "${ENDPOINT}?model=inception" -X POST -H "Content-Type: application/json" -d @sample-image.json -``` - -The expected response is: - -```json -{"label": "sports_car"} -``` - -### Iris Classifier - -Make a request to the Iris model: - -```bash -curl "${ENDPOINT}?model=iris" -X POST -H "Content-Type: application/json" -d @sample-iris.json -``` - -The expected response is: - -```json -{"label": "setosa"} -``` diff --git a/examples/multi-model/tensorflow/cortex.yaml b/examples/multi-model/tensorflow/cortex.yaml deleted file mode 100644 index 2c0e39bea8..0000000000 --- a/examples/multi-model/tensorflow/cortex.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: multi-model-classifier - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - models: - paths: - - name: inception - model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ - - name: resnet50 - model_path: s3://cortex-examples/tensorflow/resnet50/ - config: - models: - resnet50: - input_shape: [224, 224] - input_key: input - output_key: output - inception: - input_shape: [224, 224] - input_key: images - output_key: classes - image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - compute: - mem: 2G diff --git a/examples/multi-model/tensorflow/predictor.py b/examples/multi-model/tensorflow/predictor.py deleted file mode 100644 index 6577777037..0000000000 --- a/examples/multi-model/tensorflow/predictor.py +++ /dev/null @@ -1,62 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import requests -import numpy as np -import cv2 - - -def get_url_image(url_image): - """ - Get numpy image from URL image. - """ - resp = requests.get(url_image, stream=True).raw - image = np.asarray(bytearray(resp.read()), dtype="uint8") - image = cv2.imdecode(image, cv2.IMREAD_COLOR) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - return image - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - # for image classifiers - classes = requests.get(config["image-classifier-classes"]).json() - self.image_classes = [classes[str(k)][1] for k in range(len(classes))] - - # assign "models"' key value to self.config for ease of use - self.config = config["models"] - - # for iris classifier - self.iris_labels = self.config["iris"]["labels"] - - def predict(self, payload, query_params): - model_name = query_params["model"] - predicted_label = None - - if model_name == "iris": - prediction = self.client.predict(payload["input"], model_name) - predicted_class_id = int(prediction["class_ids"][0]) - predicted_label = self.iris_labels[predicted_class_id] - - elif model_name in ["resnet50", "inception"]: - predicted_label = self.predict_image_classifier(model_name, payload["url"]) - - return {"label": predicted_label} - - def predict_image_classifier(self, model, img_url): - img = get_url_image(img_url) - img = cv2.resize( - img, tuple(self.config[model]["input_shape"]), interpolation=cv2.INTER_NEAREST - ) - if model == "inception": - img = img.astype("float32") / 255 - img = {self.config[model]["input_key"]: img[np.newaxis, ...]} - - results = self.client.predict(img, model)[self.config[model]["output_key"]] - result = np.argmax(results) - if model == "inception": - result -= 1 - predicted_label = self.image_classes[result] - - return predicted_label diff --git a/examples/multi-model/tensorflow/requirements.txt b/examples/multi-model/tensorflow/requirements.txt deleted file mode 100644 index 7e2fba5e6c..0000000000 --- a/examples/multi-model/tensorflow/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -Pillow diff --git a/examples/multi-model/tensorflow/sample-image.json b/examples/multi-model/tensorflow/sample-image.json deleted file mode 100644 index 95200916c7..0000000000 --- a/examples/multi-model/tensorflow/sample-image.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "url": "https://i.imgur.com/zovGIKD.png" -} From 82a8272365c3ffc85e1dc5c4cc8cb1d4a32382f8 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Fri, 4 Dec 2020 16:17:49 -0800 Subject: [PATCH 04/36] Update tutorials --- README.md | 7 +- docs/summary.md | 6 +- docs/tutorials/batch.md | 0 docs/tutorials/compute/README.md | 90 -------- docs/tutorials/compute/cortex.yaml | 18 -- docs/tutorials/compute/cortex_gpu.yaml | 19 -- .../cortex_gpu_server_side_batching.yaml | 22 -- docs/tutorials/compute/cortex_inf.yaml | 21 -- .../cortex_inf_server_side_batching.yaml | 24 --- .../compute/generate_gpu_resnet50_model.ipynb | 131 ------------ .../compute/generate_resnet50_models.ipynb | 178 ---------------- docs/tutorials/compute/predictor.py | 63 ------ docs/tutorials/compute/requirements.txt | 1 - docs/tutorials/compute/sample.bin | Bin 8680 -> 0 bytes docs/tutorials/compute/sample.json | 3 - docs/tutorials/multi-model.md | 77 +++++++ docs/tutorials/multi-model/README.md | 69 ------- docs/tutorials/multi-model/cortex.yaml | 20 -- docs/tutorials/multi-model/predictor.py | 98 --------- docs/tutorials/multi-model/requirements.txt | 2 - docs/tutorials/multi-model/sample.json | 3 - docs/tutorials/realtime.md | 100 +++++++++ docs/tutorials/realtime/README.md | 192 ------------------ docs/tutorials/realtime/deploy.ipynb | 80 -------- docs/tutorials/realtime/predictor.py | 17 -- docs/tutorials/realtime/requirements.txt | 2 - docs/tutorials/traffic-splitter.md | 96 +++++++++ docs/tutorials/traffic-splitting/README.md | 111 ---------- docs/tutorials/traffic-splitting/cortex.yaml | 28 --- docs/tutorials/traffic-splitting/model.py | 59 ------ .../traffic-splitting/onnx_predictor.py | 20 -- .../traffic-splitting/pytorch_predictor.py | 50 ----- docs/tutorials/traffic-splitting/sample.json | 6 - docs/tutorials/utils/README.md | 36 ---- docs/tutorials/utils/throughput_test.py | 179 ---------------- 35 files changed, 276 insertions(+), 1552 deletions(-) create mode 100644 docs/tutorials/batch.md delete mode 100644 docs/tutorials/compute/README.md delete mode 100644 docs/tutorials/compute/cortex.yaml delete mode 100644 docs/tutorials/compute/cortex_gpu.yaml delete mode 100644 docs/tutorials/compute/cortex_gpu_server_side_batching.yaml delete mode 100644 docs/tutorials/compute/cortex_inf.yaml delete mode 100644 docs/tutorials/compute/cortex_inf_server_side_batching.yaml delete mode 100644 docs/tutorials/compute/generate_gpu_resnet50_model.ipynb delete mode 100644 docs/tutorials/compute/generate_resnet50_models.ipynb delete mode 100644 docs/tutorials/compute/predictor.py delete mode 100644 docs/tutorials/compute/requirements.txt delete mode 100644 docs/tutorials/compute/sample.bin delete mode 100644 docs/tutorials/compute/sample.json create mode 100644 docs/tutorials/multi-model.md delete mode 100644 docs/tutorials/multi-model/README.md delete mode 100644 docs/tutorials/multi-model/cortex.yaml delete mode 100644 docs/tutorials/multi-model/predictor.py delete mode 100644 docs/tutorials/multi-model/requirements.txt delete mode 100644 docs/tutorials/multi-model/sample.json create mode 100644 docs/tutorials/realtime.md delete mode 100644 docs/tutorials/realtime/README.md delete mode 100644 docs/tutorials/realtime/deploy.ipynb delete mode 100644 docs/tutorials/realtime/predictor.py delete mode 100644 docs/tutorials/realtime/requirements.txt create mode 100644 docs/tutorials/traffic-splitter.md delete mode 100644 docs/tutorials/traffic-splitting/README.md delete mode 100644 docs/tutorials/traffic-splitting/cortex.yaml delete mode 100644 docs/tutorials/traffic-splitting/model.py delete mode 100644 docs/tutorials/traffic-splitting/onnx_predictor.py delete mode 100644 docs/tutorials/traffic-splitting/pytorch_predictor.py delete mode 100644 docs/tutorials/traffic-splitting/sample.json delete mode 100644 docs/tutorials/utils/README.md delete mode 100644 docs/tutorials/utils/throughput_test.py diff --git a/README.md b/README.md index 53657f23f2..bbbde36959 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@
- -[install](https://docs.cortex.dev/install) • [documentation](https://docs.cortex.dev) • [examples](https://github.com/cortexlabs/cortex/tree/0.23/examples) • [community](https://gitter.im/cortexlabs/cortex) +[install](https://docs.cortex.dev/install) • [documentation](https://docs.cortex.dev) • [community](https://gitter.im/cortexlabs/cortex) # Deploy machine learning models to production @@ -74,10 +73,6 @@ class PythonPredictor: api_spec = { "name": "text-generator", "kind": "RealtimeAPI", - "predictor": { - "type": "python", - "path": "predictor.py" - }, "compute": { "gpu": 1, "mem": "8Gi", diff --git a/docs/summary.md b/docs/summary.md index b21c1caed4..04f21db7a3 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,14 +1,12 @@ # Table of contents * [Deploy machine learning models to production](../README.md) -* [Install](aws/install.md) -* [Tutorial](https://docs.cortex.dev/v/master/deployments/realtime-api/text-generator) -* [GitHub](https://github.com/cortexlabs/cortex) -* [Examples](https://github.com/cortexlabs/cortex/tree/master/examples) +* [Get started](tutorials/realtime.md) * [Contact us](contact.md) ## Running Cortex on AWS +* [Install](aws/install.md) * [Credentials](aws/credentials.md) * [Security](aws/security.md) * [Spot instances](aws/spot.md) diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/tutorials/compute/README.md b/docs/tutorials/compute/README.md deleted file mode 100644 index 7a52dadbb8..0000000000 --- a/docs/tutorials/compute/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Image Classifier with ResNet50 - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example implements an image recognition system using ResNet50, which allows for the recognition of up to 1000 classes. - -## Deploying - -There are 4 Cortex APIs available in this example: - -1. [cortex.yaml](cortex.yaml) - can be used with any instances. -1. [cortex_inf.yaml](cortex_inf.yaml) - to be used with `inf1` instances. -1. [cortex_gpu.yaml](cortex_gpu.yaml) - to be used with GPU instances. -1. [cortex_gpu_server_side_batching.yaml](cortex_gpu_server_side_batching.yaml) - to be used with GPU instances. Deployed with `max_batch_size` > 1. The exported model and the TensorFlow Predictor do not need to be modified to support server-side batching. - -To deploy an API, run: - -```bash -cortex deploy -``` - -E.g. - -```bash -cortex deploy cortex_inf.yaml -``` - -## Verifying your API - -Check that your API is live by running `cortex get image-classifier-resnet50`, and copy the example `curl` command that's shown. After the API is live, run the `curl` command, e.g. - -```bash -$ curl -X POST -H "Content-Type: application/json" -d @sample.json - -["tabby", "Egyptian_cat", "tiger_cat", "tiger", "plastic_bag"] -``` - -The following image is embedded in [sample.json](sample.json): - -![image](https://i.imgur.com/213xcvs.jpg) - -## Throughput test - -Before [throughput_test.py](../../utils/throughput_test.py) is run, 2 environment variables have to be exported: - -```bash -export ENDPOINT= # you can find this with `cortex get image-classifier-resnet50` -export PAYLOAD=https://i.imgur.com/213xcvs.jpg # this is the cat image shown in the previous step -``` - -Then, deploy each API one at a time and check the results: - -1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 2` with the [cortex.yaml](cortex.yaml) API running on an `c5.xlarge` instance will get **~16.2 inferences/sec** with an average latency of **200 ms**. -1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 48` with the [cortex_inf.yaml](cortex_inf.yaml) API running on an `inf1.2xlarge` instance will get **~510 inferences/sec** with an average latency of **80 ms**. -1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 24` with the [cortex_gpu.yaml](cortex_gpu.yaml) API running on an `g4dn.xlarge` instance will get **~125 inferences/sec** with an average latency of **85 ms**. Optimizing the model with TensorRT to use FP16 on TF-serving only seems to achieve a 10% performance improvement - one thing to consider is that the TensorRT engines hadn't been built beforehand, so this might have affected the results negatively. -1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 60` with the [cortex_gpu_server_side_batching.yaml](cortex_gpu_batch_sized.yaml) API running on an `g4dn.xlarge` instance will get **~186 inferences/sec** with an average latency of **500 ms**. This achieves a 49% higher throughput than the [cortex_gpu.yaml](cortex_gpu.yaml) API, at the expense of increased latency. - -Alternatively to [throughput_test.py](../../utils/throughput_test.py), the `ab` GNU utility can also be used to benchmark the API. This has the advantage that it's not as taxing on your local machine, but the disadvantage that it doesn't implement a cooldown period. You can run `ab` like this: - -```bash -# for making octet-stream requests, which is the default for throughput_test script -ab -n -c -p sample.bin -T 'application/octet-stream' -rks 120 $ENDPOINT - -# for making json requests, will will have lower performance because the API has to download the image every time -ab -n -c -p sample.json -T 'application/json' -rks 120 $ENDPOINT -``` - -*Note: `inf1.xlarge` isn't used because the major bottleneck with `inf` instances for this example is with the CPU, and `inf1.2xlarge` has twice the amount of CPU cores for same number of Inferentia ASICs (which is 1), which translates to almost double the throughput.* - -## Exporting SavedModels - -This example deploys models that we have built and uploaded to a public S3 bucket. If you want to build the models yourself, follow these instructions. - -Run the following command to install the dependencies required for the [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook: - -```bash -pip install --extra-index-url=https://pip.repos.neuron.amazonaws.com \ - neuron-cc==1.0.9410.0+6008239556 \ - tensorflow-neuron==1.15.0.1.0.1333.0 -``` - -The [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook will generate 2 SavedModels. One will be saved in the `resnet50` directory which can be run on GPU or on CPU and another in the `resnet50_neuron` directory which can only be run on `inf1` instances. For server-side batching on `inf1` instances, a different compilation of the model is required. To compile ResNet50 model for a batch size of 5, run `run_all` from [this directory](https://github.com/aws/aws-neuron-sdk/tree/master/src/examples/tensorflow/keras_resnet50). - -If you'd also like to build the TensorRT version of the GPU model, run the following command in a new Python environment to install the pip dependencies required for the [generate_gpu_resnet50_model.ipynb](generate_gpu_resnet50_model.ipynb) notebook: - -```bash -pip install tensorflow==2.0.0 -``` - -TensorRT also has to be installed to export the SavedModel. Follow the instructions on [Nvidia TensorRT Documentation](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-debian) to download and install TensorRT on your local machine (this will require ~5GB of space, and you will have to create an Nvidia account). This notebook also requires that the SavedModel generated with the [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook exists in the `resnet50` directory. The TensorRT SavedModel will be exported to the `resnet50_gpu` directory. You can then replace the existing SavedModel with the TensorRT-optimized version in [cortex_gpu.yaml](cortex_gpu.yaml) - it's a drop-in replacement that doesn't require any other dependencies on the Cortex side. By default, the API config in [cortex_gpu.yaml](cortex_gpu.yaml) uses the non-TensorRT-optimized version due to simplicity. diff --git a/docs/tutorials/compute/cortex.yaml b/docs/tutorials/compute/cortex.yaml deleted file mode 100644 index afbe5a8394..0000000000 --- a/docs/tutorials/compute/cortex.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/resnet50/ - processes_per_replica: 4 - threads_per_process: 16 - config: - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - input_key: input - output_key: output - compute: - cpu: 3 - mem: 4G diff --git a/docs/tutorials/compute/cortex_gpu.yaml b/docs/tutorials/compute/cortex_gpu.yaml deleted file mode 100644 index f86b85e414..0000000000 --- a/docs/tutorials/compute/cortex_gpu.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/resnet50/ - processes_per_replica: 4 - threads_per_process: 24 - config: - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - input_key: input - output_key: output - compute: - gpu: 1 - cpu: 3 - mem: 4G diff --git a/docs/tutorials/compute/cortex_gpu_server_side_batching.yaml b/docs/tutorials/compute/cortex_gpu_server_side_batching.yaml deleted file mode 100644 index 61604346d0..0000000000 --- a/docs/tutorials/compute/cortex_gpu_server_side_batching.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/resnet50/ - server_side_batching: - max_batch_size: 32 - batch_interval: 0.1s - processes_per_replica: 4 - threads_per_process: 192 - config: - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - input_key: input - output_key: output - compute: - gpu: 1 - cpu: 3 - mem: 4G diff --git a/docs/tutorials/compute/cortex_inf.yaml b/docs/tutorials/compute/cortex_inf.yaml deleted file mode 100644 index 13f999e1b5..0000000000 --- a/docs/tutorials/compute/cortex_inf.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/resnet50_neuron/ - processes_per_replica: 4 - threads_per_process: 256 - config: - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - input_key: input - output_key: output - compute: - inf: 1 - cpu: 3 - mem: 4G - autoscaling: - max_replica_concurrency: 16384 diff --git a/docs/tutorials/compute/cortex_inf_server_side_batching.yaml b/docs/tutorials/compute/cortex_inf_server_side_batching.yaml deleted file mode 100644 index 2b33961e95..0000000000 --- a/docs/tutorials/compute/cortex_inf_server_side_batching.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier-resnet50 - kind: RealtimeAPI - predictor: - type: tensorflow - path: predictor.py - model_path: s3://cortex-examples/tensorflow/resnet50_neuron_batch_size_5/ - server_side_batching: - max_batch_size: 5 - batch_interval: 0.1s - processes_per_replica: 4 - threads_per_process: 260 - config: - classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json - input_shape: [224, 224] - input_key: input_1:0 - output_key: probs/Softmax:0 - compute: - inf: 1 - cpu: 3 - mem: 4G - autoscaling: - max_replica_concurrency: 16384 diff --git a/docs/tutorials/compute/generate_gpu_resnet50_model.ipynb b/docs/tutorials/compute/generate_gpu_resnet50_model.ipynb deleted file mode 100644 index ca78235b4d..0000000000 --- a/docs/tutorials/compute/generate_gpu_resnet50_model.ipynb +++ /dev/null @@ -1,131 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Generate GPU Resnet50 Model\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from tensorflow.python.compiler.tensorrt import trt_convert as trt" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "input_model_dir = \"resnet50\"\n", - "output_model_dir = \"resnet50_gpu\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS\n", - "conversion_params = conversion_params._replace(\n", - " max_workspace_size_bytes=(1<<30))\n", - "conversion_params = conversion_params._replace(precision_mode=\"FP16\")\n", - "conversion_params = conversion_params._replace(\n", - " maximum_cached_engines=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Linked TensorRT version: (0, 0, 0)\n", - "INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)\n", - "INFO:tensorflow:Running against TensorRT version 0.0.0\n" - ] - } - ], - "source": [ - "converter = trt.TrtGraphConverterV2(\n", - " input_saved_model_dir=input_model_dir,\n", - " conversion_params=conversion_params)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From /home/robert/.miniconda3/envs/py36-tf/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1781: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "If using Keras pass *_constraint arguments to layers.\n", - "WARNING:tensorflow:Issue encountered when serializing variables.\n", - "Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore.\n", - "to_proto not supported in EAGER mode.\n", - "WARNING:tensorflow:Issue encountered when serializing trainable_variables.\n", - "Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore.\n", - "to_proto not supported in EAGER mode.\n" - ] - } - ], - "source": [ - "converter.convert()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: resnet50_gpu/assets\n" - ] - } - ], - "source": [ - "converter.save(output_model_dir)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/tutorials/compute/generate_resnet50_models.ipynb b/docs/tutorials/compute/generate_resnet50_models.ipynb deleted file mode 100644 index 11eaf5a316..0000000000 --- a/docs/tutorials/compute/generate_resnet50_models.ipynb +++ /dev/null @@ -1,178 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Generate Resnet50 Models\n", - "\n", - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import time\n", - "import shutil\n", - "import tensorflow as tf\n", - "import tensorflow.neuron as tfn\n", - "import tensorflow.compat.v1.keras as keras\n", - "from tensorflow.keras.applications.resnet50 import ResNet50" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Prepare export directories for compile/non-compiled versions of the model." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "model_dir = \"resnet50\"\n", - "compiled_model_dir = model_dir + \"_neuron\"\n", - "shutil.rmtree(model_dir, ignore_errors=True)\n", - "shutil.rmtree(compiled_model_dir, ignore_errors=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Instantiate a Keras ResNet50 model." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From /home/robert/.miniconda3/envs/py36-neuron/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "If using Keras pass *_constraint arguments to layers.\n" - ] - } - ], - "source": [ - "keras.backend.set_learning_phase(0)\n", - "keras.backend.set_image_data_format('channels_last')\n", - "model = ResNet50(weights='imagenet')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Export the model as SavedModel." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From :5: simple_save (from tensorflow.python.saved_model.simple_save) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save.\n", - "WARNING:tensorflow:From /home/robert/.miniconda3/envs/py36-neuron/lib/python3.6/site-packages/tensorflow_core/python/saved_model/signature_def_utils_impl.py:201: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.\n", - "INFO:tensorflow:Assets added to graph.\n", - "INFO:tensorflow:No assets to write.\n", - "INFO:tensorflow:SavedModel written to: resnet50/saved_model.pb\n" - ] - } - ], - "source": [ - "tf.saved_model.simple_save(\n", - " session = keras.backend.get_session(),\n", - " export_dir = model_dir,\n", - " inputs = {'input': model.inputs[0]},\n", - " outputs = {'output': model.outputs[0]})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And then compile it for Inferentia to be used on only one Neuron core. `--static-weights` option is used to cache all weights onto the neuron core's memory." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Restoring parameters from resnet50/variables/variables\n", - "INFO:tensorflow:Froze 320 variables.\n", - "INFO:tensorflow:Converted 320 variables to const ops.\n", - "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc\n", - "INFO:tensorflow:Number of operations in TensorFlow session: 4638\n", - "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n", - "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n", - "INFO:tensorflow:No assets to save.\n", - "INFO:tensorflow:No assets to write.\n", - "INFO:tensorflow:SavedModel written to: resnet50_neuron/saved_model.pb\n", - "INFO:tensorflow:Successfully converted resnet50 to resnet50_neuron\n" - ] - }, - { - "data": { - "text/plain": [ - "{'OnNeuronRatio': 0.9964028776978417}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compiler_args = ['--static-weights', '--num-neuroncores', '1']\n", - "batch_size = 1\n", - "tfn.saved_model.compile(model_dir, compiled_model_dir, batch_size)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/tutorials/compute/predictor.py b/docs/tutorials/compute/predictor.py deleted file mode 100644 index 98828723cc..0000000000 --- a/docs/tutorials/compute/predictor.py +++ /dev/null @@ -1,63 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import os -import cv2 -import numpy as np -import requests -import imageio -import json -import base64 - - -def read_image(payload): - """ - Read JPG image from {"url": "https://..."} or from a bytes object. - """ - if isinstance(payload, bytes): - jpg_as_np = np.frombuffer(payload, dtype=np.uint8) - img = cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR) - elif isinstance(payload, dict) and "url" in payload.keys(): - img = imageio.imread(payload["url"]) - else: - return None - return img - - -def prepare_image(image, input_shape, input_key): - """ - Prepares an image for the TFS client. - """ - img = cv2.resize(image, input_shape, interpolation=cv2.INTER_NEAREST) - img = {input_key: img[np.newaxis, ...]} - return img - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - # load classes - classes = requests.get(config["classes"]).json() - self.idx2label = [classes[str(k)][1] for k in range(len(classes))] - - self.input_shape = tuple(config["input_shape"]) - self.input_key = str(config["input_key"]) - self.output_key = str(config["output_key"]) - - def predict(self, payload): - # preprocess image - img = read_image(payload) - if img is None: - return None - img = prepare_image(img, self.input_shape, self.input_key) - - # predict - results = self.client.predict(img)[self.output_key] - results = np.argsort(results) - - # Lookup and print the top 5 labels - top5_idx = results[-5:] - top5_labels = [self.idx2label[idx] for idx in top5_idx] - top5_labels = top5_labels[::-1] - - return top5_labels diff --git a/docs/tutorials/compute/requirements.txt b/docs/tutorials/compute/requirements.txt deleted file mode 100644 index 66340adf33..0000000000 --- a/docs/tutorials/compute/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -imageio==2.9.* diff --git a/docs/tutorials/compute/sample.bin b/docs/tutorials/compute/sample.bin deleted file mode 100644 index 921abf24a5c99cd3c1d1cd12d00f134a16391a77..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8680 zcmbVxYg`le*6v`cQB&uZa{9BbO)$v8W{2LJ+NxL z*0Y{xjkDd^g}nahij^x67Z(I^fq#heF%p4{b73CL^El@58jE=z|JrM;*CvdgFyYk~ zd(y-S>`Cki6DCfcILVcHz)w>qyG~*L!@SAgFC90Y#TxI*p1^)}$^Yodc?)r$h;$)c zESEQsaqccGcNga!1cP&qe|0=K?B5TUaje(IPhi6_Cc_04ufy?KEI9OdI5AwE0e?qc zb00r#Zs76>9xF zgAYTNghqTExgzS5m7lIz8~YV+-TDpjUw^Z8Tf+7oDSJ}C-z!WLWgo~nDE{$K?!S)w zeDv7yUrrRBDf#v6xzh6&F8)@2t>Sv+4aLp6`a2Dcs=L28J^bU();4YXqYj}*2L)_9WE}rJ!#(JFU_7?cDK0Bm>;Bj)53)IDzg8-!1n*IBKz;a{%>3z$P|_f z6p!VO;K*>(f-^M{>YxQ@auMcB9_hI|J5TOecjRR24cSbeirq2PN9JhTfQ@s=;N6k? z%)Ihq(O27!#ozlmCURxZ%!Q|>uO8o28aW+_iR5dB!;|=0{OWKha?j4I!FG&XzLKJ6 z?+-&&+(k~L?E9Nnb{itKVQ2|yxb@75#A~lOeiq^#;}w+^sxT@h0f4}L% zQ~c9YUH!S&79eMG7r5Lcp5QOF?^@)1C-T##UwepQFNI%DVHg22>(oDG<5QSgI}!e>I?31Wj}l;uP;4Wm3+hZDIwg(pq_Oyfi9^btyxD(AwoX84PVzeMp1KyAiUqT;k4a<~$m~v+?aFRDYVT*HD-f@yNZR$2$6M(G&4uueKmUL_St2 z&8w4g3pt7|C*qU9C%zT#r57&j`Py+d^{uM3c*Ape}$xV@=HZ zVcs)wN6!DLp+7el@PDDT3Gx4^yyrg+G49hRW}J!+R`-W#12)F7A)DQbJWaB6c4DDc z<(s5GSLQf~U&qU+NGI}!Npwzpx{?x4bnH+Ua6R4=+gJ2)LNr68t-dL|O)2zz(JcH_ zrqXdnEjBJ0IJAMIkS)>%0{YUm*}Wn!?PABlrMxS&W<$54_^?$z!*SY)=%cy_GR_;z z!>{}6QzF`LoqQ{&I3bhmxQOd-V1r)Jgx_DKLx?*PLqS=b#QtRu{#f~2TfRkk%ZNQ< z&yt6bmmCU1J{qOUdl%D*Os_Q;=-korut6CmNKvT?*O;<8d9iqCnP z=lgux9qMrMXjeu`gmy)E;&6m+W|kAtvB}t96Ea&Zva_;;!tPxEm&BAl!hBLVJL*-r z10`-eX`y>oP{PGMynEQX=`oRrr!;c9OWcK)NcOC_+|o$)W<}(-mSx^Qf90F7I{El} zRoc_ET7*&2L?zzimZ+9hX=Nl1oRz~fN37HOw>6D%c7~0X+ww@MBgY^iV>(%?zv8-X2!n%qk=TeuTj=^2T=E9AfCU9fr@m9vI%a=BGX9@bte z#V;f`Mv5YEzAc*Oi&mMHG0eRH%=zu4Z;urntCAG(%#Lj2%yWCeQ7hy(g)| zUw=$4Pkj2`obB_T`_(M(c~pMxkFf3bB|VuR(tl~F&H4apeqzVVR3;R2l+Bn`EpU_x z{K<%Qnp)4E&_;2kJ)T||K$a$Ib`o1nNkB8N?SZsTB5Zgb{wyH(q&zU(C%w6pPd|Z4 zpie8-woA)8Z5)Mti4)n}LkzLy2QodWU0Ph9Y?kXwse}H#l6I8O5TN5;MbycRsX)1c z_cCDqtvNj|cfsj#8El{K>Aof|;-A>4CmYPNZ)+(ayf_M9jHRsqCRkO7l;o zM8nhIX4!GAwS@M3I7U~L+UmnPt!bQ@U8m5yZND{^LGe0svp;K5?+?Pgh&%e4@;e(a zY7Per?X?J`c*R$hPGoYDfc`6qJmN(DY=8-5Pm!{vgy!0iykjvW>T%&q2{ifVcyEAy zVGSVWaUn)+P>yD6W6hVk2)H7LaJ4cV*$53UhkP9{bct<7YMm2#q`bo??lmlIMj9I$Ut1ZWsS;vhQd&&gW=F8bscpg zzciMu%M5{+P|24Q;^}`_d%QVEW$C*>cbMhtG{cMI=GvE}dysD$zC(Fh?%N8l7ugU0 z*;ijLx)(JC5HSdycX$ICt~ZesBFGM zH$^c3B)N+)sae_*;`S12rw>v0Cta^i2xqwKKZ z*O{^A&YEyJ84-rluh+kbqUPQlsWiqtw&6)_$NNQoBT>)S54DB|^xw}jtI5d6or5K` z`}ZT%rN5NAS%T%Gtr!`peLPs|xQML|^Pt_g6NO{(;u3(Q!+HOYoH^Qiq{O9@V*r=NL z+HZ6R{+kC?u?HCcPdYZbnQJYbYb3T~OER_0FiF5H^FVodQ&pr=M${$C`$9PigPV1% z#PV<2@1~lH4#yjn;do-eBLVFP9dJ^}QGU~eKU~gD_W%_tMjPtn*nJtg_#Fz z;p=Y+<1R65QD-)DB@Qve%kg`udlf?kWnF|%y1zwMUQD+XEdu6iB=n~&o*53a#E#w? z^P+*G59}S_-6za3(LM4k~X$Aw0kFQCfhD;!6yYBDyx zFWZjUx72<+g5JD_GvaHb)co(BxP!GTEvkku&|3z4kX;htD8YL|{3F!dv-Fde@W|DW zt?|N4u_gNY4ktndxb*sKg2IzF-iVG~WsNdTdNHOggp}XrIFVDUwP7p!w-etScAVb# zu3U0tub}NFHkGt|;Lr(X^-5=Z-I${l=;Zm5Yty2QajUi;dC1KU(7!Y6h^2q$9F&9+ zd)jD4SGfuLz?GXlC_nLRTb1M7J<*JRg-?y6?_q!8Bdu#`6*qgRG>N;vU|vxDsM*1JX>N}L;HEF9Fr@Cq^20)m-|T(fNH4h=a^&o zbomJl=&?E!8toE@$CXZ`7lzC8QyIaa&Ci=;+FK$HDBLN=VgntI4FwW|XDWm6{4pKf zPXBdTz0AP{--TJeJm05$Ehv0nhk zb{o)$gj|K|TYFNUq@yIyiA=;P9w$T|cHbt?k7H{;BzEr09n2Tp8Z+8vQhwgpY5Aw- znFY8mqy<{r;HIxmw25JcXzBTxi%hHbO@62S9xy<{7keYDvP%)9X5ZG@H0^g87YoFC zAO>#d*)(ZKDPc<+*_K5w_D%WWTvdKbgNjdklYc-lr!|V7eDzdpUtaKwC(MW)KKSbJ zc$N3_I1-Z)-)WDM1_Mma36iX_msJi0TpzlOSlMYlypJ$k8^Aki_dsIpjr18gVBV$; zS8f;@bsI1(?#ackUaB9)3pf=|5{T*ct>xkIH|V;{;VJN$Mo&dC*%@((ndE-wPosyN z$o3=_10zQ0ybz41;z*t>9o42v3t)h^1!oIH#O>u3PjRUGr=<1^Qddmv^1C(mT%=e( zBm^O1=7u?ugXhT$z?d-obu!|PVO<;kY*UF&V2Y6m4WObdx8++%gWVTChl&mgH9L{n z)a!71Oee6tbr3JJ=5MK4oG0d6RCUVS!tOozD67X>-&Y#~ z@@u4L#Qg*vQa{EkAPxF$1y=Q_i}c|+EjK6;Xe~M@jQ6%I^G}w2GCf~dVviC3In@EI z%rt?=gWdT6uCQcBSux3}HbNgAFc=Oh+G-0)o*9oIdBwndeMdifHyw4HbRs=VMq39u zN+2l*4Ez=fE#~=K*4WN#gKsmytGij!l`%z#MS#I%Q>zw)4ZVtWA{PYjm|sC+6CL7< z+EeT)5m(eXB1RxI?5+*bSeu5L>ra81g&(9a9l8&O$bi`KtJeFxSQ)Xo@92qs4(3oe z4n38altZ~MgVi~4)~E<_edlzS<2uHt56a8&uQ#DbR3gi!4gT|6N7gMOD-nv zrrfr+CJ+@XAYp+w;jA~bVN+$_%XqKS5CL7{C;_54-zW79LziK5=vK@V+VuhUvHS#) zpk|Or{pdt2er>_IV~MsdW*Q#iDg;aDcB!lN!DL8!&V?>wY-XY+L_N=pGYG?=9U%)U z-~^s!O~mhVUmBcnLo``T?c5u!bm(bV9lomX~uVEwD{?hu$P7F&Kw1dxP2P1-Cc>Q*|$3`!Dz zp!X2p?7zQiUxa2D&l>Yh6BGnLNYS5r)#gvg3t)ojF00|DvEhqdD7pMv=n$GddopkMu^1@}OO8uB@kEHJzyrh8@)BHk()u7pCx8$^ zke}Ft_v1MkvPeoZ?fEXdS*K zXV;j~G|N$0%uF~Tx5iGpZa2#zEA$ijzcNFZSDNLtpVSOs{g!B03PQ?+2*svZ7ZYxB z6sOSOoMQS>fNZ2{38Qqc|MeX8@35iT@q3;k@s zBmR^5t=x%112EH}hV((MyU?&PR0S2dRIeK-z$qu5adfS&_^pT~HU%^M3b zm^&Omfk)F|4Z7*o!eLsNwhT5Nwr)7)w9w%KkKZhfveX9C^nsIv)Y+_y?yKkF16`%@hV8GWaN(Zfw2RbGS zcD|6b>>i>V%(vt7SU4R-IgzV~D}H{gf{8KWm5Ib?zoSoxoo$%S#4buoAS`cadPi&~ z7zmXs#14gES+Z2~fxj@V`HFHs zUU}SfL7pCCF36J64+XDV|M_(8!w8oCB}#o^*LDKxW~v`n-RHqi$jj#!oCuD#W8~I> z+@}*jK4Fb)x_d4^s^AUn9v~NLmzi+fUnei8lFh#ejr6k2Mc%kBE?JiOpmX*x_dp_+ zleye9ZJs?4dym*GG(t1-FcuZ>C@ayHrMo$lLE+58%?NvhqZSn;q<}LKaAT}fvSAH03DhU`J`mx3;=_pZQmJski-^+yt*j4GkX284x*u;a|e^0|cE>zQ#E?n70 zFCe_g?`tzit$7k)aW8Zr0Q^VDZ&s919VV~BB!9|*e| zkEY-61sc0iKsy)|%NxM}he?eJMjt0&kO#(Pul%zRZ+#BL94z zwyhdAu;4b1wkAL}KLOTV;ol59x9fK84H(KlazoPBGTj{Lkd$cH>M`FoXayg0Vt;fF zQMN+S1)D1mNiIx?9M65cexrRZ^}6GvmEro(#h!hD3;Ut2e%?5Bul5ZYc57^a;XtJ&49?oHPn<<1gB^h5 z=)gHX1D*4cNfb57sDvUVs7V=ql}d)JL5MjW)@UDJ=%WmXn+I{js)YHVk~kia=~uw7 zM)-Iy;z`v;X?dv%HK}3p!E2{w#7CVTM(1joz4+AKRr#n2(nH+ceS_#I1q{vsncFR1 zN0b-cNL9l=u4wB%F)ZXechnS9sPzgnjJuh6gtK5(RC}G#G+ighm|uCQVLSH&H_;r` zZv=%0fJfyQxd7JN?^6&+pc;1etsHQaW;wVI3XLF6l9&l)X(kOLh@Yu|q%{jITB zcd*r(@nQXj+ETp$BU9>18{$81mH+r%Kw8DHmi-$A zfLFLlRE}##^_k;VQWg) zKS)zIMCZaTAq;TqDra;e6Lr2@4Qq?4fStu`!tM+Lg26OG8UU=nb3-f)jTK;z3w<+Y z6_W;-E@i{vC^TF{+P2Lap<$DT0i(x*U2QrhKOkJ8*MPnb)YsqSmN}6%o;wqzS1}dZ zv543iZ|6YqNI$$F47PV;Q&x-EM?suk#Z0q6i`kh5Hy4ogG~C4FpaFU>*@lCa;C*y)DWKKB3&HKP za3u2~QFqPEbGULj)Ws3X9~)0-N^}EF(4!=n`F6*S5o&&CKYdiH9XbxgA>uqN<86c# zKi^?fTxrGn7ixxn55i3yy~;AYd547h;gZ;*Qn?LtvR()*^Dlynr<%w?P-}|E4dSUbzXj zHn3&i=qTYb+aM7CuzC|*DP*R%;S^ev*1gRL@6!cf_QZ97paU4+BDOV3q9E_&Tic*3 z=!haz{xE5DD)@L1(HeHcO(Qiex{H9V>nmj>ddc-<>;j9a`uiCf!9ZQ$G+^YdW&?HA zzmxtBj4p0IYD)nYCCH)#V6Cst^%1&dJ~)-NfPU;mLYRox5s$CAv=W;cFCu32sNI+H zht2--zKbxG|4QB9iKJ^`DT2eTV<`9?y*z;^>nup@R6^j);jMs$$yJZ7@@0mw<9vs> nI+?jSC^F{xUjzvL0|#PdBqF?9&IK8`: - -```bash -$ cortex get text-generator - -status last update avg request 2XX -live 1m - - - -endpoint: http://localhost:8889 -``` - -You can also stream logs from your API: - -```bash -$ cortex logs text-generator - -... -``` - -## Deploy your model to AWS - -Cortex can automatically provision infrastructure on your AWS account and deploy your models as production-ready web services: - -```bash -$ cortex cluster up -``` - -This creates a Cortex cluster in your AWS account, which will take approximately 15 minutes. After your cluster is created, you can deploy to your cluster by using the same code and configuration as before: - -```python -import cortex - -cx_aws = cortex.client("aws") - -api_spec = { - "name": "text-generator", - "kind": "RealtimeAPI", - "predictor": { - "type": "python", - "path": "predictor.py" - } -} - -cx_aws.deploy(api_spec, project_dir=".") -``` - -Monitor the status of your APIs using `cortex get` using your CLI: - -```bash -$ cortex get --watch - -env realtime api status up-to-date requested last update avg request 2XX -aws text-generator live 1 1 1m - - -local text-generator live 1 1 17m 3.1285 s 1 -``` - -The output above indicates that one replica of your API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and will spin down replicas if there is unused capacity. - -Show additional information for your API (e.g. its endpoint) using `cortex get `: - -```bash -$ cortex get text-generator --env aws - -status up-to-date requested last update avg request 2XX -live 1 1 1m - - - -endpoint: https://***.execute-api.us-west-2.amazonaws.com/text-generator -``` - -## Run on GPUs - -If your cortex cluster is using GPU instances (configured during cluster creation) or if you are running locally with an nvidia GPU, you can run your text generator API on GPUs. Add the `compute` field to your API configuration and re-deploy: - -```python -api_spec = { - "name": "text-generator", - "kind": "RealtimeAPI", - "predictor": { - "type": "python", - "path": "predictor.py" - }, - "compute": { - "gpu": 1 - } -} - -cx_aws.deploy(api_spec, project_dir=".") -``` - -As your new API is initializing, the old API will continue to respond to prediction requests. Once the API's status becomes "live" (with one up-to-date replica), traffic will be routed to the updated version. You can track the status of your API using `cortex get`: - -```bash -$ cortex get --env aws --watch - -realtime api status up-to-date stale requested last update avg request 2XX -text-generator updating 0 1 1 29s - - -``` - -## Cleanup - -Deleting APIs will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster creation: - -```python -cx_local.delete_api("text-generator") - -cx_aws.delete_api("text-generator") -``` diff --git a/docs/tutorials/realtime/deploy.ipynb b/docs/tutorials/realtime/deploy.ipynb deleted file mode 100644 index 5ffbce9caa..0000000000 --- a/docs/tutorials/realtime/deploy.ipynb +++ /dev/null @@ -1,80 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", - "\n", - "This example needs to run on a machine that supports Docker to deploy Cortex APIs locally (Colab users can still deploy to remote Cortex clusters)", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip3 install cortex\n", - "!pip3 install requests" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import cortex\n", - "\n", - "cx = cortex.client(\"local\")\n", - "\n", - "api_spec = {\n", - " \"name\": \"text-generator\",\n", - " \"kind\": \"RealtimeAPI\",\n", - " \"predictor\": {\n", - " \"type\": \"python\",\n", - " \"path\": \"predictor.py\"\n", - " }\n", - "}\n", - "\n", - "cx.deploy(api_spec, project_dir=\".\", wait=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "endpoint = cx.get_api(\"text-generator\")[\"endpoint\"]\n", - "payload = {\"text\": \"hello world\"}\n", - "print(requests.post(endpoint, payload).text)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/tutorials/realtime/predictor.py b/docs/tutorials/realtime/predictor.py deleted file mode 100644 index b14d8abcc7..0000000000 --- a/docs/tutorials/realtime/predictor.py +++ /dev/null @@ -1,17 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel - - -class PythonPredictor: - def __init__(self, config): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device) - - def predict(self, payload): - input_length = len(payload["text"].split()) - tokens = self.tokenizer.encode(payload["text"], return_tensors="pt").to(self.device) - prediction = self.model.generate(tokens, max_length=input_length + 20, do_sample=True) - return self.tokenizer.decode(prediction[0]) diff --git a/docs/tutorials/realtime/requirements.txt b/docs/tutorials/realtime/requirements.txt deleted file mode 100644 index 1447500abe..0000000000 --- a/docs/tutorials/realtime/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -transformers==3.0.* diff --git a/docs/tutorials/traffic-splitter.md b/docs/tutorials/traffic-splitter.md new file mode 100644 index 0000000000..5db0afda9a --- /dev/null +++ b/docs/tutorials/traffic-splitter.md @@ -0,0 +1,96 @@ +# Deploy a traffic splitter + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +## Install cortex + +```bash +$ pip install cortex +``` + +## Spin up a cluster on AWS (requires AWS credentials) + +```bash +$ cortex cluster up +``` + +## Define 2 realtime APIs and a traffic splitter + +```python +# traffic_splitter.py + +import cortex + +class PythonPredictor: + def __init__(self, config): + from transformers import pipeline + + self.model = pipeline(task="text-generation") + + def predict(self, payload): + return self.model(payload["text"])[0] + +requirements = ["tensorflow", "transformers"] + +api_spec_cpu = { + "name": "text-generator-cpu", + "kind": "RealtimeAPI", + "compute": { + "cpu": 1, + }, +} + +api_spec_gpu = { + "name": "text-generator-gpu", + "kind": "RealtimeAPI", + "compute": { + "gpu": 1, + }, +} + +traffic_splitter = { + "name": "text-generator", + "kind": "TrafficSplitter", + "apis": [ + {"name": "text-generator-cpu", "weight": 30}, + {"name": "text-generator-gpu", "weight": 70}, + ], +} + +cx = cortex.client("aws") +cx.deploy(api_spec_cpu, predictor=PythonPredictor, requirements=requirements) +cx.deploy(api_spec_gpu, predictor=PythonPredictor, requirements=requirements) +cx.deploy(traffic_splitter) +``` + +## Deploy to AWS + +```bash +$ python traffic_splitter.py +``` + +## Monitor + +```bash +$ cortex get text-generator --env aws --watch +``` + +## Stream logs + +```bash +$ cortex logs text-generator +``` + +## Make a request + +```bash +$ curl https:// \ + -X POST -H "Content-Type: application/json" \ + -d '{"text": "hello world"}' +``` + +## Delete the API + +```bash +$ cortex delete text-generator +``` diff --git a/docs/tutorials/traffic-splitting/README.md b/docs/tutorials/traffic-splitting/README.md deleted file mode 100644 index d68d763dd0..0000000000 --- a/docs/tutorials/traffic-splitting/README.md +++ /dev/null @@ -1,111 +0,0 @@ -# Splitting traffic between APIs - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example shows how to split traffic between 2 different iris-classifiers deployed as Realtime APIs. - -To deploy this example: - -1. Determine your CLI Version `cortex version` -1. Clone the repo and switch to the current version by replacing `` with your CLI version: `git clone -b v https://github.com/cortexlabs/cortex` (e.g. if the output of `cortex version` is 0.18.1, the clone command would be `git clone -b v0.18.1 https://github.com/cortexlabs/cortex`) -1. Navigate to this example directory - -## `cortex deploy` - -```bash -$ cortex deploy --env aws - -creating iris-classifier-onnx (RealtimeAPI) -creating iris-classifier-tf (RealtimeAPI) -created iris-classifier (TrafficSplitter) -``` - -## `cortex get` - -```bash -$ cortex get - -env realtime api status up-to-date requested last update avg request 2XX -aws iris-classifier-onnx updating 0 1 27s - - -aws iris-classifier-tf updating 0 1 27s - - - -env traffic splitter apis last update -aws iris-classifier iris-classifier-onnx:30 iris-classifier-tf:70 27s -``` - -## `cortex get iris-classifier` - -```bash -$ cortex get iris-classifier --env aws - -apis weights status requested last update avg request 2XX 5XX -iris-classifier-onnx 30 live 1 1m - - - -iris-classifier-tf 70 live 1 1m - - - - -last updated: 1m -endpoint: https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -example curl: curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -... -``` - -## Make multiple requests - -```bash -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -setosa -``` - -## `cortex get iris-classifier` - -Notice the requests being routed to the different Realtime APIs based on their weights (the output below may not match yours): - -```bash -$ cortex get iris-classifier --env aws - -using aws environment - - -apis weights status requested last update avg request 2XX 5XX -iris-classifier-onnx 30 live 1 4m 6.00791 ms 1 - -iris-classifier-tf 70 live 1 4m 5.81867 ms 5 - - -last updated: 4m -endpoint: https://comtf6hs64.execute-api.us-west-2.amazonaws.com/iris-classifier -example curl: curl https://comtf6hs64.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json -... -``` - -## Cleanup - -Use `cortex delete ` to delete the Traffic Splitter and the two Realtime APIs (note that the Traffic Splitter and each Realtime API must be deleted by separate `cortex delete` commands): - -```bash -$ cortex delete iris-classifier --env aws - -deleting iris-classifier - -$ cortex delete iris-classifier-onnx --env aws - -deleting iris-classifier-onnx - -$ cortex delete iris-classifier-tf --env aws - -deleting iris-classifier-tf -``` - -Running `cortex delete ` will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster installation. It will not spin down your cluster. diff --git a/docs/tutorials/traffic-splitting/cortex.yaml b/docs/tutorials/traffic-splitting/cortex.yaml deleted file mode 100644 index 16702378cd..0000000000 --- a/docs/tutorials/traffic-splitting/cortex.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: iris-classifier-pytorch - kind: RealtimeAPI - predictor: - type: python - path: pytorch_predictor.py - config: - model: s3://cortex-examples/pytorch/iris-classifier/weights.pth - monitoring: - model_type: classification - -- name: iris-classifier-onnx - kind: RealtimeAPI - predictor: - type: onnx - path: onnx_predictor.py - model_path: s3://cortex-examples/onnx/iris-classifier/ - monitoring: - model_type: classification - -- name: iris-classifier - kind: TrafficSplitter - apis: - - name: iris-classifier-onnx - weight: 30 - - name: iris-classifier-pytorch - weight: 70 diff --git a/docs/tutorials/traffic-splitting/model.py b/docs/tutorials/traffic-splitting/model.py deleted file mode 100644 index fe29ff7b6d..0000000000 --- a/docs/tutorials/traffic-splitting/model.py +++ /dev/null @@ -1,59 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -from sklearn.datasets import load_iris -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score - - -class IrisNet(nn.Module): - def __init__(self): - super(IrisNet, self).__init__() - self.fc1 = nn.Linear(4, 100) - self.fc2 = nn.Linear(100, 100) - self.fc3 = nn.Linear(100, 3) - self.softmax = nn.Softmax(dim=1) - - def forward(self, X): - X = F.relu(self.fc1(X)) - X = self.fc2(X) - X = self.fc3(X) - X = self.softmax(X) - return X - - -if __name__ == "__main__": - iris = load_iris() - X, y = iris.data, iris.target - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) - - train_X = Variable(torch.Tensor(X_train).float()) - test_X = Variable(torch.Tensor(X_test).float()) - train_y = Variable(torch.Tensor(y_train).long()) - test_y = Variable(torch.Tensor(y_test).long()) - - model = IrisNet() - - criterion = nn.CrossEntropyLoss() - - optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - - for epoch in range(1000): - optimizer.zero_grad() - out = model(train_X) - loss = criterion(out, train_y) - loss.backward() - optimizer.step() - - if epoch % 100 == 0: - print("number of epoch {} loss {}".format(epoch, loss)) - - predict_out = model(test_X) - _, predict_y = torch.max(predict_out, 1) - - print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data))) - - torch.save(model.state_dict(), "weights.pth") diff --git a/docs/tutorials/traffic-splitting/onnx_predictor.py b/docs/tutorials/traffic-splitting/onnx_predictor.py deleted file mode 100644 index b135129e14..0000000000 --- a/docs/tutorials/traffic-splitting/onnx_predictor.py +++ /dev/null @@ -1,20 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -labels = ["setosa", "versicolor", "virginica"] - - -class ONNXPredictor: - def __init__(self, onnx_client, config): - self.client = onnx_client - - def predict(self, payload): - model_input = [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - - prediction = self.client.predict(model_input) - predicted_class_id = prediction[0][0] - return labels[predicted_class_id] diff --git a/docs/tutorials/traffic-splitting/pytorch_predictor.py b/docs/tutorials/traffic-splitting/pytorch_predictor.py deleted file mode 100644 index 71994bb9ae..0000000000 --- a/docs/tutorials/traffic-splitting/pytorch_predictor.py +++ /dev/null @@ -1,50 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import re -import torch -import os -import boto3 -from botocore import UNSIGNED -from botocore.client import Config -from model import IrisNet - -labels = ["setosa", "versicolor", "virginica"] - - -class PythonPredictor: - def __init__(self, config): - # download the model - bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() - - if os.environ.get("AWS_ACCESS_KEY_ID"): - s3 = boto3.client("s3") # client will use your credentials if available - else: - s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client - - s3.download_file(bucket, key, "/tmp/model.pth") - - # initialize the model - model = IrisNet() - model.load_state_dict(torch.load("/tmp/model.pth")) - model.eval() - - self.model = model - - def predict(self, payload): - # Convert the request to a tensor and pass it into the model - input_tensor = torch.FloatTensor( - [ - [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - ] - ) - - # Run the prediction - output = self.model(input_tensor) - - # Translate the model output to the corresponding label string - return labels[torch.argmax(output[0])] diff --git a/docs/tutorials/traffic-splitting/sample.json b/docs/tutorials/traffic-splitting/sample.json deleted file mode 100644 index e17bbb2896..0000000000 --- a/docs/tutorials/traffic-splitting/sample.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "sepal_length": 5.2, - "sepal_width": 3.6, - "petal_length": 1.4, - "petal_width": 0.3 -} diff --git a/docs/tutorials/utils/README.md b/docs/tutorials/utils/README.md deleted file mode 100644 index 61202eb0c0..0000000000 --- a/docs/tutorials/utils/README.md +++ /dev/null @@ -1,36 +0,0 @@ -## Throughput tester - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -[throughput_test.py](throughput_test.py) is a Python CLI that can be used to test the throughput of your deployed API. The throughput will vary depending on your API's configuration (specified in your `cortex.yaml` file), your local machine's resources (mostly CPU, since it has to spawn many concurrent requests), and the internet connection on your local machine. - -```bash -Usage: throughput_test.py [OPTIONS] ENDPOINT PAYLOAD - - Program for testing the throughput of Cortex-deployed APIs. - -Options: - -w, --processes INTEGER Number of processes for prediction requests. [default: 1] - -t, --threads INTEGER Number of threads per process for prediction requests. [default: 1] - -s, --samples INTEGER Number of samples to run per thread. [default: 10] - -i, --time-based FLOAT How long the thread making predictions will run for in seconds. - If set, -s option will be ignored. - --help Show this message and exit. -``` - -`ENDPOINT` is the API's endpoint, which you can get by running `cortex get `. This argument can also be exported as an environment variable instead of being passed to the CLI. - -`PAYLOAD` can either be a local file or an URL resource that points to a file. The allowed extension types for the file are `json` and `jpg`. This argument can also be exported as an environment variable instead of being passed to the CLI. - -* `json` files are generally `sample.json`s as they are found in most Cortex examples. Each of these is attached to the request as payload. The content type of the request is `"application/json"`. -* `jpg` images are read as numpy arrays and then are converted to a bytes object using `cv2.imencode` function. The content type of the request is `"application/octet-stream"`. - -The same payload `PAYLOAD` is attached to all requests the script makes. - -### Dependencies - -The [throughput_test.py](throughput_test.py) CLI has been tested with Python 3.6.9. To install the CLI's dependencies, run the following: - -```bash -pip install requests click opencv-contrib-python numpy validator-collection imageio -``` diff --git a/docs/tutorials/utils/throughput_test.py b/docs/tutorials/utils/throughput_test.py deleted file mode 100644 index c157cf0b29..0000000000 --- a/docs/tutorials/utils/throughput_test.py +++ /dev/null @@ -1,179 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import os -import sys -import click -import concurrent.futures -import requests -import imageio -import json -import time -import itertools -import cv2 -import numpy as np - -from validator_collection import checkers - - -@click.command(help="Program for testing the throughput of Cortex-deployed APIs.") -@click.argument("endpoint", type=str, envvar="ENDPOINT") -@click.argument("payload", type=str, envvar="PAYLOAD") -@click.option( - "--processes", - "-p", - type=int, - default=1, - show_default=True, - help="Number of processes for prediction requests.", -) -@click.option( - "--threads", - "-t", - type=int, - default=1, - show_default=True, - help="Number of threads per process for prediction requests.", -) -@click.option( - "--samples", - "-s", - type=int, - default=10, - show_default=True, - help="Number of samples to run per thread.", -) -@click.option( - "--time-based", - "-i", - type=float, - default=0.0, - help="How long the thread making predictions will run for in seconds. If set, -s option will be ignored.", -) -def main(payload, endpoint, processes, threads, samples, time_based): - file_type = None - if checkers.is_url(payload): - if payload.lower().endswith(".json"): - file_type = "json" - payload_data = requests.get(payload).json() - elif payload.lower().endswith(".jpg"): - file_type = "jpg" - payload_data = imageio.imread(payload) - elif checkers.is_file(payload): - if payload.lower().endswith(".json"): - file_type = "json" - with open(payload, "r") as f: - payload_data = json.load(f) - elif payload.lower().endswith(".jpg"): - file_type = "jpg" - payload_data = cv2.imread(payload, cv2.IMREAD_COLOR) - else: - print(f"'{payload}' isn't an URL resource, nor is it a local file") - sys.exit(1) - - if file_type is None: - print(f"'{payload}' doesn't point to a jpg image or to a json file") - sys.exit(1) - if file_type == "jpg": - data = image_to_jpeg_bytes(payload_data) - if file_type == "json": - data = json.dumps(payload_data) - - print("Starting the inference throughput test...") - results = [] - start = time.time() - with concurrent.futures.ProcessPoolExecutor(max_workers=processes) as executor: - results = executor_submitter( - executor, processes, process_worker, threads, data, endpoint, samples, time_based - ) - end = time.time() - elapsed = end - start - - total_requests = sum(results) - - print(f"A total of {total_requests} requests have been served in {elapsed} seconds") - print(f"Avg number of inferences/sec is {total_requests / elapsed}") - print(f"Avg time spent on an inference is {elapsed / total_requests} seconds") - - -def process_worker(threads, data, endpoint, samples, time_based): - results = [] - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: - results = executor_submitter(executor, threads, task, data, endpoint, samples, time_based) - - return results - - -def executor_submitter(executor, workers, *args, **kwargs): - futures = [] - for worker in range(workers): - future = executor.submit(*args, **kwargs) - futures.append(future) - - results = [future.result() for future in futures] - results = list(itertools.chain.from_iterable(results)) - - return results - - -def task(data, endpoint, samples, time_based): - timeout = 60 - - if isinstance(data, str): - headers = {"content-type": "application/json"} - elif isinstance(data, bytes): - headers = {"content-type": "application/octet-stream"} - else: - return - - if time_based == 0.0: - for i in range(samples): - try: - resp = requests.post( - endpoint, - data=data, - headers=headers, - timeout=timeout, - ) - except Exception as e: - print(e) - break - time.sleep(0.1) - return [samples] - else: - start = time.time() - counter = 0 - while start + time_based >= time.time(): - try: - resp = requests.post( - endpoint, - data=data, - headers=headers, - timeout=timeout, - ) - except Exception as e: - print(e) - break - time.sleep(0.1) - counter += 1 - return [counter] - - -def image_to_jpeg_nparray(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): - """ - Convert numpy image to jpeg numpy vector. - """ - is_success, im_buf_arr = cv2.imencode(".jpg", image, quality) - return im_buf_arr - - -def image_to_jpeg_bytes(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): - """ - Convert numpy image to bytes-encoded jpeg image. - """ - buf = image_to_jpeg_nparray(image, quality) - byte_im = buf.tobytes() - return byte_im - - -if __name__ == "__main__": - main() From 22010bfd7a4e54f8af07caaf156118b88ac88bb1 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Fri, 4 Dec 2020 16:53:48 -0800 Subject: [PATCH 05/36] Update docs --- .gitbook.yaml | 5 +-- CODE_OF_CONDUCT.md | 76 ----------------------------------- CONTRIBUTING.md | 9 ----- README.md | 99 ++-------------------------------------------- docs/contact.md | 19 --------- docs/summary.md | 9 ++--- 6 files changed, 7 insertions(+), 210 deletions(-) delete mode 100644 CODE_OF_CONDUCT.md delete mode 100644 CONTRIBUTING.md delete mode 100644 docs/contact.md diff --git a/.gitbook.yaml b/.gitbook.yaml index 8b207447a3..8a7909c2c0 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -1,13 +1,10 @@ root: ./docs/ structure: - readme: ../README.md summary: summary.md redirects: - tutorial: ./tutorials/hello-world/python/README.md - tutorial/realtime: ./tutorials/hello-world/python/README.md - tutorial/batch: ./tutorials/batch/python/README.md + start: ./tutorials/realtime.md install: ./aws/install.md uninstall: ./aws/uninstall.md update: ./aws/update.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 425f0e1e73..0000000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,76 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, sex characteristics, gender identity and expression, -level of experience, education, socio-economic status, nationality, personal -appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or - advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project e-mail -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at contact@cortex.dev. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html - -[homepage]: https://www.contributor-covenant.org - -For answers to common questions about this code of conduct, see -https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index facbf253e0..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,9 +0,0 @@ -# Contributing - -Thank you for your interest in contributing to Cortex! - -- **Report a bug, request a feature, or share feedback:** please let us know via [email](mailto:hello@cortex.dev), [chat](https://gitter.im/cortexlabs/cortex), or [issues](https://github.com/cortexlabs/cortex/issues). - -- **Add an example:** we're always excited to see cool models deployed with Cortex. Please check out our [examples](examples) and feel free to add a one by submitting a pull request. - -- **Implement a feature:** here are [instructions for setting up a development environment](docs/contributing/development.md). If you'd like to contribute significant code to the project, please reach out to us so we can work together on the design and make sure we're on the same page before you get started. diff --git a/README.md b/README.md index bbbde36959..9feb5f7aeb 100644 --- a/README.md +++ b/README.md @@ -3,15 +3,10 @@
- -[install](https://docs.cortex.dev/install) • [documentation](https://docs.cortex.dev) • [community](https://gitter.im/cortexlabs/cortex) - # Deploy machine learning models to production Cortex is an open source platform for deploying, managing, and scaling machine learning in production. -
- ## Model serving infrastructure * Supports deploying TensorFlow, PyTorch, sklearn and other models as realtime or batch APIs. @@ -19,32 +14,6 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Runs inference on spot instances with on-demand backups. * Autoscales to handle production workloads. -#### Configure Cortex - -```yaml -# cluster.yaml - -region: us-east-1 -instance_type: g4dn.xlarge -min_instances: 10 -max_instances: 100 -spot: true -``` - -#### Spin up Cortex on your AWS account - -```text -$ cortex cluster up --config cluster.yaml - -○ configuring autoscaling ✓ -○ configuring networking ✓ -○ configuring logging ✓ - -cortex is ready! -``` - -
- ## Reproducible deployments * Package dependencies, code, and configuration for reproducible deployments. @@ -52,43 +21,6 @@ cortex is ready! * Integrate with your data science platform or CI/CD system. * Test locally before deploying to your cluster. -#### Implement a predictor - -```python -# predictor.py - -from transformers import pipeline - -class PythonPredictor: - def __init__(self, config): - self.model = pipeline(task="text-generation") - - def predict(self, payload): - return self.model(payload["text"])[0] -``` - -#### Configure an API - -```python -api_spec = { - "name": "text-generator", - "kind": "RealtimeAPI", - "compute": { - "gpu": 1, - "mem": "8Gi", - }, - "autoscaling": { - "min_replicas": 1, - "max_replicas": 10 - }, - "networking": { - "api_gateway": "public" - } -} -``` - -
- ## Scalable machine learning APIs * Scale to handle production workloads with request-based autoscaling. @@ -97,33 +29,8 @@ api_spec = { * Configure traffic splitting for A/B testing. * Update APIs without downtime. -#### Deploy to your cluster - -```python -import cortex - -cx = cortex.client("aws") -cx.deploy(api_spec, project_dir=".") - -# creating https://example.com/text-generator -``` - -#### Consume your API - -```python -import requests - -endpoint = "https://example.com/text-generator" -payload = {"text": "hello world"} -prediction = requests.post(endpoint, payload) -``` - -
- ## Get started -```bash -pip install cortex -``` - -See the [installation guide](https://docs.cortex.dev/install) for next steps. +* [Deploy a realtime API](https://docs.cortex.dev/start) +* [Read the docs](https://docs.cortex.dev) +* [Join our community](https://gitter.im/cortexlabs/cortex) diff --git a/docs/contact.md b/docs/contact.md deleted file mode 100644 index 70a9748f34..0000000000 --- a/docs/contact.md +++ /dev/null @@ -1,19 +0,0 @@ -# Contact us - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## Support - -[GitHub](https://github.com/cortexlabs/cortex/issues) - Submit feature requests, file bugs, and track issues. - -[Gitter](https://gitter.im/cortexlabs/cortex) - Chat with us in our community channel. - -[Email](mailto:hello@cortex.dev) - Email us at `hello@cortex.dev` to contact us privately. - -## Contributing - -Find instructions for how to set up your development environment in the [development guide](contributing/development.md). - -## We're hiring - -Interested in joining us? See our [job postings](https://angel.co/company/cortex-labs-inc/jobs). diff --git a/docs/summary.md b/docs/summary.md index 04f21db7a3..cd57cf0bd4 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,10 +1,9 @@ # Table of contents -* [Deploy machine learning models to production](../README.md) -* [Get started](tutorials/realtime.md) -* [Contact us](contact.md) +* [Deploy a realtime API](tutorials/realtime.md) +* [Deploy a batch API](tutorials/batch.md) -## Running Cortex on AWS +## Running on AWS * [Install](aws/install.md) * [Credentials](aws/credentials.md) @@ -30,14 +29,12 @@ * [Autoscaling](deployments/realtime-api/autoscaling.md) * [Prediction monitoring](deployments/realtime-api/prediction-monitoring.md) * [Traffic Splitter](deployments/realtime-api/traffic-splitter.md) - * [Realtime API tutorial](tutorials/realtime/README.md) * [Batch API](deployments/batch-api.md) * [Predictor implementation](deployments/batch-api/predictors.md) * [API configuration](deployments/batch-api/api-configuration.md) * [API deployment](deployments/batch-api/deployment.md) * [Endpoints](deployments/batch-api/endpoints.md) * [Job statuses](deployments/batch-api/statuses.md) - * [Batch API tutorial](tutorials/batch/README.md) ## Advanced From a6695daf61794ec5ae9e6fcdb022276365425576 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Fri, 4 Dec 2020 16:57:14 -0800 Subject: [PATCH 06/36] Update README.md --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9feb5f7aeb..4d8f3165d7 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ Cortex is an open source platform for deploying, managing, and scaling machine learning in production. +
+ ## Model serving infrastructure * Supports deploying TensorFlow, PyTorch, sklearn and other models as realtime or batch APIs. @@ -14,6 +16,8 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Runs inference on spot instances with on-demand backups. * Autoscales to handle production workloads. +
+ ## Reproducible deployments * Package dependencies, code, and configuration for reproducible deployments. @@ -21,6 +25,8 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Integrate with your data science platform or CI/CD system. * Test locally before deploying to your cluster. +
+ ## Scalable machine learning APIs * Scale to handle production workloads with request-based autoscaling. @@ -29,8 +35,9 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Configure traffic splitting for A/B testing. * Update APIs without downtime. +
+ ## Get started -* [Deploy a realtime API](https://docs.cortex.dev/start) -* [Read the docs](https://docs.cortex.dev) +* [Deploy models](https://docs.cortex.dev/start) * [Join our community](https://gitter.im/cortexlabs/cortex) From 49586aa4d4e76ade2aa9ec37c0aa0cf6a8792632 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Fri, 4 Dec 2020 16:59:39 -0800 Subject: [PATCH 07/36] Update README.md --- .gitbook.yaml | 1 - README.md | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitbook.yaml b/.gitbook.yaml index 8a7909c2c0..52b37106f2 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -4,7 +4,6 @@ structure: summary: summary.md redirects: - start: ./tutorials/realtime.md install: ./aws/install.md uninstall: ./aws/uninstall.md update: ./aws/update.md diff --git a/README.md b/README.md index 4d8f3165d7..e955d8c274 100644 --- a/README.md +++ b/README.md @@ -39,5 +39,4 @@ Cortex is an open source platform for deploying, managing, and scaling machine l ## Get started -* [Deploy models](https://docs.cortex.dev/start) -* [Join our community](https://gitter.im/cortexlabs/cortex) +[Deploy models](https://docs.cortex.dev) and [join our community](https://gitter.im/cortexlabs/cortex). From 994a8e1b0f41aa256043a2d2b0870bbcd1087853 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Fri, 4 Dec 2020 17:31:19 -0800 Subject: [PATCH 08/36] Reorganize misc docs --- .../environments.md | 0 .../python-client.md | 0 .../telemetry.md | 4 - .../development.md => guides/contributing.md} | 2 +- docs/miscellaneous/architecture.md | 7 - docs/miscellaneous/cli.md | 323 ------------------ docs/summary.md | 16 +- 7 files changed, 5 insertions(+), 347 deletions(-) rename docs/{miscellaneous => deployments}/environments.md (100%) rename docs/{miscellaneous => deployments}/python-client.md (100%) rename docs/{miscellaneous => deployments}/telemetry.md (71%) rename docs/{contributing/development.md => guides/contributing.md} (99%) delete mode 100644 docs/miscellaneous/architecture.md delete mode 100644 docs/miscellaneous/cli.md diff --git a/docs/miscellaneous/environments.md b/docs/deployments/environments.md similarity index 100% rename from docs/miscellaneous/environments.md rename to docs/deployments/environments.md diff --git a/docs/miscellaneous/python-client.md b/docs/deployments/python-client.md similarity index 100% rename from docs/miscellaneous/python-client.md rename to docs/deployments/python-client.md diff --git a/docs/miscellaneous/telemetry.md b/docs/deployments/telemetry.md similarity index 71% rename from docs/miscellaneous/telemetry.md rename to docs/deployments/telemetry.md index b26f2ece87..e7e767c79c 100644 --- a/docs/miscellaneous/telemetry.md +++ b/docs/deployments/telemetry.md @@ -8,10 +8,6 @@ By default, Cortex sends anonymous usage data to Cortex Labs. If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex deploy`, Cortex Labs will receive an event of the structure `{id: 1234, command: "deploy"}`. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster. -## Why is this data being collected? - -Telemetry helps us make Cortex better. For example, we discovered that people are running `cortex delete` more times than we expected and realized that our documentation doesn't explain clearly that `cortex deploy` is declarative and can be run consecutively without deleting APIs. - ## How do I opt out? If you'd like to disable telemetry, modify your `~/.cortex/cli.yaml` file (or create it if it doesn't exist) and add `telemetry: false`. diff --git a/docs/contributing/development.md b/docs/guides/contributing.md similarity index 99% rename from docs/contributing/development.md rename to docs/guides/contributing.md index c851e0abf9..f87133dc8c 100644 --- a/docs/contributing/development.md +++ b/docs/guides/contributing.md @@ -1,4 +1,4 @@ -# Development +# Contributing ## Remote development diff --git a/docs/miscellaneous/architecture.md b/docs/miscellaneous/architecture.md deleted file mode 100644 index 88940898ec..0000000000 --- a/docs/miscellaneous/architecture.md +++ /dev/null @@ -1,7 +0,0 @@ -# Architecture diagram - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -![architecture diagram](https://user-images.githubusercontent.com/808475/83995909-92c1cf00-a90f-11ea-983f-c96117e42aa3.png) - -_note: this diagram is simplified for illustrative purposes_ diff --git a/docs/miscellaneous/cli.md b/docs/miscellaneous/cli.md deleted file mode 100644 index a1bf1ee72e..0000000000 --- a/docs/miscellaneous/cli.md +++ /dev/null @@ -1,323 +0,0 @@ -# CLI commands - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## Install the CLI - -```bash -pip install cortex -``` - -## Install the CLI without Python Client - -### Mac/Linux OS - -```bash -# Replace `INSERT_CORTEX_VERSION` with the complete CLI version (e.g. 0.18.1): -$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/vINSERT_CORTEX_VERSION/get-cli.sh)" - -# For example to download CLI version 0.18.1 (Note the "v"): -$ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/v0.18.1/get-cli.sh)" -``` - -By default, the Cortex CLI is installed at `/usr/local/bin/cortex`. To install the executable elsewhere, export the `CORTEX_INSTALL_PATH` environment variable to your desired location before running the command above. - -By default, the Cortex CLI creates a directory at `~/.cortex/` and uses it to store environment configuration. To use a different directory, export the `CORTEX_CLI_CONFIG_DIR` environment variable before running a `cortex` command. - -### Windows - -To install the Cortex CLI on a Windows machine, follow [this guide](../guides/windows-cli.md). - -## Command overview - -### deploy - -```text -create or update apis - -Usage: - cortex deploy [CONFIG_FILE] [flags] - -Flags: - -e, --env string environment to use (default "local") - -f, --force override the in-progress api update - -y, --yes skip prompts - -o, --output string output format: one of pretty|json (default "pretty") - -h, --help help for deploy -``` - -### get - -```text -get information about apis or jobs - -Usage: - cortex get [API_NAME] [JOB_ID] [flags] - -Flags: - -e, --env string environment to use (default "local") - -w, --watch re-run the command every 2 seconds - -o, --output string output format: one of pretty|json (default "pretty") - -v, --verbose show additional information (only applies to pretty output format) - -h, --help help for get -``` - -### logs - -```text -stream logs from an api - -Usage: - cortex logs API_NAME [JOB_ID] [flags] - -Flags: - -e, --env string environment to use (default "local") - -h, --help help for logs -``` - -### refresh - -```text -restart all replicas for an api (without downtime) - -Usage: - cortex refresh API_NAME [flags] - -Flags: - -e, --env string environment to use (default "local") - -f, --force override the in-progress api update - -o, --output string output format: one of pretty|json (default "pretty") - -h, --help help for refresh -``` - -### predict - -```text -make a prediction request using a json file - -Usage: - cortex predict API_NAME JSON_FILE [flags] - -Flags: - -e, --env string environment to use (default "local") - -h, --help help for predict -``` - -### delete - -```text -delete any kind of api or stop a batch job - -Usage: - cortex delete API_NAME [JOB_ID] [flags] - -Flags: - -e, --env string environment to use (default "local") - -f, --force delete the api without confirmation - -c, --keep-cache keep cached data for the api - -o, --output string output format: one of pretty|json (default "pretty") - -h, --help help for delete -``` - -### cluster up - -```text -spin up a cluster - -Usage: - cortex cluster up [flags] - -Flags: - -c, --config string path to a cluster configuration file - --aws-key string aws access key id - --aws-secret string aws secret access key - --cluster-aws-key string aws access key id to be used by the cluster - --cluster-aws-secret string aws secret access key to be used by the cluster - -e, --configure-env string name of environment to configure (default "aws") - -y, --yes skip prompts - -h, --help help for up -``` - -### cluster info - -```text -get information about a cluster - -Usage: - cortex cluster info [flags] - -Flags: - -c, --config string path to a cluster configuration file - -n, --name string aws name of the cluster - -r, --region string aws region of the cluster - --aws-key string aws access key id - --aws-secret string aws secret access key - -e, --configure-env string name of environment to configure - -d, --debug save the current cluster state to a file - -y, --yes skip prompts - -h, --help help for info -``` - -### cluster configure - -```text -update a cluster's configuration - -Usage: - cortex cluster configure [flags] - -Flags: - -c, --config string path to a cluster configuration file - --aws-key string aws access key id - --aws-secret string aws secret access key - --cluster-aws-key string aws access key id to be used by the cluster - --cluster-aws-secret string aws secret access key to be used by the cluster - -e, --configure-env string name of environment to configure - -y, --yes skip prompts - -h, --help help for configure -``` - -### cluster down - -```text -spin down a cluster - -Usage: - cortex cluster down [flags] - -Flags: - -c, --config string path to a cluster configuration file - -n, --name string aws name of the cluster - -r, --region string aws region of the cluster - --aws-key string aws access key id - --aws-secret string aws secret access key - -y, --yes skip prompts - -h, --help help for down -``` - -### cluster export - -```text -download the code and configuration for APIs - -Usage: - cortex cluster export [API_NAME] [API_ID] [flags] - -Flags: - -c, --config string path to a cluster configuration file - -n, --name string aws name of the cluster - -r, --region string aws region of the cluster - --aws-key string aws access key id - --aws-secret string aws secret access key - -h, --help help for export -``` - -### env configure - -```text -configure an environment - -Usage: - cortex env configure [ENVIRONMENT_NAME] [flags] - -Flags: - -p, --provider string set the provider without prompting - -o, --operator-endpoint string set the operator endpoint without prompting - -k, --aws-access-key-id string set the aws access key id without prompting - -s, --aws-secret-access-key string set the aws secret access key without prompting - -r, --aws-region string set the aws region without prompting - -h, --help help for configure -``` - -### env list - -```text -list all configured environments - -Usage: - cortex env list [flags] - -Flags: - -o, --output string output format: one of pretty|json (default "pretty") - -h, --help help for list -``` - -### env default - -```text -set the default environment - -Usage: - cortex env default [ENVIRONMENT_NAME] [flags] - -Flags: - -h, --help help for default -``` - -### env delete - -```text -delete an environment configuration - -Usage: - cortex env delete [ENVIRONMENT_NAME] [flags] - -Flags: - -h, --help help for delete -``` - -### version - -```text -print the cli and cluster versions - -Usage: - cortex version [flags] - -Flags: - -e, --env string environment to use (default "local") - -h, --help help for version -``` - -### completion - -```text -generate shell completion scripts - -to enable cortex shell completion: - bash: - add this to ~/.bash_profile (mac) or ~/.bashrc (linux): - source <(cortex completion bash) - - note: bash-completion must be installed on your system; example installation instructions: - mac: - 1) install bash completion: - brew install bash-completion - 2) add this to your ~/.bash_profile: - source $(brew --prefix)/etc/bash_completion - 3) log out and back in, or close your terminal window and reopen it - ubuntu: - 1) install bash completion: - apt update && apt install -y bash-completion # you may need sudo - 2) open ~/.bashrc and uncomment the bash completion section, or add this: - if [ -f /etc/bash_completion ] && ! shopt -oq posix; then . /etc/bash_completion; fi - 3) log out and back in, or close your terminal window and reopen it - - zsh: - option 1: - add this to ~/.zshrc: - source <(cortex completion zsh) - if that failed, you can try adding this line (above the source command you just added): - autoload -Uz compinit && compinit - option 2: - create a _cortex file in your fpath, for example: - cortex completion zsh > /usr/local/share/zsh/site-functions/_cortex - -Note: this will also add the "cx" alias for cortex for convenience - -Usage: - cortex completion SHELL [flags] - -Flags: - -h, --help help for completion -``` diff --git a/docs/summary.md b/docs/summary.md index cd57cf0bd4..3b984a57ec 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -35,6 +35,9 @@ * [API deployment](deployments/batch-api/deployment.md) * [Endpoints](deployments/batch-api/endpoints.md) * [Job statuses](deployments/batch-api/statuses.md) +* [Python client](deployments/python-client.md) +* [Environments](deployments/environments.md) +* [Telemetry](deployments/telemetry.md) ## Advanced @@ -44,14 +47,6 @@ * [Python packages](deployments/python-packages.md) * [System packages](deployments/system-packages.md) -## Miscellaneous - -* [CLI commands](miscellaneous/cli.md) -* [Python client](miscellaneous/python-client.md) -* [Environments](miscellaneous/environments.md) -* [Architecture diagram](miscellaneous/architecture.md) -* [Telemetry](miscellaneous/telemetry.md) - ## Troubleshooting * [API is stuck updating](troubleshooting/stuck-updating.md) @@ -73,7 +68,4 @@ * [Docker Hub rate limiting](guides/docker-hub-rate-limiting.md) * [Private docker registry](guides/private-docker.md) * [Install CLI on Windows](guides/windows-cli.md) - -## Contributing - -* [Development](contributing/development.md) +* [Contributing](guides/contributing.md) From 91c4ce41fa698ef5abec7b08b304e5a234ac1be6 Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 7 Dec 2020 18:31:25 -0500 Subject: [PATCH 09/36] Add a simple version of the batch tutorial --- docs/tutorials/batch.md | 148 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index e69de29bb2..6a188d487c 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -0,0 +1,148 @@ +# Deploy a batch API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +**Note: Batch APIs are only supported on a Cortex cluster (in AWS).** + +## Install cortex + +```bash +$ pip install cortex +``` + +## Spin up a cluster on AWS (requires AWS credentials) + +```bash +$ cortex cluster up +``` + +## Define a batch API + +```python +# batch.py + +import cortex + +class PythonPredictor: + def __init__(self, config, job_spec): + from torchvision import transforms + import torchvision + import requests + import boto3 + import re + + self.model = torchvision.models.alexnet(pretrained=True).eval() + self.labels = requests.get(config["labels"]).text.split("\n")[1:] + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.preprocess = transforms.Compose( + [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] + ) + + self.s3 = boto3.client("s3") # initialize S3 client to save results + self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() + self.key = os.path.join(self.key, job_spec["job_id"]) + + def predict(self, payload, batch_id): + import json + import torch + from PIL import Image + from io import BytesIO + import requests + + tensor_list = [] + for image_url in payload: # download and preprocess each image + img_pil = Image.open(BytesIO(requests.get(image_url).content)) + tensor_list.append(self.preprocess(img_pil)) + + img_tensor = torch.stack(tensor_list) + with torch.no_grad(): # classify the batch of images + prediction = self.model(img_tensor) + _, indices = prediction.max(1) + + results = [{"url": payload[i], "class": self.labels[class_idx]} for i, class_idx in enumerate(indices)] + self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json.dumps(results)) + +requirements = ["torch", "boto3", "pillow", "torchvision", "requests"] + +api_spec = { + "name": "image-classifier", + "kind": "BatchAPI", + "predictor": { + "config": { + "labels": "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + } + } +} + +cx = cortex.client("aws") +cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) +``` + +## Deploy to your Cortex cluster on AWS + +```bash +$ python batch.py +``` + +## Describe the Batch API + +```bash +$ cortex get image-classifier -e aws +``` + +## Submit a job + +```python +import cortex +import requests + +cx = cortex.client("aws") +batch_endpoint = cx.get_api("image-classifier")["endpoint"] + +dest_s3_dir = # specify S3 directory for the results (make sure your cluster has access to this bucket) + +job_spec = { + "workers": 1, + "item_list": { + "items": [ + "https://i.imgur.com/PzXprwl.jpg", + "https://i.imgur.com/E4cOSLw.jpg", + "https://user-images.githubusercontent.com/4365343/96516272-d40aa980-1234-11eb-949d-8e7e739b8345.jpg", + "https://i.imgur.com/jDimNTZ.jpg", + "https://i.imgur.com/WqeovVj.jpg" + ], + "batch_size": 2 + }, + "config": { + "dest_s3_dir": dest_s3_dir + } +} + +response = requests.post(batch_endpoint, json=job_spec) + +print(response) +# > {"job_id":"69b183ed6bdf3e9b","api_name":"image-classifier", "config": {"dest_s3_dir": ...}} +``` + +## Monitor the job + +```bash +$ cortex get image-classifier 69b183ed6bdf3e9b +``` + +## Stream job logs + +```bash +$ cortex logs image-classifier 69b183ed6bdf3e9b +``` + +## View the results + +Once the job is complete, you should be able to find the results of the batch job in the S3 directory you've specified. + +## Delete the Batch API + +```bash +$ cortex delete --env local image-classifier +``` From 39efeede7a66fe55a34dc1e9ebda2b714e56403b Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 7 Dec 2020 18:31:50 -0500 Subject: [PATCH 10/36] Remove batch example from the batch tutorial --- docs/tutorials/batch/README.md | 572 -------------------------- docs/tutorials/batch/cortex.yaml | 9 - docs/tutorials/batch/predictor.py | 81 ---- docs/tutorials/batch/requirements.txt | 4 - docs/tutorials/batch/sample.json | 3 - 5 files changed, 669 deletions(-) delete mode 100644 docs/tutorials/batch/README.md delete mode 100644 docs/tutorials/batch/cortex.yaml delete mode 100644 docs/tutorials/batch/predictor.py delete mode 100644 docs/tutorials/batch/requirements.txt delete mode 100644 docs/tutorials/batch/sample.json diff --git a/docs/tutorials/batch/README.md b/docs/tutorials/batch/README.md deleted file mode 100644 index a37cb8f966..0000000000 --- a/docs/tutorials/batch/README.md +++ /dev/null @@ -1,572 +0,0 @@ -# Deploy models as Batch APIs - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -This example shows how to deploy a batch image classification api that accepts a list of image urls as input, downloads the images, classifies them, and writes the results to S3. - -**Batch APIs are only supported on a Cortex cluster (in AWS).** You can find cluster installation documentation [here](../../../docs/aws/install.md). - -## Pre-requisites - -* [Install](../../../docs/aws/install.md) Cortex and create a cluster -* Create an S3 bucket/directory to store the results of the batch job -* AWS CLI (optional) - -
- -## Implement your predictor - -1. Create a Python file named `predictor.py`. -1. Define a Predictor class with a constructor that loads and initializes an image-classifier from `torchvision`. -1. Add a `predict()` function that will accept a list of images urls (http:// or s3://), downloads them, performs inference, and writes the predictions to S3. -1. Specify an `on_job_complete()` function that aggregates the results and writes them to a single file named `aggregated_results.json` in S3. - -```python -# predictor.py - -import os -import requests -import torch -import torchvision -from torchvision import transforms -from PIL import Image -from io import BytesIO -import boto3 -import json -import re - - -class PythonPredictor: - def __init__(self, config, job_spec): - self.model = torchvision.models.alexnet(pretrained=True).eval() - - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - self.preprocess = transforms.Compose( - [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] - ) - - self.labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" - ).text.split("\n")[1:] - - if len(config.get("dest_s3_dir", "")) == 0: - raise Exception("'dest_s3_dir' field was not provided in job submission") - - self.s3 = boto3.client("s3") - - self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() - self.key = os.path.join(self.key, job_spec["job_id"]) - - def predict(self, payload, batch_id): - tensor_list = [] - - # download and preprocess each image - for image_url in payload: - if image_url.startswith("s3://"): - bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() - image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() - else: - image_bytes = requests.get(image_url).content - - img_pil = Image.open(BytesIO(image_bytes)) - tensor_list.append(self.preprocess(img_pil)) - - # classify the batch of images - img_tensor = torch.stack(tensor_list) - with torch.no_grad(): - prediction = self.model(img_tensor) - _, indices = prediction.max(1) - - # extract predicted classes - results = [ - {"url": payload[i], "class": self.labels[class_idx]} - for i, class_idx in enumerate(indices) - ] - json_output = json.dumps(results) - - # save results - self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) - - def on_job_complete(self): - all_results = [] - - # aggregate all classifications - paginator = self.s3.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=self.bucket, Prefix=self.key): - for obj in page["Contents"]: - body = self.s3.get_object(Bucket=self.bucket, Key=obj["Key"])["Body"] - all_results += json.loads(body.read().decode("utf8")) - - # save single file containing aggregated classifications - self.s3.put_object( - Bucket=self.bucket, - Key=os.path.join(self.key, "aggregated_results.json"), - Body=json.dumps(all_results), - ) -``` - -Here are the complete [Predictor docs](../../../docs/deployments/batch-api/predictors.md). - -
- -## Specify your Python dependencies - -Create a `requirements.txt` file to specify the dependencies needed by `predictor.py`. Cortex will automatically install them into your runtime once you deploy: - -```python -# requirements.txt - -boto3 -torch -torchvision -pillow -``` - -
- -## Configure your API - -Create a `cortex.yaml` file and add the configuration below. An `api` with `kind: BatchAPI` will expose your model as an endpoint that will orchestrate offline batch inference across multiple workers upon receiving job requests. The configuration below defines how much `compute` each worker requires and your `predictor.py` determines how each batch should be processed. - -```yaml -# cortex.yaml - -- name: image-classifier - kind: BatchAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 -``` - -Here are the complete [API configuration docs](../../../docs/deployments/batch-api/api-configuration.md). - -
- -## Deploy your Batch API - -`cortex deploy` takes your model, your `predictor.py` implementation, and your configuration from `cortex.yaml` and creates an endpoint that can receive job submissions and manage running jobs. - -```bash -$ cortex deploy --env aws - -created image-classifier (BatchAPI) -``` - -Get the endpoint for your Batch API with `cortex get image-classifier`: - -```bash -$ cortex get image-classifier --env aws - -no submitted jobs - -endpoint: https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -``` - -
- -## Setup destination S3 directory - -Our `predictor.py` implementation writes results to an S3 directory. Before submitting a job, we need to create an S3 directory to store the output of the batch job. The S3 directory should be accessible by the credentials used to create your Cortex cluster. - -Export the S3 directory to an environment variable: - -```bash -$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir -``` - -
- -## Submit a job - -Now that you've deployed a Batch API, you are ready to submit jobs. You can provide image urls directly in the request by specifying the urls in `item_list`. The curl command below showcases how to submit image urls in the request. - -```bash -$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir -$ curl $BATCH_API_ENDPOINT \ - -X POST -H "Content-Type: application/json" \ - -d @- <` then type `EOF`. - -After submitting the job, you should get a response like this: - -```json -{"job_id":"69d6faf82e4660d3","api_name":"image-classifier", "config":{"dest_s3_dir": "YOUR_S3_BUCKET_HERE"}} -``` - -Take note of the job id in the response. - -### List the jobs for your Batch API - -```bash -$ cortex get image-classifier --env aws - -job id status progress start time duration -69d6faf82e4660d3 running 0/3 20 Jul 2020 01:07:44 UTC 3m26s - -endpoint: https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -``` - -### Get the job status with an HTTP request - -You can make a GET request to your `/JOB_ID` to get the status of your job. - -```bash -$ curl https://abcdefg.execute-api.us-west-2.amazonaws.com?jobID=69d6faf82e4660d3 - -{ - "job_status":{ - "job_id":"69d6faf82e4660d3", - "api_name":"image-classifier", - ... - }, - "endpoint":"https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier" -} -``` - -### Get job status using Cortex CLI - -You can also use the Cortex CLI to get the status of your job using `cortex get `. - -```bash -$ cortex get image-classifier 69d6faf82e4660d3 --env aws - -job id: 69d6faf82e4660d3 -status: running - -start time: 27 Jul 2020 15:02:25 UTC -end time: - -duration: 42s - -batch stats -total succeeded failed avg time per batch -3 0 0 - - -worker stats -requested initializing running failed succeeded -1 1 0 0 0 - -job endpoint: https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier/69d6faf82e4660d3 -``` - -### Stream logs - -You can stream logs realtime for debugging and monitoring purposes with `cortex logs ` - -```bash -$ cortex logs image-classifier 69d6fdeb2d8e6647 --env aws - -started enqueuing batches to queue -partitioning 5 items found in job submission into 3 batches of size 2 -completed enqueuing a total of 3 batches -spinning up workers... -... -2020-08-07 14:44:05.557598:cortex:pid-25:INFO:processing batch c9136381-6dcc-45bd-bd97-cc9c66ccc6d6 -2020-08-07 14:44:26.037276:cortex:pid-25:INFO:executing on_job_complete -2020-08-07 14:44:26.208972:cortex:pid-25:INFO:no batches left in queue, job has been completed -``` - -### Find your results - -Wait for the job to complete by streaming the logs with `cortex logs ` or watching for the job status to change with `cortex get --watch`. - -The status of your job, which you can get from `cortex get `, should change from `running` to `succeeded` once the job has completed. If it changes to a different status, you may be able to find the stacktrace using `cortex logs `. If your job has completed successfully, you can view the results of the image classification in the S3 directory you specified in the job submission. - -Using the AWS CLI: - -```bash -$ aws s3 ls $CORTEX_DEST_S3_DIR// - 161f9fda-fd08-44f3-b983-4529f950e40b.json - 40100ffb-6824-4560-8ca4-7c0d14273e05.json - c9136381-6dcc-45bd-bd97-cc9c66ccc6d6.json - aggregated_results.json -``` - -You can download the aggregated results file with `aws s3 cp $CORTEX_DEST_S3_DIR//aggregated_results.json .` and confirm that there are 16 classifications. - -
- -## Alternative job submission: image URLs in files - -In addition to providing the image URLs directly in the job submission request, it is possible to use image urls stored in newline delimited json files in S3. A newline delimited JSON file has one complete JSON object per line. - -Two newline delimited json files containing image urls for this tutorial have already been created for you and can be found at `s3://cortex-examples/image-classifier/`. If you have AWS CLI, you can list the directory and you should be able to find the files (`urls_0.json` and `urls_1.json`). - -```text -$ aws s3 ls s3://cortex-examples/image-classifier/ - PRE inception/ -... -2020-07-27 14:19:30 506 urls_0.json -2020-07-27 14:19:30 473 urls_1.json -``` - -To use JSON files as input data for the job, we will specify `delimited_files` in the job request. The Batch API will break up the JSON files into batches of desired size and push them onto a queue that is consumed by the pool of workers. - -### Dry run - -Before we submit the job, let's perform a dry run to ensure that only the desired files will be read. You can perform a dry run by appending `dryRun=true` query parameter to your job request. - -Get the endpoint from `cortex get image-classifier` if you haven't done so already. - -```bash -$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir -$ curl $BATCH_API_ENDPOINT?dryRun=true \ --X POST -H "Content-Type: application/json" \ --d @- <` then type `EOF`. - -You should expect a response like this: - -```text -s3://cortex-examples/image-classifier/urls_0.json -s3://cortex-examples/image-classifier/urls_1.json -validations passed -``` - -This shows that the correct files will be used as input for the job. - -### Classify image urls stored in S3 files - -When you submit a job specifying `delimited_files`, your Batch API will get all of the input S3 files based on `s3_paths` and will apply the filters specified in `includes` and `excludes`. Then your Batch API will read each file, split on the newline characters, and parse each item as a JSON object. Each item in the file is treated as a single sample and will be grouped together into batches and then placed onto a queue that is consumed by the pool of workers. - -In this example `urls_0.json` and `urls_1.json` each contain 8 urls. Let's classify the images from the URLs listed in those 2 files. - -```bash -$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir -$ curl $BATCH_API_ENDPOINT \ --X POST -H "Content-Type: application/json" \ --d @- <` then type `EOF`. - -After submitting this job, you should get a response like this: - -```json -{"job_id":"69d6faf82e4660d3","api_name":"image-classifier", "config":{"dest_s3_dir": "YOUR_S3_BUCKET_HERE"}} -``` - -### Find results - -Wait for the job to complete by streaming the logs with `cortex logs ` or watching for the job status to change with `cortex get --watch`. - -```bash -$ cortex logs image-classifier 69d6faf82e4660d3 --env aws - -started enqueuing batches to queue -enqueuing contents from file s3://cortex-examples/image-classifier/urls_0.json -enqueuing contents from file s3://cortex-examples/image-classifier/urls_1.json -completed enqueuing a total of 8 batches -spinning up workers... -2020-08-07 15:11:21.364179:cortex:pid-25:INFO:processing batch 1de0bc65-04ea-4b9e-9e96-5a0bb52fcc37 -... -2020-08-07 15:11:45.461032:cortex:pid-25:INFO:no batches left in queue, job has been completed -``` - -The status of your job, which you can get from `cortex get `, should change from `running` to `succeeded` once the job has completed. If it changes to a different status, you may be able to find the stacktrace using `cortex logs `. If your job has completed successfully, you can view the results of the image classification in the S3 directory you specified in the job submission. - -Using the AWS CLI: - -```bash -$ aws s3 ls $CORTEX_DEST_S3_DIR// - 161f9fda-fd08-44f3-b983-4529f950e40b.json - 40100ffb-6824-4560-8ca4-7c0d14273e05.json - 6d1c933c-0ddf-4316-9956-046cd731c5ab.json - ... - aggregated_results.json -``` - -You can download the aggregated results file with `aws s3 cp $CORTEX_DEST_S3_DIR//aggregated_results.json .` and confirm that there are 16 classifications. - -
- -## Alternative job submission: images in S3 - -Let's assume that rather downloading urls on the internet, you have an S3 directory containing the images. We can specify `file_path_lister` in the job request to get the list of S3 urls for the images, partition the list of S3 urls into batches, and place them on a queue that will be consumed by the workers. - -We'll classify the 16 images that can be found here `s3://cortex-examples/image-classifier/samples`. You can use AWS CLI to verify that there are 16 images `aws s3 ls s3://cortex-examples/image-classifier/samples/`. - -### Dry run - -Let's do a dry run to make sure the correct list of images will be submitted to the job. - -```bash -$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir -$ curl $BATCH_API_ENDPOINT?dryRun=true \ --X POST -H "Content-Type: application/json" \ --d @- <` then type `EOF`. - -You should expect a response like this: - -```text -s3://cortex-examples/image-classifier/samples/img_0.jpg -s3://cortex-examples/image-classifier/samples/img_1.jpg -... -s3://cortex-examples/image-classifier/samples/img_8.jpg -s3://cortex-examples/image-classifier/samples/img_9.jpg -validations passed -``` - -### Classify images in S3 - -Let's actually submit the job now. Your Batch API will get all of the input S3 files based on `s3_paths` and will apply the filters specified in `includes` and `excludes`. - -```bash -$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir -$ curl $BATCH_API_ENDPOINT \ --X POST -H "Content-Type: application/json" \ --d @- <` then type `EOF`. - -You should get a response like this: - -```json -{"job_id":"69d6f8a472f0e1e5","api_name":"image-classifier", "config":{"dest_s3_dir": "YOUR_S3_BUCKET_HERE"}} -``` - -### Verify results - -Wait for the job to complete by streaming the logs with `cortex logs ` or watching for the job status to change with `cortex get --watch`. - -```bash -$ cortex logs image-classifier 69d6f8a472f0e1e5 --env aws - -started enqueuing batches to queue -completed enqueuing a total of 8 batches -spinning up workers... -2020-07-18 21:35:34.186348:cortex:pid-1:INFO:downloading the project code -... -2020-08-07 15:49:10.889839:cortex:pid-25:INFO:processing batch d0e695bc-a975-4115-a60f-0a55c743fc57 -2020-08-07 15:49:31.188943:cortex:pid-25:INFO:executing on_job_complete -2020-08-07 15:49:31.362053:cortex:pid-25:INFO:no batches left in queue, job has been completed -``` - -The status of your job, which you can get from `cortex get `, should change from `running` to `succeeded` once the job has completed. If it changes to a different status, you may be able to find the stacktrace using `cortex logs `. If your job has completed successfully, you can view the results of the image classification in the S3 directory you specified in the job submission. - -Using the AWS CLI: - -```bash -$ aws s3 ls $CORTEX_DEST_S3_DIR// - 6bee7412-4c16-4d9f-ab3e-e88669cf7a89.json - 3c45b4b3-953e-4226-865b-75f3961dcf95.json - d0e695bc-a975-4115-a60f-0a55c743fc57.json - ... - aggregated_results.json -``` - -You can download the aggregated results file with `aws s3 cp $CORTEX_DEST_S3_DIR//aggregated_results.json .` and confirm that there are 16 classifications. - -
- -## Stopping a Job - -You can stop a running job by sending a DELETE request to `/`. - -```bash -$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier -$ curl -X DELETE $BATCH_API_ENDPOINT?jobID=69d96a01ea55da8c - -stopped job 69d96a01ea55da8c -``` - -You can also use the Cortex CLI `cortex delete `. - -```bash -$ cortex delete image-classifier 69d96a01ea55da8c --env aws - -stopped job 69d96a01ea55da8c -``` - -
- -## Cleanup - -Run `cortex delete` to delete the API: - -```bash -$ cortex delete image-classifier --env aws - -deleting image-classifier -``` - -Running `cortex delete` will stop all in progress jobs for the API and will delete job history for that API. It will not spin down your cluster. diff --git a/docs/tutorials/batch/cortex.yaml b/docs/tutorials/batch/cortex.yaml deleted file mode 100644 index 35ea4e991a..0000000000 --- a/docs/tutorials/batch/cortex.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -- name: image-classifier - kind: BatchAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 diff --git a/docs/tutorials/batch/predictor.py b/docs/tutorials/batch/predictor.py deleted file mode 100644 index 293c466fd3..0000000000 --- a/docs/tutorials/batch/predictor.py +++ /dev/null @@ -1,81 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) - -import os -import requests -import torch -import torchvision -from torchvision import transforms -from PIL import Image -from io import BytesIO -import boto3 -import json -import re - - -class PythonPredictor: - def __init__(self, config, job_spec): - self.model = torchvision.models.alexnet(pretrained=True).eval() - - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - self.preprocess = transforms.Compose( - [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] - ) - - self.labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" - ).text.split("\n")[1:] - - if len(config.get("dest_s3_dir", "")) == 0: - raise Exception("'dest_s3_dir' field was not provided in job submission") - - self.s3 = boto3.client("s3") - - self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() - self.key = os.path.join(self.key, job_spec["job_id"]) - - def predict(self, payload, batch_id): - tensor_list = [] - - # download and preprocess each image - for image_url in payload: - if image_url.startswith("s3://"): - bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() - image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() - else: - image_bytes = requests.get(image_url).content - - img_pil = Image.open(BytesIO(image_bytes)) - tensor_list.append(self.preprocess(img_pil)) - - # classify the batch of images - img_tensor = torch.stack(tensor_list) - with torch.no_grad(): - prediction = self.model(img_tensor) - _, indices = prediction.max(1) - - # extract predicted classes - results = [ - {"url": payload[i], "class": self.labels[class_idx]} - for i, class_idx in enumerate(indices) - ] - json_output = json.dumps(results) - - # save results - self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) - - def on_job_complete(self): - all_results = [] - - # aggregate all classifications - paginator = self.s3.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=self.bucket, Prefix=self.key): - for obj in page["Contents"]: - body = self.s3.get_object(Bucket=self.bucket, Key=obj["Key"])["Body"] - all_results += json.loads(body.read().decode("utf8")) - - # save single file containing aggregated classifications - self.s3.put_object( - Bucket=self.bucket, - Key=os.path.join(self.key, "aggregated_results.json"), - Body=json.dumps(all_results), - ) diff --git a/docs/tutorials/batch/requirements.txt b/docs/tutorials/batch/requirements.txt deleted file mode 100644 index 2c0ef31b51..0000000000 --- a/docs/tutorials/batch/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch -torchvision -boto3 -pillow diff --git a/docs/tutorials/batch/sample.json b/docs/tutorials/batch/sample.json deleted file mode 100644 index eb45c463fd..0000000000 --- a/docs/tutorials/batch/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -[ - "https://i.imgur.com/PzXprwl.jpg" -] From f2513735e5a18f083d32f851c7e19a3e4ecf61f7 Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 7 Dec 2020 18:32:15 -0500 Subject: [PATCH 11/36] Move examples directory to test folder --- test/README.md | 67 ++ test/batch/image-classifier/README.md | 580 ++++++++++ test/batch/image-classifier/cortex.yaml | 9 + test/batch/image-classifier/predictor.py | 58 + test/batch/image-classifier/requirements.txt | 4 + test/batch/image-classifier/sample.json | 3 + test/batch/onnx/README.md | 6 + test/batch/onnx/cortex.yaml | 10 + test/batch/onnx/predictor.py | 64 ++ test/batch/onnx/requirements.txt | 3 + test/batch/tensorflow/README.md | 6 + test/batch/tensorflow/cortex.yaml | 10 + test/batch/tensorflow/predictor.py | 60 + test/batch/tensorflow/requirements.txt | 1 + test/keras/document-denoiser/README.md | 46 + test/keras/document-denoiser/cortex.yaml | 12 + test/keras/document-denoiser/predictor.py | 86 ++ test/keras/document-denoiser/requirements.txt | 5 + test/keras/document-denoiser/sample.json | 3 + test/keras/document-denoiser/trainer.ipynb | 620 ++++++++++ test/live-reloading/onnx/README.md | 7 + .../python/mpg-estimator/cortex.yaml | 8 + .../python/mpg-estimator/predictor.py | 27 + .../python/mpg-estimator/requirements.txt | 4 + .../python/mpg-estimator/sample.json | 7 + test/live-reloading/tensorflow/README.md | 11 + .../onnx/multi-model-classifier/README.md | 77 ++ .../onnx/multi-model-classifier/cortex.yaml | 22 + .../onnx/multi-model-classifier/predictor.py | 99 ++ .../multi-model-classifier/requirements.txt | 2 + .../onnx/multi-model-classifier/sample.json | 3 + .../python/mpg-estimator/README.md | 75 ++ .../python/mpg-estimator/cortex.yaml | 13 + .../python/mpg-estimator/predictor.py | 28 + .../python/mpg-estimator/requirements.txt | 4 + .../python/mpg-estimator/sample.json | 7 + .../multi-model-classifier/README.md | 77 ++ .../multi-model-classifier/cortex.yaml | 32 + .../multi-model-classifier/predictor.py | 63 ++ .../multi-model-classifier/requirements.txt | 1 + .../multi-model-classifier/sample-image.json | 3 + .../multi-model-classifier/sample-iris.json | 8 + test/onnx/iris-classifier/README.md | 3 + test/onnx/iris-classifier/cortex.yaml | 10 + test/onnx/iris-classifier/predictor.py | 20 + test/onnx/iris-classifier/sample.json | 6 + test/onnx/iris-classifier/xgboost.ipynb | 244 ++++ test/onnx/multi-model-classifier/README.md | 69 ++ test/onnx/multi-model-classifier/cortex.yaml | 20 + test/onnx/multi-model-classifier/predictor.py | 98 ++ .../multi-model-classifier/requirements.txt | 2 + test/onnx/multi-model-classifier/sample.json | 3 + test/onnx/yolov5-youtube/README.md | 61 + test/onnx/yolov5-youtube/conda-packages.txt | 3 + test/onnx/yolov5-youtube/cortex.yaml | 13 + test/onnx/yolov5-youtube/labels.json | 82 ++ test/onnx/yolov5-youtube/predictor.py | 65 ++ test/onnx/yolov5-youtube/requirements.txt | 3 + test/onnx/yolov5-youtube/sample.json | 3 + test/onnx/yolov5-youtube/utils.py | 130 +++ test/pytorch/answer-generator/README.md | 3 + test/pytorch/answer-generator/cortex.yaml | 11 + test/pytorch/answer-generator/generator.py | 44 + test/pytorch/answer-generator/predictor.py | 36 + .../pytorch/answer-generator/requirements.txt | 3 + test/pytorch/answer-generator/sample.json | 3 + .../image-classifier-alexnet/README.md | 3 + .../image-classifier-alexnet/cortex.yaml | 11 + .../image-classifier-alexnet/predictor.py | 39 + .../image-classifier-alexnet/requirements.txt | 2 + .../image-classifier-alexnet/sample.json | 3 + .../image-classifier-resnet50/README.md | 59 + .../image-classifier-resnet50/cortex.yaml | 15 + .../image-classifier-resnet50/cortex_gpu.yaml | 16 + .../image-classifier-resnet50/cortex_inf.yaml | 16 + .../generate_resnet50_models.ipynb | 121 ++ .../image-classifier-resnet50/predictor.py | 93 ++ .../image-classifier-resnet50/sample.json | 3 + test/pytorch/iris-classifier/README.md | 3 + test/pytorch/iris-classifier/cortex.yaml | 11 + test/pytorch/iris-classifier/model.py | 59 + test/pytorch/iris-classifier/predictor.py | 50 + test/pytorch/iris-classifier/requirements.txt | 2 + test/pytorch/iris-classifier/sample.json | 6 + test/pytorch/language-identifier/README.md | 3 + test/pytorch/language-identifier/cortex.yaml | 9 + test/pytorch/language-identifier/predictor.py | 18 + .../language-identifier/requirements.txt | 2 + test/pytorch/language-identifier/sample.json | 3 + .../multi-model-text-analyzer/README.md | 51 + .../multi-model-text-analyzer/cortex.yaml | 11 + .../multi-model-text-analyzer/predictor.py | 25 + .../requirements.txt | 2 + .../sample-sentiment.json | 3 + .../sample-summarizer.json | 3 + test/pytorch/object-detector/README.md | 3 + test/pytorch/object-detector/coco_labels.txt | 91 ++ test/pytorch/object-detector/cortex.yaml | 11 + test/pytorch/object-detector/predictor.py | 49 + test/pytorch/object-detector/requirements.txt | 2 + test/pytorch/object-detector/sample.json | 4 + test/pytorch/question-generator/cortex.yaml | 10 + .../question-generator/dependencies.sh | 4 + test/pytorch/question-generator/predictor.py | 36 + .../question-generator/requirements.txt | 4 + test/pytorch/question-generator/sample.json | 4 + test/pytorch/reading-comprehender/README.md | 3 + test/pytorch/reading-comprehender/cortex.yaml | 11 + .../pytorch/reading-comprehender/predictor.py | 25 + .../reading-comprehender/requirements.txt | 1 + test/pytorch/reading-comprehender/sample.json | 4 + test/pytorch/search-completer/README.md | 3 + test/pytorch/search-completer/cortex.yaml | 11 + test/pytorch/search-completer/predictor.py | 20 + .../pytorch/search-completer/requirements.txt | 5 + test/pytorch/search-completer/sample.json | 3 + test/pytorch/sentiment-analyzer/README.md | 3 + test/pytorch/sentiment-analyzer/cortex.yaml | 10 + test/pytorch/sentiment-analyzer/predictor.py | 15 + .../sentiment-analyzer/requirements.txt | 2 + test/pytorch/sentiment-analyzer/sample.json | 3 + test/pytorch/text-generator/README.md | 192 ++++ test/pytorch/text-generator/deploy.ipynb | 80 ++ test/pytorch/text-generator/predictor.py | 17 + test/pytorch/text-generator/requirements.txt | 2 + test/pytorch/text-summarizer/README.md | 5 + test/pytorch/text-summarizer/cortex.yaml | 11 + test/pytorch/text-summarizer/predictor.py | 18 + test/pytorch/text-summarizer/requirements.txt | 2 + test/pytorch/text-summarizer/sample.json | 3 + test/sklearn/iris-classifier/README.md | 3 + test/sklearn/iris-classifier/cortex.yaml | 15 + test/sklearn/iris-classifier/predictor.py | 31 + test/sklearn/iris-classifier/requirements.txt | 2 + test/sklearn/iris-classifier/sample.json | 6 + test/sklearn/iris-classifier/trainer.py | 25 + test/sklearn/mpg-estimator/README.md | 3 + test/sklearn/mpg-estimator/cortex.yaml | 11 + test/sklearn/mpg-estimator/predictor.py | 41 + test/sklearn/mpg-estimator/requirements.txt | 4 + test/sklearn/mpg-estimator/sample.json | 7 + test/sklearn/mpg-estimator/trainer.py | 25 + test/spacy/entity-recognizer/README.md | 3 + test/spacy/entity-recognizer/cortex.yaml | 10 + test/spacy/entity-recognizer/predictor.py | 22 + test/spacy/entity-recognizer/requirements.txt | 1 + test/spacy/entity-recognizer/sample.json | 3 + .../image-classifier-inception/README.md | 3 + .../image-classifier-inception/cortex.yaml | 13 + .../cortex_server_side_batching.yaml | 17 + .../inception.ipynb | 211 ++++ .../image-classifier-inception/predictor.py | 21 + .../requirements.txt | 1 + .../image-classifier-inception/sample.json | 3 + .../image-classifier-resnet50/README.md | 90 ++ .../image-classifier-resnet50/cortex.yaml | 18 + .../image-classifier-resnet50/cortex_gpu.yaml | 19 + .../cortex_gpu_server_side_batching.yaml | 22 + .../image-classifier-resnet50/cortex_inf.yaml | 21 + .../cortex_inf_server_side_batching.yaml | 24 + .../generate_gpu_resnet50_model.ipynb | 131 +++ .../generate_resnet50_models.ipynb | 178 +++ .../image-classifier-resnet50/predictor.py | 63 ++ .../requirements.txt | 1 + .../image-classifier-resnet50/sample.bin | Bin 0 -> 8680 bytes .../image-classifier-resnet50/sample.json | 3 + test/tensorflow/iris-classifier/README.md | 3 + test/tensorflow/iris-classifier/cortex.yaml | 10 + test/tensorflow/iris-classifier/predictor.py | 13 + test/tensorflow/iris-classifier/sample.json | 6 + .../iris-classifier/tensorflow.ipynb | 296 +++++ .../tensorflow/license-plate-reader/README.md | 175 +++ .../license-plate-reader/config.json | 8 + .../license-plate-reader/cortex_full.yaml | 35 + .../license-plate-reader/cortex_lite.yaml | 14 + .../license-plate-reader/predictor_crnn.py | 44 + .../license-plate-reader/predictor_lite.py | 120 ++ .../license-plate-reader/predictor_yolo.py | 46 + .../license-plate-reader/requirements.txt | 5 + .../license-plate-reader/sample_inference.py | 100 ++ .../license-plate-reader/utils/__init__.py | 1 + .../license-plate-reader/utils/bbox.py | 111 ++ .../license-plate-reader/utils/colors.py | 100 ++ .../license-plate-reader/utils/preprocess.py | 59 + .../license-plate-reader/utils/utils.py | 160 +++ .../multi-model-classifier/README.md | 69 ++ .../multi-model-classifier/cortex.yaml | 30 + .../multi-model-classifier/predictor.py | 62 + .../multi-model-classifier/requirements.txt | 1 + .../multi-model-classifier/sample-image.json | 3 + .../multi-model-classifier/sample-iris.json | 8 + test/tensorflow/sentiment-analyzer/README.md | 3 + test/tensorflow/sentiment-analyzer/bert.ipynb | 1007 +++++++++++++++++ .../tensorflow/sentiment-analyzer/cortex.yaml | 13 + .../sentiment-analyzer/predictor.py | 29 + .../sentiment-analyzer/requirements.txt | 5 + .../tensorflow/sentiment-analyzer/sample.json | 3 + test/tensorflow/text-generator/README.md | 3 + test/tensorflow/text-generator/cortex.yaml | 11 + test/tensorflow/text-generator/encoder.py | 118 ++ test/tensorflow/text-generator/gpt-2.ipynb | 383 +++++++ test/tensorflow/text-generator/predictor.py | 24 + .../text-generator/requirements.txt | 2 + test/tensorflow/text-generator/sample.json | 3 + test/traffic-splitter/README.md | 111 ++ test/traffic-splitter/cortex.yaml | 28 + test/traffic-splitter/model.py | 59 + test/traffic-splitter/onnx_predictor.py | 20 + test/traffic-splitter/pytorch_predictor.py | 50 + test/traffic-splitter/sample.json | 6 + test/utils/README.md | 36 + test/utils/throughput_test.py | 179 +++ 212 files changed, 9098 insertions(+) create mode 100644 test/README.md create mode 100644 test/batch/image-classifier/README.md create mode 100644 test/batch/image-classifier/cortex.yaml create mode 100644 test/batch/image-classifier/predictor.py create mode 100644 test/batch/image-classifier/requirements.txt create mode 100644 test/batch/image-classifier/sample.json create mode 100644 test/batch/onnx/README.md create mode 100644 test/batch/onnx/cortex.yaml create mode 100644 test/batch/onnx/predictor.py create mode 100644 test/batch/onnx/requirements.txt create mode 100644 test/batch/tensorflow/README.md create mode 100644 test/batch/tensorflow/cortex.yaml create mode 100644 test/batch/tensorflow/predictor.py create mode 100644 test/batch/tensorflow/requirements.txt create mode 100644 test/keras/document-denoiser/README.md create mode 100644 test/keras/document-denoiser/cortex.yaml create mode 100644 test/keras/document-denoiser/predictor.py create mode 100644 test/keras/document-denoiser/requirements.txt create mode 100644 test/keras/document-denoiser/sample.json create mode 100644 test/keras/document-denoiser/trainer.ipynb create mode 100644 test/live-reloading/onnx/README.md create mode 100644 test/live-reloading/python/mpg-estimator/cortex.yaml create mode 100644 test/live-reloading/python/mpg-estimator/predictor.py create mode 100644 test/live-reloading/python/mpg-estimator/requirements.txt create mode 100644 test/live-reloading/python/mpg-estimator/sample.json create mode 100644 test/live-reloading/tensorflow/README.md create mode 100644 test/model-caching/onnx/multi-model-classifier/README.md create mode 100644 test/model-caching/onnx/multi-model-classifier/cortex.yaml create mode 100644 test/model-caching/onnx/multi-model-classifier/predictor.py create mode 100644 test/model-caching/onnx/multi-model-classifier/requirements.txt create mode 100644 test/model-caching/onnx/multi-model-classifier/sample.json create mode 100644 test/model-caching/python/mpg-estimator/README.md create mode 100644 test/model-caching/python/mpg-estimator/cortex.yaml create mode 100644 test/model-caching/python/mpg-estimator/predictor.py create mode 100644 test/model-caching/python/mpg-estimator/requirements.txt create mode 100644 test/model-caching/python/mpg-estimator/sample.json create mode 100644 test/model-caching/tensorflow/multi-model-classifier/README.md create mode 100644 test/model-caching/tensorflow/multi-model-classifier/cortex.yaml create mode 100644 test/model-caching/tensorflow/multi-model-classifier/predictor.py create mode 100644 test/model-caching/tensorflow/multi-model-classifier/requirements.txt create mode 100644 test/model-caching/tensorflow/multi-model-classifier/sample-image.json create mode 100644 test/model-caching/tensorflow/multi-model-classifier/sample-iris.json create mode 100644 test/onnx/iris-classifier/README.md create mode 100644 test/onnx/iris-classifier/cortex.yaml create mode 100644 test/onnx/iris-classifier/predictor.py create mode 100644 test/onnx/iris-classifier/sample.json create mode 100644 test/onnx/iris-classifier/xgboost.ipynb create mode 100644 test/onnx/multi-model-classifier/README.md create mode 100644 test/onnx/multi-model-classifier/cortex.yaml create mode 100644 test/onnx/multi-model-classifier/predictor.py create mode 100644 test/onnx/multi-model-classifier/requirements.txt create mode 100644 test/onnx/multi-model-classifier/sample.json create mode 100644 test/onnx/yolov5-youtube/README.md create mode 100644 test/onnx/yolov5-youtube/conda-packages.txt create mode 100644 test/onnx/yolov5-youtube/cortex.yaml create mode 100644 test/onnx/yolov5-youtube/labels.json create mode 100644 test/onnx/yolov5-youtube/predictor.py create mode 100644 test/onnx/yolov5-youtube/requirements.txt create mode 100644 test/onnx/yolov5-youtube/sample.json create mode 100644 test/onnx/yolov5-youtube/utils.py create mode 100644 test/pytorch/answer-generator/README.md create mode 100644 test/pytorch/answer-generator/cortex.yaml create mode 100644 test/pytorch/answer-generator/generator.py create mode 100644 test/pytorch/answer-generator/predictor.py create mode 100644 test/pytorch/answer-generator/requirements.txt create mode 100644 test/pytorch/answer-generator/sample.json create mode 100644 test/pytorch/image-classifier-alexnet/README.md create mode 100644 test/pytorch/image-classifier-alexnet/cortex.yaml create mode 100644 test/pytorch/image-classifier-alexnet/predictor.py create mode 100644 test/pytorch/image-classifier-alexnet/requirements.txt create mode 100644 test/pytorch/image-classifier-alexnet/sample.json create mode 100644 test/pytorch/image-classifier-resnet50/README.md create mode 100644 test/pytorch/image-classifier-resnet50/cortex.yaml create mode 100644 test/pytorch/image-classifier-resnet50/cortex_gpu.yaml create mode 100644 test/pytorch/image-classifier-resnet50/cortex_inf.yaml create mode 100644 test/pytorch/image-classifier-resnet50/generate_resnet50_models.ipynb create mode 100644 test/pytorch/image-classifier-resnet50/predictor.py create mode 100644 test/pytorch/image-classifier-resnet50/sample.json create mode 100644 test/pytorch/iris-classifier/README.md create mode 100644 test/pytorch/iris-classifier/cortex.yaml create mode 100644 test/pytorch/iris-classifier/model.py create mode 100644 test/pytorch/iris-classifier/predictor.py create mode 100644 test/pytorch/iris-classifier/requirements.txt create mode 100644 test/pytorch/iris-classifier/sample.json create mode 100644 test/pytorch/language-identifier/README.md create mode 100644 test/pytorch/language-identifier/cortex.yaml create mode 100644 test/pytorch/language-identifier/predictor.py create mode 100644 test/pytorch/language-identifier/requirements.txt create mode 100644 test/pytorch/language-identifier/sample.json create mode 100644 test/pytorch/multi-model-text-analyzer/README.md create mode 100644 test/pytorch/multi-model-text-analyzer/cortex.yaml create mode 100644 test/pytorch/multi-model-text-analyzer/predictor.py create mode 100644 test/pytorch/multi-model-text-analyzer/requirements.txt create mode 100644 test/pytorch/multi-model-text-analyzer/sample-sentiment.json create mode 100644 test/pytorch/multi-model-text-analyzer/sample-summarizer.json create mode 100644 test/pytorch/object-detector/README.md create mode 100644 test/pytorch/object-detector/coco_labels.txt create mode 100644 test/pytorch/object-detector/cortex.yaml create mode 100644 test/pytorch/object-detector/predictor.py create mode 100644 test/pytorch/object-detector/requirements.txt create mode 100644 test/pytorch/object-detector/sample.json create mode 100644 test/pytorch/question-generator/cortex.yaml create mode 100644 test/pytorch/question-generator/dependencies.sh create mode 100644 test/pytorch/question-generator/predictor.py create mode 100644 test/pytorch/question-generator/requirements.txt create mode 100644 test/pytorch/question-generator/sample.json create mode 100644 test/pytorch/reading-comprehender/README.md create mode 100644 test/pytorch/reading-comprehender/cortex.yaml create mode 100644 test/pytorch/reading-comprehender/predictor.py create mode 100644 test/pytorch/reading-comprehender/requirements.txt create mode 100644 test/pytorch/reading-comprehender/sample.json create mode 100644 test/pytorch/search-completer/README.md create mode 100644 test/pytorch/search-completer/cortex.yaml create mode 100644 test/pytorch/search-completer/predictor.py create mode 100644 test/pytorch/search-completer/requirements.txt create mode 100644 test/pytorch/search-completer/sample.json create mode 100644 test/pytorch/sentiment-analyzer/README.md create mode 100644 test/pytorch/sentiment-analyzer/cortex.yaml create mode 100644 test/pytorch/sentiment-analyzer/predictor.py create mode 100644 test/pytorch/sentiment-analyzer/requirements.txt create mode 100644 test/pytorch/sentiment-analyzer/sample.json create mode 100644 test/pytorch/text-generator/README.md create mode 100644 test/pytorch/text-generator/deploy.ipynb create mode 100644 test/pytorch/text-generator/predictor.py create mode 100644 test/pytorch/text-generator/requirements.txt create mode 100644 test/pytorch/text-summarizer/README.md create mode 100644 test/pytorch/text-summarizer/cortex.yaml create mode 100644 test/pytorch/text-summarizer/predictor.py create mode 100644 test/pytorch/text-summarizer/requirements.txt create mode 100644 test/pytorch/text-summarizer/sample.json create mode 100644 test/sklearn/iris-classifier/README.md create mode 100644 test/sklearn/iris-classifier/cortex.yaml create mode 100644 test/sklearn/iris-classifier/predictor.py create mode 100644 test/sklearn/iris-classifier/requirements.txt create mode 100644 test/sklearn/iris-classifier/sample.json create mode 100644 test/sklearn/iris-classifier/trainer.py create mode 100644 test/sklearn/mpg-estimator/README.md create mode 100644 test/sklearn/mpg-estimator/cortex.yaml create mode 100644 test/sklearn/mpg-estimator/predictor.py create mode 100644 test/sklearn/mpg-estimator/requirements.txt create mode 100644 test/sklearn/mpg-estimator/sample.json create mode 100644 test/sklearn/mpg-estimator/trainer.py create mode 100644 test/spacy/entity-recognizer/README.md create mode 100644 test/spacy/entity-recognizer/cortex.yaml create mode 100644 test/spacy/entity-recognizer/predictor.py create mode 100644 test/spacy/entity-recognizer/requirements.txt create mode 100644 test/spacy/entity-recognizer/sample.json create mode 100644 test/tensorflow/image-classifier-inception/README.md create mode 100644 test/tensorflow/image-classifier-inception/cortex.yaml create mode 100644 test/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml create mode 100644 test/tensorflow/image-classifier-inception/inception.ipynb create mode 100644 test/tensorflow/image-classifier-inception/predictor.py create mode 100644 test/tensorflow/image-classifier-inception/requirements.txt create mode 100644 test/tensorflow/image-classifier-inception/sample.json create mode 100644 test/tensorflow/image-classifier-resnet50/README.md create mode 100644 test/tensorflow/image-classifier-resnet50/cortex.yaml create mode 100644 test/tensorflow/image-classifier-resnet50/cortex_gpu.yaml create mode 100644 test/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml create mode 100644 test/tensorflow/image-classifier-resnet50/cortex_inf.yaml create mode 100644 test/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml create mode 100644 test/tensorflow/image-classifier-resnet50/generate_gpu_resnet50_model.ipynb create mode 100644 test/tensorflow/image-classifier-resnet50/generate_resnet50_models.ipynb create mode 100644 test/tensorflow/image-classifier-resnet50/predictor.py create mode 100644 test/tensorflow/image-classifier-resnet50/requirements.txt create mode 100644 test/tensorflow/image-classifier-resnet50/sample.bin create mode 100644 test/tensorflow/image-classifier-resnet50/sample.json create mode 100644 test/tensorflow/iris-classifier/README.md create mode 100644 test/tensorflow/iris-classifier/cortex.yaml create mode 100644 test/tensorflow/iris-classifier/predictor.py create mode 100644 test/tensorflow/iris-classifier/sample.json create mode 100644 test/tensorflow/iris-classifier/tensorflow.ipynb create mode 100644 test/tensorflow/license-plate-reader/README.md create mode 100644 test/tensorflow/license-plate-reader/config.json create mode 100644 test/tensorflow/license-plate-reader/cortex_full.yaml create mode 100644 test/tensorflow/license-plate-reader/cortex_lite.yaml create mode 100644 test/tensorflow/license-plate-reader/predictor_crnn.py create mode 100644 test/tensorflow/license-plate-reader/predictor_lite.py create mode 100644 test/tensorflow/license-plate-reader/predictor_yolo.py create mode 100644 test/tensorflow/license-plate-reader/requirements.txt create mode 100644 test/tensorflow/license-plate-reader/sample_inference.py create mode 100644 test/tensorflow/license-plate-reader/utils/__init__.py create mode 100644 test/tensorflow/license-plate-reader/utils/bbox.py create mode 100644 test/tensorflow/license-plate-reader/utils/colors.py create mode 100644 test/tensorflow/license-plate-reader/utils/preprocess.py create mode 100644 test/tensorflow/license-plate-reader/utils/utils.py create mode 100644 test/tensorflow/multi-model-classifier/README.md create mode 100644 test/tensorflow/multi-model-classifier/cortex.yaml create mode 100644 test/tensorflow/multi-model-classifier/predictor.py create mode 100644 test/tensorflow/multi-model-classifier/requirements.txt create mode 100644 test/tensorflow/multi-model-classifier/sample-image.json create mode 100644 test/tensorflow/multi-model-classifier/sample-iris.json create mode 100644 test/tensorflow/sentiment-analyzer/README.md create mode 100644 test/tensorflow/sentiment-analyzer/bert.ipynb create mode 100644 test/tensorflow/sentiment-analyzer/cortex.yaml create mode 100644 test/tensorflow/sentiment-analyzer/predictor.py create mode 100644 test/tensorflow/sentiment-analyzer/requirements.txt create mode 100644 test/tensorflow/sentiment-analyzer/sample.json create mode 100644 test/tensorflow/text-generator/README.md create mode 100644 test/tensorflow/text-generator/cortex.yaml create mode 100644 test/tensorflow/text-generator/encoder.py create mode 100644 test/tensorflow/text-generator/gpt-2.ipynb create mode 100644 test/tensorflow/text-generator/predictor.py create mode 100644 test/tensorflow/text-generator/requirements.txt create mode 100644 test/tensorflow/text-generator/sample.json create mode 100644 test/traffic-splitter/README.md create mode 100644 test/traffic-splitter/cortex.yaml create mode 100644 test/traffic-splitter/model.py create mode 100644 test/traffic-splitter/onnx_predictor.py create mode 100644 test/traffic-splitter/pytorch_predictor.py create mode 100644 test/traffic-splitter/sample.json create mode 100644 test/utils/README.md create mode 100644 test/utils/throughput_test.py diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000000..1eb711f57d --- /dev/null +++ b/test/README.md @@ -0,0 +1,67 @@ +# Examples + +## TensorFlow + +- [Iris classification](tensorflow/iris-classifier): deploy a model to classify iris flowers. + +- [Text generation](tensorflow/text-generator): deploy OpenAI's GPT-2 to generate text. + +- [Sentiment analysis](tensorflow/sentiment-analyzer): deploy a BERT model for sentiment analysis. + +- [Image classification](tensorflow/image-classifier-inception): deploy an Inception model to classify images. + +- [Image classification](tensorflow/image-classifier-resnet50): deploy a ResNet50 model to classify images. + +- [License plate reader](tensorflow/license-plate-reader): deploy a YOLOv3 model (and others) to identify license plates in real time. + +- [Multi-model classification](tensorflow/multi-model-classifier): deploy 3 models (ResNet50, Iris, Inception) in a single API. + +## Keras + +- [Denoisify text documents](keras/document-denoiser): deploy an Autoencoder model to clean text document images of noise. + +## PyTorch + +- [Iris classification](pytorch/iris-classifier): deploy a model to classify iris flowers. + +- [Text generation](pytorch/text-generator): deploy Hugging Face's GPT-2 model to generate text. + +- [Sentiment analysis](pytorch/sentiment-analyzer): deploy a Hugging Face transformers model for sentiment analysis. + +- [Search completion](pytorch/search-completer): deploy a Facebook's RoBERTa model to complete search terms. + +- [Answer generation](pytorch/answer-generator): deploy Microsoft's DialoGPT model to answer questions. + +- [Text summarization](pytorch/text-summarizer): deploy a BART model (from Hugging Face's transformers library) to summarize text. + +- [Reading comprehension](pytorch/reading-comprehender): deploy an AllenNLP model for reading comprehension. + +- [Language identification](pytorch/language-identifier): deploy a fastText model to identify languages. + +- [Multi-model text analysis](pytorch/multi-model-text-analyzer): deploy 2 models (Sentiment and Summarization analyzers) in a single API. + +- [Image classification](pytorch/image-classifier-alexnet): deploy an AlexNet model from TorchVision to classify images. + +- [Image classification](pytorch/image-classifier-resnet50): deploy a ResNet50 model from TorchVision to classify images. + +- [Object detection](pytorch/object-detector): deploy a Faster R-CNN model from TorchVision to detect objects in images. + +- [Question generator](pytorch/question-generator): deploy a transformers model to generate questions given text and the correct answer. + +## ONNX + +- [Iris classification](onnx/iris-classifier): deploy an XGBoost model (exported in ONNX) to classify iris flowers. + +- [YOLOv5 YouTube detection](onnx/yolov5-youtube): deploy a YOLOv5 model trained on COCO val2017 dataset. + +- [Multi-model classification](onnx/multi-model-classifier): deploy 3 models (ResNet50, MobileNet, ShuffleNet) in a single API. + +## scikit-learn + +- [Iris classification](sklearn/iris-classifier): deploy a model to classify iris flowers. + +- [MPG estimation](sklearn/mpg-estimator): deploy a linear regression model to estimate MPG. + +## spacy + +- [Entity recognizer](spacy/entity-recognizer): deploy a spacy model for named entity recognition. diff --git a/test/batch/image-classifier/README.md b/test/batch/image-classifier/README.md new file mode 100644 index 0000000000..03cc827d35 --- /dev/null +++ b/test/batch/image-classifier/README.md @@ -0,0 +1,580 @@ +# Deploy models as Batch APIs + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example shows how to deploy a batch image classification api that accepts a list of image urls as input, downloads the images, classifies them, and writes the results to S3. + +**Batch APIs are only supported on a Cortex cluster (in AWS).** You can find cluster installation documentation [here](../../../docs/aws/install.md). + +## Pre-requisites + +* [Install](../../../docs/aws/install.md) Cortex and create a cluster +* Create an S3 bucket/directory to store the results of the batch job +* AWS CLI (optional) + +
+ +## Implement your predictor + +1. Create a Python file named `predictor.py`. +1. Define a Predictor class with a constructor that loads and initializes an image-classifier from `torchvision`. +1. Add a `predict()` function that will accept a list of images urls (http:// or s3://), downloads them, performs inference, and writes the predictions to S3. +1. Specify an `on_job_complete()` function that aggregates the results and writes them to a single file named `aggregated_results.json` in S3. + +```python +# predictor.py + +import os +import requests +import torch +import torchvision +from torchvision import transforms +from PIL import Image +from io import BytesIO +import boto3 +import json +import re + + +class PythonPredictor: + def __init__(self, config, job_spec): + self.model = torchvision.models.alexnet(pretrained=True).eval() + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.preprocess = transforms.Compose( + [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] + ) + + self.labels = requests.get( + "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + ).text.split("\n")[1:] + + if len(config.get("dest_s3_dir", "")) == 0: + raise Exception("'dest_s3_dir' field was not provided in job submission") + + self.s3 = boto3.client("s3") + + self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() + self.key = os.path.join(self.key, job_spec["job_id"]) + + def predict(self, payload, batch_id): + tensor_list = [] + + # download and preprocess each image + for image_url in payload: + if image_url.startswith("s3://"): + bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() + image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() + else: + image_bytes = requests.get(image_url).content + + img_pil = Image.open(BytesIO(image_bytes)) + tensor_list.append(self.preprocess(img_pil)) + + # classify the batch of images + img_tensor = torch.stack(tensor_list) + with torch.no_grad(): + prediction = self.model(img_tensor) + _, indices = prediction.max(1) + + # extract predicted classes + results = [ + {"url": payload[i], "class": self.labels[class_idx]} + for i, class_idx in enumerate(indices) + ] + json_output = json.dumps(results) + + # save results + self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) + + def on_job_complete(self): + all_results = [] + + # aggregate all classifications + paginator = self.s3.get_paginator("list_objects_v2") + for page in paginator.paginate(Bucket=self.bucket, Prefix=self.key): + for obj in page["Contents"]: + body = self.s3.get_object(Bucket=self.bucket, Key=obj["Key"])["Body"] + all_results += json.loads(body.read().decode("utf8")) + + # save single file containing aggregated classifications + self.s3.put_object( + Bucket=self.bucket, + Key=os.path.join(self.key, "aggregated_results.json"), + Body=json.dumps(all_results), + ) +``` + +Here are the complete [Predictor docs](../../../docs/deployments/batch-api/predictors.md). + +
+ +## Specify your Python dependencies + +Create a `requirements.txt` file to specify the dependencies needed by `predictor.py`. Cortex will automatically install them into your runtime once you deploy: + +```python +# requirements.txt + +boto3 +torch +torchvision +pillow +``` + +
+ +## Configure your API + +Create a `cortex.yaml` file and add the configuration below. An `api` with `kind: BatchAPI` will expose your model as an endpoint that will orchestrate offline batch inference across multiple workers upon receiving job requests. The configuration below defines how much `compute` each worker requires and your `predictor.py` determines how each batch should be processed. + +```yaml +# cortex.yaml + +- name: image-classifier + kind: BatchAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 +``` + +Here are the complete [API configuration docs](../../../docs/deployments/batch-api/api-configuration.md). + +
+ +## Deploy your Batch API + +`cortex deploy` takes your model, your `predictor.py` implementation, and your configuration from `cortex.yaml` and creates an endpoint that can receive job submissions and manage running jobs. + +```bash +$ cortex deploy --env aws + +created image-classifier (BatchAPI) +``` + +Get the endpoint for your Batch API with `cortex get image-classifier`: + +```bash +$ cortex get image-classifier --env aws + +no submitted jobs + +endpoint: https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +``` + +
+ +## Setup destination S3 directory + +Our `predictor.py` implementation writes results to an S3 directory. Before submitting a job, we need to create an S3 directory to store the output of the batch job. The S3 directory should be accessible by the credentials used to create your Cortex cluster. + +Export the S3 directory to an environment variable: + +```bash +$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir +``` + +
+ +## Submit a job + +Now that you've deployed a Batch API, you are ready to submit jobs. You can provide image urls directly in the request by specifying the urls in `item_list`. The curl command below showcases how to submit image urls in the request. + +```bash +$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir +$ curl $BATCH_API_ENDPOINT \ + -X POST -H "Content-Type: application/json" \ + -d @- <` then type `EOF`. + +After submitting the job, you should get a response like this: + +```json +{"job_id":"69d6faf82e4660d3","api_name":"image-classifier", "config":{"dest_s3_dir": "YOUR_S3_BUCKET_HERE"}} +``` + +Take note of the job id in the response. + +### List the jobs for your Batch API + +```bash +$ cortex get image-classifier --env aws + +job id status progress start time duration +69d6faf82e4660d3 running 0/3 20 Jul 2020 01:07:44 UTC 3m26s + +endpoint: https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +``` + +### Get the job status with an HTTP request + +You can make a GET request to your `/JOB_ID` to get the status of your job. + +```bash +$ curl https://abcdefg.execute-api.us-west-2.amazonaws.com?jobID=69d6faf82e4660d3 + +{ + "job_status":{ + "job_id":"69d6faf82e4660d3", + "api_name":"image-classifier", + ... + }, + "endpoint":"https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier" +} +``` + +### Get job status using Cortex CLI + +You can also use the Cortex CLI to get the status of your job using `cortex get `. + +```bash +$ cortex get image-classifier 69d6faf82e4660d3 --env aws + +job id: 69d6faf82e4660d3 +status: running + +start time: 27 Jul 2020 15:02:25 UTC +end time: - +duration: 42s + +batch stats +total succeeded failed avg time per batch +3 0 0 - + +worker stats +requested initializing running failed succeeded +1 1 0 0 0 + +job endpoint: https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier/69d6faf82e4660d3 +``` + +### Stream logs + +You can stream logs realtime for debugging and monitoring purposes with `cortex logs ` + +```bash +$ cortex logs image-classifier 69d6fdeb2d8e6647 --env aws + +started enqueuing batches to queue +partitioning 5 items found in job submission into 3 batches of size 2 +completed enqueuing a total of 3 batches +spinning up workers... +... +2020-08-07 14:44:05.557598:cortex:pid-25:INFO:processing batch c9136381-6dcc-45bd-bd97-cc9c66ccc6d6 +2020-08-07 14:44:26.037276:cortex:pid-25:INFO:executing on_job_complete +2020-08-07 14:44:26.208972:cortex:pid-25:INFO:no batches left in queue, job has been completed +``` + +### Find your results + +Wait for the job to complete by streaming the logs with `cortex logs ` or watching for the job status to change with `cortex get --watch`. + +The status of your job, which you can get from `cortex get `, should change from `running` to `succeeded` once the job has completed. If it changes to a different status, you may be able to find the stacktrace using `cortex logs `. If your job has completed successfully, you can view the results of the image classification in the S3 directory you specified in the job submission. + +Using the AWS CLI: + +```bash +$ aws s3 ls $CORTEX_DEST_S3_DIR// + 161f9fda-fd08-44f3-b983-4529f950e40b.json + 40100ffb-6824-4560-8ca4-7c0d14273e05.json + c9136381-6dcc-45bd-bd97-cc9c66ccc6d6.json + aggregated_results.json +``` + +You can download the aggregated results file with `aws s3 cp $CORTEX_DEST_S3_DIR//aggregated_results.json .` and confirm that there are 16 classifications. + +
+ +## Alternative job submission: image URLs in files + +In addition to providing the image URLs directly in the job submission request, it is possible to use image urls stored in newline delimited json files in S3. A newline delimited JSON file has one complete JSON object per line. + +Two newline delimited json files containing image urls for this tutorial have already been created for you and can be found at `s3://cortex-examples/image-classifier/`. If you have AWS CLI, you can list the directory and you should be able to find the files (`urls_0.json` and `urls_1.json`). + +```text +$ aws s3 ls s3://cortex-examples/image-classifier/ + PRE inception/ +... +2020-07-27 14:19:30 506 urls_0.json +2020-07-27 14:19:30 473 urls_1.json +``` + +To use JSON files as input data for the job, we will specify `delimited_files` in the job request. The Batch API will break up the JSON files into batches of desired size and push them onto a queue that is consumed by the pool of workers. + +### Dry run + +Before we submit the job, let's perform a dry run to ensure that only the desired files will be read. You can perform a dry run by appending `dryRun=true` query parameter to your job request. + +Get the endpoint from `cortex get image-classifier` if you haven't done so already. + +```bash +$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir +$ curl $BATCH_API_ENDPOINT?dryRun=true \ +-X POST -H "Content-Type: application/json" \ +-d @- <` then type `EOF`. + +You should expect a response like this: + +```text +s3://cortex-examples/image-classifier/urls_0.json +s3://cortex-examples/image-classifier/urls_1.json +validations passed +``` + +This shows that the correct files will be used as input for the job. + +### Classify image urls stored in S3 files + +When you submit a job specifying `delimited_files`, your Batch API will get all of the input S3 files based on `s3_paths` and will apply the filters specified in `includes` and `excludes`. Then your Batch API will read each file, split on the newline characters, and parse each item as a JSON object. Each item in the file is treated as a single sample and will be grouped together into batches and then placed onto a queue that is consumed by the pool of workers. + +In this example `urls_0.json` and `urls_1.json` each contain 8 urls. Let's classify the images from the URLs listed in those 2 files. + +```bash +$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir +$ curl $BATCH_API_ENDPOINT \ +-X POST -H "Content-Type: application/json" \ +-d @- <` then type `EOF`. + +After submitting this job, you should get a response like this: + +```json +{"job_id":"69d6faf82e4660d3","api_name":"image-classifier", "config":{"dest_s3_dir": "YOUR_S3_BUCKET_HERE"}} +``` + +### Find results + +Wait for the job to complete by streaming the logs with `cortex logs ` or watching for the job status to change with `cortex get --watch`. + +```bash +$ cortex logs image-classifier 69d6faf82e4660d3 --env aws + +started enqueuing batches to queue +enqueuing contents from file s3://cortex-examples/image-classifier/urls_0.json +enqueuing contents from file s3://cortex-examples/image-classifier/urls_1.json +completed enqueuing a total of 8 batches +spinning up workers... +2020-08-07 15:11:21.364179:cortex:pid-25:INFO:processing batch 1de0bc65-04ea-4b9e-9e96-5a0bb52fcc37 +... +2020-08-07 15:11:45.461032:cortex:pid-25:INFO:no batches left in queue, job has been completed +``` + +The status of your job, which you can get from `cortex get `, should change from `running` to `succeeded` once the job has completed. If it changes to a different status, you may be able to find the stacktrace using `cortex logs `. If your job has completed successfully, you can view the results of the image classification in the S3 directory you specified in the job submission. + +Using the AWS CLI: + +```bash +$ aws s3 ls $CORTEX_DEST_S3_DIR// + 161f9fda-fd08-44f3-b983-4529f950e40b.json + 40100ffb-6824-4560-8ca4-7c0d14273e05.json + 6d1c933c-0ddf-4316-9956-046cd731c5ab.json + ... + aggregated_results.json +``` + +You can download the aggregated results file with `aws s3 cp $CORTEX_DEST_S3_DIR//aggregated_results.json .` and confirm that there are 16 classifications. + +
+ +## Alternative job submission: images in S3 + +Let's assume that rather downloading urls on the internet, you have an S3 directory containing the images. We can specify `file_path_lister` in the job request to get the list of S3 urls for the images, partition the list of S3 urls into batches, and place them on a queue that will be consumed by the workers. + +We'll classify the 16 images that can be found here `s3://cortex-examples/image-classifier/samples`. You can use AWS CLI to verify that there are 16 images `aws s3 ls s3://cortex-examples/image-classifier/samples/`. + +### Dry run + +Let's do a dry run to make sure the correct list of images will be submitted to the job. + +```bash +$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir +$ curl $BATCH_API_ENDPOINT?dryRun=true \ +-X POST -H "Content-Type: application/json" \ +-d @- <` then type `EOF`. + +You should expect a response like this: + +```text +s3://cortex-examples/image-classifier/samples/img_0.jpg +s3://cortex-examples/image-classifier/samples/img_1.jpg +... +s3://cortex-examples/image-classifier/samples/img_8.jpg +s3://cortex-examples/image-classifier/samples/img_9.jpg +validations passed +``` + +### Classify images in S3 + +Let's actually submit the job now. Your Batch API will get all of the input S3 files based on `s3_paths` and will apply the filters specified in `includes` and `excludes`. + +```bash +$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +$ export CORTEX_DEST_S3_DIR= # e.g. export CORTEX_DEST_S3_DIR=s3://my-bucket/dir +$ curl $BATCH_API_ENDPOINT \ +-X POST -H "Content-Type: application/json" \ +-d @- <` then type `EOF`. + +You should get a response like this: + +```json +{"job_id":"69d6f8a472f0e1e5","api_name":"image-classifier", "config":{"dest_s3_dir": "YOUR_S3_BUCKET_HERE"}} +``` + +### Verify results + +Wait for the job to complete by streaming the logs with `cortex logs ` or watching for the job status to change with `cortex get --watch`. + +```bash +$ cortex logs image-classifier 69d6f8a472f0e1e5 --env aws + +started enqueuing batches to queue +completed enqueuing a total of 8 batches +spinning up workers... +2020-07-18 21:35:34.186348:cortex:pid-1:INFO:downloading the project code +... +2020-08-07 15:49:10.889839:cortex:pid-25:INFO:processing batch d0e695bc-a975-4115-a60f-0a55c743fc57 +2020-08-07 15:49:31.188943:cortex:pid-25:INFO:executing on_job_complete +2020-08-07 15:49:31.362053:cortex:pid-25:INFO:no batches left in queue, job has been completed +``` + +The status of your job, which you can get from `cortex get `, should change from `running` to `succeeded` once the job has completed. If it changes to a different status, you may be able to find the stacktrace using `cortex logs `. If your job has completed successfully, you can view the results of the image classification in the S3 directory you specified in the job submission. + +Using the AWS CLI: + +```bash +$ aws s3 ls $CORTEX_DEST_S3_DIR// + 6bee7412-4c16-4d9f-ab3e-e88669cf7a89.json + 3c45b4b3-953e-4226-865b-75f3961dcf95.json + d0e695bc-a975-4115-a60f-0a55c743fc57.json + ... + aggregated_results.json +``` + +You can download the aggregated results file with `aws s3 cp $CORTEX_DEST_S3_DIR//aggregated_results.json .` and confirm that there are 16 classifications. + +
+ +## Stopping a Job + +You can stop a running job by sending a DELETE request to `/`. + +```bash +$ export BATCH_API_ENDPOINT= # e.g. export BATCH_API_ENDPOINT=https://abcdefg.execute-api.us-west-2.amazonaws.com/image-classifier +$ curl -X DELETE $BATCH_API_ENDPOINT?jobID=69d96a01ea55da8c + +stopped job 69d96a01ea55da8c +``` + +You can also use the Cortex CLI `cortex delete `. + +```bash +$ cortex delete image-classifier 69d96a01ea55da8c --env aws + +stopped job 69d96a01ea55da8c +``` + +
+ +## Cleanup + +Run `cortex delete` to delete the API: + +```bash +$ cortex delete image-classifier --env aws + +deleting image-classifier +``` + +Running `cortex delete` will stop all in progress jobs for the API and will delete job history for that API. It will not spin down your cluster. + +## Next steps + + +* Deploy another one of our [batch examples](https://github.com/cortexlabs/cortex/tree/master/examples/batch). +* See our [exporting guide](../../../docs/guides/exporting.md) for how to export your model to use in an API. +* Try the [realtime API tutorial](../../pytorch/text-generator/README.md) to learn how to deploy realtime APIs in Cortex. +* See [uninstall](../../../docs/aws/uninstall.md) if you'd like to spin down your cluster. diff --git a/test/batch/image-classifier/cortex.yaml b/test/batch/image-classifier/cortex.yaml new file mode 100644 index 0000000000..35ea4e991a --- /dev/null +++ b/test/batch/image-classifier/cortex.yaml @@ -0,0 +1,9 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier + kind: BatchAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 diff --git a/test/batch/image-classifier/predictor.py b/test/batch/image-classifier/predictor.py new file mode 100644 index 0000000000..92a8cc26b9 --- /dev/null +++ b/test/batch/image-classifier/predictor.py @@ -0,0 +1,58 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import os +import requests +from PIL import Image +from io import BytesIO +import json +import re + +# labels "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" +# bucket, key + + +class PythonPredictor: + def __init__(self, config, job_spec): + import re + import boto3 + from torchvision import transforms + import torchvision + + self.model = torchvision.models.alexnet(pretrained=True).eval() + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.preprocess = transforms.Compose( + [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] + ) + + self.labels = requests.get(config["labels"]).text.split("\n")[1:] + + self.s3 = boto3.client("s3") # initialize S3 client to save results + + self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() + self.key = os.path.join(self.key, job_spec["job_id"]) + + def predict(self, payload, batch_id): + import json + from PIL import Image + import torch + + tensor_list = [] + for image_url in payload: # download and preprocess each image + img_pil = Image.open(BytesIO(requests.get(image_url).content)) + tensor_list.append(self.preprocess(img_pil)) + + img_tensor = torch.stack(tensor_list) + with torch.no_grad(): # classify the batch of images + prediction = self.model(img_tensor) + _, indices = prediction.max(1) + + results = [ # extract predicted classes + {"url": payload[i], "class": self.labels[class_idx]} + for i, class_idx in enumerate(indices) + ] + + # save results + self.s3.put_object( + Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json.dumps(results) + ) diff --git a/test/batch/image-classifier/requirements.txt b/test/batch/image-classifier/requirements.txt new file mode 100644 index 0000000000..2c0ef31b51 --- /dev/null +++ b/test/batch/image-classifier/requirements.txt @@ -0,0 +1,4 @@ +torch +torchvision +boto3 +pillow diff --git a/test/batch/image-classifier/sample.json b/test/batch/image-classifier/sample.json new file mode 100644 index 0000000000..eb45c463fd --- /dev/null +++ b/test/batch/image-classifier/sample.json @@ -0,0 +1,3 @@ +[ + "https://i.imgur.com/PzXprwl.jpg" +] diff --git a/test/batch/onnx/README.md b/test/batch/onnx/README.md new file mode 100644 index 0000000000..b3091bb133 --- /dev/null +++ b/test/batch/onnx/README.md @@ -0,0 +1,6 @@ +# Batch Image Classifier in ONNX + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + + +Please refer to the [tutorial](https://docs.cortex.dev/v/master/batch-api/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. diff --git a/test/batch/onnx/cortex.yaml b/test/batch/onnx/cortex.yaml new file mode 100644 index 0000000000..4bdf7080e1 --- /dev/null +++ b/test/batch/onnx/cortex.yaml @@ -0,0 +1,10 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier + kind: BatchAPI + predictor: + type: onnx + path: predictor.py + model_path: s3://cortex-examples/image-classifier/alexnet_batch/ + compute: + cpu: 1 diff --git a/test/batch/onnx/predictor.py b/test/batch/onnx/predictor.py new file mode 100644 index 0000000000..7f005a0b72 --- /dev/null +++ b/test/batch/onnx/predictor.py @@ -0,0 +1,64 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import requests +import numpy as np +import base64 +from PIL import Image +from io import BytesIO +from torchvision import transforms +import boto3 +import json +import re +import os + + +class ONNXPredictor: + def __init__(self, onnx_client, config, job_spec): + self.client = onnx_client + + self.labels = requests.get( + "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + ).text.split("\n")[1:] + + # https://github.com/pytorch/examples/blob/447974f6337543d4de6b888e244a964d3c9b71f6/imagenet/main.py#L198-L199 + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.preprocess = transforms.Compose( + [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] + ) + + if len(config.get("dest_s3_dir", "")) == 0: + raise Exception("'dest_s3_dir' field was not provided in job submission") + + self.s3 = boto3.client("s3") + + self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() + self.key = os.path.join(self.key, job_spec["job_id"]) + + def predict(self, payload, batch_id): + arr_list = [] + + # download and preprocess each image + for image_url in payload: + if image_url.startswith("s3://"): + bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() + image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() + else: + image_bytes = requests.get(image_url).content + + img_pil = Image.open(BytesIO(image_bytes)) + arr_list.append(self.preprocess(img_pil).numpy()) + + # classify the batch of images + imgs_arr = np.stack(arr_list, axis=0) + result = self.client.predict(imgs_arr) + + # extract predicted classes + predicted_classes = np.argmax(result[0], axis=1) + results = [ + {"url": payload[i], "class": self.labels[class_idx]} + for i, class_idx in enumerate(predicted_classes) + ] + + # save results + json_output = json.dumps(results) + self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) diff --git a/test/batch/onnx/requirements.txt b/test/batch/onnx/requirements.txt new file mode 100644 index 0000000000..5a2cde2a12 --- /dev/null +++ b/test/batch/onnx/requirements.txt @@ -0,0 +1,3 @@ +torchvision +boto3 +pillow diff --git a/test/batch/tensorflow/README.md b/test/batch/tensorflow/README.md new file mode 100644 index 0000000000..163fe34968 --- /dev/null +++ b/test/batch/tensorflow/README.md @@ -0,0 +1,6 @@ +# Batch Image Classifier in TensorFlow + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + + +Please refer to the [tutorial](https://docs.cortex.dev/v/master/batch-api/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. diff --git a/test/batch/tensorflow/cortex.yaml b/test/batch/tensorflow/cortex.yaml new file mode 100644 index 0000000000..189e1a9b0e --- /dev/null +++ b/test/batch/tensorflow/cortex.yaml @@ -0,0 +1,10 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier + kind: BatchAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ + compute: + cpu: 1 diff --git a/test/batch/tensorflow/predictor.py b/test/batch/tensorflow/predictor.py new file mode 100644 index 0000000000..da4bb39ec3 --- /dev/null +++ b/test/batch/tensorflow/predictor.py @@ -0,0 +1,60 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import requests +import numpy as np +from PIL import Image +from io import BytesIO +import json +import os +import re +import boto3 +import tensorflow as tf + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config, job_spec): + self.client = tensorflow_client + self.labels = requests.get( + "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + ).text.split("\n")[1:] + + if len(config.get("dest_s3_dir", "")) == 0: + raise Exception("'dest_s3_dir' field was not provided in job submission") + + self.s3 = boto3.client("s3") + + self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() + self.key = os.path.join(self.key, job_spec["job_id"]) + + def predict(self, payload, batch_id): + arr_list = [] + + # download and preprocess each image + for image_url in payload: + if image_url.startswith("s3://"): + bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() + image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() + else: + image_bytes = requests.get(image_url).content + + decoded_image = np.asarray(Image.open(BytesIO(image_bytes)), dtype=np.float32) / 255 + resized_image = tf.image.resize( + decoded_image, [224, 224], method=tf.image.ResizeMethod.BILINEAR + ) + arr_list.append(resized_image) + + # classify the batch of images + model_input = {"images": np.stack(arr_list, axis=0)} + predictions = self.client.predict(model_input) + + # extract predicted classes + reshaped_predictions = np.reshape(np.array(predictions["classes"]), [-1, len(self.labels)]) + predicted_classes = np.argmax(reshaped_predictions, axis=1) + results = [ + {"url": payload[i], "class": self.labels[class_idx]} + for i, class_idx in enumerate(predicted_classes) + ] + + # save results + json_output = json.dumps(results) + self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) diff --git a/test/batch/tensorflow/requirements.txt b/test/batch/tensorflow/requirements.txt new file mode 100644 index 0000000000..7e2fba5e6c --- /dev/null +++ b/test/batch/tensorflow/requirements.txt @@ -0,0 +1 @@ +Pillow diff --git a/test/keras/document-denoiser/README.md b/test/keras/document-denoiser/README.md new file mode 100644 index 0000000000..05f90b9bef --- /dev/null +++ b/test/keras/document-denoiser/README.md @@ -0,0 +1,46 @@ +# Clean Dirty Documents w/ Autoencoders + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example model cleans text documents of anything that isn't text (aka noise): coffee stains, old wear artifacts, etc. You can inspect the notebook that has been used to train the model [here](trainer.ipynb). + +Here's a collage of input texts and predictions. + +![Imgur](https://i.imgur.com/M4Mjz2l.jpg) + +*Figure 1 - The dirty documents are on the left side and the cleaned ones are on the right* + +## Sample Prediction + +Once this model is deployed, get the API endpoint by running `cortex get document-denoiser`. + +Now let's take a sample image like this one. + +![Imgur](https://i.imgur.com/JJLfFxB.png) + +Export the endpoint & the image's URL by running +```bash +export ENDPOINT= +export IMAGE_URL=https://i.imgur.com/JJLfFxB.png +``` + +Then run the following piped commands +```bash +curl "${ENDPOINT}" -X POST -H "Content-Type: application/json" -d '{"url":"'${IMAGE_URL}'"}' | +sed 's/"//g' | +base64 -d > prediction.png +``` + +Once this has run, we'll see a `prediction.png` file saved to the disk. This is the result. + +![Imgur](https://i.imgur.com/PRB2oS8.png) + +As it can be seen, the text document has been cleaned of any noise. Success! + +--- + +Here's a short list of URLs of other text documents in image format that can be cleaned using this model. Export these links to `IMAGE_URL` variable: + +* https://i.imgur.com/6COQ46f.png +* https://i.imgur.com/alLI83b.png +* https://i.imgur.com/QVoSTuu.png diff --git a/test/keras/document-denoiser/cortex.yaml b/test/keras/document-denoiser/cortex.yaml new file mode 100644 index 0000000000..b616a0ff0f --- /dev/null +++ b/test/keras/document-denoiser/cortex.yaml @@ -0,0 +1,12 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: document-denoiser + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + model: s3://cortex-examples/keras/document-denoiser/model.h5 + resize_shape: [540, 260] + compute: + cpu: 1 diff --git a/test/keras/document-denoiser/predictor.py b/test/keras/document-denoiser/predictor.py new file mode 100644 index 0000000000..2554560388 --- /dev/null +++ b/test/keras/document-denoiser/predictor.py @@ -0,0 +1,86 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import boto3, base64, cv2, re, os, requests +from botocore import UNSIGNED +from botocore.client import Config +import numpy as np +from tensorflow.keras.models import load_model + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE) + return image + + +def image_to_png_nparray(image): + """ + Convert numpy image to jpeg numpy vector. + """ + is_success, im_buf_arr = cv2.imencode(".png", image) + return im_buf_arr + + +def image_to_png_bytes(image): + """ + Convert numpy image to bytes-encoded png image. + """ + buf = image_to_png_nparray(image) + byte_im = buf.tobytes() + return byte_im + + +class PythonPredictor: + def __init__(self, config): + # download the model + bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() + + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + model_path = os.path.join("/tmp/model.h5") + s3.download_file(bucket, key, model_path) + + # load the model + self.model = load_model(model_path) + + # resize shape (width, height) + self.resize_shape = tuple(config["resize_shape"]) + + def predict(self, payload): + # download image + img_url = payload["url"] + image = get_url_image(img_url) + resized = cv2.resize(image, self.resize_shape) + + # prediction + pred = self.make_prediction(resized) + + # image represented in bytes + byte_im = image_to_png_bytes(pred) + + # encode image + image_enc = base64.b64encode(byte_im).decode("utf-8") + + return image_enc + + def make_prediction(self, img): + """ + Make prediction on image. + """ + processed = img / 255.0 + processed = np.expand_dims(processed, 0) + processed = np.expand_dims(processed, 3) + pred = self.model.predict(processed) + pred = np.squeeze(pred, 3) + pred = np.squeeze(pred, 0) + out_img = pred * 255 + out_img[out_img > 255.0] = 255.0 + out_img = out_img.astype(np.uint8) + return out_img diff --git a/test/keras/document-denoiser/requirements.txt b/test/keras/document-denoiser/requirements.txt new file mode 100644 index 0000000000..77eb59dc52 --- /dev/null +++ b/test/keras/document-denoiser/requirements.txt @@ -0,0 +1,5 @@ +numpy==1.18.0 +requests==2.22.0 +opencv-python==4.1.2.30 +keras==2.3.1 +h5py==2.10.0 diff --git a/test/keras/document-denoiser/sample.json b/test/keras/document-denoiser/sample.json new file mode 100644 index 0000000000..651595f4fb --- /dev/null +++ b/test/keras/document-denoiser/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/JJLfFxB.png" +} diff --git a/test/keras/document-denoiser/trainer.ipynb b/test/keras/document-denoiser/trainer.ipynb new file mode 100644 index 0000000000..c8b0799b1b --- /dev/null +++ b/test/keras/document-denoiser/trainer.ipynb @@ -0,0 +1,620 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training a Document Denoiser Model with AutoEncoders" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "# _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", + "\n", + "\n", + "import keras\n", + "import cv2\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import os\n", + "import ntpath\n", + "from glob import glob\n", + "from matplotlib.pyplot import imshow\n", + "from sklearn.model_selection import train_test_split\n", + "from keras.preprocessing.image import ImageDataGenerator\n", + "from keras.models import Sequential, Model, load_model\n", + "from keras.layers import Activation, Flatten, Dropout, SpatialDropout2D, Conv2D, UpSampling2D, MaxPooling2D, add, concatenate, Input, BatchNormalization\n", + "from keras.backend import set_image_data_format\n", + "from keras.utils import plot_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download Dataset\n", + "\n", + "Download the dataset from [kaggle (denoising dirty documents)](https://www.kaggle.com/c/denoising-dirty-documents/data). You will need to be logged in to be able to download the data.\n", + "\n", + "Once downloaded run the following commands" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!unzip denoising-dirty-documents.zip && rm denoising-dirty-documents.zip\n", + "!mv denoising-dirty-documents/*.zip . && rm -rf denoising-dirty-documents\n", + "!unzip '*.zip' > /dev/null && rm *.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define the Data Generator\n", + "\n", + "Include data augmentation because the dataset is rather small." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "x_dirty = sorted(glob(\"train/*.png\"))\n", + "x_cleaned = sorted(glob(\"train_cleaned/*.png\"))\n", + "x_test = sorted(glob(\"test/*.png\"))\n", + "input_shape = (260, 540)\n", + "height = input_shape[0]\n", + "width = input_shape[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_valid, y_train, y_valid = train_test_split(x_dirty, x_cleaned, test_size=0.20)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "set_image_data_format(\"channels_last\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def model_train_generator(x_train, y_train, epochs, batch_size, resize_shape):\n", + " white_fill = 1.0\n", + " datagen = ImageDataGenerator(\n", + " rotation_range=180,\n", + " width_shift_range=0.2,\n", + " height_shift_range=0.2,\n", + " zoom_range=0.3,\n", + " fill_mode=\"constant\",\n", + " cval=white_fill,\n", + " horizontal_flip=True,\n", + " vertical_flip=True,\n", + " )\n", + " \n", + " for _ in range(epochs):\n", + " for x_file, y_file in zip(x_train, y_train):\n", + " x_img = cv2.imread(x_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", + " y_img = cv2.imread(y_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", + " \n", + " xs = []\n", + " ys = []\n", + " for i in range(batch_size):\n", + " if i == 0:\n", + " x = x_img\n", + " y = y_img\n", + " else:\n", + " params = datagen.get_random_transform(img_shape=x_img.shape)\n", + " x = datagen.apply_transform(np.expand_dims(x_img, 2), params)\n", + " y = datagen.apply_transform(np.expand_dims(y_img, 2), params)\n", + " x = cv2.resize(x, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", + " y = cv2.resize(y, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", + " x = np.expand_dims(x, 2)\n", + " y = np.expand_dims(y, 2)\n", + " xs.append(x)\n", + " ys.append(y)\n", + " xs_imgs = np.array(xs)\n", + " ys_imgs = np.array(ys)\n", + " yield (xs_imgs, ys_imgs)\n", + "\n", + "def model_valid_generator(x_valid, y_valid, epochs, resize_shape):\n", + " xs = []\n", + " ys = []\n", + " for x_file, y_file in zip(x_valid, y_valid):\n", + " x_img = cv2.imread(x_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", + " y_img = cv2.imread(y_file, cv2.IMREAD_GRAYSCALE) / 255.0\n", + " x = cv2.resize(x_img, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", + " y = cv2.resize(y_img, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", + " x = np.expand_dims(x, 2)\n", + " x = np.expand_dims(x, 0)\n", + " y = np.expand_dims(y, 2)\n", + " y = np.expand_dims(y, 0)\n", + " xs.append(x)\n", + " ys.append(y)\n", + " \n", + " for _ in range(epochs):\n", + " for xs_img, ys_img in zip(xs, ys):\n", + " yield (xs_img, ys_img)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def create_encoder(input_shape):\n", + " inp = Input(shape=input_shape)\n", + " x = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), \n", + " input_shape=input_shape, activation=\"relu\", padding=\"same\")(inp)\n", + " x = BatchNormalization()(x)\n", + " x = MaxPooling2D(pool_size=(2,2))(x)\n", + " \n", + " x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), \n", + " activation=\"relu\", padding=\"same\")(x)\n", + " x = BatchNormalization()(x)\n", + "\n", + " return inp, x" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def create_decoder(inp):\n", + " x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation=\"relu\",\n", + " padding=\"same\")(inp)\n", + " x = BatchNormalization()(x)\n", + " x = UpSampling2D(size=(2,2))(x)\n", + " \n", + " x = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), \n", + " activation=\"relu\", padding=\"same\")(x)\n", + " x = BatchNormalization()(x)\n", + " \n", + " x = Conv2D(filters=1, kernel_size=(1,1), strides=(1,1), \n", + " activation=\"sigmoid\", padding=\"same\")(x)\n", + " x = BatchNormalization()(x)\n", + " \n", + " return inp, x" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def create_autoencoder(input_shape):\n", + " enc_inp, encoder = create_encoder(input_shape)\n", + " dec_inp, autoencoder = create_decoder(encoder)\n", + " model = Model(inputs=[enc_inp], outputs=[autoencoder], name='AutoEncoder')\n", + " \n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From C:\\Users\\OboTh\\Anaconda3\\envs\\lightweight-gpu-python\\lib\\site-packages\\tensorflow_core\\python\\ops\\resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "If using Keras pass *_constraint arguments to layers.\n", + "WARNING:tensorflow:From C:\\Users\\OboTh\\Anaconda3\\envs\\lightweight-gpu-python\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", + "\n", + "Model: \"AutoEncoder\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "input_1 (InputLayer) (None, 260, 540, 1) 0 \n", + "_________________________________________________________________\n", + "conv2d_1 (Conv2D) (None, 260, 540, 64) 640 \n", + "_________________________________________________________________\n", + "batch_normalization_1 (Batch (None, 260, 540, 64) 256 \n", + "_________________________________________________________________\n", + "max_pooling2d_1 (MaxPooling2 (None, 130, 270, 64) 0 \n", + "_________________________________________________________________\n", + "conv2d_2 (Conv2D) (None, 130, 270, 32) 18464 \n", + "_________________________________________________________________\n", + "batch_normalization_2 (Batch (None, 130, 270, 32) 128 \n", + "_________________________________________________________________\n", + "conv2d_3 (Conv2D) (None, 130, 270, 32) 9248 \n", + "_________________________________________________________________\n", + "batch_normalization_3 (Batch (None, 130, 270, 32) 128 \n", + "_________________________________________________________________\n", + "up_sampling2d_1 (UpSampling2 (None, 260, 540, 32) 0 \n", + "_________________________________________________________________\n", + "conv2d_4 (Conv2D) (None, 260, 540, 64) 18496 \n", + "_________________________________________________________________\n", + "batch_normalization_4 (Batch (None, 260, 540, 64) 256 \n", + "_________________________________________________________________\n", + "conv2d_5 (Conv2D) (None, 260, 540, 1) 65 \n", + "_________________________________________________________________\n", + "batch_normalization_5 (Batch (None, 260, 540, 1) 4 \n", + "=================================================================\n", + "Total params: 47,685\n", + "Trainable params: 47,299\n", + "Non-trainable params: 386\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model = create_autoencoder((height, width, 1))\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(optimizer='adam', loss='mse')\n", + "epochs = 20\n", + "batch_size = 8\n", + "samples = len(x_train)\n", + "validation_samples = len(x_valid)\n", + "train_generator = model_train_generator(x_train, y_train, epochs=epochs, batch_size=batch_size, resize_shape=(height, width))\n", + "valid_generator = model_valid_generator(x_valid, y_valid, epochs=epochs, resize_shape=(height, width))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the AutoEncoder Model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From C:\\Users\\OboTh\\Anaconda3\\envs\\lightweight-gpu-python\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n", + "\n", + "Epoch 1/20\n", + "115/115 [==============================] - 49s 429ms/step - loss: 1.2062 - val_loss: 0.1817\n", + "Epoch 2/20\n", + "115/115 [==============================] - 43s 373ms/step - loss: 0.5792 - val_loss: 0.1720\n", + "Epoch 3/20\n", + "115/115 [==============================] - 43s 373ms/step - loss: 0.4297 - val_loss: 0.1399\n", + "Epoch 4/20\n", + "115/115 [==============================] - 43s 375ms/step - loss: 0.3160 - val_loss: 0.1023\n", + "Epoch 5/20\n", + "115/115 [==============================] - 44s 385ms/step - loss: 0.2276 - val_loss: 0.0609\n", + "Epoch 6/20\n", + "115/115 [==============================] - 44s 379ms/step - loss: 0.1599 - val_loss: 0.0292\n", + "Epoch 7/20\n", + "115/115 [==============================] - 43s 376ms/step - loss: 0.1091 - val_loss: 0.0112\n", + "Epoch 8/20\n", + "115/115 [==============================] - 43s 376ms/step - loss: 0.0730 - val_loss: 0.0074\n", + "Epoch 9/20\n", + "115/115 [==============================] - 44s 381ms/step - loss: 0.0473 - val_loss: 0.0055\n", + "Epoch 10/20\n", + "115/115 [==============================] - 45s 393ms/step - loss: 0.0301 - val_loss: 0.0047\n", + "Epoch 11/20\n", + "115/115 [==============================] - 45s 387ms/step - loss: 0.0189 - val_loss: 0.0041\n", + "Epoch 12/20\n", + "115/115 [==============================] - 43s 376ms/step - loss: 0.0118 - val_loss: 0.0042\n", + "Epoch 13/20\n", + "115/115 [==============================] - 44s 380ms/step - loss: 0.0075 - val_loss: 0.0061\n", + "Epoch 14/20\n", + "115/115 [==============================] - 43s 377ms/step - loss: 0.0051 - val_loss: 0.0048\n", + "Epoch 15/20\n", + "115/115 [==============================] - 43s 378ms/step - loss: 0.0037 - val_loss: 0.0045\n", + "Epoch 16/20\n", + "115/115 [==============================] - 43s 373ms/step - loss: 0.0029 - val_loss: 0.0045\n", + "Epoch 17/20\n", + "115/115 [==============================] - 44s 378ms/step - loss: 0.0025 - val_loss: 0.0048\n", + "Epoch 18/20\n", + "115/115 [==============================] - 43s 375ms/step - loss: 0.0023 - val_loss: 0.0047\n", + "Epoch 19/20\n", + "115/115 [==============================] - 43s 376ms/step - loss: 0.0022 - val_loss: 0.0043\n", + "Epoch 20/20\n", + "115/115 [==============================] - 44s 380ms/step - loss: 0.0021 - val_loss: 0.0042\n" + ] + } + ], + "source": [ + "hist_obj = model.fit_generator(train_generator, validation_data=valid_generator, validation_steps=validation_samples, steps_per_epoch=samples, epochs=epochs, shuffle=True) " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "hist_pd = pd.DataFrame(hist_obj.history, index=np.arange(1, len(hist_obj.history['loss'])+1))\n", + "hist_pd.index.name = 'epoch'\n", + "sns.lineplot(data=hist_pd)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "model_name = \"model.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "model.save(model_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# model = load_model(model_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def test_generator(x_test, resize_shape):\n", + " for sample in x_test:\n", + " img = cv2.imread(sample, cv2.IMREAD_GRAYSCALE) / 255.0\n", + " res_img = cv2.resize(img, resize_shape[::-1], interpolation=cv2.INTER_AREA)\n", + " res_img = np.expand_dims(res_img, 0)\n", + " res_img = np.expand_dims(res_img, 3)\n", + " np_img = np.array(res_img)\n", + " yield (np_img, np_img)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE Loss: 0.07084273546934128\n" + ] + } + ], + "source": [ + "steps = len(x_test)\n", + "test_gen = test_generator(x_test, input_shape)\n", + "loss = model.evaluate_generator(test_gen, steps=steps)\n", + "print(\"MSE Loss:\", loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "img = cv2.imread(x_test[0], cv2.IMREAD_GRAYSCALE)\n", + "img = cv2.resize(img, input_shape[::-1], interpolation=cv2.INTER_AREA)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "imshow(img, cmap='gray')" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "def make_prediction(img):\n", + " processed = img / 255.0\n", + " processed = np.expand_dims(processed, 0)\n", + " processed = np.expand_dims(processed, 3)\n", + " pred = model.predict(processed)\n", + " pred = np.squeeze(pred, 3)\n", + " pred = np.squeeze(pred, 0)\n", + " out_img = pred * 255\n", + " out_img[out_img > 255.0] = 255.0\n", + " out_img = out_img.astype(np.uint8)\n", + " return out_img\n", + "\n", + "def path_leaf(path):\n", + " head, tail = ntpath.split(path)\n", + " return tail or ntpath.basename(head)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "pred = make_prediction(img)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "imshow(pred, cmap='gray')" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "output_dir = 'test_preds'\n", + "if not os.path.exists(output_dir):\n", + " os.makedirs(output_dir)\n", + "for x_test_file in x_test:\n", + " img = cv2.imread(x_test_file, cv2.IMREAD_GRAYSCALE)\n", + " img = cv2.resize(img, input_shape[::-1], interpolation=cv2.INTER_AREA)\n", + " pred = make_prediction(img)\n", + " filename = path_leaf(x_test_file)\n", + " filepath = os.path.join(output_dir, filename)\n", + " cv2.imwrite(filepath, pred)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lightweight-gpu-kernel", + "language": "python", + "name": "lightweight-gpu-kernel" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/live-reloading/onnx/README.md b/test/live-reloading/onnx/README.md new file mode 100644 index 0000000000..77456896ee --- /dev/null +++ b/test/live-reloading/onnx/README.md @@ -0,0 +1,7 @@ +## Live-reloading model APIs + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +The model live-reloading feature is automatically enabled for the ONNX predictors. This means that any ONNX examples found in the [examples](../..) directory will already have this running. + +The live-reloading is a feature that reloads models at run-time from (a) specified S3 bucket(s) in the `cortex.yaml` config of each API. Models are added/removed from the API when the said models are added/removed from the S3 bucket(s) or reloaded when the models are edited. More on this in the [docs](insert-link). diff --git a/test/live-reloading/python/mpg-estimator/cortex.yaml b/test/live-reloading/python/mpg-estimator/cortex.yaml new file mode 100644 index 0000000000..4c243b5032 --- /dev/null +++ b/test/live-reloading/python/mpg-estimator/cortex.yaml @@ -0,0 +1,8 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: mpg-estimator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/ diff --git a/test/live-reloading/python/mpg-estimator/predictor.py b/test/live-reloading/python/mpg-estimator/predictor.py new file mode 100644 index 0000000000..104b9a5c0a --- /dev/null +++ b/test/live-reloading/python/mpg-estimator/predictor.py @@ -0,0 +1,27 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import mlflow.sklearn +import numpy as np + + +class PythonPredictor: + def __init__(self, config, python_client): + self.client = python_client + + def load_model(self, model_path): + return mlflow.sklearn.load_model(model_path) + + def predict(self, payload, query_params): + model_version = query_params.get("version") + + model = self.client.get_model(model_version=model_version) + model_input = [ + payload["cylinders"], + payload["displacement"], + payload["horsepower"], + payload["weight"], + payload["acceleration"], + ] + result = model.predict([model_input]).item() + + return {"prediction": result, "model": {"version": model_version}} diff --git a/test/live-reloading/python/mpg-estimator/requirements.txt b/test/live-reloading/python/mpg-estimator/requirements.txt new file mode 100644 index 0000000000..cbcad6b321 --- /dev/null +++ b/test/live-reloading/python/mpg-estimator/requirements.txt @@ -0,0 +1,4 @@ +mlflow +pandas +numpy +scikit-learn==0.21.3 diff --git a/test/live-reloading/python/mpg-estimator/sample.json b/test/live-reloading/python/mpg-estimator/sample.json new file mode 100644 index 0000000000..2dbbca46dd --- /dev/null +++ b/test/live-reloading/python/mpg-estimator/sample.json @@ -0,0 +1,7 @@ +{ + "cylinders": 4, + "displacement": 135, + "horsepower": 84, + "weight": 2490, + "acceleration": 15.7 +} diff --git a/test/live-reloading/tensorflow/README.md b/test/live-reloading/tensorflow/README.md new file mode 100644 index 0000000000..2444484b77 --- /dev/null +++ b/test/live-reloading/tensorflow/README.md @@ -0,0 +1,11 @@ +## Live-reloading model APIs + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +The model live-reloading feature is automatically enabled 1 for the TensorFlow predictors. This means that any TensorFLow examples found in the [examples](../..) directory will already have this running. + +The live-reloading is a feature that reloads models at run-time from (a) specified S3 bucket(s) in the `cortex.yaml` config of each API. Models are added/removed from the API when the said models are added/removed from the S3 bucket(s) or reloaded when the models are edited. More on this in the [docs](insert-link). + +--- + +*1: The live-reloading feature for the TensorFlow predictor is disabled when Inferentia resources (`compute.inf`) are added to the API and `processes_per_replica` > 1.* diff --git a/test/model-caching/onnx/multi-model-classifier/README.md b/test/model-caching/onnx/multi-model-classifier/README.md new file mode 100644 index 0000000000..bf5fc906cb --- /dev/null +++ b/test/model-caching/onnx/multi-model-classifier/README.md @@ -0,0 +1,77 @@ +# Multi-Model Classifier API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys ResNet50, MobileNet and ShuffleNet models in one API. Query parameters are used for selecting the model and the version. + +Since model caching is enabled, there can only be 2 models loaded into memory - loading a 3rd one will lead to the removal of the least recently used one. To witness the adding/removal process of models, check the logs of the API by running `cortex logs multi-model-classifier` once the API is up. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get multi-model-classifier`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +When making a prediction with [sample.json](sample.json), the following image will be used: + +![cat](https://i.imgur.com/213xcvs.jpg) + +### ResNet50 Classifier + +Make a request to the ResNet50 model: + +```bash +curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"label": "tabby", "model": {"name": "resnet50", "version": "latest"}} +``` + +### MobileNet Classifier + +Make a request to the MobileNet model: + +```bash +curl "${ENDPOINT}?model=mobilenet" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"label": "tabby", "model": {"name": "mobilenet", "version": "latest"}} +``` + +### ShuffleNet Classifier + +At this point, there are 2 models loaded into memory (as specified by `cache_size`). Loading `ShuffleNet` as well will lead to the removal of the least recently used model - in this case, it will be the ResNet50 model that will get evicted. Since the `disk_cache_size` is set to 3, no model will be removed from disk. + +Make a request to the ShuffleNet model: + +```bash +curl "${ENDPOINT}?model=shufflenet" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"label": "Egyptian_cat", "model": {"name": "shufflenet", "version": "latest"}} +``` + +--- + +Now, inspect `cortex get multi-model-classifier` to see when and which models were removed in this process of making requests to different versions of the same model. diff --git a/test/model-caching/onnx/multi-model-classifier/cortex.yaml b/test/model-caching/onnx/multi-model-classifier/cortex.yaml new file mode 100644 index 0000000000..f074721fd3 --- /dev/null +++ b/test/model-caching/onnx/multi-model-classifier/cortex.yaml @@ -0,0 +1,22 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: multi-model-classifier + kind: RealtimeAPI + predictor: + type: onnx + path: predictor.py + models: + paths: + - name: resnet50 + model_path: s3://cortex-examples/onnx/resnet50/ + - name: mobilenet + model_path: s3://cortex-examples/onnx/mobilenet/ + - name: shufflenet + model_path: s3://cortex-examples/onnx/shufflenet/ + cache_size: 2 + disk_cache_size: 3 + config: + image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + image-resize: 224 + compute: + mem: 2G diff --git a/test/model-caching/onnx/multi-model-classifier/predictor.py b/test/model-caching/onnx/multi-model-classifier/predictor.py new file mode 100644 index 0000000000..6ab949a24c --- /dev/null +++ b/test/model-caching/onnx/multi-model-classifier/predictor.py @@ -0,0 +1,99 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import numpy as np +import cv2, requests +from scipy.special import softmax + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + return image + + +def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA): + """ + Resize a numpy image. + """ + dim = None + (h, w) = image.shape[:2] + + if width is None and height is None: + return image + + if width is None: + # calculate the ratio of the height and construct the dimensions + r = height / float(h) + dim = (int(w * r), height) + else: + # calculate the ratio of the width and construct the dimensions + r = width / float(w) + dim = (width, int(h * r)) + + resized = cv2.resize(image, dim, interpolation=inter) + + return resized + + +def preprocess(img_data): + """ + Normalize input for inference. + """ + # move pixel color dimension to position 0 + img = np.moveaxis(img_data, 2, 0) + + mean_vec = np.array([0.485, 0.456, 0.406]) + stddev_vec = np.array([0.229, 0.224, 0.225]) + norm_img_data = np.zeros(img.shape).astype("float32") + for i in range(img.shape[0]): + # for each pixel in each channel, divide the value by 255 to get value between [0, 1] and then normalize + norm_img_data[i, :, :] = (img[i, :, :] / 255 - mean_vec[i]) / stddev_vec[i] + + # extend to batch size of 1 + norm_img_data = norm_img_data[np.newaxis, ...] + return norm_img_data + + +def postprocess(results): + """ + Eliminates all dimensions of size 1, softmaxes the input and then returns the index of the element with the highest value. + """ + squeezed = np.squeeze(results) + maxed = softmax(squeezed) + result = np.argmax(maxed) + return result + + +class ONNXPredictor: + def __init__(self, onnx_client, config): + # onnx client + self.client = onnx_client + + # for image classifiers + classes = requests.get(config["image-classifier-classes"]).json() + self.image_classes = [classes[str(k)][1] for k in range(len(classes))] + self.resize_value = config["image-resize"] + + def predict(self, payload, query_params): + # get request params + model_name = query_params["model"] + model_version = query_params.get("version", "latest") + img_url = payload["url"] + + # process the input + img = get_url_image(img_url) + img = image_resize(img, height=self.resize_value) + img = preprocess(img) + + # predict + results = self.client.predict(img, model_name, model_version)[0] + + # interpret result + result = postprocess(results) + predicted_label = self.image_classes[result] + + return {"label": predicted_label, "model": {"name": model_name, "version": model_version}} diff --git a/test/model-caching/onnx/multi-model-classifier/requirements.txt b/test/model-caching/onnx/multi-model-classifier/requirements.txt new file mode 100644 index 0000000000..212d089934 --- /dev/null +++ b/test/model-caching/onnx/multi-model-classifier/requirements.txt @@ -0,0 +1,2 @@ +opencv-python==4.2.0.34 +scipy==1.4.1 diff --git a/test/model-caching/onnx/multi-model-classifier/sample.json b/test/model-caching/onnx/multi-model-classifier/sample.json new file mode 100644 index 0000000000..4ee3aa45df --- /dev/null +++ b/test/model-caching/onnx/multi-model-classifier/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/213xcvs.jpg" +} diff --git a/test/model-caching/python/mpg-estimator/README.md b/test/model-caching/python/mpg-estimator/README.md new file mode 100644 index 0000000000..e120ac8204 --- /dev/null +++ b/test/model-caching/python/mpg-estimator/README.md @@ -0,0 +1,75 @@ +# MPG Estimator API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys an MPG estimator model of multiple versions in one API. Query parameters are used for selecting the model and the version. + +Since model caching is enabled, there can only be 2 models loaded into memory (counting the versioned models as well) - loading a 3rd one will lead to the removal of the least recently used one. To witness the adding/removal process of models, check the logs of the API by running `cortex logs mpg-estimator` once the API is up. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get mpg-estimator`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +### Version 1 + +Make a request version `1` of the `mpg-estimator` model: + +```bash +curl "${ENDPOINT}?model=resnet50&version=1" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"prediction": 26.929889872154185, "model": {"name": "mpg-estimator", "version": "1"}} +``` + +### Version 2 + +At this point, there is one model loaded into memory (as specified by `cache_size`). Loading another versioned model as well will lead to the removal of the least recently used model - in this case, it will be version 1 that will get evicted. Since the `disk_cache_size` is set to 2, no model will be removed from disk. + +Make a request version `2` of the `mpg-estimator` model: + +```bash +curl "${ENDPOINT}?model=mobilenet" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"prediction": 26.929889872154185, "model": {"name": "mpg-estimator", "version": "1"}} +``` + +### Version 3 + +With the following request, version 2 of the model will have to be evicted from the memory. Since `disk_cache_size` is set to 2, this time, version 1 of the model will get removed from the disk. + +Make a request version `3` of the `mpg-estimator` model: + +```bash +curl "${ENDPOINT}?model=shufflenet" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"prediction": 26.929889872154185, "model": {"name": "mpg-estimator", "version": "1"}} +``` + +--- + +Now, inspect `cortex get mpg-estimator` to see when and which models were removed in this process of making requests to different versions of the same model. The same algorithm is applied to different models as well, not just for the versions of a specific model. diff --git a/test/model-caching/python/mpg-estimator/cortex.yaml b/test/model-caching/python/mpg-estimator/cortex.yaml new file mode 100644 index 0000000000..1d26879aaa --- /dev/null +++ b/test/model-caching/python/mpg-estimator/cortex.yaml @@ -0,0 +1,13 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: mpg-estimator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + models: + paths: + - name: mpg-estimator + model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/ + cache_size: 1 + disk_cache_size: 2 diff --git a/test/model-caching/python/mpg-estimator/predictor.py b/test/model-caching/python/mpg-estimator/predictor.py new file mode 100644 index 0000000000..84aa206f41 --- /dev/null +++ b/test/model-caching/python/mpg-estimator/predictor.py @@ -0,0 +1,28 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import mlflow.sklearn +import numpy as np + + +class PythonPredictor: + def __init__(self, config, python_client): + self.client = python_client + + def load_model(self, model_path): + return mlflow.sklearn.load_model(model_path) + + def predict(self, payload, query_params): + model_name = query_params["model"] + model_version = query_params.get("version", "latest") + + model = self.client.get_model(model_name, model_version) + model_input = [ + payload["cylinders"], + payload["displacement"], + payload["horsepower"], + payload["weight"], + payload["acceleration"], + ] + result = model.predict([model_input]).item() + + return {"prediction": result, "model": {"name": model_name, "version": model_version}} diff --git a/test/model-caching/python/mpg-estimator/requirements.txt b/test/model-caching/python/mpg-estimator/requirements.txt new file mode 100644 index 0000000000..cbcad6b321 --- /dev/null +++ b/test/model-caching/python/mpg-estimator/requirements.txt @@ -0,0 +1,4 @@ +mlflow +pandas +numpy +scikit-learn==0.21.3 diff --git a/test/model-caching/python/mpg-estimator/sample.json b/test/model-caching/python/mpg-estimator/sample.json new file mode 100644 index 0000000000..2dbbca46dd --- /dev/null +++ b/test/model-caching/python/mpg-estimator/sample.json @@ -0,0 +1,7 @@ +{ + "cylinders": 4, + "displacement": 135, + "horsepower": 84, + "weight": 2490, + "acceleration": 15.7 +} diff --git a/test/model-caching/tensorflow/multi-model-classifier/README.md b/test/model-caching/tensorflow/multi-model-classifier/README.md new file mode 100644 index 0000000000..9fd921884b --- /dev/null +++ b/test/model-caching/tensorflow/multi-model-classifier/README.md @@ -0,0 +1,77 @@ +# Multi-Model Classifier API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys Iris, ResNet50 and Inception models in one API. Query parameters are used for selecting the model. + +Since model caching is enabled, there can only be 2 models loaded into memory - loading a 3rd one will lead to the removal of the least recently used one. To witness the adding/removal process of models, check the logs of the API by running `cortex logs multi-model-classifier` once the API is up. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the APIs endpoint. You can get the API's endpoint by running `cortex get multi-model-classifier`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +When making a prediction with [sample-image.json](sample-image.json), the following image will be used: + +![sports car](https://i.imgur.com/zovGIKD.png) + +### ResNet50 Classifier + +Make a request to the ResNet50 model: + +```bash +curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample-image.json +``` + +The expected response is: + +```json +{"label": "sports_car"} +``` + +### Inception Classifier + +Make a request to the Inception model: + +```bash +curl "${ENDPOINT}?model=inception" -X POST -H "Content-Type: application/json" -d @sample-image.json +``` + +The expected response is: + +```json +{"label": "sports_car"} +``` + +### Iris Classifier + +At this point, there are 2 models loaded into memory (as specified by `cache_size`). Loading the `iris` classifier will lead to the removal of the least recently used model - in this case, it will be the ResNet50 model that will get evicted. Since the `disk_cache_size` is set to 3, no model will be removed from disk. + +Make a request to the Iris model: + +```bash +curl "${ENDPOINT}?model=iris" -X POST -H "Content-Type: application/json" -d @sample-iris.json +``` + +The expected response is: + +```json +{"label": "setosa"} +``` + +--- + +Now, inspect `cortex get multi-model-classifier` to see when and which models were removed in this process of making requests to different versions of the same model. diff --git a/test/model-caching/tensorflow/multi-model-classifier/cortex.yaml b/test/model-caching/tensorflow/multi-model-classifier/cortex.yaml new file mode 100644 index 0000000000..4a165d177d --- /dev/null +++ b/test/model-caching/tensorflow/multi-model-classifier/cortex.yaml @@ -0,0 +1,32 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: multi-model-classifier + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + models: + paths: + - name: inception + model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ + - name: iris + model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ + - name: resnet50 + model_path: s3://cortex-examples/tensorflow/resnet50/ + cache_size: 2 + disk_cache_size: 3 + config: + models: + iris: + labels: ["setosa", "versicolor", "virginica"] + resnet50: + input_shape: [224, 224] + input_key: input + output_key: output + inception: + input_shape: [224, 224] + input_key: images + output_key: classes + image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + compute: + mem: 2G diff --git a/test/model-caching/tensorflow/multi-model-classifier/predictor.py b/test/model-caching/tensorflow/multi-model-classifier/predictor.py new file mode 100644 index 0000000000..d0914b8411 --- /dev/null +++ b/test/model-caching/tensorflow/multi-model-classifier/predictor.py @@ -0,0 +1,63 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import requests +import numpy as np +import cv2 + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + # for image classifiers + classes = requests.get(config["image-classifier-classes"]).json() + self.image_classes = [classes[str(k)][1] for k in range(len(classes))] + + # assign "models"' key value to self.config for ease of use + self.config = config["models"] + + # for iris classifier + self.iris_labels = self.config["iris"]["labels"] + + def predict(self, payload, query_params): + model_name = query_params["model"] + model_version = query_params.get("version", "latest") + predicted_label = None + + if model_name == "iris": + prediction = self.client.predict(payload["input"], model_name, model_version) + predicted_class_id = int(prediction["class_ids"][0]) + predicted_label = self.iris_labels[predicted_class_id] + + elif model_name in ["resnet50", "inception"]: + predicted_label = self.predict_image_classifier(model_name, payload["url"]) + + return {"label": predicted_label, "model": {"model": model_name, "version": model_version}} + + def predict_image_classifier(self, model, img_url): + img = get_url_image(img_url) + img = cv2.resize( + img, tuple(self.config[model]["input_shape"]), interpolation=cv2.INTER_NEAREST + ) + if model == "inception": + img = img.astype("float32") / 255 + img = {self.config[model]["input_key"]: img[np.newaxis, ...]} + + results = self.client.predict(img, model)[self.config[model]["output_key"]] + result = np.argmax(results) + if model == "inception": + result -= 1 + predicted_label = self.image_classes[result] + + return predicted_label diff --git a/test/model-caching/tensorflow/multi-model-classifier/requirements.txt b/test/model-caching/tensorflow/multi-model-classifier/requirements.txt new file mode 100644 index 0000000000..7e2fba5e6c --- /dev/null +++ b/test/model-caching/tensorflow/multi-model-classifier/requirements.txt @@ -0,0 +1 @@ +Pillow diff --git a/test/model-caching/tensorflow/multi-model-classifier/sample-image.json b/test/model-caching/tensorflow/multi-model-classifier/sample-image.json new file mode 100644 index 0000000000..95200916c7 --- /dev/null +++ b/test/model-caching/tensorflow/multi-model-classifier/sample-image.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/zovGIKD.png" +} diff --git a/test/model-caching/tensorflow/multi-model-classifier/sample-iris.json b/test/model-caching/tensorflow/multi-model-classifier/sample-iris.json new file mode 100644 index 0000000000..67c03827f2 --- /dev/null +++ b/test/model-caching/tensorflow/multi-model-classifier/sample-iris.json @@ -0,0 +1,8 @@ +{ + "input": { + "sepal_length": 5.2, + "sepal_width": 3.6, + "petal_length": 1.4, + "petal_width": 0.3 + } +} diff --git a/test/onnx/iris-classifier/README.md b/test/onnx/iris-classifier/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/onnx/iris-classifier/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/onnx/iris-classifier/cortex.yaml b/test/onnx/iris-classifier/cortex.yaml new file mode 100644 index 0000000000..00b8a61112 --- /dev/null +++ b/test/onnx/iris-classifier/cortex.yaml @@ -0,0 +1,10 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: iris-classifier + kind: RealtimeAPI + predictor: + type: onnx + path: predictor.py + model_path: s3://cortex-examples/onnx/iris-classifier/ + monitoring: + model_type: classification diff --git a/test/onnx/iris-classifier/predictor.py b/test/onnx/iris-classifier/predictor.py new file mode 100644 index 0000000000..b135129e14 --- /dev/null +++ b/test/onnx/iris-classifier/predictor.py @@ -0,0 +1,20 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +labels = ["setosa", "versicolor", "virginica"] + + +class ONNXPredictor: + def __init__(self, onnx_client, config): + self.client = onnx_client + + def predict(self, payload): + model_input = [ + payload["sepal_length"], + payload["sepal_width"], + payload["petal_length"], + payload["petal_width"], + ] + + prediction = self.client.predict(model_input) + predicted_class_id = prediction[0][0] + return labels[predicted_class_id] diff --git a/test/onnx/iris-classifier/sample.json b/test/onnx/iris-classifier/sample.json new file mode 100644 index 0000000000..252c666b3a --- /dev/null +++ b/test/onnx/iris-classifier/sample.json @@ -0,0 +1,6 @@ +{ + "sepal_length": 5.2, + "sepal_width": 3.6, + "petal_length": 1.4, + "petal_width": 0.3 +} diff --git a/test/onnx/iris-classifier/xgboost.ipynb b/test/onnx/iris-classifier/xgboost.ipynb new file mode 100644 index 0000000000..d4e1497360 --- /dev/null +++ b/test/onnx/iris-classifier/xgboost.ipynb @@ -0,0 +1,244 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "iris_xgboost.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "IiTxCwB7t6Ef", + "colab_type": "text" + }, + "source": [ + "# Training an Iris classifier using XGBoost\n", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", + "\n", + "In this notebook, we'll show how to train a classifier trained on the [iris data set](https://archive.ics.uci.edu/ml/datasets/iris) using XGBoost." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j6QdLAUpuW7r", + "colab_type": "text" + }, + "source": [ + "## Install Dependencies\n", + "First, we'll install our dependencies:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BQE5z_kHj9jV", + "colab_type": "code", + "colab": {} + }, + "source": [ + "pip install xgboost==0.90 scikit-learn==0.21.* onnxmltools==1.5.* boto3==1.*" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yEVK-sLnumqn", + "colab_type": "text" + }, + "source": [ + "## Load the data\n", + "We can use scikit-learn to load the Iris dataset:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tx9Xw0x0lfbl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "iris = load_iris()\n", + "X, y = iris.data, iris.target\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "obGdgMm3urb2", + "colab_type": "text" + }, + "source": [ + "## Train the model\n", + "We'll use XGBoost's [`XGBClassifier`](https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.XGBClassifier) to train the model:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jjYp8TaflhW0", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import xgboost as xgb\n", + "\n", + "xgb_model = xgb.XGBClassifier()\n", + "xgb_model = xgb_model.fit(X_train, y_train)\n", + "\n", + "print(\"Test data accuracy of the xgb classifier is {:.2f}\".format(xgb_model.score(X_test, y_test))) # Accuracy should be > 90%" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hdwu-wzJvJLb", + "colab_type": "text" + }, + "source": [ + "## Export the model\n", + "Now we can export the model in the ONNX format:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AVgs2mkdllRn", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from onnxmltools.convert import convert_xgboost\n", + "from onnxconverter_common.data_types import FloatTensorType\n", + "\n", + "onnx_model = convert_xgboost(xgb_model, initial_types=[(\"input\", FloatTensorType([1, 4]))])\n", + "\n", + "with open(\"gbtree.onnx\", \"wb\") as f:\n", + " f.write(onnx_model.SerializeToString())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ipVlP4yPxFxw", + "colab_type": "text" + }, + "source": [ + "## Upload the model to AWS\n", + "\n", + "Cortex loads models from AWS, so we need to upload the exported model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3IqsfyylxLhy", + "colab_type": "text" + }, + "source": [ + "Set these variables to configure your AWS credentials and model upload path:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lc9LBH1uHT_h", + "colab_type": "code", + "cellView": "form", + "colab": {} + }, + "source": [ + "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", + "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", + "S3_UPLOAD_PATH = \"s3://my-bucket/iris-classifier/gbtree.onnx\" #@param {type:\"string\"}\n", + "\n", + "import sys\n", + "import re\n", + "\n", + "if AWS_ACCESS_KEY_ID == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", + "\n", + "elif AWS_SECRET_ACCESS_KEY == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", + "\n", + "else:\n", + " try:\n", + " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", + " except:\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXeuZsaQxUc8", + "colab_type": "text" + }, + "source": [ + "Upload the model to S3:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YLmnWTEVsu55", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import boto3\n", + "\n", + "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", + "print(\"Uploading {} ...\".format(S3_UPLOAD_PATH), end = '')\n", + "s3.upload_file(\"gbtree.onnx\", bucket, key)\n", + "print(\" ✓\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aR-mmcUzyCV3", + "colab_type": "text" + }, + "source": [ + "\n", + "That's it! See the [example](https://github.com/cortexlabs/cortex/tree/master/examples/onnx/iris-classifier) for how to deploy the model as an API." + ] + } + ] +} diff --git a/test/onnx/multi-model-classifier/README.md b/test/onnx/multi-model-classifier/README.md new file mode 100644 index 0000000000..45a001378a --- /dev/null +++ b/test/onnx/multi-model-classifier/README.md @@ -0,0 +1,69 @@ +# Multi-Model Classifier API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys ResNet50, MobileNet and ShuffleNet models in one API. Query parameters are used for selecting the model. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get multi-model-classifier`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +When making a prediction with [sample.json](sample.json), the following image will be used: + +![cat](https://i.imgur.com/213xcvs.jpg) + +### ResNet50 Classifier + +Make a request to the ResNet50 model: + +```bash +curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"label": "tabby"} +``` + +### MobileNet Classifier + +Make a request to the MobileNet model: + +```bash +curl "${ENDPOINT}?model=mobilenet" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"label": "tabby"} +``` + +### ShuffleNet Classifier + +Make a request to the ShuffleNet model: + +```bash +curl "${ENDPOINT}?model=shufflenet" -X POST -H "Content-Type: application/json" -d @sample.json +``` + +The expected response is: + +```json +{"label": "Egyptian_cat"} +``` diff --git a/test/onnx/multi-model-classifier/cortex.yaml b/test/onnx/multi-model-classifier/cortex.yaml new file mode 100644 index 0000000000..63efb00ebb --- /dev/null +++ b/test/onnx/multi-model-classifier/cortex.yaml @@ -0,0 +1,20 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: multi-model-classifier + kind: RealtimeAPI + predictor: + type: onnx + path: predictor.py + models: + paths: + - name: resnet50 + model_path: s3://cortex-examples/onnx/resnet50/ + - name: mobilenet + model_path: s3://cortex-examples/onnx/mobilenet/ + - name: shufflenet + model_path: s3://cortex-examples/onnx/shufflenet/ + config: + image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + image-resize: 224 + compute: + mem: 2G diff --git a/test/onnx/multi-model-classifier/predictor.py b/test/onnx/multi-model-classifier/predictor.py new file mode 100644 index 0000000000..a057bc9724 --- /dev/null +++ b/test/onnx/multi-model-classifier/predictor.py @@ -0,0 +1,98 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import numpy as np +import cv2, requests +from scipy.special import softmax + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + return image + + +def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA): + """ + Resize a numpy image. + """ + dim = None + (h, w) = image.shape[:2] + + if width is None and height is None: + return image + + if width is None: + # calculate the ratio of the height and construct the dimensions + r = height / float(h) + dim = (int(w * r), height) + else: + # calculate the ratio of the width and construct the dimensions + r = width / float(w) + dim = (width, int(h * r)) + + resized = cv2.resize(image, dim, interpolation=inter) + + return resized + + +def preprocess(img_data): + """ + Normalize input for inference. + """ + # move pixel color dimension to position 0 + img = np.moveaxis(img_data, 2, 0) + + mean_vec = np.array([0.485, 0.456, 0.406]) + stddev_vec = np.array([0.229, 0.224, 0.225]) + norm_img_data = np.zeros(img.shape).astype("float32") + for i in range(img.shape[0]): + # for each pixel in each channel, divide the value by 255 to get value between [0, 1] and then normalize + norm_img_data[i, :, :] = (img[i, :, :] / 255 - mean_vec[i]) / stddev_vec[i] + + # extend to batch size of 1 + norm_img_data = norm_img_data[np.newaxis, ...] + return norm_img_data + + +def postprocess(results): + """ + Eliminates all dimensions of size 1, softmaxes the input and then returns the index of the element with the highest value. + """ + squeezed = np.squeeze(results) + maxed = softmax(squeezed) + result = np.argmax(maxed) + return result + + +class ONNXPredictor: + def __init__(self, onnx_client, config): + # onnx client + self.client = onnx_client + + # for image classifiers + classes = requests.get(config["image-classifier-classes"]).json() + self.image_classes = [classes[str(k)][1] for k in range(len(classes))] + self.resize_value = config["image-resize"] + + def predict(self, payload, query_params): + # get request params + model_name = query_params["model"] + img_url = payload["url"] + + # process the input + img = get_url_image(img_url) + img = image_resize(img, height=self.resize_value) + img = preprocess(img) + + # predict + results = self.client.predict(img, model_name)[0] + + # interpret result + result = postprocess(results) + predicted_label = self.image_classes[result] + + return {"label": predicted_label} diff --git a/test/onnx/multi-model-classifier/requirements.txt b/test/onnx/multi-model-classifier/requirements.txt new file mode 100644 index 0000000000..212d089934 --- /dev/null +++ b/test/onnx/multi-model-classifier/requirements.txt @@ -0,0 +1,2 @@ +opencv-python==4.2.0.34 +scipy==1.4.1 diff --git a/test/onnx/multi-model-classifier/sample.json b/test/onnx/multi-model-classifier/sample.json new file mode 100644 index 0000000000..4ee3aa45df --- /dev/null +++ b/test/onnx/multi-model-classifier/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/213xcvs.jpg" +} diff --git a/test/onnx/yolov5-youtube/README.md b/test/onnx/yolov5-youtube/README.md new file mode 100644 index 0000000000..f7822449bb --- /dev/null +++ b/test/onnx/yolov5-youtube/README.md @@ -0,0 +1,61 @@ +# YOLOv5 Detection model + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys a detection model trained using [ultralytics' yolo repo](https://github.com/ultralytics/yolov5) using ONNX. +We'll use the `yolov5s` model as an example here. +In can be used to run inference on youtube videos and returns the annotated video with bounding boxes. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the API's endpoint for convenience. You can get the API's endpoint by running `cortex get yolov5-youtube`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +When making a prediction with [sample.json](sample.json), [this](https://www.youtube.com/watch?v=aUdKzb4LGJI) youtube video will be used. + +To make a request to the model: + +```bash +curl "${ENDPOINT}" -X POST -H "Content-Type: application/json" -d @sample.json --output video.mp4 +``` + +After a few seconds, `curl` will save the resulting video `video.mp4` in the current working directory. The following is a sample of what should be exported: + +![yolov5](https://user-images.githubusercontent.com/26958764/86545098-e0dce900-bf34-11ea-83a7-8fd544afa11c.gif) + + +## Exporting ONNX + +To export a custom model from the repo, use the [`model/export.py`](https://github.com/ultralytics/yolov5/blob/master/models/export.py) script. +The only change we need to make is to change the line + +```bash +model.model[-1].export = True # set Detect() layer export=True +``` + +to + +```bash +model.model[-1].export = False +``` + +Originally, the ultralytics repo does not export postprocessing steps of the model, e.g. the conversion from the raw CNN outputs to bounding boxes. +With newer ONNX versions, these can be exported as part of the model making the deployment much easier. + +With this modified script, the ONNX graph used for this example has been exported using +```bash +python models/export.py --weights weights/yolov5s.pt --img 416 --batch 1 +``` diff --git a/test/onnx/yolov5-youtube/conda-packages.txt b/test/onnx/yolov5-youtube/conda-packages.txt new file mode 100644 index 0000000000..131fce12b5 --- /dev/null +++ b/test/onnx/yolov5-youtube/conda-packages.txt @@ -0,0 +1,3 @@ +conda-forge::ffmpeg=4.2.3 +conda-forge::youtube-dl +conda-forge::matplotlib diff --git a/test/onnx/yolov5-youtube/cortex.yaml b/test/onnx/yolov5-youtube/cortex.yaml new file mode 100644 index 0000000000..80d0393308 --- /dev/null +++ b/test/onnx/yolov5-youtube/cortex.yaml @@ -0,0 +1,13 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: yolov5-youtube + kind: RealtimeAPI + predictor: + type: onnx + path: predictor.py + model_path: s3://cortex-examples/onnx/yolov5-youtube/ + config: + iou_threshold: 0.5 + confidence_threshold: 0.6 + compute: + gpu: 1 # this is optional, since the api can also run on cpu diff --git a/test/onnx/yolov5-youtube/labels.json b/test/onnx/yolov5-youtube/labels.json new file mode 100644 index 0000000000..c86f2f812a --- /dev/null +++ b/test/onnx/yolov5-youtube/labels.json @@ -0,0 +1,82 @@ +[ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" +] diff --git a/test/onnx/yolov5-youtube/predictor.py b/test/onnx/yolov5-youtube/predictor.py new file mode 100644 index 0000000000..b99d29d911 --- /dev/null +++ b/test/onnx/yolov5-youtube/predictor.py @@ -0,0 +1,65 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import json +import os +import io +import uuid +import utils + +import numpy as np +from matplotlib import pyplot as plt + +from starlette.responses import StreamingResponse + + +class ONNXPredictor: + def __init__(self, onnx_client, config): + self.client = onnx_client + # Get the input shape from the ONNX runtime + (signature,) = onnx_client.get_model()["input_signatures"].values() + _, _, height, width = signature["shape"] + self.input_size = (width, height) + self.config = config + with open("labels.json") as buf: + self.labels = json.load(buf) + color_map = plt.cm.tab20(np.linspace(0, 20, len(self.labels))) + self.color_map = [tuple(map(int, colors)) for colors in 255 * color_map] + + def postprocess(self, output): + boxes, obj_score, class_scores = np.split(output[0], [4, 5], axis=1) + boxes = utils.boxes_yolo_to_xyxy(boxes) + + # get the class-prediction & class confidences + class_id = class_scores.argmax(axis=1) + cls_score = class_scores[np.arange(len(class_scores)), class_id] + + confidence = obj_score.squeeze(axis=1) * cls_score + sel = confidence > self.config["confidence_threshold"] + boxes, class_id, confidence = boxes[sel], class_id[sel], confidence[sel] + sel = utils.nms(boxes, confidence, self.config["iou_threshold"]) + boxes, class_id, confidence = boxes[sel], class_id[sel], confidence[sel] + return boxes, class_id, confidence + + def predict(self, payload): + # download YT video + in_path = utils.download_from_youtube(payload["url"], self.input_size[1]) + out_path = f"{uuid.uuid1()}.mp4" + + # run predictions + with utils.FrameWriter(out_path, size=self.input_size) as writer: + for frame in utils.frame_reader(in_path, size=self.input_size): + x = (frame.astype(np.float32) / 255).transpose(2, 0, 1) + # 4 output tensors, the last three are intermediate values and + # not necessary for detection + output, *_ = self.client.predict(x[None]) + boxes, class_ids, confidence = self.postprocess(output) + utils.overlay_boxes(frame, boxes, class_ids, self.labels, self.color_map) + writer.write(frame) + + with open(out_path, "rb") as f: + output_buf = io.BytesIO(f.read()) + + os.remove(in_path) + os.remove(out_path) + + return StreamingResponse(output_buf, media_type="video/mp4") diff --git a/test/onnx/yolov5-youtube/requirements.txt b/test/onnx/yolov5-youtube/requirements.txt new file mode 100644 index 0000000000..2c779ca7f1 --- /dev/null +++ b/test/onnx/yolov5-youtube/requirements.txt @@ -0,0 +1,3 @@ +ffmpeg-python +aiofiles +opencv-python-headless diff --git a/test/onnx/yolov5-youtube/sample.json b/test/onnx/yolov5-youtube/sample.json new file mode 100644 index 0000000000..8421278f58 --- /dev/null +++ b/test/onnx/yolov5-youtube/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://www.youtube.com/watch?v=aUdKzb4LGJI" +} diff --git a/test/onnx/yolov5-youtube/utils.py b/test/onnx/yolov5-youtube/utils.py new file mode 100644 index 0000000000..c9bbeb73fe --- /dev/null +++ b/test/onnx/yolov5-youtube/utils.py @@ -0,0 +1,130 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import youtube_dl +import ffmpeg +import numpy as np +import cv2 +import uuid + +from pathlib import Path +from typing import Iterable, Tuple + + +def download_from_youtube(url: str, min_height: int) -> Path: + target = f"{uuid.uuid1()}.mp4" + ydl_opts = { + "outtmpl": target, + "format": f"worstvideo[vcodec=vp9][height>={min_height}]", + } + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + # we need to glob in case youtube-dl adds suffix + (path,) = Path().absolute().glob(f"{target}*") + return path + + +def frame_reader(path: Path, size: Tuple[int, int]) -> Iterable[np.ndarray]: + width, height = size + # letterbox frames to fixed size + process = ( + ffmpeg.input(path) + .filter("scale", size=f"{width}:{height}", force_original_aspect_ratio="decrease") + # Negative values for x and y center the padded video + .filter("pad", height=height, width=width, x=-1, y=-1) + .output("pipe:", format="rawvideo", pix_fmt="rgb24") + .run_async(pipe_stdout=True) + ) + + while True: + in_bytes = process.stdout.read(height * width * 3) + if not in_bytes: + process.wait() + break + frame = np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3]) + yield frame + + +class FrameWriter: + def __init__(self, path: Path, size: Tuple[int, int]): + width, height = size + self.process = ( + ffmpeg.input("pipe:", format="rawvideo", pix_fmt="rgb24", s=f"{width}x{height}") + .output(path, pix_fmt="yuv420p") + .overwrite_output() + .run_async(pipe_stdin=True) + ) + + def write(self, frame: np.ndarray): + self.process.stdin.write(frame.astype(np.uint8).tobytes()) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.__del__() + + def __del__(self): + self.process.stdin.close() + self.process.wait() + + +def nms(dets: np.ndarray, scores: np.ndarray, thresh: float) -> np.ndarray: + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] # get boxes with more ious first + + keep = [] + while order.size > 0: + i = order[0] # pick maxmum iou box + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) # maximum width + h = np.maximum(0.0, yy2 - yy1 + 1) # maxiumum height + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return np.array(keep).astype(np.int) + + +def boxes_yolo_to_xyxy(boxes: np.ndarray): + boxes[:, 0] -= boxes[:, 2] / 2 + boxes[:, 1] -= boxes[:, 3] / 2 + boxes[:, 2] = boxes[:, 2] + boxes[:, 0] + boxes[:, 3] = boxes[:, 3] + boxes[:, 1] + return boxes + + +def overlay_boxes(frame, boxes, class_ids, label_map, color_map, line_thickness=None): + tl = ( + line_thickness or round(0.0005 * (frame.shape[0] + frame.shape[1]) / 2) + 1 + ) # line/font thickness + + for class_id, (x1, y1, x2, y2) in zip(class_ids, boxes.astype(np.int)): + color = color_map[class_id] + label = label_map[class_id] + cv2.rectangle(frame, (x1, y1), (x2, y2), color, tl, cv2.LINE_AA) + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + x3, y3 = x1 + t_size[0], y1 - t_size[1] - 3 + cv2.rectangle(frame, (x1, y1), (x3, y3), color, -1, cv2.LINE_AA) # filled + cv2.putText( + frame, + label, + (x1, y1 - 2), + 0, + tl / 3, + [225, 255, 255], + thickness=tf, + lineType=cv2.LINE_AA, + ) diff --git a/test/pytorch/answer-generator/README.md b/test/pytorch/answer-generator/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/answer-generator/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/answer-generator/cortex.yaml b/test/pytorch/answer-generator/cortex.yaml new file mode 100644 index 0000000000..b336f257dd --- /dev/null +++ b/test/pytorch/answer-generator/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: answer-generator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 5G diff --git a/test/pytorch/answer-generator/generator.py b/test/pytorch/answer-generator/generator.py new file mode 100644 index 0000000000..4a9aba613e --- /dev/null +++ b/test/pytorch/answer-generator/generator.py @@ -0,0 +1,44 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +# This file includes code which was modified from https://colab.research.google.com/drive/1KTLqiAOdKM_3RnBWfqgrvOQLqumUyOdA + +import torch +import torch.nn.functional as F + + +END_OF_TEXT = 50256 + + +def generate(model, conditioned_tokens, device): + generated_tokens = [] + while True: + result = recalc(model, conditioned_tokens, generated_tokens, device) + if result == END_OF_TEXT: + return generated_tokens[:-1] + + +def recalc(model, conditioned_tokens, generated_tokens, device): + indexed_tokens = conditioned_tokens + generated_tokens + tokens_tensor = torch.tensor([indexed_tokens]) + tokens_tensor = tokens_tensor.to(device) + with torch.no_grad(): + outputs = model(tokens_tensor) + predictions = outputs[0] + logits = predictions[0, -1, :] + filtered_logits = top_p_filtering(logits) + probabilities = F.softmax(filtered_logits, dim=-1) + next_token = torch.multinomial(probabilities, 1) + generated_tokens.append(next_token.item()) + return next_token.item() + + +def top_p_filtering(logits, top_p=0.9, filter_value=-float("Inf")): + assert logits.dim() == 1 + sorted_logits, sorted_indices = torch.sort(logits, descending=True) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + indices_to_remove = sorted_indices[sorted_indices_to_remove] + logits[indices_to_remove] = filter_value + return logits diff --git a/test/pytorch/answer-generator/predictor.py b/test/pytorch/answer-generator/predictor.py new file mode 100644 index 0000000000..38c6622bf3 --- /dev/null +++ b/test/pytorch/answer-generator/predictor.py @@ -0,0 +1,36 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import wget +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config +import generator + + +class PythonPredictor: + def __init__(self, config): + medium_config = GPT2Config(n_embd=1024, n_layer=24, n_head=16) + model = GPT2LMHeadModel(medium_config) + wget.download( + "https://convaisharables.blob.core.windows.net/lsp/multiref/medium_ft.pkl", + "/tmp/medium_ft.pkl", + ) + + weights = torch.load("/tmp/medium_ft.pkl") + weights["lm_head.weight"] = weights["lm_head.decoder.weight"] + weights.pop("lm_head.decoder.weight", None) + + model.load_state_dict(weights) + + device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"using device: {device}") + model.to(device) + model.eval() + + self.device = device + self.model = model + self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") + + def predict(self, payload): + conditioned_tokens = self.tokenizer.encode(payload["text"]) + [generator.END_OF_TEXT] + prediction = generator.generate(self.model, conditioned_tokens, self.device) + return self.tokenizer.decode(prediction) diff --git a/test/pytorch/answer-generator/requirements.txt b/test/pytorch/answer-generator/requirements.txt new file mode 100644 index 0000000000..effba0ef1b --- /dev/null +++ b/test/pytorch/answer-generator/requirements.txt @@ -0,0 +1,3 @@ +torch +transformers==2.3.* +wget==3.* diff --git a/test/pytorch/answer-generator/sample.json b/test/pytorch/answer-generator/sample.json new file mode 100644 index 0000000000..aa91c9d2eb --- /dev/null +++ b/test/pytorch/answer-generator/sample.json @@ -0,0 +1,3 @@ +{ + "text": "What is machine learning?" +} diff --git a/test/pytorch/image-classifier-alexnet/README.md b/test/pytorch/image-classifier-alexnet/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/image-classifier-alexnet/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/image-classifier-alexnet/cortex.yaml b/test/pytorch/image-classifier-alexnet/cortex.yaml new file mode 100644 index 0000000000..74c463c0b0 --- /dev/null +++ b/test/pytorch/image-classifier-alexnet/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-alexnet + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/test/pytorch/image-classifier-alexnet/predictor.py b/test/pytorch/image-classifier-alexnet/predictor.py new file mode 100644 index 0000000000..a739ddbb8a --- /dev/null +++ b/test/pytorch/image-classifier-alexnet/predictor.py @@ -0,0 +1,39 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import requests +import torch +import torchvision +from torchvision import transforms +from PIL import Image +from io import BytesIO + + +class PythonPredictor: + def __init__(self, config): + device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"using device: {device}") + + model = torchvision.models.alexnet(pretrained=True).to(device) + model.eval() + # https://github.com/pytorch/examples/blob/447974f6337543d4de6b888e244a964d3c9b71f6/imagenet/main.py#L198-L199 + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + self.preprocess = transforms.Compose( + [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] + ) + self.labels = requests.get( + "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + ).text.split("\n")[1:] + self.model = model + self.device = device + + def predict(self, payload): + image = requests.get(payload["url"]).content + img_pil = Image.open(BytesIO(image)) + img_tensor = self.preprocess(img_pil) + img_tensor.unsqueeze_(0) + img_tensor = img_tensor.to(self.device) + with torch.no_grad(): + prediction = self.model(img_tensor) + _, index = prediction[0].max(0) + return self.labels[index] diff --git a/test/pytorch/image-classifier-alexnet/requirements.txt b/test/pytorch/image-classifier-alexnet/requirements.txt new file mode 100644 index 0000000000..ac988bdf84 --- /dev/null +++ b/test/pytorch/image-classifier-alexnet/requirements.txt @@ -0,0 +1,2 @@ +torch +torchvision diff --git a/test/pytorch/image-classifier-alexnet/sample.json b/test/pytorch/image-classifier-alexnet/sample.json new file mode 100644 index 0000000000..eb72ddb869 --- /dev/null +++ b/test/pytorch/image-classifier-alexnet/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/PzXprwl.jpg" +} diff --git a/test/pytorch/image-classifier-resnet50/README.md b/test/pytorch/image-classifier-resnet50/README.md new file mode 100644 index 0000000000..f13020d874 --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/README.md @@ -0,0 +1,59 @@ +# Image Classifier with ResNet50 + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example implements an image recognition system using ResNet50, which allows for the recognition of up to 1000 classes. + +## Deploying + +There are 3 Cortex APIs available in this example: + +1. [cortex.yaml](cortex.yaml) - can be used with any instances. +1. [cortex_inf.yaml](cortex_inf.yaml) - to be used with `inf1` instances. +1. [cortex_gpu.yaml](cortex_gpu.yaml) - to be used with GPU instances. + +To deploy an API, run: + +```bash +cortex deploy +``` + +E.g. + +```bash +cortex deploy cortex_gpu.yaml +``` + +## Verifying your API + +Check that your API is live by running `cortex get image-classifier-resnet50`, and copy the example `curl` command that's shown. After the API is live, run the `curl` command, e.g. + +```bash +$ curl -X POST -H "Content-Type: application/json" -d @sample.json + +["tabby", "Egyptian_cat", "tiger_cat", "tiger", "plastic_bag"] +``` + +The following image is embedded in [sample.json](sample.json): + +![image](https://i.imgur.com/213xcvs.jpg) + +## Exporting SavedModels + +This example deploys models that we have built and uploaded to a public S3 bucket. If you want to build the models yourself, follow these instructions. + +Run the following command to install the dependencies required for the [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook: + +```bash +pip install --extra-index-url=https://pip.repos.neuron.amazonaws.com \ + neuron-cc==1.0.9410.0+6008239556 \ + torch-neuron==1.0.825.0 +``` + +Also, `torchvision` has to be installed, but without any dependencies: + +```bash +pip install torchvision==0.4.2 --no-deps +``` + +The [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook will generate 2 torch models. One is saved as `resnet50.pt` which can be run on GPU or CPU, and another is saved as `resnet50_neuron.pt`, which can only be run on `inf1` instances. diff --git a/test/pytorch/image-classifier-resnet50/cortex.yaml b/test/pytorch/image-classifier-resnet50/cortex.yaml new file mode 100644 index 0000000000..d6c1cb64c9 --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/cortex.yaml @@ -0,0 +1,15 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + model_path: s3://cortex-examples/pytorch/image-classifier-resnet50 + model_name: resnet50.pt + device: cpu + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + compute: + cpu: 1 diff --git a/test/pytorch/image-classifier-resnet50/cortex_gpu.yaml b/test/pytorch/image-classifier-resnet50/cortex_gpu.yaml new file mode 100644 index 0000000000..7f06603504 --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/cortex_gpu.yaml @@ -0,0 +1,16 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + model_path: s3://cortex-examples/pytorch/image-classifier-resnet50 + model_name: resnet50.pt + device: gpu + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + compute: + gpu: 1 + cpu: 1 diff --git a/test/pytorch/image-classifier-resnet50/cortex_inf.yaml b/test/pytorch/image-classifier-resnet50/cortex_inf.yaml new file mode 100644 index 0000000000..55ce4ff793 --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/cortex_inf.yaml @@ -0,0 +1,16 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + model_path: s3://cortex-examples/pytorch/image-classifier-resnet50 + model_name: resnet50_neuron.pt + device: inf + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + compute: + inf: 1 + cpu: 1 diff --git a/test/pytorch/image-classifier-resnet50/generate_resnet50_models.ipynb b/test/pytorch/image-classifier-resnet50/generate_resnet50_models.ipynb new file mode 100644 index 0000000000..e4e1343d85 --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/generate_resnet50_models.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate Resnet50 Models\n", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import numpy as np\n", + "import os\n", + "import torch_neuron\n", + "from torchvision import models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load Resnet50 model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "model = models.resnet50(pretrained=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compile model for Inferentia. Should have worked with 1 NeuronCores, but it appears that setting it to a minimum of 2 is required." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:Neuron:compiling module ResNet with neuron-cc\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Compiler args type is value is ['--num-neuroncores', '2']\n" + ] + } + ], + "source": [ + "model.eval()\n", + "batch_size = 1\n", + "image = torch.zeros([batch_size, 3, 224, 224], dtype=torch.float32)\n", + "model_neuron = torch.neuron.trace(model, example_inputs=[image], compiler_args=[\"--num-neuroncores\", \"2\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save both models to disk" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "model_neuron.save(\"resnet50_neuron.pt\")\n", + "torch.save(model.state_dict(), \"resnet50.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/pytorch/image-classifier-resnet50/predictor.py b/test/pytorch/image-classifier-resnet50/predictor.py new file mode 100644 index 0000000000..8059c4078c --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/predictor.py @@ -0,0 +1,93 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import os +import torch +import cv2 +import numpy as np +import requests +import re +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +from torchvision import models, transforms, datasets + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + +class PythonPredictor: + def __init__(self, config): + # load classes + classes = requests.get(config["classes"]).json() + self.idx2label = [classes[str(k)][1] for k in range(len(classes))] + + # create s3 client + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + # download the model + model_path = config["model_path"] + model_name = config["model_name"] + bucket, key = re.match("s3://(.+?)/(.+)", model_path).groups() + s3.download_file(bucket, os.path.join(key, model_name), model_name) + + # load the model + self.device = None + if config["device"] == "gpu": + self.device = torch.device("cuda") + self.model = models.resnet50() + self.model.load_state_dict(torch.load(model_name, map_location="cuda:0")) + self.model.eval() + self.model = self.model.to(self.device) + elif config["device"] == "cpu": + self.model = models.resnet50() + self.model.load_state_dict(torch.load(model_name)) + self.model.eval() + elif config["device"] == "inf": + import torch_neuron + + self.model = torch.jit.load(model_name) + else: + raise RuntimeError("invalid predictor: config: must be cpu, gpu, or inf") + + # save normalization transform for later use + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.transform = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.Resize(config["input_shape"]), + transforms.ToTensor(), + normalize, + ] + ) + + def predict(self, payload): + # preprocess image + image = get_url_image(payload["url"]) + image = self.transform(image) + image = torch.tensor(image.numpy()[np.newaxis, ...]) + + # predict + if self.device: + results = self.model(image.to(self.device)) + else: + results = self.model(image) + + # Get the top 5 results + top5_idx = results[0].sort()[1][-5:] + + # Lookup and print the top 5 labels + top5_labels = [self.idx2label[idx] for idx in top5_idx] + top5_labels = top5_labels[::-1] + + return top5_labels diff --git a/test/pytorch/image-classifier-resnet50/sample.json b/test/pytorch/image-classifier-resnet50/sample.json new file mode 100644 index 0000000000..4ee3aa45df --- /dev/null +++ b/test/pytorch/image-classifier-resnet50/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/213xcvs.jpg" +} diff --git a/test/pytorch/iris-classifier/README.md b/test/pytorch/iris-classifier/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/iris-classifier/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/iris-classifier/cortex.yaml b/test/pytorch/iris-classifier/cortex.yaml new file mode 100644 index 0000000000..a8b590882d --- /dev/null +++ b/test/pytorch/iris-classifier/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: iris-classifier + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + model: s3://cortex-examples/pytorch/iris-classifier/weights.pth + monitoring: + model_type: classification diff --git a/test/pytorch/iris-classifier/model.py b/test/pytorch/iris-classifier/model.py new file mode 100644 index 0000000000..fe29ff7b6d --- /dev/null +++ b/test/pytorch/iris-classifier/model.py @@ -0,0 +1,59 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score + + +class IrisNet(nn.Module): + def __init__(self): + super(IrisNet, self).__init__() + self.fc1 = nn.Linear(4, 100) + self.fc2 = nn.Linear(100, 100) + self.fc3 = nn.Linear(100, 3) + self.softmax = nn.Softmax(dim=1) + + def forward(self, X): + X = F.relu(self.fc1(X)) + X = self.fc2(X) + X = self.fc3(X) + X = self.softmax(X) + return X + + +if __name__ == "__main__": + iris = load_iris() + X, y = iris.data, iris.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) + + train_X = Variable(torch.Tensor(X_train).float()) + test_X = Variable(torch.Tensor(X_test).float()) + train_y = Variable(torch.Tensor(y_train).long()) + test_y = Variable(torch.Tensor(y_test).long()) + + model = IrisNet() + + criterion = nn.CrossEntropyLoss() + + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + + for epoch in range(1000): + optimizer.zero_grad() + out = model(train_X) + loss = criterion(out, train_y) + loss.backward() + optimizer.step() + + if epoch % 100 == 0: + print("number of epoch {} loss {}".format(epoch, loss)) + + predict_out = model(test_X) + _, predict_y = torch.max(predict_out, 1) + + print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data))) + + torch.save(model.state_dict(), "weights.pth") diff --git a/test/pytorch/iris-classifier/predictor.py b/test/pytorch/iris-classifier/predictor.py new file mode 100644 index 0000000000..71994bb9ae --- /dev/null +++ b/test/pytorch/iris-classifier/predictor.py @@ -0,0 +1,50 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import re +import torch +import os +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +from model import IrisNet + +labels = ["setosa", "versicolor", "virginica"] + + +class PythonPredictor: + def __init__(self, config): + # download the model + bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() + + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + s3.download_file(bucket, key, "/tmp/model.pth") + + # initialize the model + model = IrisNet() + model.load_state_dict(torch.load("/tmp/model.pth")) + model.eval() + + self.model = model + + def predict(self, payload): + # Convert the request to a tensor and pass it into the model + input_tensor = torch.FloatTensor( + [ + [ + payload["sepal_length"], + payload["sepal_width"], + payload["petal_length"], + payload["petal_width"], + ] + ] + ) + + # Run the prediction + output = self.model(input_tensor) + + # Translate the model output to the corresponding label string + return labels[torch.argmax(output[0])] diff --git a/test/pytorch/iris-classifier/requirements.txt b/test/pytorch/iris-classifier/requirements.txt new file mode 100644 index 0000000000..f2f30b7ef9 --- /dev/null +++ b/test/pytorch/iris-classifier/requirements.txt @@ -0,0 +1,2 @@ +torch +scikit-learn diff --git a/test/pytorch/iris-classifier/sample.json b/test/pytorch/iris-classifier/sample.json new file mode 100644 index 0000000000..0bc6836266 --- /dev/null +++ b/test/pytorch/iris-classifier/sample.json @@ -0,0 +1,6 @@ +{ + "sepal_length": 2.2, + "sepal_width": 3.6, + "petal_length": 1.4, + "petal_width": 3.3 +} diff --git a/test/pytorch/language-identifier/README.md b/test/pytorch/language-identifier/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/language-identifier/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/language-identifier/cortex.yaml b/test/pytorch/language-identifier/cortex.yaml new file mode 100644 index 0000000000..e8243a58fa --- /dev/null +++ b/test/pytorch/language-identifier/cortex.yaml @@ -0,0 +1,9 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: language-identifier + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + monitoring: + model_type: classification diff --git a/test/pytorch/language-identifier/predictor.py b/test/pytorch/language-identifier/predictor.py new file mode 100644 index 0000000000..e59ebe5012 --- /dev/null +++ b/test/pytorch/language-identifier/predictor.py @@ -0,0 +1,18 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import wget +import fasttext + + +class PythonPredictor: + def __init__(self, config): + wget.download( + "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin", "/tmp/model" + ) + + self.model = fasttext.load_model("/tmp/model") + + def predict(self, payload): + prediction = self.model.predict(payload["text"]) + language = prediction[0][0][-2:] + return language diff --git a/test/pytorch/language-identifier/requirements.txt b/test/pytorch/language-identifier/requirements.txt new file mode 100644 index 0000000000..a342ff2914 --- /dev/null +++ b/test/pytorch/language-identifier/requirements.txt @@ -0,0 +1,2 @@ +wget==3.* +fasttext==0.9.* diff --git a/test/pytorch/language-identifier/sample.json b/test/pytorch/language-identifier/sample.json new file mode 100644 index 0000000000..225c357392 --- /dev/null +++ b/test/pytorch/language-identifier/sample.json @@ -0,0 +1,3 @@ +{ + "text": "build machine learning apis" +} diff --git a/test/pytorch/multi-model-text-analyzer/README.md b/test/pytorch/multi-model-text-analyzer/README.md new file mode 100644 index 0000000000..0fbca390cd --- /dev/null +++ b/test/pytorch/multi-model-text-analyzer/README.md @@ -0,0 +1,51 @@ +# Multi-Model Analyzer API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys a sentiment analyzer and a text summarizer in one API. Query parameters are used for selecting the model. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the APIs endpoint. You can get the API's endpoint by running `cortex get text-analyzer`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +### Sentiment Analyzer Classifier + +Make a request to the sentiment analyzer model: + +```bash +curl "${ENDPOINT}?model=sentiment" -X POST -H "Content-Type: application/json" -d @sample-sentiment.json +``` + +The expected response is: + +```json +{"label": "POSITIVE", "score": 0.9998506903648376} +``` + +### Text Summarizer + +Make a request to the text summarizer model: + +```bash +curl "${ENDPOINT}?model=summarizer" -X POST -H "Content-Type: application/json" -d @sample-summarizer.json +``` + +The expected response is: + +```text +Machine learning is the study of algorithms and statistical models that computer systems use to perform a specific task. It is seen as a subset of artificial intelligence. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision. In its application across business problems, machine learning is also referred to as predictive analytics. +``` diff --git a/test/pytorch/multi-model-text-analyzer/cortex.yaml b/test/pytorch/multi-model-text-analyzer/cortex.yaml new file mode 100644 index 0000000000..b2ece6bab9 --- /dev/null +++ b/test/pytorch/multi-model-text-analyzer/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: multi-model-text-analyzer + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 6G diff --git a/test/pytorch/multi-model-text-analyzer/predictor.py b/test/pytorch/multi-model-text-analyzer/predictor.py new file mode 100644 index 0000000000..03a8b03fbb --- /dev/null +++ b/test/pytorch/multi-model-text-analyzer/predictor.py @@ -0,0 +1,25 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +from transformers import pipeline +from starlette.responses import JSONResponse + + +class PythonPredictor: + def __init__(self, config): + device = 0 if torch.cuda.is_available() else -1 + print(f"using device: {'cuda' if device == 0 else 'cpu'}") + + self.analyzer = pipeline(task="sentiment-analysis", device=device) + self.summarizer = pipeline(task="summarization", device=device) + + def predict(self, query_params, payload): + model_name = query_params.get("model") + + if model_name == "sentiment": + return self.analyzer(payload["text"])[0] + elif model_name == "summarizer": + summary = self.summarizer(payload["text"]) + return summary[0]["summary_text"] + else: + return JSONResponse({"error": f"unknown model: {model_name}"}, status_code=400) diff --git a/test/pytorch/multi-model-text-analyzer/requirements.txt b/test/pytorch/multi-model-text-analyzer/requirements.txt new file mode 100644 index 0000000000..3f565d80e4 --- /dev/null +++ b/test/pytorch/multi-model-text-analyzer/requirements.txt @@ -0,0 +1,2 @@ +torch +transformers==2.9.* diff --git a/test/pytorch/multi-model-text-analyzer/sample-sentiment.json b/test/pytorch/multi-model-text-analyzer/sample-sentiment.json new file mode 100644 index 0000000000..de3a18a92a --- /dev/null +++ b/test/pytorch/multi-model-text-analyzer/sample-sentiment.json @@ -0,0 +1,3 @@ +{ + "text": "best day ever" +} diff --git a/test/pytorch/multi-model-text-analyzer/sample-summarizer.json b/test/pytorch/multi-model-text-analyzer/sample-summarizer.json new file mode 100644 index 0000000000..b19a1406d4 --- /dev/null +++ b/test/pytorch/multi-model-text-analyzer/sample-summarizer.json @@ -0,0 +1,3 @@ +{ + "text": "Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems use to perform a specific task without using explicit instructions, relying on patterns and inference instead. It is seen as a subset of artificial intelligence. Machine learning algorithms build a mathematical model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to perform the task. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop a conventional algorithm for effectively performing the task. Machine learning is closely related to computational statistics, which focuses on making predictions using computers. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a field of study within machine learning, and focuses on exploratory data analysis through unsupervised learning. In its application across business problems, machine learning is also referred to as predictive analytics." +} diff --git a/test/pytorch/object-detector/README.md b/test/pytorch/object-detector/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/object-detector/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/object-detector/coco_labels.txt b/test/pytorch/object-detector/coco_labels.txt new file mode 100644 index 0000000000..8d950d95da --- /dev/null +++ b/test/pytorch/object-detector/coco_labels.txt @@ -0,0 +1,91 @@ +__background__ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +N/A +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +N/A +backpack +umbrella +N/A +N/A +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +N/A +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +N/A +dining table +N/A +N/A +toilet +N/A +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +N/A +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/test/pytorch/object-detector/cortex.yaml b/test/pytorch/object-detector/cortex.yaml new file mode 100644 index 0000000000..9b06d29e9e --- /dev/null +++ b/test/pytorch/object-detector/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: object-detector + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/test/pytorch/object-detector/predictor.py b/test/pytorch/object-detector/predictor.py new file mode 100644 index 0000000000..52aa593774 --- /dev/null +++ b/test/pytorch/object-detector/predictor.py @@ -0,0 +1,49 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +from io import BytesIO + +import requests +import torch +from PIL import Image +from torchvision import models +from torchvision import transforms + + +class PythonPredictor: + def __init__(self, config): + self.device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"using device: {self.device}") + + model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(self.device) + model.eval() + + self.preprocess = transforms.Compose([transforms.ToTensor()]) + + with open("/mnt/project/coco_labels.txt") as f: + self.coco_labels = f.read().splitlines() + + self.model = model + + def predict(self, payload): + threshold = float(payload["threshold"]) + image = requests.get(payload["url"]).content + img_pil = Image.open(BytesIO(image)) + img_tensor = self.preprocess(img_pil).to(self.device) + img_tensor.unsqueeze_(0) + + with torch.no_grad(): + pred = self.model(img_tensor) + + predicted_class = [self.coco_labels[i] for i in pred[0]["labels"].cpu().tolist()] + predicted_boxes = [ + [(i[0], i[1]), (i[2], i[3])] for i in pred[0]["boxes"].detach().cpu().tolist() + ] + predicted_score = pred[0]["scores"].detach().cpu().tolist() + predicted_t = [predicted_score.index(x) for x in predicted_score if x > threshold] + if len(predicted_t) == 0: + return [], [] + + predicted_t = predicted_t[-1] + predicted_boxes = predicted_boxes[: predicted_t + 1] + predicted_class = predicted_class[: predicted_t + 1] + return predicted_boxes, predicted_class diff --git a/test/pytorch/object-detector/requirements.txt b/test/pytorch/object-detector/requirements.txt new file mode 100644 index 0000000000..ac988bdf84 --- /dev/null +++ b/test/pytorch/object-detector/requirements.txt @@ -0,0 +1,2 @@ +torch +torchvision diff --git a/test/pytorch/object-detector/sample.json b/test/pytorch/object-detector/sample.json new file mode 100644 index 0000000000..5005f13bad --- /dev/null +++ b/test/pytorch/object-detector/sample.json @@ -0,0 +1,4 @@ +{ + "url": "https://i.imgur.com/PzXprwl.jpg", + "threshold": "0.8" +} diff --git a/test/pytorch/question-generator/cortex.yaml b/test/pytorch/question-generator/cortex.yaml new file mode 100644 index 0000000000..a944303edb --- /dev/null +++ b/test/pytorch/question-generator/cortex.yaml @@ -0,0 +1,10 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: question-generator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + mem: 6G diff --git a/test/pytorch/question-generator/dependencies.sh b/test/pytorch/question-generator/dependencies.sh new file mode 100644 index 0000000000..5040da2342 --- /dev/null +++ b/test/pytorch/question-generator/dependencies.sh @@ -0,0 +1,4 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +# torchvision isn’t required for this example, and pip was throwing warnings with it installed +pip uninstall torchvision -y diff --git a/test/pytorch/question-generator/predictor.py b/test/pytorch/question-generator/predictor.py new file mode 100644 index 0000000000..0b7692890c --- /dev/null +++ b/test/pytorch/question-generator/predictor.py @@ -0,0 +1,36 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +from transformers import AutoModelWithLMHead, AutoTokenizer +import spacy +import subprocess +import json + + +class PythonPredictor: + def __init__(self, config): + subprocess.call("python -m spacy download en_core_web_sm".split(" ")) + import en_core_web_sm + + self.tokenizer = AutoTokenizer.from_pretrained( + "mrm8488/t5-base-finetuned-question-generation-ap" + ) + self.model = AutoModelWithLMHead.from_pretrained( + "mrm8488/t5-base-finetuned-question-generation-ap" + ) + self.nlp = en_core_web_sm.load() + + def predict(self, payload): + context = payload["context"] + answer = payload["answer"] + max_length = int(payload.get("max_length", 64)) + + input_text = "answer: {} context: {} ".format(answer, context) + features = self.tokenizer([input_text], return_tensors="pt") + + output = self.model.generate( + input_ids=features["input_ids"], + attention_mask=features["attention_mask"], + max_length=max_length, + ) + + return {"result": self.tokenizer.decode(output[0])} diff --git a/test/pytorch/question-generator/requirements.txt b/test/pytorch/question-generator/requirements.txt new file mode 100644 index 0000000000..d7b5db27a0 --- /dev/null +++ b/test/pytorch/question-generator/requirements.txt @@ -0,0 +1,4 @@ +spacy==2.1.8 +-e git+https://github.com/huggingface/transformers.git#egg=transformers +--find-links https://download.pytorch.org/whl/torch_stable.html +torch==1.6.0+cpu diff --git a/test/pytorch/question-generator/sample.json b/test/pytorch/question-generator/sample.json new file mode 100644 index 0000000000..88c9fb0c92 --- /dev/null +++ b/test/pytorch/question-generator/sample.json @@ -0,0 +1,4 @@ +{ + "context": "Sarah works as a software engineer in London", + "answer": "London" +} diff --git a/test/pytorch/reading-comprehender/README.md b/test/pytorch/reading-comprehender/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/reading-comprehender/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/reading-comprehender/cortex.yaml b/test/pytorch/reading-comprehender/cortex.yaml new file mode 100644 index 0000000000..ba89862c78 --- /dev/null +++ b/test/pytorch/reading-comprehender/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: reading-comprehender + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/test/pytorch/reading-comprehender/predictor.py b/test/pytorch/reading-comprehender/predictor.py new file mode 100644 index 0000000000..7b86ac4770 --- /dev/null +++ b/test/pytorch/reading-comprehender/predictor.py @@ -0,0 +1,25 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +from allennlp.predictors.predictor import Predictor as AllenNLPPredictor + + +class PythonPredictor: + def __init__(self, config): + self.device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"using device: {self.device}") + + cuda_device = -1 + if self.device == "cuda": + cuda_device = 0 + + self.predictor = AllenNLPPredictor.from_path( + "https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2018.11.30-charpad.tar.gz", + cuda_device=cuda_device, + ) + + def predict(self, payload): + prediction = self.predictor.predict( + passage=payload["passage"], question=payload["question"] + ) + return prediction["best_span_str"] diff --git a/test/pytorch/reading-comprehender/requirements.txt b/test/pytorch/reading-comprehender/requirements.txt new file mode 100644 index 0000000000..13dd5fbdba --- /dev/null +++ b/test/pytorch/reading-comprehender/requirements.txt @@ -0,0 +1 @@ +allennlp==0.9.* diff --git a/test/pytorch/reading-comprehender/sample.json b/test/pytorch/reading-comprehender/sample.json new file mode 100644 index 0000000000..14f60455bc --- /dev/null +++ b/test/pytorch/reading-comprehender/sample.json @@ -0,0 +1,4 @@ +{ + "passage": "Cortex Labs is building machine learning infrastructure for deploying models in production", + "question": "What does Cortex Labs do?" +} diff --git a/test/pytorch/search-completer/README.md b/test/pytorch/search-completer/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/search-completer/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/search-completer/cortex.yaml b/test/pytorch/search-completer/cortex.yaml new file mode 100644 index 0000000000..cd73458149 --- /dev/null +++ b/test/pytorch/search-completer/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: search-completer + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/test/pytorch/search-completer/predictor.py b/test/pytorch/search-completer/predictor.py new file mode 100644 index 0000000000..58d03ccc2c --- /dev/null +++ b/test/pytorch/search-completer/predictor.py @@ -0,0 +1,20 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +import regex +import tqdm + + +class PythonPredictor: + def __init__(self, config): + roberta = torch.hub.load("pytorch/fairseq", "roberta.large", force_reload=True) + roberta.eval() + device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"using device: {device}") + roberta.to(device) + + self.model = roberta + + def predict(self, payload): + predictions = self.model.fill_mask(payload["text"] + " ", topk=5) + return [prediction[0] for prediction in predictions] diff --git a/test/pytorch/search-completer/requirements.txt b/test/pytorch/search-completer/requirements.txt new file mode 100644 index 0000000000..16b9215d31 --- /dev/null +++ b/test/pytorch/search-completer/requirements.txt @@ -0,0 +1,5 @@ +torch +regex +tqdm +dataclasses +hydra-core diff --git a/test/pytorch/search-completer/sample.json b/test/pytorch/search-completer/sample.json new file mode 100644 index 0000000000..dfd2a2f433 --- /dev/null +++ b/test/pytorch/search-completer/sample.json @@ -0,0 +1,3 @@ +{ + "text": "machine learning is" +} diff --git a/test/pytorch/sentiment-analyzer/README.md b/test/pytorch/sentiment-analyzer/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/pytorch/sentiment-analyzer/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/pytorch/sentiment-analyzer/cortex.yaml b/test/pytorch/sentiment-analyzer/cortex.yaml new file mode 100644 index 0000000000..1ed6c45bbf --- /dev/null +++ b/test/pytorch/sentiment-analyzer/cortex.yaml @@ -0,0 +1,10 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: sentiment-analyzer + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + # gpu: 1 # this is optional, since the api can also run on cpu diff --git a/test/pytorch/sentiment-analyzer/predictor.py b/test/pytorch/sentiment-analyzer/predictor.py new file mode 100644 index 0000000000..03b796d199 --- /dev/null +++ b/test/pytorch/sentiment-analyzer/predictor.py @@ -0,0 +1,15 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +from transformers import pipeline + + +class PythonPredictor: + def __init__(self, config): + device = 0 if torch.cuda.is_available() else -1 + print(f"using device: {'cuda' if device == 0 else 'cpu'}") + + self.analyzer = pipeline(task="sentiment-analysis", device=device) + + def predict(self, payload): + return self.analyzer(payload["text"])[0] diff --git a/test/pytorch/sentiment-analyzer/requirements.txt b/test/pytorch/sentiment-analyzer/requirements.txt new file mode 100644 index 0000000000..3f565d80e4 --- /dev/null +++ b/test/pytorch/sentiment-analyzer/requirements.txt @@ -0,0 +1,2 @@ +torch +transformers==2.9.* diff --git a/test/pytorch/sentiment-analyzer/sample.json b/test/pytorch/sentiment-analyzer/sample.json new file mode 100644 index 0000000000..7622d16ae0 --- /dev/null +++ b/test/pytorch/sentiment-analyzer/sample.json @@ -0,0 +1,3 @@ +{ + "text": "best day ever" +} diff --git a/test/pytorch/text-generator/README.md b/test/pytorch/text-generator/README.md new file mode 100644 index 0000000000..f99417e3b4 --- /dev/null +++ b/test/pytorch/text-generator/README.md @@ -0,0 +1,192 @@ +# Deploy machine learning models to production + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example shows how to deploy a realtime text generation API using a GPT-2 model from Hugging Face's transformers library. + +## Implement your Predictor + +1. Create a Python file named `predictor.py`. +2. Define a Predictor class with a constructor that loads and initializes the model. +3. Add a predict function that will accept a payload and return the generated text. + +```python +# predictor.py + +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel + + +class PythonPredictor: + def __init__(self, config): + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") + self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device) + + def predict(self, payload): + input_length = len(payload["text"].split()) + tokens = self.tokenizer.encode(payload["text"], return_tensors="pt").to(self.device) + prediction = self.model.generate(tokens, max_length=input_length + 20, do_sample=True) + return self.tokenizer.decode(prediction[0]) +``` + +## Specify Python dependencies + +Create a `requirements.txt` file to specify the dependencies needed by `predictor.py`. Cortex will automatically install them into your runtime once you deploy: + +```python +# requirements.txt + +torch +transformers==3.0.* +``` + +## Deploy your model locally + +You can create APIs from any Python runtime that has access to Docker (e.g. the Python shell or a Jupyter notebook): + +```python +import cortex + +cx_local = cortex.client("local") + +api_spec = { + "name": "text-generator", + "kind": "RealtimeAPI", + "predictor": { + "type": "python", + "path": "predictor.py" + } +} + +cx_local.deploy(api_spec, project_dir=".", wait=True) +``` + +## Consume your API + +```python +import requests + +endpoint = cx_local.get_api("text-generator")["endpoint"] +payload = {"text": "hello world"} +print(requests.post(endpoint, payload).text) +``` + +## Manage your APIs using the CLI + +Monitor the status of your API using `cortex get`: + +```bash +$ cortex get --watch + +env realtime api status last update avg request 2XX +local text-generator updating 8s - - +``` + +Show additional information for your API (e.g. its endpoint) using `cortex get `: + +```bash +$ cortex get text-generator + +status last update avg request 2XX +live 1m - - + +endpoint: http://localhost:8889 +``` + +You can also stream logs from your API: + +```bash +$ cortex logs text-generator + +... +``` + +## Deploy your model to AWS + +Cortex can automatically provision infrastructure on your AWS account and deploy your models as production-ready web services: + +```bash +$ cortex cluster up +``` + +This creates a Cortex cluster in your AWS account, which will take approximately 15 minutes. After your cluster is created, you can deploy to your cluster by using the same code and configuration as before: + +```python +import cortex + +cx_aws = cortex.client("aws") + +api_spec = { + "name": "text-generator", + "kind": "RealtimeAPI", + "predictor": { + "type": "python", + "path": "predictor.py" + } +} + +cx_aws.deploy(api_spec, project_dir=".") +``` + +Monitor the status of your APIs using `cortex get` using your CLI: + +```bash +$ cortex get --watch + +env realtime api status up-to-date requested last update avg request 2XX +aws text-generator live 1 1 1m - - +local text-generator live 1 1 17m 3.1285 s 1 +``` + +The output above indicates that one replica of your API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and will spin down replicas if there is unused capacity. + +Show additional information for your API (e.g. its endpoint) using `cortex get `: + +```bash +$ cortex get text-generator --env aws + +status up-to-date requested last update avg request 2XX +live 1 1 1m - - + +endpoint: https://***.execute-api.us-west-2.amazonaws.com/text-generator +``` + +## Run on GPUs + +If your cortex cluster is using GPU instances (configured during cluster creation) or if you are running locally with an nvidia GPU, you can run your text generator API on GPUs. Add the `compute` field to your API configuration and re-deploy: + +```python +api_spec = { + "name": "text-generator", + "kind": "RealtimeAPI", + "predictor": { + "type": "python", + "path": "predictor.py" + }, + "compute": { + "gpu": 1 + } +} + +cx_aws.deploy(api_spec, project_dir=".") +``` + +As your new API is initializing, the old API will continue to respond to prediction requests. Once the API's status becomes "live" (with one up-to-date replica), traffic will be routed to the updated version. You can track the status of your API using `cortex get`: + +```bash +$ cortex get --env aws --watch + +realtime api status up-to-date stale requested last update avg request 2XX +text-generator updating 0 1 1 29s - - +``` + +## Cleanup + +Deleting APIs will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster creation: + +```python +cx_local.delete_api("text-generator") + +cx_aws.delete_api("text-generator") +``` diff --git a/test/pytorch/text-generator/deploy.ipynb b/test/pytorch/text-generator/deploy.ipynb new file mode 100644 index 0000000000..5ffbce9caa --- /dev/null +++ b/test/pytorch/text-generator/deploy.ipynb @@ -0,0 +1,80 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", + "\n", + "This example needs to run on a machine that supports Docker to deploy Cortex APIs locally (Colab users can still deploy to remote Cortex clusters)", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install cortex\n", + "!pip3 install requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cortex\n", + "\n", + "cx = cortex.client(\"local\")\n", + "\n", + "api_spec = {\n", + " \"name\": \"text-generator\",\n", + " \"kind\": \"RealtimeAPI\",\n", + " \"predictor\": {\n", + " \"type\": \"python\",\n", + " \"path\": \"predictor.py\"\n", + " }\n", + "}\n", + "\n", + "cx.deploy(api_spec, project_dir=\".\", wait=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "endpoint = cx.get_api(\"text-generator\")[\"endpoint\"]\n", + "payload = {\"text\": \"hello world\"}\n", + "print(requests.post(endpoint, payload).text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/pytorch/text-generator/predictor.py b/test/pytorch/text-generator/predictor.py new file mode 100644 index 0000000000..b14d8abcc7 --- /dev/null +++ b/test/pytorch/text-generator/predictor.py @@ -0,0 +1,17 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel + + +class PythonPredictor: + def __init__(self, config): + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") + self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device) + + def predict(self, payload): + input_length = len(payload["text"].split()) + tokens = self.tokenizer.encode(payload["text"], return_tensors="pt").to(self.device) + prediction = self.model.generate(tokens, max_length=input_length + 20, do_sample=True) + return self.tokenizer.decode(prediction[0]) diff --git a/test/pytorch/text-generator/requirements.txt b/test/pytorch/text-generator/requirements.txt new file mode 100644 index 0000000000..1447500abe --- /dev/null +++ b/test/pytorch/text-generator/requirements.txt @@ -0,0 +1,2 @@ +torch +transformers==3.0.* diff --git a/test/pytorch/text-summarizer/README.md b/test/pytorch/text-summarizer/README.md new file mode 100644 index 0000000000..4323c6e133 --- /dev/null +++ b/test/pytorch/text-summarizer/README.md @@ -0,0 +1,5 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. + +Please refer [here](https://sshleifer.github.io/blog_v2/jupyter/2020/03/12/bart.html) to learn more about BART. diff --git a/test/pytorch/text-summarizer/cortex.yaml b/test/pytorch/text-summarizer/cortex.yaml new file mode 100644 index 0000000000..9f7b620ca9 --- /dev/null +++ b/test/pytorch/text-summarizer/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: text-summarizer + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + gpu: 1 # this is optional, since the api can also run on cpu + mem: 6G diff --git a/test/pytorch/text-summarizer/predictor.py b/test/pytorch/text-summarizer/predictor.py new file mode 100644 index 0000000000..05652afd17 --- /dev/null +++ b/test/pytorch/text-summarizer/predictor.py @@ -0,0 +1,18 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +from transformers import pipeline + + +class PythonPredictor: + def __init__(self, config): + device = 0 if torch.cuda.is_available() else -1 + print(f"using device: {'cuda' if device == 0 else 'cpu'}") + + self.summarizer = pipeline(task="summarization", device=device) + + def predict(self, payload): + summary = self.summarizer( + payload["text"], num_beams=4, length_penalty=2.0, max_length=142, no_repeat_ngram_size=3 + ) + return summary[0]["summary_text"] diff --git a/test/pytorch/text-summarizer/requirements.txt b/test/pytorch/text-summarizer/requirements.txt new file mode 100644 index 0000000000..5afceb377e --- /dev/null +++ b/test/pytorch/text-summarizer/requirements.txt @@ -0,0 +1,2 @@ +transformers==2.9.* +torch diff --git a/test/pytorch/text-summarizer/sample.json b/test/pytorch/text-summarizer/sample.json new file mode 100644 index 0000000000..e54b77f18c --- /dev/null +++ b/test/pytorch/text-summarizer/sample.json @@ -0,0 +1,3 @@ +{ + "text": "Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems use to perform a specific task without using explicit instructions, relying on patterns and inference instead. It is seen as a subset of artificial intelligence. Machine learning algorithms build a mathematical model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to perform the task. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop a conventional algorithm for effectively performing the task. Machine learning is closely related to computational statistics, which focuses on making predictions using computers. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a field of study within machine learning, and focuses on exploratory data analysis through unsupervised learning. In its application across business problems, machine learning is also referred to as predictive analytics." +} diff --git a/test/sklearn/iris-classifier/README.md b/test/sklearn/iris-classifier/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/sklearn/iris-classifier/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/sklearn/iris-classifier/cortex.yaml b/test/sklearn/iris-classifier/cortex.yaml new file mode 100644 index 0000000000..1f05c85eca --- /dev/null +++ b/test/sklearn/iris-classifier/cortex.yaml @@ -0,0 +1,15 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: iris-classifier + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + bucket: cortex-examples + key: sklearn/iris-classifier/model.pkl + monitoring: + model_type: classification + compute: + cpu: 0.2 + mem: 200M diff --git a/test/sklearn/iris-classifier/predictor.py b/test/sklearn/iris-classifier/predictor.py new file mode 100644 index 0000000000..46edab0ad2 --- /dev/null +++ b/test/sklearn/iris-classifier/predictor.py @@ -0,0 +1,31 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import os +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import pickle + +labels = ["setosa", "versicolor", "virginica"] + + +class PythonPredictor: + def __init__(self, config): + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + s3.download_file(config["bucket"], config["key"], "/tmp/model.pkl") + self.model = pickle.load(open("/tmp/model.pkl", "rb")) + + def predict(self, payload): + measurements = [ + payload["sepal_length"], + payload["sepal_width"], + payload["petal_length"], + payload["petal_width"], + ] + + label_id = self.model.predict([measurements])[0] + return labels[label_id] diff --git a/test/sklearn/iris-classifier/requirements.txt b/test/sklearn/iris-classifier/requirements.txt new file mode 100644 index 0000000000..bbc213cf3e --- /dev/null +++ b/test/sklearn/iris-classifier/requirements.txt @@ -0,0 +1,2 @@ +boto3 +scikit-learn==0.21.3 diff --git a/test/sklearn/iris-classifier/sample.json b/test/sklearn/iris-classifier/sample.json new file mode 100644 index 0000000000..9e792863cd --- /dev/null +++ b/test/sklearn/iris-classifier/sample.json @@ -0,0 +1,6 @@ +{ + "sepal_length": 5.2, + "sepal_width": 3.6, + "petal_length": 1.5, + "petal_width": 0.3 +} diff --git a/test/sklearn/iris-classifier/trainer.py b/test/sklearn/iris-classifier/trainer.py new file mode 100644 index 0000000000..db1b047938 --- /dev/null +++ b/test/sklearn/iris-classifier/trainer.py @@ -0,0 +1,25 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import boto3 +import pickle + +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression + +# Train the model + +iris = load_iris() +data, labels = iris.data, iris.target +training_data, test_data, training_labels, test_labels = train_test_split(data, labels) + +model = LogisticRegression(solver="lbfgs", multi_class="multinomial") +model.fit(training_data, training_labels) +accuracy = model.score(test_data, test_labels) +print("accuracy: {:.2f}".format(accuracy)) + +# Upload the model + +pickle.dump(model, open("model.pkl", "wb")) +s3 = boto3.client("s3") +s3.upload_file("model.pkl", "cortex-examples", "sklearn/iris-classifier/model.pkl") diff --git a/test/sklearn/mpg-estimator/README.md b/test/sklearn/mpg-estimator/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/sklearn/mpg-estimator/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/sklearn/mpg-estimator/cortex.yaml b/test/sklearn/mpg-estimator/cortex.yaml new file mode 100644 index 0000000000..e6ffc969ee --- /dev/null +++ b/test/sklearn/mpg-estimator/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: mpg-estimator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + config: + model: s3://cortex-examples/sklearn/mpg-estimator/linreg/ + monitoring: + model_type: regression diff --git a/test/sklearn/mpg-estimator/predictor.py b/test/sklearn/mpg-estimator/predictor.py new file mode 100644 index 0000000000..bb1c2ed19a --- /dev/null +++ b/test/sklearn/mpg-estimator/predictor.py @@ -0,0 +1,41 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +import mlflow.sklearn +import numpy as np +import re +import os + + +class PythonPredictor: + def __init__(self, config): + model_path = "/tmp/model" + os.makedirs(model_path, exist_ok=True) + + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + # download mlflow model folder from S3 + bucket, prefix = re.match("s3://(.+?)/(.+)", config["model"]).groups() + response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) + for s3_obj in response["Contents"]: + obj_key = s3_obj["Key"] + s3.download_file(bucket, obj_key, os.path.join(model_path, os.path.basename(obj_key))) + + self.model = mlflow.sklearn.load_model(model_path) + + def predict(self, payload): + model_input = [ + payload["cylinders"], + payload["displacement"], + payload["horsepower"], + payload["weight"], + payload["acceleration"], + ] + + result = self.model.predict([model_input]) + return np.asscalar(result) diff --git a/test/sklearn/mpg-estimator/requirements.txt b/test/sklearn/mpg-estimator/requirements.txt new file mode 100644 index 0000000000..cbcad6b321 --- /dev/null +++ b/test/sklearn/mpg-estimator/requirements.txt @@ -0,0 +1,4 @@ +mlflow +pandas +numpy +scikit-learn==0.21.3 diff --git a/test/sklearn/mpg-estimator/sample.json b/test/sklearn/mpg-estimator/sample.json new file mode 100644 index 0000000000..2dbbca46dd --- /dev/null +++ b/test/sklearn/mpg-estimator/sample.json @@ -0,0 +1,7 @@ +{ + "cylinders": 4, + "displacement": 135, + "horsepower": 84, + "weight": 2490, + "acceleration": 15.7 +} diff --git a/test/sklearn/mpg-estimator/trainer.py b/test/sklearn/mpg-estimator/trainer.py new file mode 100644 index 0000000000..f17b7d9c05 --- /dev/null +++ b/test/sklearn/mpg-estimator/trainer.py @@ -0,0 +1,25 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import mlflow.sklearn +import pandas as pd +import numpy as np +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split + + +df = pd.read_csv( + "https://www.uio.no/studier/emner/sv/oekonomi/ECON4150/v16/statacourse/datafiles/auto.csv" +) +df = df.replace("?", np.nan) +df = df.dropna() +df = df.drop(["name", "origin", "year"], axis=1) # drop categorical variables for simplicity +data = df.drop("mpg", axis=1) +labels = df[["mpg"]] + +training_data, test_data, training_labels, test_labels = train_test_split(data, labels) +model = LinearRegression() +model.fit(training_data, training_labels) +accuracy = model.score(test_data, test_labels) +print("accuracy: {:.2f}".format(accuracy)) + +mlflow.sklearn.save_model(model, "linreg") diff --git a/test/spacy/entity-recognizer/README.md b/test/spacy/entity-recognizer/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/spacy/entity-recognizer/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/spacy/entity-recognizer/cortex.yaml b/test/spacy/entity-recognizer/cortex.yaml new file mode 100644 index 0000000000..cc4dbbba38 --- /dev/null +++ b/test/spacy/entity-recognizer/cortex.yaml @@ -0,0 +1,10 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: entity-recognizer + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + mem: 1G diff --git a/test/spacy/entity-recognizer/predictor.py b/test/spacy/entity-recognizer/predictor.py new file mode 100644 index 0000000000..9d42a9de4c --- /dev/null +++ b/test/spacy/entity-recognizer/predictor.py @@ -0,0 +1,22 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import spacy +import subprocess + + +class PythonPredictor: + """ + Class to perform NER (named entity recognition) + """ + + def __init__(self, config): + subprocess.call("python -m spacy download en_core_web_md".split(" ")) + import en_core_web_md + + self.nlp = en_core_web_md.load() + + def predict(self, payload): + doc = self.nlp(payload["text"]) + proc = lambda ent: {"label": ent.label_, "start": ent.start, "end": ent.end} + out = {ent.text: proc(ent) for ent in doc.ents} + return out diff --git a/test/spacy/entity-recognizer/requirements.txt b/test/spacy/entity-recognizer/requirements.txt new file mode 100644 index 0000000000..568e4fc634 --- /dev/null +++ b/test/spacy/entity-recognizer/requirements.txt @@ -0,0 +1 @@ +spacy diff --git a/test/spacy/entity-recognizer/sample.json b/test/spacy/entity-recognizer/sample.json new file mode 100644 index 0000000000..ae0f0f4120 --- /dev/null +++ b/test/spacy/entity-recognizer/sample.json @@ -0,0 +1,3 @@ +{ + "text": "Lilium, a Munich-based startup that is designing and building vertical take-off and landing (VTOL) aircraft with speeds of up to 100 km/h that it plans eventually to run in its own taxi fleet, has closed a funding round of over $240 million — money that it plans to use to keep developing its aircraft, and to start building manufacturing facilities to produce more of them, for an expected launch date of 2025." +} diff --git a/test/tensorflow/image-classifier-inception/README.md b/test/tensorflow/image-classifier-inception/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/tensorflow/image-classifier-inception/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/tensorflow/image-classifier-inception/cortex.yaml b/test/tensorflow/image-classifier-inception/cortex.yaml new file mode 100644 index 0000000000..e5177788ba --- /dev/null +++ b/test/tensorflow/image-classifier-inception/cortex.yaml @@ -0,0 +1,13 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-inception + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ + monitoring: + model_type: classification + compute: + cpu: 1 + gpu: 1 diff --git a/test/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml b/test/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml new file mode 100644 index 0000000000..919870651c --- /dev/null +++ b/test/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml @@ -0,0 +1,17 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-inception + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ + server_side_batching: + max_batch_size: 2 + batch_interval: 0.2s + threads_per_process: 2 + monitoring: + model_type: classification + compute: + cpu: 1 + gpu: 1 diff --git a/test/tensorflow/image-classifier-inception/inception.ipynb b/test/tensorflow/image-classifier-inception/inception.ipynb new file mode 100644 index 0000000000..46956e0e48 --- /dev/null +++ b/test/tensorflow/image-classifier-inception/inception.ipynb @@ -0,0 +1,211 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "inception.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "n8CwINQcEBKz", + "colab_type": "text" + }, + "source": [ + "# Exporting ImageNet Inception\n", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", + "\n", + "In this notebook, we'll show how to export the [pre-trained Imagenet Inception model](https://tfhub.dev/google/imagenet/inception_v3/classification/3) for serving." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3221z3P69fgf", + "colab_type": "text" + }, + "source": [ + "First, we'll install the required packages:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_SdQpq7g9LiI", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!pip install tensorflow==1.14.* tensorflow-hub==0.6.* boto3==1.*" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I-k0gUpxDGkU", + "colab_type": "text" + }, + "source": [ + "Next, we'll download the model from TensorFlow Hub and export it for serving:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "z6QLCzB4BKMe", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import time\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def\n", + "\n", + "export_dir = \"export/\" + str(time.time()).split('.')[0]\n", + "builder = tf.saved_model.builder.SavedModelBuilder(export_dir)\n", + "\n", + "with tf.Session(graph=tf.Graph()) as sess:\n", + " module = hub.Module(\"https://tfhub.dev/google/imagenet/inception_v3/classification/3\")\n", + "\n", + " input_params = module.get_input_info_dict()\n", + " image_input = tf.placeholder(\n", + " name=\"images\", dtype=input_params[\"images\"].dtype, shape=input_params[\"images\"].get_shape()\n", + " )\n", + " \n", + " sess.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", + "\n", + " classes = module(image_input)\n", + " signature = predict_signature_def(inputs={\"images\": image_input}, outputs={\"classes\": classes})\n", + "\n", + " builder.add_meta_graph_and_variables(\n", + " sess, [\"serve\"], signature_def_map={\"predict\": signature}, strip_default_attrs=True\n", + " )\n", + "\n", + "builder.save()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aGtJiyEnBgwl", + "colab_type": "text" + }, + "source": [ + "## Upload the model to AWS\n", + "\n", + "Cortex loads models from AWS, so we need to upload the exported model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fTkjvSKBBmUB", + "colab_type": "text" + }, + "source": [ + "Set these variables to configure your AWS credentials and model upload path:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4xcDWxqCBPre", + "colab_type": "code", + "cellView": "form", + "colab": {} + }, + "source": [ + "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", + "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", + "S3_UPLOAD_PATH = \"s3://my-bucket/image-classifier/inception\" #@param {type:\"string\"}\n", + "\n", + "import sys\n", + "import re\n", + "\n", + "if AWS_ACCESS_KEY_ID == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", + "\n", + "elif AWS_SECRET_ACCESS_KEY == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", + "\n", + "else:\n", + " try:\n", + " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", + " except:\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "czZkjb1IBr-f", + "colab_type": "text" + }, + "source": [ + "Upload the model to S3:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "M0b0IbyaBsim", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import boto3\n", + "\n", + "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", + "\n", + "for dirpath, _, filenames in os.walk(\"export\"):\n", + " for filename in filenames:\n", + " filepath = os.path.join(dirpath, filename)\n", + " filekey = os.path.join(key, filepath[len(\"export/\"):])\n", + " print(\"Uploading s3://{}/{}...\".format(bucket, filekey), end = '')\n", + " s3.upload_file(filepath, bucket, filekey)\n", + " print(\" ✓\")\n", + "\n", + "print(\"\\nUploaded model export directory to \" + S3_UPLOAD_PATH)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pZQWoeZbE7Wc", + "colab_type": "text" + }, + "source": [ + "\n", + "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/image-classifier-inception) for how to deploy the model as an API." + ] + } + ] +} diff --git a/test/tensorflow/image-classifier-inception/predictor.py b/test/tensorflow/image-classifier-inception/predictor.py new file mode 100644 index 0000000000..c2afb63c0c --- /dev/null +++ b/test/tensorflow/image-classifier-inception/predictor.py @@ -0,0 +1,21 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import requests +import numpy as np +from PIL import Image +from io import BytesIO + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + self.labels = requests.get( + "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + ).text.split("\n") + + def predict(self, payload): + image = requests.get(payload["url"]).content + decoded_image = np.asarray(Image.open(BytesIO(image)), dtype=np.float32) / 255 + model_input = {"images": np.expand_dims(decoded_image, axis=0)} + prediction = self.client.predict(model_input) + return self.labels[np.argmax(prediction["classes"])] diff --git a/test/tensorflow/image-classifier-inception/requirements.txt b/test/tensorflow/image-classifier-inception/requirements.txt new file mode 100644 index 0000000000..7e2fba5e6c --- /dev/null +++ b/test/tensorflow/image-classifier-inception/requirements.txt @@ -0,0 +1 @@ +Pillow diff --git a/test/tensorflow/image-classifier-inception/sample.json b/test/tensorflow/image-classifier-inception/sample.json new file mode 100644 index 0000000000..667652007a --- /dev/null +++ b/test/tensorflow/image-classifier-inception/sample.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/PzXprwl.jpg" +} diff --git a/test/tensorflow/image-classifier-resnet50/README.md b/test/tensorflow/image-classifier-resnet50/README.md new file mode 100644 index 0000000000..7a52dadbb8 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/README.md @@ -0,0 +1,90 @@ +# Image Classifier with ResNet50 + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example implements an image recognition system using ResNet50, which allows for the recognition of up to 1000 classes. + +## Deploying + +There are 4 Cortex APIs available in this example: + +1. [cortex.yaml](cortex.yaml) - can be used with any instances. +1. [cortex_inf.yaml](cortex_inf.yaml) - to be used with `inf1` instances. +1. [cortex_gpu.yaml](cortex_gpu.yaml) - to be used with GPU instances. +1. [cortex_gpu_server_side_batching.yaml](cortex_gpu_server_side_batching.yaml) - to be used with GPU instances. Deployed with `max_batch_size` > 1. The exported model and the TensorFlow Predictor do not need to be modified to support server-side batching. + +To deploy an API, run: + +```bash +cortex deploy +``` + +E.g. + +```bash +cortex deploy cortex_inf.yaml +``` + +## Verifying your API + +Check that your API is live by running `cortex get image-classifier-resnet50`, and copy the example `curl` command that's shown. After the API is live, run the `curl` command, e.g. + +```bash +$ curl -X POST -H "Content-Type: application/json" -d @sample.json + +["tabby", "Egyptian_cat", "tiger_cat", "tiger", "plastic_bag"] +``` + +The following image is embedded in [sample.json](sample.json): + +![image](https://i.imgur.com/213xcvs.jpg) + +## Throughput test + +Before [throughput_test.py](../../utils/throughput_test.py) is run, 2 environment variables have to be exported: + +```bash +export ENDPOINT= # you can find this with `cortex get image-classifier-resnet50` +export PAYLOAD=https://i.imgur.com/213xcvs.jpg # this is the cat image shown in the previous step +``` + +Then, deploy each API one at a time and check the results: + +1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 2` with the [cortex.yaml](cortex.yaml) API running on an `c5.xlarge` instance will get **~16.2 inferences/sec** with an average latency of **200 ms**. +1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 48` with the [cortex_inf.yaml](cortex_inf.yaml) API running on an `inf1.2xlarge` instance will get **~510 inferences/sec** with an average latency of **80 ms**. +1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 24` with the [cortex_gpu.yaml](cortex_gpu.yaml) API running on an `g4dn.xlarge` instance will get **~125 inferences/sec** with an average latency of **85 ms**. Optimizing the model with TensorRT to use FP16 on TF-serving only seems to achieve a 10% performance improvement - one thing to consider is that the TensorRT engines hadn't been built beforehand, so this might have affected the results negatively. +1. Running `python ../../utils/throughput_test.py -i 30 -p 4 -t 60` with the [cortex_gpu_server_side_batching.yaml](cortex_gpu_batch_sized.yaml) API running on an `g4dn.xlarge` instance will get **~186 inferences/sec** with an average latency of **500 ms**. This achieves a 49% higher throughput than the [cortex_gpu.yaml](cortex_gpu.yaml) API, at the expense of increased latency. + +Alternatively to [throughput_test.py](../../utils/throughput_test.py), the `ab` GNU utility can also be used to benchmark the API. This has the advantage that it's not as taxing on your local machine, but the disadvantage that it doesn't implement a cooldown period. You can run `ab` like this: + +```bash +# for making octet-stream requests, which is the default for throughput_test script +ab -n -c -p sample.bin -T 'application/octet-stream' -rks 120 $ENDPOINT + +# for making json requests, will will have lower performance because the API has to download the image every time +ab -n -c -p sample.json -T 'application/json' -rks 120 $ENDPOINT +``` + +*Note: `inf1.xlarge` isn't used because the major bottleneck with `inf` instances for this example is with the CPU, and `inf1.2xlarge` has twice the amount of CPU cores for same number of Inferentia ASICs (which is 1), which translates to almost double the throughput.* + +## Exporting SavedModels + +This example deploys models that we have built and uploaded to a public S3 bucket. If you want to build the models yourself, follow these instructions. + +Run the following command to install the dependencies required for the [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook: + +```bash +pip install --extra-index-url=https://pip.repos.neuron.amazonaws.com \ + neuron-cc==1.0.9410.0+6008239556 \ + tensorflow-neuron==1.15.0.1.0.1333.0 +``` + +The [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook will generate 2 SavedModels. One will be saved in the `resnet50` directory which can be run on GPU or on CPU and another in the `resnet50_neuron` directory which can only be run on `inf1` instances. For server-side batching on `inf1` instances, a different compilation of the model is required. To compile ResNet50 model for a batch size of 5, run `run_all` from [this directory](https://github.com/aws/aws-neuron-sdk/tree/master/src/examples/tensorflow/keras_resnet50). + +If you'd also like to build the TensorRT version of the GPU model, run the following command in a new Python environment to install the pip dependencies required for the [generate_gpu_resnet50_model.ipynb](generate_gpu_resnet50_model.ipynb) notebook: + +```bash +pip install tensorflow==2.0.0 +``` + +TensorRT also has to be installed to export the SavedModel. Follow the instructions on [Nvidia TensorRT Documentation](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-debian) to download and install TensorRT on your local machine (this will require ~5GB of space, and you will have to create an Nvidia account). This notebook also requires that the SavedModel generated with the [generate_resnet50_models.ipynb](generate_resnet50_models.ipynb) notebook exists in the `resnet50` directory. The TensorRT SavedModel will be exported to the `resnet50_gpu` directory. You can then replace the existing SavedModel with the TensorRT-optimized version in [cortex_gpu.yaml](cortex_gpu.yaml) - it's a drop-in replacement that doesn't require any other dependencies on the Cortex side. By default, the API config in [cortex_gpu.yaml](cortex_gpu.yaml) uses the non-TensorRT-optimized version due to simplicity. diff --git a/test/tensorflow/image-classifier-resnet50/cortex.yaml b/test/tensorflow/image-classifier-resnet50/cortex.yaml new file mode 100644 index 0000000000..afbe5a8394 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/cortex.yaml @@ -0,0 +1,18 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/resnet50/ + processes_per_replica: 4 + threads_per_process: 16 + config: + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + input_key: input + output_key: output + compute: + cpu: 3 + mem: 4G diff --git a/test/tensorflow/image-classifier-resnet50/cortex_gpu.yaml b/test/tensorflow/image-classifier-resnet50/cortex_gpu.yaml new file mode 100644 index 0000000000..f86b85e414 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/cortex_gpu.yaml @@ -0,0 +1,19 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/resnet50/ + processes_per_replica: 4 + threads_per_process: 24 + config: + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + input_key: input + output_key: output + compute: + gpu: 1 + cpu: 3 + mem: 4G diff --git a/test/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml b/test/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml new file mode 100644 index 0000000000..61604346d0 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml @@ -0,0 +1,22 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/resnet50/ + server_side_batching: + max_batch_size: 32 + batch_interval: 0.1s + processes_per_replica: 4 + threads_per_process: 192 + config: + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + input_key: input + output_key: output + compute: + gpu: 1 + cpu: 3 + mem: 4G diff --git a/test/tensorflow/image-classifier-resnet50/cortex_inf.yaml b/test/tensorflow/image-classifier-resnet50/cortex_inf.yaml new file mode 100644 index 0000000000..13f999e1b5 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/cortex_inf.yaml @@ -0,0 +1,21 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/resnet50_neuron/ + processes_per_replica: 4 + threads_per_process: 256 + config: + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + input_key: input + output_key: output + compute: + inf: 1 + cpu: 3 + mem: 4G + autoscaling: + max_replica_concurrency: 16384 diff --git a/test/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml b/test/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml new file mode 100644 index 0000000000..2b33961e95 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml @@ -0,0 +1,24 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: image-classifier-resnet50 + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/resnet50_neuron_batch_size_5/ + server_side_batching: + max_batch_size: 5 + batch_interval: 0.1s + processes_per_replica: 4 + threads_per_process: 260 + config: + classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + input_shape: [224, 224] + input_key: input_1:0 + output_key: probs/Softmax:0 + compute: + inf: 1 + cpu: 3 + mem: 4G + autoscaling: + max_replica_concurrency: 16384 diff --git a/test/tensorflow/image-classifier-resnet50/generate_gpu_resnet50_model.ipynb b/test/tensorflow/image-classifier-resnet50/generate_gpu_resnet50_model.ipynb new file mode 100644 index 0000000000..ca78235b4d --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/generate_gpu_resnet50_model.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate GPU Resnet50 Model\n", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.python.compiler.tensorrt import trt_convert as trt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "input_model_dir = \"resnet50\"\n", + "output_model_dir = \"resnet50_gpu\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS\n", + "conversion_params = conversion_params._replace(\n", + " max_workspace_size_bytes=(1<<30))\n", + "conversion_params = conversion_params._replace(precision_mode=\"FP16\")\n", + "conversion_params = conversion_params._replace(\n", + " maximum_cached_engines=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Linked TensorRT version: (0, 0, 0)\n", + "INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)\n", + "INFO:tensorflow:Running against TensorRT version 0.0.0\n" + ] + } + ], + "source": [ + "converter = trt.TrtGraphConverterV2(\n", + " input_saved_model_dir=input_model_dir,\n", + " conversion_params=conversion_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /home/robert/.miniconda3/envs/py36-tf/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1781: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "If using Keras pass *_constraint arguments to layers.\n", + "WARNING:tensorflow:Issue encountered when serializing variables.\n", + "Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore.\n", + "to_proto not supported in EAGER mode.\n", + "WARNING:tensorflow:Issue encountered when serializing trainable_variables.\n", + "Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore.\n", + "to_proto not supported in EAGER mode.\n" + ] + } + ], + "source": [ + "converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: resnet50_gpu/assets\n" + ] + } + ], + "source": [ + "converter.save(output_model_dir)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/tensorflow/image-classifier-resnet50/generate_resnet50_models.ipynb b/test/tensorflow/image-classifier-resnet50/generate_resnet50_models.ipynb new file mode 100644 index 0000000000..11eaf5a316 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/generate_resnet50_models.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate Resnet50 Models\n", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import shutil\n", + "import tensorflow as tf\n", + "import tensorflow.neuron as tfn\n", + "import tensorflow.compat.v1.keras as keras\n", + "from tensorflow.keras.applications.resnet50 import ResNet50" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prepare export directories for compile/non-compiled versions of the model." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "model_dir = \"resnet50\"\n", + "compiled_model_dir = model_dir + \"_neuron\"\n", + "shutil.rmtree(model_dir, ignore_errors=True)\n", + "shutil.rmtree(compiled_model_dir, ignore_errors=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instantiate a Keras ResNet50 model." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /home/robert/.miniconda3/envs/py36-neuron/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "If using Keras pass *_constraint arguments to layers.\n" + ] + } + ], + "source": [ + "keras.backend.set_learning_phase(0)\n", + "keras.backend.set_image_data_format('channels_last')\n", + "model = ResNet50(weights='imagenet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Export the model as SavedModel." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :5: simple_save (from tensorflow.python.saved_model.simple_save) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save.\n", + "WARNING:tensorflow:From /home/robert/.miniconda3/envs/py36-neuron/lib/python3.6/site-packages/tensorflow_core/python/saved_model/signature_def_utils_impl.py:201: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.\n", + "INFO:tensorflow:Assets added to graph.\n", + "INFO:tensorflow:No assets to write.\n", + "INFO:tensorflow:SavedModel written to: resnet50/saved_model.pb\n" + ] + } + ], + "source": [ + "tf.saved_model.simple_save(\n", + " session = keras.backend.get_session(),\n", + " export_dir = model_dir,\n", + " inputs = {'input': model.inputs[0]},\n", + " outputs = {'output': model.outputs[0]})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And then compile it for Inferentia to be used on only one Neuron core. `--static-weights` option is used to cache all weights onto the neuron core's memory." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Restoring parameters from resnet50/variables/variables\n", + "INFO:tensorflow:Froze 320 variables.\n", + "INFO:tensorflow:Converted 320 variables to const ops.\n", + "INFO:tensorflow:fusing subgraph neuron_op_d6f098c01c780733 with neuron-cc\n", + "INFO:tensorflow:Number of operations in TensorFlow session: 4638\n", + "INFO:tensorflow:Number of operations after tf.neuron optimizations: 556\n", + "INFO:tensorflow:Number of operations placed on Neuron runtime: 554\n", + "INFO:tensorflow:No assets to save.\n", + "INFO:tensorflow:No assets to write.\n", + "INFO:tensorflow:SavedModel written to: resnet50_neuron/saved_model.pb\n", + "INFO:tensorflow:Successfully converted resnet50 to resnet50_neuron\n" + ] + }, + { + "data": { + "text/plain": [ + "{'OnNeuronRatio': 0.9964028776978417}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compiler_args = ['--static-weights', '--num-neuroncores', '1']\n", + "batch_size = 1\n", + "tfn.saved_model.compile(model_dir, compiled_model_dir, batch_size)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/tensorflow/image-classifier-resnet50/predictor.py b/test/tensorflow/image-classifier-resnet50/predictor.py new file mode 100644 index 0000000000..98828723cc --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/predictor.py @@ -0,0 +1,63 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import os +import cv2 +import numpy as np +import requests +import imageio +import json +import base64 + + +def read_image(payload): + """ + Read JPG image from {"url": "https://..."} or from a bytes object. + """ + if isinstance(payload, bytes): + jpg_as_np = np.frombuffer(payload, dtype=np.uint8) + img = cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR) + elif isinstance(payload, dict) and "url" in payload.keys(): + img = imageio.imread(payload["url"]) + else: + return None + return img + + +def prepare_image(image, input_shape, input_key): + """ + Prepares an image for the TFS client. + """ + img = cv2.resize(image, input_shape, interpolation=cv2.INTER_NEAREST) + img = {input_key: img[np.newaxis, ...]} + return img + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + # load classes + classes = requests.get(config["classes"]).json() + self.idx2label = [classes[str(k)][1] for k in range(len(classes))] + + self.input_shape = tuple(config["input_shape"]) + self.input_key = str(config["input_key"]) + self.output_key = str(config["output_key"]) + + def predict(self, payload): + # preprocess image + img = read_image(payload) + if img is None: + return None + img = prepare_image(img, self.input_shape, self.input_key) + + # predict + results = self.client.predict(img)[self.output_key] + results = np.argsort(results) + + # Lookup and print the top 5 labels + top5_idx = results[-5:] + top5_labels = [self.idx2label[idx] for idx in top5_idx] + top5_labels = top5_labels[::-1] + + return top5_labels diff --git a/test/tensorflow/image-classifier-resnet50/requirements.txt b/test/tensorflow/image-classifier-resnet50/requirements.txt new file mode 100644 index 0000000000..66340adf33 --- /dev/null +++ b/test/tensorflow/image-classifier-resnet50/requirements.txt @@ -0,0 +1 @@ +imageio==2.9.* diff --git a/test/tensorflow/image-classifier-resnet50/sample.bin b/test/tensorflow/image-classifier-resnet50/sample.bin new file mode 100644 index 0000000000000000000000000000000000000000..921abf24a5c99cd3c1d1cd12d00f134a16391a77 GIT binary patch literal 8680 zcmbVxYg`le*6v`cQB&uZa{9BbO)$v8W{2LJ+NxL z*0Y{xjkDd^g}nahij^x67Z(I^fq#heF%p4{b73CL^El@58jE=z|JrM;*CvdgFyYk~ zd(y-S>`Cki6DCfcILVcHz)w>qyG~*L!@SAgFC90Y#TxI*p1^)}$^Yodc?)r$h;$)c zESEQsaqccGcNga!1cP&qe|0=K?B5TUaje(IPhi6_Cc_04ufy?KEI9OdI5AwE0e?qc zb00r#Zs76>9xF zgAYTNghqTExgzS5m7lIz8~YV+-TDpjUw^Z8Tf+7oDSJ}C-z!WLWgo~nDE{$K?!S)w zeDv7yUrrRBDf#v6xzh6&F8)@2t>Sv+4aLp6`a2Dcs=L28J^bU();4YXqYj}*2L)_9WE}rJ!#(JFU_7?cDK0Bm>;Bj)53)IDzg8-!1n*IBKz;a{%>3z$P|_f z6p!VO;K*>(f-^M{>YxQ@auMcB9_hI|J5TOecjRR24cSbeirq2PN9JhTfQ@s=;N6k? z%)Ihq(O27!#ozlmCURxZ%!Q|>uO8o28aW+_iR5dB!;|=0{OWKha?j4I!FG&XzLKJ6 z?+-&&+(k~L?E9Nnb{itKVQ2|yxb@75#A~lOeiq^#;}w+^sxT@h0f4}L% zQ~c9YUH!S&79eMG7r5Lcp5QOF?^@)1C-T##UwepQFNI%DVHg22>(oDG<5QSgI}!e>I?31Wj}l;uP;4Wm3+hZDIwg(pq_Oyfi9^btyxD(AwoX84PVzeMp1KyAiUqT;k4a<~$m~v+?aFRDYVT*HD-f@yNZR$2$6M(G&4uueKmUL_St2 z&8w4g3pt7|C*qU9C%zT#r57&j`Py+d^{uM3c*Ape}$xV@=HZ zVcs)wN6!DLp+7el@PDDT3Gx4^yyrg+G49hRW}J!+R`-W#12)F7A)DQbJWaB6c4DDc z<(s5GSLQf~U&qU+NGI}!Npwzpx{?x4bnH+Ua6R4=+gJ2)LNr68t-dL|O)2zz(JcH_ zrqXdnEjBJ0IJAMIkS)>%0{YUm*}Wn!?PABlrMxS&W<$54_^?$z!*SY)=%cy_GR_;z z!>{}6QzF`LoqQ{&I3bhmxQOd-V1r)Jgx_DKLx?*PLqS=b#QtRu{#f~2TfRkk%ZNQ< z&yt6bmmCU1J{qOUdl%D*Os_Q;=-korut6CmNKvT?*O;<8d9iqCnP z=lgux9qMrMXjeu`gmy)E;&6m+W|kAtvB}t96Ea&Zva_;;!tPxEm&BAl!hBLVJL*-r z10`-eX`y>oP{PGMynEQX=`oRrr!;c9OWcK)NcOC_+|o$)W<}(-mSx^Qf90F7I{El} zRoc_ET7*&2L?zzimZ+9hX=Nl1oRz~fN37HOw>6D%c7~0X+ww@MBgY^iV>(%?zv8-X2!n%qk=TeuTj=^2T=E9AfCU9fr@m9vI%a=BGX9@bte z#V;f`Mv5YEzAc*Oi&mMHG0eRH%=zu4Z;urntCAG(%#Lj2%yWCeQ7hy(g)| zUw=$4Pkj2`obB_T`_(M(c~pMxkFf3bB|VuR(tl~F&H4apeqzVVR3;R2l+Bn`EpU_x z{K<%Qnp)4E&_;2kJ)T||K$a$Ib`o1nNkB8N?SZsTB5Zgb{wyH(q&zU(C%w6pPd|Z4 zpie8-woA)8Z5)Mti4)n}LkzLy2QodWU0Ph9Y?kXwse}H#l6I8O5TN5;MbycRsX)1c z_cCDqtvNj|cfsj#8El{K>Aof|;-A>4CmYPNZ)+(ayf_M9jHRsqCRkO7l;o zM8nhIX4!GAwS@M3I7U~L+UmnPt!bQ@U8m5yZND{^LGe0svp;K5?+?Pgh&%e4@;e(a zY7Per?X?J`c*R$hPGoYDfc`6qJmN(DY=8-5Pm!{vgy!0iykjvW>T%&q2{ifVcyEAy zVGSVWaUn)+P>yD6W6hVk2)H7LaJ4cV*$53UhkP9{bct<7YMm2#q`bo??lmlIMj9I$Ut1ZWsS;vhQd&&gW=F8bscpg zzciMu%M5{+P|24Q;^}`_d%QVEW$C*>cbMhtG{cMI=GvE}dysD$zC(Fh?%N8l7ugU0 z*;ijLx)(JC5HSdycX$ICt~ZesBFGM zH$^c3B)N+)sae_*;`S12rw>v0Cta^i2xqwKKZ z*O{^A&YEyJ84-rluh+kbqUPQlsWiqtw&6)_$NNQoBT>)S54DB|^xw}jtI5d6or5K` z`}ZT%rN5NAS%T%Gtr!`peLPs|xQML|^Pt_g6NO{(;u3(Q!+HOYoH^Qiq{O9@V*r=NL z+HZ6R{+kC?u?HCcPdYZbnQJYbYb3T~OER_0FiF5H^FVodQ&pr=M${$C`$9PigPV1% z#PV<2@1~lH4#yjn;do-eBLVFP9dJ^}QGU~eKU~gD_W%_tMjPtn*nJtg_#Fz z;p=Y+<1R65QD-)DB@Qve%kg`udlf?kWnF|%y1zwMUQD+XEdu6iB=n~&o*53a#E#w? z^P+*G59}S_-6za3(LM4k~X$Aw0kFQCfhD;!6yYBDyx zFWZjUx72<+g5JD_GvaHb)co(BxP!GTEvkku&|3z4kX;htD8YL|{3F!dv-Fde@W|DW zt?|N4u_gNY4ktndxb*sKg2IzF-iVG~WsNdTdNHOggp}XrIFVDUwP7p!w-etScAVb# zu3U0tub}NFHkGt|;Lr(X^-5=Z-I${l=;Zm5Yty2QajUi;dC1KU(7!Y6h^2q$9F&9+ zd)jD4SGfuLz?GXlC_nLRTb1M7J<*JRg-?y6?_q!8Bdu#`6*qgRG>N;vU|vxDsM*1JX>N}L;HEF9Fr@Cq^20)m-|T(fNH4h=a^&o zbomJl=&?E!8toE@$CXZ`7lzC8QyIaa&Ci=;+FK$HDBLN=VgntI4FwW|XDWm6{4pKf zPXBdTz0AP{--TJeJm05$Ehv0nhk zb{o)$gj|K|TYFNUq@yIyiA=;P9w$T|cHbt?k7H{;BzEr09n2Tp8Z+8vQhwgpY5Aw- znFY8mqy<{r;HIxmw25JcXzBTxi%hHbO@62S9xy<{7keYDvP%)9X5ZG@H0^g87YoFC zAO>#d*)(ZKDPc<+*_K5w_D%WWTvdKbgNjdklYc-lr!|V7eDzdpUtaKwC(MW)KKSbJ zc$N3_I1-Z)-)WDM1_Mma36iX_msJi0TpzlOSlMYlypJ$k8^Aki_dsIpjr18gVBV$; zS8f;@bsI1(?#ackUaB9)3pf=|5{T*ct>xkIH|V;{;VJN$Mo&dC*%@((ndE-wPosyN z$o3=_10zQ0ybz41;z*t>9o42v3t)h^1!oIH#O>u3PjRUGr=<1^Qddmv^1C(mT%=e( zBm^O1=7u?ugXhT$z?d-obu!|PVO<;kY*UF&V2Y6m4WObdx8++%gWVTChl&mgH9L{n z)a!71Oee6tbr3JJ=5MK4oG0d6RCUVS!tOozD67X>-&Y#~ z@@u4L#Qg*vQa{EkAPxF$1y=Q_i}c|+EjK6;Xe~M@jQ6%I^G}w2GCf~dVviC3In@EI z%rt?=gWdT6uCQcBSux3}HbNgAFc=Oh+G-0)o*9oIdBwndeMdifHyw4HbRs=VMq39u zN+2l*4Ez=fE#~=K*4WN#gKsmytGij!l`%z#MS#I%Q>zw)4ZVtWA{PYjm|sC+6CL7< z+EeT)5m(eXB1RxI?5+*bSeu5L>ra81g&(9a9l8&O$bi`KtJeFxSQ)Xo@92qs4(3oe z4n38altZ~MgVi~4)~E<_edlzS<2uHt56a8&uQ#DbR3gi!4gT|6N7gMOD-nv zrrfr+CJ+@XAYp+w;jA~bVN+$_%XqKS5CL7{C;_54-zW79LziK5=vK@V+VuhUvHS#) zpk|Or{pdt2er>_IV~MsdW*Q#iDg;aDcB!lN!DL8!&V?>wY-XY+L_N=pGYG?=9U%)U z-~^s!O~mhVUmBcnLo``T?c5u!bm(bV9lomX~uVEwD{?hu$P7F&Kw1dxP2P1-Cc>Q*|$3`!Dz zp!X2p?7zQiUxa2D&l>Yh6BGnLNYS5r)#gvg3t)ojF00|DvEhqdD7pMv=n$GddopkMu^1@}OO8uB@kEHJzyrh8@)BHk()u7pCx8$^ zke}Ft_v1MkvPeoZ?fEXdS*K zXV;j~G|N$0%uF~Tx5iGpZa2#zEA$ijzcNFZSDNLtpVSOs{g!B03PQ?+2*svZ7ZYxB z6sOSOoMQS>fNZ2{38Qqc|MeX8@35iT@q3;k@s zBmR^5t=x%112EH}hV((MyU?&PR0S2dRIeK-z$qu5adfS&_^pT~HU%^M3b zm^&Omfk)F|4Z7*o!eLsNwhT5Nwr)7)w9w%KkKZhfveX9C^nsIv)Y+_y?yKkF16`%@hV8GWaN(Zfw2RbGS zcD|6b>>i>V%(vt7SU4R-IgzV~D}H{gf{8KWm5Ib?zoSoxoo$%S#4buoAS`cadPi&~ z7zmXs#14gES+Z2~fxj@V`HFHs zUU}SfL7pCCF36J64+XDV|M_(8!w8oCB}#o^*LDKxW~v`n-RHqi$jj#!oCuD#W8~I> z+@}*jK4Fb)x_d4^s^AUn9v~NLmzi+fUnei8lFh#ejr6k2Mc%kBE?JiOpmX*x_dp_+ zleye9ZJs?4dym*GG(t1-FcuZ>C@ayHrMo$lLE+58%?NvhqZSn;q<}LKaAT}fvSAH03DhU`J`mx3;=_pZQmJski-^+yt*j4GkX284x*u;a|e^0|cE>zQ#E?n70 zFCe_g?`tzit$7k)aW8Zr0Q^VDZ&s919VV~BB!9|*e| zkEY-61sc0iKsy)|%NxM}he?eJMjt0&kO#(Pul%zRZ+#BL94z zwyhdAu;4b1wkAL}KLOTV;ol59x9fK84H(KlazoPBGTj{Lkd$cH>M`FoXayg0Vt;fF zQMN+S1)D1mNiIx?9M65cexrRZ^}6GvmEro(#h!hD3;Ut2e%?5Bul5ZYc57^a;XtJ&49?oHPn<<1gB^h5 z=)gHX1D*4cNfb57sDvUVs7V=ql}d)JL5MjW)@UDJ=%WmXn+I{js)YHVk~kia=~uw7 zM)-Iy;z`v;X?dv%HK}3p!E2{w#7CVTM(1joz4+AKRr#n2(nH+ceS_#I1q{vsncFR1 zN0b-cNL9l=u4wB%F)ZXechnS9sPzgnjJuh6gtK5(RC}G#G+ighm|uCQVLSH&H_;r` zZv=%0fJfyQxd7JN?^6&+pc;1etsHQaW;wVI3XLF6l9&l)X(kOLh@Yu|q%{jITB zcd*r(@nQXj+ETp$BU9>18{$81mH+r%Kw8DHmi-$A zfLFLlRE}##^_k;VQWg) zKS)zIMCZaTAq;TqDra;e6Lr2@4Qq?4fStu`!tM+Lg26OG8UU=nb3-f)jTK;z3w<+Y z6_W;-E@i{vC^TF{+P2Lap<$DT0i(x*U2QrhKOkJ8*MPnb)YsqSmN}6%o;wqzS1}dZ zv543iZ|6YqNI$$F47PV;Q&x-EM?suk#Z0q6i`kh5Hy4ogG~C4FpaFU>*@lCa;C*y)DWKKB3&HKP za3u2~QFqPEbGULj)Ws3X9~)0-N^}EF(4!=n`F6*S5o&&CKYdiH9XbxgA>uqN<86c# zKi^?fTxrGn7ixxn55i3yy~;AYd547h;gZ;*Qn?LtvR()*^Dlynr<%w?P-}|E4dSUbzXj zHn3&i=qTYb+aM7CuzC|*DP*R%;S^ev*1gRL@6!cf_QZ97paU4+BDOV3q9E_&Tic*3 z=!haz{xE5DD)@L1(HeHcO(Qiex{H9V>nmj>ddc-<>;j9a`uiCf!9ZQ$G+^YdW&?HA zzmxtBj4p0IYD)nYCCH)#V6Cst^%1&dJ~)-NfPU;mLYRox5s$CAv=W;cFCu32sNI+H zht2--zKbxG|4QB9iKJ^`DT2eTV<`9?y*z;^>nup@R6^j);jMs$$yJZ7@@0mw<9vs> nI+?jSC^F{xUjzvL0|#PdBqF?9&IK8 90%" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hdwu-wzJvJLb", + "colab_type": "text" + }, + "source": [ + "## Export the model\n", + "Now we can export the model using [`Estimator.export_saved_model`](https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/estimator/Estimator#export_saved_model):" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AVgs2mkdllRn", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def json_serving_input_fn():\n", + " placeholders = {}\n", + " features = {}\n", + " for feature_name in feature_names:\n", + " placeholders[feature_name] = tf.placeholder(shape=[None], dtype=tf.float64, name=feature_name)\n", + " features[feature_name] = tf.expand_dims(placeholders[feature_name], -1)\n", + " \n", + " return tf.estimator.export.ServingInputReceiver(features, receiver_tensors=placeholders)\n", + "\n", + "\n", + "classifier.export_saved_model(\"export\", json_serving_input_fn)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ipVlP4yPxFxw", + "colab_type": "text" + }, + "source": [ + "## Upload the model to AWS\n", + "\n", + "Cortex loads models from AWS, so we need to upload the exported model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3IqsfyylxLhy", + "colab_type": "text" + }, + "source": [ + "Set these variables to configure your AWS credentials and model upload path:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lc9LBH1uHT_h", + "colab_type": "code", + "cellView": "form", + "colab": {} + }, + "source": [ + "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", + "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", + "S3_UPLOAD_PATH = \"s3://my-bucket/iris-classifier/tensorflow\" #@param {type:\"string\"}\n", + "\n", + "import sys\n", + "import re\n", + "\n", + "if AWS_ACCESS_KEY_ID == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", + "\n", + "elif AWS_SECRET_ACCESS_KEY == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", + "\n", + "else:\n", + " try:\n", + " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", + " except:\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NXeuZsaQxUc8", + "colab_type": "text" + }, + "source": [ + "Upload the model to S3:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YLmnWTEVsu55", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import boto3\n", + "\n", + "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", + "\n", + "for dirpath, _, filenames in os.walk(\"export\"):\n", + " for filename in filenames:\n", + " filepath = os.path.join(dirpath, filename)\n", + " filekey = os.path.join(key, filepath[len(\"export/\"):])\n", + " print(\"Uploading s3://{}/{}...\".format(bucket, filekey), end = '')\n", + " s3.upload_file(filepath, bucket, filekey)\n", + " print(\" ✓\")", + "\n", + "print(\"\\nUploaded model export directory to \" + S3_UPLOAD_PATH)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aR-mmcUzyCV3", + "colab_type": "text" + }, + "source": [ + "\n", + "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/iris-classifier) for how to deploy the model as an API." + ] + } + ] +} diff --git a/test/tensorflow/license-plate-reader/README.md b/test/tensorflow/license-plate-reader/README.md new file mode 100644 index 0000000000..009286a4e1 --- /dev/null +++ b/test/tensorflow/license-plate-reader/README.md @@ -0,0 +1,175 @@ +# Real-Time License Plate Identification System + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This project implements a license plate identification system. On resource-constrained systems, running inferences may prove to be too computationally expensive. One solution is to run the ML in the cloud and have the local (embedded) system act as a client of these services. + +![Demo GIF](https://i.imgur.com/jgkJB59.gif) + +*Figure 1 - GIF taken from this real-time recording [video](https://www.youtube.com/watch?v=gsYEZtecXlA) of predictions* + +![Raspberry Pi client with 4G access and onboard GPS that connects to cortex's APIs for inference](https://i.imgur.com/MvDAXWU.jpg) + +*Figure 2 - Raspberry Pi-powered client with 4G access and onboard GPS that connects to cortex's APIs for inference. More on that [here](https://github.com/RobertLucian/cortex-license-plate-reader-client).* + +In our example, we assume we have a dashcam mounted on a car and we want to detect and recognize all license plates in the video stream in real-time. We can use an embedded computer system to record the video, then stream and infer frame-by-frame using a web service, reassemble the stream with the licence plate annotations, and finally display the annotated stream on a screen. The web service in our case is a set of 2 web APIs deployed using cortex. + +## Used Models + +The identification of license plates is done in three steps: + +1. Detecting the bounding boxes of each license plate using *YOLOv3* model. +1. Detecting the very specific region of each word inside each bounding box with high accuracy using a pretrained *CRAFT* text detector. +1. Recognizing the text inside the previously detected boxes using a pretrained *CRNN* model. + +Out of these three models (*YOLOv3*, *CRAFT* and *CRNN*) only *YOLOv3* has been fine-tuned with a rather small dataset to better work with license plates. This dataset can be found [here](https://github.com/RobertLucian/license-plate-dataset). This *YOLOv3* model has in turn been trained using [this](https://github.com/experiencor/keras-yolo3) GitHub project. To get more details about our fine-tuned model, check the project's description page. + +The other two models, *CRAFT* and *CRNN*, can be found in [keras-ocr](https://github.com/faustomorales/keras-ocr). + +## Deployment - Lite Version + +A lite version of the deployment is available with `cortex_lite.yaml`. The lite version accepts an image as input and returns an image with the recognized license plates overlayed on top. A single GPU is required for this deployment (i.e. `g4dn.xlarge`). + +Once the cortex cluster is created, run + +```bash +cortex deploy cortex_lite.yaml +``` + +And monitor the API with + +```bash +cortex get --watch +``` + +To run an inference on the lite version, the only 3 tools you need are `curl`, `sed` and `base64`. This API expects an URL pointing to an image onto which the inferencing is done. This includes the detection of license plates with *YOLOv3* and the recognition part with *CRAFT* + *CRNN* models. + +Export the endpoint & the image's URL by running + +```bash +export ENDPOINT=your-api-endpoint +export IMAGE_URL=https://i.imgur.com/r8xdI7P.png +``` + +Then run the following piped commands + +```bash +curl "${ENDPOINT}" -X POST -H "Content-Type: application/json" -d '{"url":"'${IMAGE_URL}'"}' | +sed 's/"//g' | +base64 -d > prediction.jpg +``` + +The resulting image is the same as the one in [Verifying the Deployed APIs](#verifying-the-deployed-apis). + +For another prediction, let's use a generic image from the web. Export [this image's URL link](https://i.imgur.com/mYuvMOs.jpg) and re-run the prediction. This is what we get. + +![annotated sample image](https://i.imgur.com/tg1PE1E.jpg) + +*The above prediction has the bounding boxes colored differently to distinguish them from the cars' red bodies* + +## Deployment - Full Version + +The recommended number of instances to run this smoothly on a video stream is about 12 GPU instances (2 GPU instances for *YOLOv3* and 10 for *CRNN* + *CRAFT*). `cortex_full.yaml` is already set up to use these 12 instances. Note: this is the optimal number of instances when using the `g4dn.xlarge` instance type. For the client to work smoothly, the number of processes per replica can be adjusted, especially for `p3` or `g4` instances, where the GPU has a lot of compute capacity. + +If you don't have access to this many GPU-equipped instances, you could just lower the number and expect dropped frames. It will still prove the point, albeit at a much lower framerate and with higher latency. More on that [here](https://github.com/RobertLucian/cortex-license-plate-reader-client). + +Then after the cortex cluster is created, run + +```bash +cortex deploy cortex_full.yaml +``` + +And monitor the APIs with + +```bash +cortex get --watch +``` + +We can run the inference on a sample image to verify that both APIs are working as expected before we move on to running the client. Here is an example image: + +![sample image](https://i.imgur.com/r8xdI7P.png) + +On your local machine run: + +``` +pip install requests click opencv-contrib-python numpy +``` + +and run the following script with Python >= `3.6.x`. The application expects the argument to be a link to an image. The following link is for the above sample image. + + +```bash +export YOLOV3_ENDPOINT=api_endpoint_for_yolov3 +export CRNN_ENDPOINT=api_endpoint_for_crnn +python sample_inference.py "https://i.imgur.com/r8xdI7P.png" +``` + +If all goes well, then a prediction will be saved as a JPEG image to disk. By default, it's saved to `prediction.jpg`. Here is the output for the image above: + +![annotated sample image](https://i.imgur.com/JaD4A05.jpg) + +You can use `python sample_inference.py --help` to find out more. Keep in mind that any detected license plates with a confidence score lower than 80% are discarded. + +If this verification works, then we can move on and run the main client. + +### Running the Client + +Once the APIs are up and running, launch the streaming client by following the instructions at [robertlucian/cortex-license-plate-reader-client](https://github.com/RobertLucian/cortex-license-plate-reader-client). + +*Note: The client is kept in a separate repository to maintain the cortex project clean and focused. Keeping some of the projects that are more complex out of this repository can reduce the confusion.* + +## Customization/Optimization + +### Uploading the Model to S3 + +The only model to upload to an S3 bucket (for Cortex to deploy) is the *YOLOv3* model. The other two models are downloaded automatically upon deploying the service. + +If you would like to host the model from your own bucket, or if you want to fine tune the model for your needs, here's what you can do. + +#### Lite Version + +Download the *Keras* model: + +```bash +wget -O license_plate.h5 "https://www.dropbox.com/s/vsvgoyricooksyv/license_plate.h5?dl=0" +``` + +And then upload it to your bucket (also make sure [cortex_lite.yaml](cortex_lite.yaml) points to this bucket): + +```bash +BUCKET=my-bucket +YOLO3_PATH=examples/tensorflow/license-plate-reader/yolov3_keras +aws s3 cp license_plate.h5 "s3://$BUCKET/$YOLO3_PATH/model.h5" +``` + +#### Full Version + +Download the *SavedModel*: + +```bash +wget -O yolov3.zip "https://www.dropbox.com/sh/4ltffycnzfeul01/AAB7Xdmmi59w0EPOwhQ1nkvua/yolov3?dl=0" +``` + +Unzip it: + +```bash +unzip yolov3.zip -d yolov3 +``` + +And then upload it to your bucket (also make sure [cortex_full.yaml](cortex_full.yaml) points to this bucket): + +```bash +BUCKET=my-bucket +YOLO3_PATH=examples/tensorflow/license-plate-reader/yolov3_tf +aws s3 cp yolov3/ "s3://$BUCKET/$YOLO3_PATH" --recursive +``` + +### Configuring YOLOv3 Predictor + +The `yolov3` API predictor requires a [config.json](config.json) file to configure the input size of the image (dependent on the model's architecture), the anchor boxes, the object threshold, and the IoU threshold. All of these are already set appropriately so no other change is required. + +The configuration file's content is based on [this](https://github.com/experiencor/keras-yolo3/blob/bf37c87561caeccc4f1b879e313d4a3fec1b987e/zoo/config_license_plates.json#L2-L7). + +### Opportunities for performance improvements + +One way to reduce the inference time is to convert the models to use FP16/BFP16 (in mixed mode or not) and then choose the accelerator that gives the best performance in half precision mode - i.e. T4/V100. A speedup of an order of magnitude can be expected. diff --git a/test/tensorflow/license-plate-reader/config.json b/test/tensorflow/license-plate-reader/config.json new file mode 100644 index 0000000000..0ff64d0a98 --- /dev/null +++ b/test/tensorflow/license-plate-reader/config.json @@ -0,0 +1,8 @@ +{ + "labels": ["license-plate"], + "net_h" : 416, + "net_w" : 416, + "anchors" : [15,6, 18,8, 22,9, 27,11, 32,13, 41,17, 54,21, 66,27, 82,33], + "obj_thresh" : 0.8, + "nms_thresh" : 0.01 +} diff --git a/test/tensorflow/license-plate-reader/cortex_full.yaml b/test/tensorflow/license-plate-reader/cortex_full.yaml new file mode 100644 index 0000000000..f16f6ab934 --- /dev/null +++ b/test/tensorflow/license-plate-reader/cortex_full.yaml @@ -0,0 +1,35 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: yolov3 + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor_yolo.py + model_path: s3://cortex-examples/tensorflow/license-plate-reader/yolov3_tf/ + processes_per_replica: 4 + threads_per_process: 3 + signature_key: serving_default + config: + model_config: config.json + compute: + cpu: 1 + gpu: 1 + mem: 8G + autoscaling: + min_replicas: 2 + max_replicas: 2 + +- name: crnn + kind: RealtimeAPI + predictor: + type: python + path: predictor_crnn.py + processes_per_replica: 1 + threads_per_process: 1 + compute: + cpu: 1 + gpu: 1 + mem: 8G + autoscaling: + min_replicas: 10 + max_replicas: 10 diff --git a/test/tensorflow/license-plate-reader/cortex_lite.yaml b/test/tensorflow/license-plate-reader/cortex_lite.yaml new file mode 100644 index 0000000000..8e07cd8280 --- /dev/null +++ b/test/tensorflow/license-plate-reader/cortex_lite.yaml @@ -0,0 +1,14 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: license-plate-reader + kind: RealtimeAPI + predictor: + type: python + path: predictor_lite.py + config: + yolov3: s3://cortex-examples/tensorflow/license-plate-reader/yolov3_keras/model.h5 + yolov3_model_config: config.json + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/test/tensorflow/license-plate-reader/predictor_crnn.py b/test/tensorflow/license-plate-reader/predictor_crnn.py new file mode 100644 index 0000000000..aa543f45cf --- /dev/null +++ b/test/tensorflow/license-plate-reader/predictor_crnn.py @@ -0,0 +1,44 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import cv2 +import numpy as np +import keras_ocr +import base64 +import pickle +import tensorflow as tf + + +class PythonPredictor: + def __init__(self, config): + # limit memory usage on each process + for gpu in tf.config.list_physical_devices("GPU"): + tf.config.experimental.set_memory_growth(gpu, True) + + # keras-ocr will automatically download pretrained + # weights for the detector and recognizer. + self.pipeline = keras_ocr.pipeline.Pipeline() + + def predict(self, payload): + # preprocess the images w/ license plates (LPs) + imgs = payload["imgs"] + imgs = base64.b64decode(imgs.encode("utf-8")) + jpgs_as_np = pickle.loads(imgs) + images = [cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR) for jpg_as_np in jpgs_as_np] + + # run batch inference + try: + prediction_groups = self.pipeline.recognize(images) + except ValueError: + # exception can occur when the images are too small + prediction_groups = [] + + image_list = [] + for img_predictions in prediction_groups: + boxes_per_image = [] + for predictions in img_predictions: + boxes_per_image.append([predictions[0], predictions[1].tolist()]) + image_list.append(boxes_per_image) + + lps = {"license-plates": image_list} + + return lps diff --git a/test/tensorflow/license-plate-reader/predictor_lite.py b/test/tensorflow/license-plate-reader/predictor_lite.py new file mode 100644 index 0000000000..0a71b775fa --- /dev/null +++ b/test/tensorflow/license-plate-reader/predictor_lite.py @@ -0,0 +1,120 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import boto3, base64, cv2, re, os, requests, json +import keras_ocr + +from botocore import UNSIGNED +from botocore.client import Config +from tensorflow.keras.models import load_model +import utils.utils as utils +import utils.bbox as bbox_utils +import utils.preprocess as preprocess_utils + + +class PythonPredictor: + def __init__(self, config): + # download yolov3 model + bucket, key = re.match("s3://(.+?)/(.+)", config["yolov3"]).groups() + + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + model_path = "/tmp/model.h5" + s3.download_file(bucket, key, model_path) + + # load yolov3 model + self.yolov3_model = load_model(model_path) + + # get configuration for yolov3 model + with open(config["yolov3_model_config"]) as json_file: + data = json.load(json_file) + for key in data: + setattr(self, key, data[key]) + self.box_confidence_score = 0.8 + + # keras-ocr automatically downloads the pretrained + # weights for the detector and recognizer + self.recognition_model_pipeline = keras_ocr.pipeline.Pipeline() + + def predict(self, payload): + # download image + img_url = payload["url"] + image = preprocess_utils.get_url_image(img_url) + + # detect the bounding boxes + boxes = utils.get_yolo_boxes( + self.yolov3_model, + image, + self.net_h, + self.net_w, + self.anchors, + self.obj_thresh, + self.nms_thresh, + len(self.labels), + tensorflow_model=False, + ) + + # purge bounding boxes with a low confidence score + aux = [] + for b in boxes: + label = -1 + for i in range(len(b.classes)): + if b.classes[i] > self.box_confidence_score: + label = i + if label >= 0: + aux.append(b) + boxes = aux + del aux + + # if bounding boxes have been detected + dec_words = [] + if len(boxes) > 0: + # create set of images of the detected license plates + lps = [] + for b in boxes: + lp = image[b.ymin : b.ymax, b.xmin : b.xmax] + lps.append(lp) + + # run batch inference + try: + prediction_groups = self.recognition_model_pipeline.recognize(lps) + except ValueError: + # exception can occur when the images are too small + prediction_groups = [] + + # process pipeline output + image_list = [] + for img_predictions in prediction_groups: + boxes_per_image = [] + for predictions in img_predictions: + boxes_per_image.append([predictions[0], predictions[1].tolist()]) + image_list.append(boxes_per_image) + + # reorder text within detected LPs based on horizontal position + dec_lps = preprocess_utils.reorder_recognized_words(image_list) + for dec_lp in dec_lps: + dec_words.append([word[0] for word in dec_lp]) + + # if there are no recognized LPs, then don't draw them + if len(dec_words) == 0: + dec_words = [[] for i in range(len(boxes))] + + # draw predictions as overlays on the source image + draw_image = bbox_utils.draw_boxes( + image, + boxes, + overlay_text=dec_words, + labels=["LP"], + obj_thresh=self.box_confidence_score, + ) + + # image represented in bytes + byte_im = preprocess_utils.image_to_jpeg_bytes(draw_image) + + # encode image + image_enc = base64.b64encode(byte_im).decode("utf-8") + + # image with draw boxes overlayed + return image_enc diff --git a/test/tensorflow/license-plate-reader/predictor_yolo.py b/test/tensorflow/license-plate-reader/predictor_yolo.py new file mode 100644 index 0000000000..7648b66960 --- /dev/null +++ b/test/tensorflow/license-plate-reader/predictor_yolo.py @@ -0,0 +1,46 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import json +import base64 +import numpy as np +import cv2 +import pickle +import utils.utils as utils + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + with open(config["model_config"]) as json_file: + data = json.load(json_file) + for key in data: + setattr(self, key, data[key]) + + def predict(self, payload): + # decode the payload + img = payload["img"] + img = base64.b64decode(img) + jpg_as_np = np.frombuffer(img, dtype=np.uint8) + image = cv2.imdecode(jpg_as_np, flags=cv2.IMREAD_COLOR) + + # detect the bounding boxes + boxes = utils.get_yolo_boxes( + self.client, + image, + self.net_h, + self.net_w, + self.anchors, + self.obj_thresh, + self.nms_thresh, + len(self.labels), + ) + + # package the response + response = {"boxes": []} + for box in boxes: + response["boxes"].append( + [box.xmin, box.ymin, box.xmax, box.ymax, float(box.c), box.classes.tolist()] + ) + + return response diff --git a/test/tensorflow/license-plate-reader/requirements.txt b/test/tensorflow/license-plate-reader/requirements.txt new file mode 100644 index 0000000000..0fb87fcf23 --- /dev/null +++ b/test/tensorflow/license-plate-reader/requirements.txt @@ -0,0 +1,5 @@ +keras-ocr==0.8.5 +keras==2.3.1 +tensorflow==2.3.0 +scipy==1.4.1 +numpy==1.18.* diff --git a/test/tensorflow/license-plate-reader/sample_inference.py b/test/tensorflow/license-plate-reader/sample_inference.py new file mode 100644 index 0000000000..11e217ec78 --- /dev/null +++ b/test/tensorflow/license-plate-reader/sample_inference.py @@ -0,0 +1,100 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import click, cv2, requests, pickle, base64, json +import numpy as np +import utils.bbox as bbox_utils +import utils.preprocess as preprocess_utils + + +@click.command( + help=( + "Identify license plates in a given image" + " while outsourcing the predictions using the REST API endpoints." + " Both API endpoints have to be exported as environment variables." + ) +) +@click.argument("img_url_src", type=str) +@click.argument("yolov3_endpoint", envvar="YOLOV3_ENDPOINT") +@click.argument("crnn_endpoint", envvar="CRNN_ENDPOINT") +@click.option( + "--output", + "-o", + type=str, + default="prediction.jpg", + show_default=True, + help="File to save the prediction to.", +) +def main(img_url_src, yolov3_endpoint, crnn_endpoint, output): + + # get the image in bytes representation + image = preprocess_utils.get_url_image(img_url_src) + image_bytes = preprocess_utils.image_to_jpeg_bytes(image) + + # encode image + image_enc = base64.b64encode(image_bytes).decode("utf-8") + image_dump = json.dumps({"img": image_enc}) + + # make yolov3 api request + resp = requests.post( + yolov3_endpoint, data=image_dump, headers={"content-type": "application/json"} + ) + + # parse response + boxes_raw = resp.json()["boxes"] + boxes = [] + for b in boxes_raw: + box = bbox_utils.BoundBox(*b) + boxes.append(box) + + # purge bounding boxes with a low confidence score + confidence_score = 0.8 + aux = [] + for b in boxes: + label = -1 + for i in range(len(b.classes)): + if b.classes[i] > confidence_score: + label = i + if label >= 0: + aux.append(b) + boxes = aux + del aux + + dec_words = [] + if len(boxes) > 0: + # create set of images of the detected license plates + lps = [] + for b in boxes: + lp = image[b.ymin : b.ymax, b.xmin : b.xmax] + jpeg = preprocess_utils.image_to_jpeg_nparray(lp) + lps.append(jpeg) + + # encode the cropped license plates + lps = pickle.dumps(lps, protocol=0) + lps_enc = base64.b64encode(lps).decode("utf-8") + lps_dump = json.dumps({"imgs": lps_enc}) + + # make crnn api request + resp = requests.post( + crnn_endpoint, data=lps_dump, headers={"content-type": "application/json"} + ) + + # parse the response + dec_lps = resp.json()["license-plates"] + dec_lps = preprocess_utils.reorder_recognized_words(dec_lps) + for dec_lp in dec_lps: + dec_words.append([word[0] for word in dec_lp]) + + if len(dec_words) == 0: + dec_words = [[] for i in range(len(boxes))] + + # draw predictions as overlays on the source image + draw_image = bbox_utils.draw_boxes( + image, boxes, overlay_text=dec_words, labels=["LP"], obj_thresh=confidence_score + ) + + # and save it to disk + cv2.imwrite(output, draw_image) + + +if __name__ == "__main__": + main() diff --git a/test/tensorflow/license-plate-reader/utils/__init__.py b/test/tensorflow/license-plate-reader/utils/__init__.py new file mode 100644 index 0000000000..5f47d63e43 --- /dev/null +++ b/test/tensorflow/license-plate-reader/utils/__init__.py @@ -0,0 +1 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) diff --git a/test/tensorflow/license-plate-reader/utils/bbox.py b/test/tensorflow/license-plate-reader/utils/bbox.py new file mode 100644 index 0000000000..de9c7ef8c0 --- /dev/null +++ b/test/tensorflow/license-plate-reader/utils/bbox.py @@ -0,0 +1,111 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import numpy as np +import cv2 +from .colors import get_color + + +class BoundBox: + def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None): + self.xmin = xmin + self.ymin = ymin + self.xmax = xmax + self.ymax = ymax + + self.c = c + self.classes = classes + + self.label = -1 + self.score = -1 + + def get_label(self): + if self.label == -1: + self.label = np.argmax(self.classes) + + return self.label + + def get_score(self): + if self.score == -1: + self.score = self.classes[self.get_label()] + + return self.score + + +def _interval_overlap(interval_a, interval_b): + x1, x2 = interval_a + x3, x4 = interval_b + + if x3 < x1: + if x4 < x1: + return 0 + else: + return min(x2, x4) - x1 + else: + if x2 < x3: + return 0 + else: + return min(x2, x4) - x3 + + +def bbox_iou(box1, box2): + intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) + intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) + + intersect = intersect_w * intersect_h + + w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin + w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin + + union = w1 * h1 + w2 * h2 - intersect + + return float(intersect) / union + + +def draw_boxes(image, boxes, overlay_text, labels, obj_thresh, quiet=True): + for box, overlay in zip(boxes, overlay_text): + label_str = "" + label = -1 + + for i in range(len(labels)): + if box.classes[i] > obj_thresh: + if label_str != "": + label_str += ", " + label_str += labels[i] + " " + str(round(box.get_score() * 100, 2)) + "%" + label = i + if not quiet: + print(label_str) + + if label >= 0: + if len(overlay) > 0: + text = label_str + ": [" + " ".join(overlay) + "]" + else: + text = label_str + text = text.upper() + text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5) + width, height = text_size[0][0], text_size[0][1] + region = np.array( + [ + [box.xmin - 3, box.ymin], + [box.xmin - 3, box.ymin - height - 26], + [box.xmin + width + 13, box.ymin - height - 26], + [box.xmin + width + 13, box.ymin], + ], + dtype="int32", + ) + + # cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5) + rec = (box.xmin, box.ymin, box.xmax - box.xmin, box.ymax - box.ymin) + rec = tuple(int(i) for i in rec) + cv2.rectangle(img=image, rec=rec, color=get_color(label), thickness=3) + cv2.fillPoly(img=image, pts=[region], color=get_color(label)) + cv2.putText( + img=image, + text=text, + org=(box.xmin + 13, box.ymin - 13), + fontFace=cv2.FONT_HERSHEY_SIMPLEX, + fontScale=1e-3 * image.shape[0], + color=(0, 0, 0), + thickness=1, + ) + + return image diff --git a/test/tensorflow/license-plate-reader/utils/colors.py b/test/tensorflow/license-plate-reader/utils/colors.py new file mode 100644 index 0000000000..2902c4e5aa --- /dev/null +++ b/test/tensorflow/license-plate-reader/utils/colors.py @@ -0,0 +1,100 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + + +def get_color(label): + """Return a color from a set of predefined colors. Contains 80 colors in total. + code originally from https://github.com/fizyr/keras-retinanet/ + Args + label: The label to get the color for. + Returns + A list of three values representing a RGB color. + """ + if label < len(colors): + return colors[label] + else: + print("Label {} has no color, returning default.".format(label)) + return (0, 255, 0) + + +colors = [ + [31, 0, 255], + [0, 159, 255], + [255, 95, 0], + [255, 19, 0], + [255, 0, 0], + [255, 38, 0], + [0, 255, 25], + [255, 0, 133], + [255, 172, 0], + [108, 0, 255], + [0, 82, 255], + [0, 255, 6], + [255, 0, 152], + [223, 0, 255], + [12, 0, 255], + [0, 255, 178], + [108, 255, 0], + [184, 0, 255], + [255, 0, 76], + [146, 255, 0], + [51, 0, 255], + [0, 197, 255], + [255, 248, 0], + [255, 0, 19], + [255, 0, 38], + [89, 255, 0], + [127, 255, 0], + [255, 153, 0], + [0, 255, 255], + [0, 255, 216], + [0, 255, 121], + [255, 0, 248], + [70, 0, 255], + [0, 255, 159], + [0, 216, 255], + [0, 6, 255], + [0, 63, 255], + [31, 255, 0], + [255, 57, 0], + [255, 0, 210], + [0, 255, 102], + [242, 255, 0], + [255, 191, 0], + [0, 255, 63], + [255, 0, 95], + [146, 0, 255], + [184, 255, 0], + [255, 114, 0], + [0, 255, 235], + [255, 229, 0], + [0, 178, 255], + [255, 0, 114], + [255, 0, 57], + [0, 140, 255], + [0, 121, 255], + [12, 255, 0], + [255, 210, 0], + [0, 255, 44], + [165, 255, 0], + [0, 25, 255], + [0, 255, 140], + [0, 101, 255], + [0, 255, 82], + [223, 255, 0], + [242, 0, 255], + [89, 0, 255], + [165, 0, 255], + [70, 255, 0], + [255, 0, 172], + [255, 76, 0], + [203, 255, 0], + [204, 0, 255], + [255, 0, 229], + [255, 133, 0], + [127, 0, 255], + [0, 235, 255], + [0, 255, 197], + [255, 0, 191], + [0, 44, 255], + [50, 255, 0], +] diff --git a/test/tensorflow/license-plate-reader/utils/preprocess.py b/test/tensorflow/license-plate-reader/utils/preprocess.py new file mode 100644 index 0000000000..5e40a35719 --- /dev/null +++ b/test/tensorflow/license-plate-reader/utils/preprocess.py @@ -0,0 +1,59 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import numpy as np +import cv2, requests +from statistics import mean + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + return image + + +def image_to_jpeg_nparray(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): + """ + Convert numpy image to jpeg numpy vector. + """ + is_success, im_buf_arr = cv2.imencode(".jpg", image, quality) + return im_buf_arr + + +def image_to_jpeg_bytes(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): + """ + Convert numpy image to bytes-encoded jpeg image. + """ + buf = image_to_jpeg_nparray(image, quality) + byte_im = buf.tobytes() + return byte_im + + +def reorder_recognized_words(detected_images): + """ + Reorder the detected words in each image based on the average horizontal position of each word. + Sorting them in ascending order. + """ + + reordered_images = [] + for detected_image in detected_images: + + # computing the mean average position for each word + mean_horizontal_positions = [] + for words in detected_image: + box = words[1] + y_positions = [point[0] for point in box] + mean_y_position = mean(y_positions) + mean_horizontal_positions.append(mean_y_position) + indexes = np.argsort(mean_horizontal_positions) + + # and reordering them + reordered = [] + for index, words in zip(indexes, detected_image): + reordered.append(detected_image[index]) + reordered_images.append(reordered) + + return reordered_images diff --git a/test/tensorflow/license-plate-reader/utils/utils.py b/test/tensorflow/license-plate-reader/utils/utils.py new file mode 100644 index 0000000000..9d07b289e0 --- /dev/null +++ b/test/tensorflow/license-plate-reader/utils/utils.py @@ -0,0 +1,160 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import cv2 +import numpy as np +import math +from .bbox import BoundBox, bbox_iou +from scipy.special import expit + + +def _sigmoid(x): + return expit(x) + + +def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): + if (float(net_w) / image_w) < (float(net_h) / image_h): + new_w = net_w + new_h = (image_h * net_w) / image_w + else: + new_h = net_w + new_w = (image_w * net_h) / image_h + + for i in range(len(boxes)): + x_offset, x_scale = (net_w - new_w) / 2.0 / net_w, float(new_w) / net_w + y_offset, y_scale = (net_h - new_h) / 2.0 / net_h, float(new_h) / net_h + + boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) + boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) + boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) + boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) + + +def do_nms(boxes, nms_thresh): + if len(boxes) > 0: + nb_class = len(boxes[0].classes) + else: + return + + for c in range(nb_class): + sorted_indices = np.argsort([-box.classes[c] for box in boxes]) + + for i in range(len(sorted_indices)): + index_i = sorted_indices[i] + + if boxes[index_i].classes[c] == 0: + continue + + for j in range(i + 1, len(sorted_indices)): + index_j = sorted_indices[j] + + if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: + boxes[index_j].classes[c] = 0 + + +def decode_netout(netout, anchors, obj_thresh, net_h, net_w): + grid_h, grid_w = netout.shape[:2] + nb_box = 3 + netout = netout.reshape((grid_h, grid_w, nb_box, -1)) + nb_class = netout.shape[-1] - 5 + + boxes = [] + + netout[..., :2] = _sigmoid(netout[..., :2]) + netout[..., 4] = _sigmoid(netout[..., 4]) + netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) + netout[..., 5:] *= netout[..., 5:] > obj_thresh + + for i in range(grid_h * grid_w): + row = i // grid_w + col = i % grid_w + + for b in range(nb_box): + # 4th element is objectness score + objectness = netout[row, col, b, 4] + + if objectness <= obj_thresh: + continue + + # first 4 elements are x, y, w, and h + x, y, w, h = netout[row, col, b, :4] + + x = (col + x) / grid_w # center position, unit: image width + y = (row + y) / grid_h # center position, unit: image height + w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width + h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height + + # last elements are class probabilities + classes = netout[row, col, b, 5:] + + box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes) + + boxes.append(box) + + return boxes + + +def preprocess_input(image, net_h, net_w): + new_h, new_w, _ = image.shape + + # determine the new size of the image + if (float(net_w) / new_w) < (float(net_h) / new_h): + new_h = (new_h * net_w) // new_w + new_w = net_w + else: + new_w = (new_w * net_h) // new_h + new_h = net_h + + # resize the image to the new size + resized = cv2.resize(image[:, :, ::-1] / 255.0, (new_w, new_h)) + + # embed the image into the standard letter box + new_image = np.ones((net_h, net_w, 3)) * 0.5 + new_image[ + (net_h - new_h) // 2 : (net_h + new_h) // 2, (net_w - new_w) // 2 : (net_w + new_w) // 2, : + ] = resized + new_image = np.expand_dims(new_image, 0) + + return new_image + + +def get_yolo_boxes( + model, image, net_h, net_w, anchors, obj_thresh, nms_thresh, classes, tensorflow_model=True +): + # preprocess the input + image_h, image_w, _ = image.shape + batch_input = np.zeros((1, net_h, net_w, 3)) + batch_input[0] = preprocess_input(image, net_h, net_w) + + # run the prediction + if tensorflow_model: + output = model.predict({"input_1": batch_input}) + yolos = [output["conv_81"], output["conv_93"], output["conv_105"]] + filters = 3 * (5 + classes) + for i in range(len(yolos)): + length = len(yolos[i]) + box_size = int(math.sqrt(length / filters)) + yolos[i] = np.array(yolos[i]).reshape((box_size, box_size, filters)) + else: + output = model.predict_on_batch(batch_input) + yolos = [output[0][0], output[1][0], output[2][0]] + + boxes = [] + # decode the output of the network + for j in range(len(yolos)): + yolo_anchors = anchors[(2 - j) * 6 : (3 - j) * 6] # config['model']['anchors'] + boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w) + + # correct the sizes of the bounding boxes + correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) + + # suppress non-maximal boxes + do_nms(boxes, nms_thresh) + + return boxes + + +def _softmax(x, axis=-1): + x = x - np.amax(x, axis, keepdims=True) + e_x = np.exp(x) + + return e_x / e_x.sum(axis, keepdims=True) diff --git a/test/tensorflow/multi-model-classifier/README.md b/test/tensorflow/multi-model-classifier/README.md new file mode 100644 index 0000000000..631f800179 --- /dev/null +++ b/test/tensorflow/multi-model-classifier/README.md @@ -0,0 +1,69 @@ +# Multi-Model Classifier API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example deploys Iris, ResNet50 and Inception models in one API. Query parameters are used for selecting the model. + +The example can be run on both CPU and on GPU hardware. + +## Sample Prediction + +Deploy the model by running: + +```bash +cortex deploy +``` + +And wait for it to become live by tracking its status with `cortex get --watch`. + +Once the API has been successfully deployed, export the APIs endpoint. You can get the API's endpoint by running `cortex get multi-model-classifier`. + +```bash +export ENDPOINT=your-api-endpoint +``` + +When making a prediction with [sample-image.json](sample-image.json), the following image will be used: + +![sports car](https://i.imgur.com/zovGIKD.png) + +### ResNet50 Classifier + +Make a request to the ResNet50 model: + +```bash +curl "${ENDPOINT}?model=resnet50" -X POST -H "Content-Type: application/json" -d @sample-image.json +``` + +The expected response is: + +```json +{"label": "sports_car"} +``` + +### Inception Classifier + +Make a request to the Inception model: + +```bash +curl "${ENDPOINT}?model=inception" -X POST -H "Content-Type: application/json" -d @sample-image.json +``` + +The expected response is: + +```json +{"label": "sports_car"} +``` + +### Iris Classifier + +Make a request to the Iris model: + +```bash +curl "${ENDPOINT}?model=iris" -X POST -H "Content-Type: application/json" -d @sample-iris.json +``` + +The expected response is: + +```json +{"label": "setosa"} +``` diff --git a/test/tensorflow/multi-model-classifier/cortex.yaml b/test/tensorflow/multi-model-classifier/cortex.yaml new file mode 100644 index 0000000000..ef99bc941e --- /dev/null +++ b/test/tensorflow/multi-model-classifier/cortex.yaml @@ -0,0 +1,30 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: multi-model-classifier + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + models: + paths: + - name: inception + model_path: s3://cortex-examples/tensorflow/image-classifier/inception/ + - name: iris + model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/ + - name: resnet50 + model_path: s3://cortex-examples/tensorflow/resnet50/ + config: + models: + iris: + labels: ["setosa", "versicolor", "virginica"] + resnet50: + input_shape: [224, 224] + input_key: input + output_key: output + inception: + input_shape: [224, 224] + input_key: images + output_key: classes + image-classifier-classes: https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json + compute: + mem: 2G diff --git a/test/tensorflow/multi-model-classifier/predictor.py b/test/tensorflow/multi-model-classifier/predictor.py new file mode 100644 index 0000000000..6577777037 --- /dev/null +++ b/test/tensorflow/multi-model-classifier/predictor.py @@ -0,0 +1,62 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import requests +import numpy as np +import cv2 + + +def get_url_image(url_image): + """ + Get numpy image from URL image. + """ + resp = requests.get(url_image, stream=True).raw + image = np.asarray(bytearray(resp.read()), dtype="uint8") + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + # for image classifiers + classes = requests.get(config["image-classifier-classes"]).json() + self.image_classes = [classes[str(k)][1] for k in range(len(classes))] + + # assign "models"' key value to self.config for ease of use + self.config = config["models"] + + # for iris classifier + self.iris_labels = self.config["iris"]["labels"] + + def predict(self, payload, query_params): + model_name = query_params["model"] + predicted_label = None + + if model_name == "iris": + prediction = self.client.predict(payload["input"], model_name) + predicted_class_id = int(prediction["class_ids"][0]) + predicted_label = self.iris_labels[predicted_class_id] + + elif model_name in ["resnet50", "inception"]: + predicted_label = self.predict_image_classifier(model_name, payload["url"]) + + return {"label": predicted_label} + + def predict_image_classifier(self, model, img_url): + img = get_url_image(img_url) + img = cv2.resize( + img, tuple(self.config[model]["input_shape"]), interpolation=cv2.INTER_NEAREST + ) + if model == "inception": + img = img.astype("float32") / 255 + img = {self.config[model]["input_key"]: img[np.newaxis, ...]} + + results = self.client.predict(img, model)[self.config[model]["output_key"]] + result = np.argmax(results) + if model == "inception": + result -= 1 + predicted_label = self.image_classes[result] + + return predicted_label diff --git a/test/tensorflow/multi-model-classifier/requirements.txt b/test/tensorflow/multi-model-classifier/requirements.txt new file mode 100644 index 0000000000..7e2fba5e6c --- /dev/null +++ b/test/tensorflow/multi-model-classifier/requirements.txt @@ -0,0 +1 @@ +Pillow diff --git a/test/tensorflow/multi-model-classifier/sample-image.json b/test/tensorflow/multi-model-classifier/sample-image.json new file mode 100644 index 0000000000..95200916c7 --- /dev/null +++ b/test/tensorflow/multi-model-classifier/sample-image.json @@ -0,0 +1,3 @@ +{ + "url": "https://i.imgur.com/zovGIKD.png" +} diff --git a/test/tensorflow/multi-model-classifier/sample-iris.json b/test/tensorflow/multi-model-classifier/sample-iris.json new file mode 100644 index 0000000000..67c03827f2 --- /dev/null +++ b/test/tensorflow/multi-model-classifier/sample-iris.json @@ -0,0 +1,8 @@ +{ + "input": { + "sepal_length": 5.2, + "sepal_width": 3.6, + "petal_length": 1.4, + "petal_width": 0.3 + } +} diff --git a/test/tensorflow/sentiment-analyzer/README.md b/test/tensorflow/sentiment-analyzer/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/tensorflow/sentiment-analyzer/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/tensorflow/sentiment-analyzer/bert.ipynb b/test/tensorflow/sentiment-analyzer/bert.ipynb new file mode 100644 index 0000000000..27ca8c67b1 --- /dev/null +++ b/test/tensorflow/sentiment-analyzer/bert.ipynb @@ -0,0 +1,1007 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "bert.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "j0a4mTk9o1Qg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Modified source from https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb\n", + "\n", + "# Copyright 2019 Google Inc.\n", + "\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dCpvgG0vwXAZ", + "colab_type": "text" + }, + "source": [ + "#Predicting Movie Review Sentiment with BERT on TF Hub", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xiYrZKaHwV81", + "colab_type": "text" + }, + "source": [ + "If you’ve been following Natural Language Processing over the past year, you’ve probably heard of BERT: Bidirectional Encoder Representations from Transformers. It’s a neural network architecture designed by Google researchers that’s totally transformed what’s state-of-the-art for NLP tasks, like text classification, translation, summarization, and question answering.\n", + "\n", + "Now that BERT's been added to [TF Hub](https://www.tensorflow.org/hub) as a loadable module, it's easy(ish) to add into existing TensorFlow text pipelines. In an existing pipeline, BERT can replace text embedding layers like ELMO and GloVE. Alternatively, [finetuning](http://wiki.fast.ai/index.php/Fine_tuning) BERT can provide both an accuracy boost and faster training time in many cases.\n", + "\n", + "Here, we'll train a model to predict whether an IMDB movie review is positive or negative using BERT in TensorFlow with tf hub. Some code was adapted from [this colab notebook](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb). Let's get started!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "chM4UttbMIqq", + "colab_type": "text" + }, + "source": [ + "First, we'll install the required packages:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jviywGyWyKsA", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!pip install bert-tensorflow==1.0.* tensorflow-gpu==1.13.* scikit-learn==0.21.* pandas==0.24.* tensorflow-hub==0.6.* boto3==1.*" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "hsZvic2YxnTz", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from datetime import datetime\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "import pandas as pd\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import bert\n", + "from bert import run_classifier\n", + "from bert import optimization\n", + "from bert import tokenization" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KVB3eOcjxxm1", + "colab_type": "text" + }, + "source": [ + "Below, we'll set an output location to store our model output, checkpoints, and export in a local directory. Note: if you're running on Google Colab, local directories don't persist after the session ends." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "US_EAnICvP7f", + "colab_type": "code", + "colab": {} + }, + "source": [ + "OUTPUT_DIR = \"bert\"\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pmFYvkylMwXn", + "colab_type": "text" + }, + "source": [ + "#Data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MC_w8SRqN0fr", + "colab_type": "text" + }, + "source": [ + "First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this TensorFlow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fom_ff20gyy6", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from tensorflow import keras\n", + "import os\n", + "import re\n", + "\n", + "# Load all files from a directory in a DataFrame.\n", + "def load_directory_data(directory):\n", + " data = {}\n", + " data[\"sentence\"] = []\n", + " data[\"sentiment\"] = []\n", + " for file_path in os.listdir(directory):\n", + " with tf.gfile.GFile(os.path.join(directory, file_path), \"r\") as f:\n", + " data[\"sentence\"].append(f.read())\n", + " data[\"sentiment\"].append(re.match(\"\\d+_(\\d+)\\.txt\", file_path).group(1))\n", + " return pd.DataFrame.from_dict(data)\n", + "\n", + "# Merge positive and negative examples, add a polarity column and shuffle.\n", + "def load_dataset(directory):\n", + " pos_df = load_directory_data(os.path.join(directory, \"pos\"))\n", + " neg_df = load_directory_data(os.path.join(directory, \"neg\"))\n", + " pos_df[\"polarity\"] = 1\n", + " neg_df[\"polarity\"] = 0\n", + " return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)\n", + "\n", + "# Download and process the dataset files.\n", + "def download_and_load_datasets(force_download=False):\n", + " dataset = tf.keras.utils.get_file(\n", + " fname=\"aclImdb.tar.gz\", \n", + " origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\", \n", + " extract=True)\n", + " \n", + " train_df = load_dataset(os.path.join(os.path.dirname(dataset), \n", + " \"aclImdb\", \"train\"))\n", + " test_df = load_dataset(os.path.join(os.path.dirname(dataset), \n", + " \"aclImdb\", \"test\"))\n", + " \n", + " return train_df, test_df\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "2abfwdn-g135", + "colab_type": "code", + "colab": {} + }, + "source": [ + "train, test = download_and_load_datasets()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XA8WHJgzhIZf", + "colab_type": "text" + }, + "source": [ + "To keep training fast, we'll take a sample of 5000 train and test examples, respectively." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lw_F488eixTV", + "colab_type": "code", + "colab": {} + }, + "source": [ + "train = train.sample(5000)\n", + "test = test.sample(5000)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "prRQM8pDi8xI", + "colab_type": "code", + "colab": {} + }, + "source": [ + "train.columns" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sfRnHSz3iSXz", + "colab_type": "text" + }, + "source": [ + "For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respecitvely)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IuMOGwFui4it", + "colab_type": "code", + "colab": {} + }, + "source": [ + "DATA_COLUMN = 'sentence'\n", + "LABEL_COLUMN = 'polarity'\n", + "# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'\n", + "label_list = [0, 1]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V399W0rqNJ-Z", + "colab_type": "text" + }, + "source": [ + "#Data Preprocessing\n", + "We'll need to transform our data into a format BERT understands. This involves two steps. First, we create `InputExample`'s using the constructor provided in the BERT library.\n", + "\n", + "- `text_a` is the text we want to classify, which in this case, is the `Request` field in our Dataframe. \n", + "- `text_b` is used if we're training a model to understand the relationship between sentences (i.e. is `text_b` a translation of `text_a`? Is `text_b` an answer to the question asked by `text_a`?). This doesn't apply to our task, so we can leave `text_b` blank.\n", + "- `label` is the label for our example, i.e. True, False" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p9gEt5SmM6i6", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Use the InputExample class from BERT's run_classifier code to create examples from the data\n", + "train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example\n", + " text_a = x[DATA_COLUMN], \n", + " text_b = None, \n", + " label = x[LABEL_COLUMN]), axis = 1)\n", + "\n", + "test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, \n", + " text_a = x[DATA_COLUMN], \n", + " text_b = None, \n", + " label = x[LABEL_COLUMN]), axis = 1)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SCZWZtKxObjh", + "colab_type": "text" + }, + "source": [ + "Next, we need to preprocess our data so that it matches the data BERT was trained on. For this, we'll need to do a couple of things (but don't worry--this is also included in the Python library):\n", + "\n", + "\n", + "1. Lowercase our text (if we're using a BERT lowercase model)\n", + "2. Tokenize it (i.e. \"sally says hi\" -> [\"sally\", \"says\", \"hi\"])\n", + "3. Break words into WordPieces (i.e. \"calling\" -> [\"call\", \"##ing\"])\n", + "4. Map our words to indexes using a vocab file that BERT provides\n", + "5. Add special \"CLS\" and \"SEP\" tokens (see the [readme](https://github.com/google-research/bert))\n", + "6. Append \"index\" and \"segment\" tokens to each input (see the [BERT paper](https://arxiv.org/pdf/1810.04805.pdf))\n", + "\n", + "Happily, we don't have to worry about most of these details.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qMWiDtpyQSoU", + "colab_type": "text" + }, + "source": [ + "To start, we'll need to load a vocabulary file and lowercasing information directly from the BERT tf hub module:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IhJSe0QHNG7U", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# This is a path to an uncased (all lowercase) version of BERT\n", + "BERT_MODEL_HUB = \"https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1\"\n", + "\n", + "def create_tokenizer_from_hub_module():\n", + " \"\"\"Get the vocab file and casing info from the Hub module.\"\"\"\n", + " with tf.Graph().as_default():\n", + " bert_module = hub.Module(BERT_MODEL_HUB)\n", + " tokenization_info = bert_module(signature=\"tokenization_info\", as_dict=True)\n", + " with tf.Session() as sess:\n", + " vocab_file, do_lower_case = sess.run([tokenization_info[\"vocab_file\"],\n", + " tokenization_info[\"do_lower_case\"]])\n", + " \n", + " return bert.tokenization.FullTokenizer(\n", + " vocab_file=vocab_file, do_lower_case=do_lower_case)\n", + "\n", + "tokenizer = create_tokenizer_from_hub_module()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z4oFkhpZBDKm", + "colab_type": "text" + }, + "source": [ + "Great--we just learned that the BERT model we're using expects lowercase data (that's what stored in tokenization_info[\"do_lower_case\"]) and we also loaded BERT's vocab file. We also created a tokenizer, which breaks words into word pieces:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dsBo6RCtQmwx", + "colab_type": "code", + "colab": {} + }, + "source": [ + "tokenizer.tokenize(\"This here's an example of using the BERT tokenizer\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0OEzfFIt6GIc", + "colab_type": "text" + }, + "source": [ + "Using our tokenizer, we'll call `run_classifier.convert_examples_to_features` on our InputExamples to convert them into features BERT understands." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LL5W8gEGRTAf", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# We'll set sequences to be at most 128 tokens long.\n", + "MAX_SEQ_LENGTH = 128\n", + "# Convert our train and test features to InputFeatures that BERT understands.\n", + "train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)\n", + "test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ccp5trMwRtmr", + "colab_type": "text" + }, + "source": [ + "#Creating a model\n", + "\n", + "Now that we've prepared our data, let's focus on building a model. `create_model` does just this below. First, it loads the BERT tf hub module again (this time to extract the computation graph). Next, it creates a single new layer that will be trained to adapt BERT to our sentiment task (i.e. classifying whether a movie review is positive or negative). This strategy of using a mostly trained model is called [fine-tuning](http://wiki.fast.ai/index.php/Fine_tuning)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6o2a5ZIvRcJq", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,\n", + " num_labels):\n", + " \"\"\"Creates a classification model.\"\"\"\n", + "\n", + " bert_module = hub.Module(\n", + " BERT_MODEL_HUB,\n", + " trainable=True)\n", + " bert_inputs = dict(\n", + " input_ids=input_ids,\n", + " input_mask=input_mask,\n", + " segment_ids=segment_ids)\n", + " bert_outputs = bert_module(\n", + " inputs=bert_inputs,\n", + " signature=\"tokens\",\n", + " as_dict=True)\n", + "\n", + " # Use \"pooled_output\" for classification tasks on an entire sentence.\n", + " # Use \"sequence_outputs\" for token-level output.\n", + " output_layer = bert_outputs[\"pooled_output\"]\n", + "\n", + " hidden_size = output_layer.shape[-1].value\n", + "\n", + " # Create our own layer to tune for politeness data.\n", + " output_weights = tf.get_variable(\n", + " \"output_weights\", [num_labels, hidden_size],\n", + " initializer=tf.truncated_normal_initializer(stddev=0.02))\n", + "\n", + " output_bias = tf.get_variable(\n", + " \"output_bias\", [num_labels], initializer=tf.zeros_initializer())\n", + "\n", + " with tf.variable_scope(\"loss\"):\n", + "\n", + " # Dropout helps prevent overfitting\n", + " output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)\n", + "\n", + " logits = tf.matmul(output_layer, output_weights, transpose_b=True)\n", + " logits = tf.nn.bias_add(logits, output_bias)\n", + " log_probs = tf.nn.log_softmax(logits, axis=-1)\n", + "\n", + " # Convert labels into one-hot encoding\n", + " one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)\n", + "\n", + " predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))\n", + " # If we're predicting, we want predicted labels and the probabiltiies.\n", + " if is_predicting:\n", + " return (predicted_labels, log_probs)\n", + "\n", + " # If we're train/eval, compute loss between predicted and actual label\n", + " per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)\n", + " loss = tf.reduce_mean(per_example_loss)\n", + " return (loss, predicted_labels, log_probs)\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qpE0ZIDOCQzE", + "colab_type": "text" + }, + "source": [ + "Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FnH-AnOQ9KKW", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# model_fn_builder actually creates our model function\n", + "# using the passed parameters for num_labels, learning_rate, etc.\n", + "def model_fn_builder(num_labels, learning_rate, num_train_steps,\n", + " num_warmup_steps):\n", + " \"\"\"Returns `model_fn` closure for TPUEstimator.\"\"\"\n", + " def model_fn(features, labels, mode, params): # pylint: disable=unused-argument\n", + " \"\"\"The `model_fn` for TPUEstimator.\"\"\"\n", + "\n", + " input_ids = features[\"input_ids\"]\n", + " input_mask = features[\"input_mask\"]\n", + " segment_ids = features[\"segment_ids\"]\n", + " label_ids = features[\"label_ids\"]\n", + "\n", + " is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)\n", + " \n", + " # TRAIN and EVAL\n", + " if not is_predicting:\n", + "\n", + " (loss, predicted_labels, log_probs) = create_model(\n", + " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n", + "\n", + " train_op = bert.optimization.create_optimizer(\n", + " loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)\n", + "\n", + " # Calculate evaluation metrics. \n", + " def metric_fn(label_ids, predicted_labels):\n", + " accuracy = tf.metrics.accuracy(label_ids, predicted_labels)\n", + " f1_score = tf.contrib.metrics.f1_score(\n", + " label_ids,\n", + " predicted_labels)\n", + " auc = tf.metrics.auc(\n", + " label_ids,\n", + " predicted_labels)\n", + " recall = tf.metrics.recall(\n", + " label_ids,\n", + " predicted_labels)\n", + " precision = tf.metrics.precision(\n", + " label_ids,\n", + " predicted_labels) \n", + " true_pos = tf.metrics.true_positives(\n", + " label_ids,\n", + " predicted_labels)\n", + " true_neg = tf.metrics.true_negatives(\n", + " label_ids,\n", + " predicted_labels) \n", + " false_pos = tf.metrics.false_positives(\n", + " label_ids,\n", + " predicted_labels) \n", + " false_neg = tf.metrics.false_negatives(\n", + " label_ids,\n", + " predicted_labels)\n", + " return {\n", + " \"eval_accuracy\": accuracy,\n", + " \"f1_score\": f1_score,\n", + " \"auc\": auc,\n", + " \"precision\": precision,\n", + " \"recall\": recall,\n", + " \"true_positives\": true_pos,\n", + " \"true_negatives\": true_neg,\n", + " \"false_positives\": false_pos,\n", + " \"false_negatives\": false_neg\n", + " }\n", + "\n", + " eval_metrics = metric_fn(label_ids, predicted_labels)\n", + "\n", + " if mode == tf.estimator.ModeKeys.TRAIN:\n", + " return tf.estimator.EstimatorSpec(mode=mode,\n", + " loss=loss,\n", + " train_op=train_op)\n", + " else:\n", + " return tf.estimator.EstimatorSpec(mode=mode,\n", + " loss=loss,\n", + " eval_metric_ops=eval_metrics)\n", + " else:\n", + " (predicted_labels, log_probs) = create_model(\n", + " is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)\n", + "\n", + " predictions = {\n", + " 'probabilities': log_probs,\n", + " 'labels': predicted_labels\n", + " }\n", + " return tf.estimator.EstimatorSpec(mode, predictions=predictions)\n", + "\n", + " # Return the actual model function in the closure\n", + " return model_fn\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "OjwJ4bTeWXD8", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Compute train and warmup steps from batch size\n", + "# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)\n", + "BATCH_SIZE = 32\n", + "LEARNING_RATE = 2e-5\n", + "NUM_TRAIN_EPOCHS = 3.0\n", + "# Warmup is a period of time where hte learning rate \n", + "# is small and gradually increases--usually helps training.\n", + "WARMUP_PROPORTION = 0.1\n", + "# Model configs\n", + "SAVE_CHECKPOINTS_STEPS = 500\n", + "SAVE_SUMMARY_STEPS = 100" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "emHf9GhfWBZ_", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Compute # train and warmup steps from batch size\n", + "num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)\n", + "num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "oEJldMr3WYZa", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Specify outpit directory and number of checkpoint steps to save\n", + "run_config = tf.estimator.RunConfig(\n", + " model_dir=OUTPUT_DIR,\n", + " save_summary_steps=SAVE_SUMMARY_STEPS,\n", + " save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "q_WebpS1X97v", + "colab_type": "code", + "colab": {} + }, + "source": [ + "model_fn = model_fn_builder(\n", + " num_labels=len(label_list),\n", + " learning_rate=LEARNING_RATE,\n", + " num_train_steps=num_train_steps,\n", + " num_warmup_steps=num_warmup_steps)\n", + "\n", + "estimator = tf.estimator.Estimator(\n", + " model_fn=model_fn,\n", + " config=run_config,\n", + " params={\"batch_size\": BATCH_SIZE})\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NOO3RfG1DYLo", + "colab_type": "text" + }, + "source": [ + "Next we create an input builder function that takes our training feature set (`train_features`) and produces a generator. This is a pretty standard design pattern for working with TensorFlow [Estimators](https://www.tensorflow.org/guide/estimators)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1Pv2bAlOX_-K", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Create an input function for training. drop_remainder = True for using TPUs.\n", + "train_input_fn = bert.run_classifier.input_fn_builder(\n", + " features=train_features,\n", + " seq_length=MAX_SEQ_LENGTH,\n", + " is_training=True,\n", + " drop_remainder=False)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t6Nukby2EB6-", + "colab_type": "text" + }, + "source": [ + "Now we train our model! For me, using a Colab notebook running on Google's GPUs, training time is typically 8-14 minutes." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nucD4gluYJmK", + "colab_type": "code", + "colab": {} + }, + "source": [ + "print(f'Beginning Training!')\n", + "current_time = datetime.now()\n", + "estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)\n", + "print(\"Training took time \", datetime.now() - current_time)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CmbLTVniARy3", + "colab_type": "text" + }, + "source": [ + "Now let's use our test data to see how well our model did:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JIhejfpyJ8Bx", + "colab_type": "code", + "colab": {} + }, + "source": [ + "test_input_fn = run_classifier.input_fn_builder(\n", + " features=test_features,\n", + " seq_length=MAX_SEQ_LENGTH,\n", + " is_training=False,\n", + " drop_remainder=False)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "PPVEXhNjYXC-", + "colab_type": "code", + "colab": {} + }, + "source": [ + "estimator.evaluate(input_fn=test_input_fn, steps=None)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueKsULteiz1B", + "colab_type": "text" + }, + "source": [ + "Now let's write code to make predictions on new sentences:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OsrbTD2EJTVl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def getPrediction(in_sentences):\n", + " labels = [\"Negative\", \"Positive\"]\n", + " input_examples = [run_classifier.InputExample(guid=\"\", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, \"\" is just a dummy label\n", + " input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)\n", + " predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)\n", + " predictions = estimator.predict(predict_input_fn)\n", + " return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-thbodgih_VJ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "pred_sentences = [\n", + " \"That movie was absolutely awful\",\n", + " \"The acting was a bit lacking\",\n", + " \"The film was creative and surprising\",\n", + " \"Absolutely fantastic!\"\n", + "]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "QrZmvZySKQTm", + "colab_type": "code", + "colab": {} + }, + "source": [ + "predictions = getPrediction(pred_sentences)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MXkRiEBUqN3n", + "colab_type": "text" + }, + "source": [ + "Voila! We have a sentiment classifier!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ERkTE8-7oQLZ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "predictions" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P3Tg7c47vfLE", + "colab_type": "text" + }, + "source": [ + "# Export the model\n", + "\n", + "We are now ready to export the model. The following code defines the serving input function and exports the model to `OUTPUT_DIR`." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NfXsdV4qtlpW", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def serving_input_fn():\n", + " reciever_tensors = {\n", + " \"input_ids\": tf.placeholder(dtype=tf.int32,\n", + " shape=[1, MAX_SEQ_LENGTH])\n", + " }\n", + " features = {\n", + " \"input_ids\": reciever_tensors['input_ids'],\n", + " \"input_mask\": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int32),\n", + " \"segment_ids\": tf.zeros(dtype=tf.int32, shape=[1, MAX_SEQ_LENGTH]),\n", + " \"label_ids\": tf.placeholder(tf.int32, [None], name='label_ids')\n", + " }\n", + " return tf.estimator.export.ServingInputReceiver(features, reciever_tensors)\n", + " \n", + "estimator._export_to_tpu = False\n", + "estimator.export_saved_model(OUTPUT_DIR+\"/export\", serving_input_fn)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tIFTmUbcwI0w", + "colab_type": "text" + }, + "source": [ + "# Upload the model to AWS\n", + "\n", + "Cortex loads models from AWS, so we need to upload the exported model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gByRzrnR_OBX", + "colab_type": "text" + }, + "source": [ + "Set these variables to configure your AWS credentials and model upload path:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1bdCOb3z0_Gh", + "colab_type": "code", + "cellView": "form", + "colab": {} + }, + "source": [ + "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", + "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", + "S3_UPLOAD_PATH = \"s3://my-bucket/sentiment-analyzer/bert\" #@param {type:\"string\"}\n", + "\n", + "import sys\n", + "import re\n", + "\n", + "if AWS_ACCESS_KEY_ID == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", + "\n", + "elif AWS_SECRET_ACCESS_KEY == \"\":\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", + "\n", + "else:\n", + " try:\n", + " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", + " except:\n", + " print(\"\\033[91m{}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WLT09hZr_bhm", + "colab_type": "text" + }, + "source": [ + "Upload to S3:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jCN3BINl2sKN", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import boto3\n", + "\n", + "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", + "\n", + "for dirpath, _, filenames in os.walk(OUTPUT_DIR+\"/export\"):\n", + " for filename in filenames:\n", + " filepath = os.path.join(dirpath, filename)\n", + " filekey = os.path.join(key, filepath[len(OUTPUT_DIR+\"/export/\"):])\n", + " print(\"Uploading s3://{}/{} ...\".format(bucket, filekey), end = '')\n", + " s3.upload_file(filepath, bucket, filekey)\n", + " print(\" ✓\")\n", + "\n", + "print(\"\\nUploaded model export directory to \" + S3_UPLOAD_PATH)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7XPKSHzf_d7M", + "colab_type": "text" + }, + "source": [ + "\n", + "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/sentiment-analyzer) for how to deploy the model as an API." + ] + } + ] +} diff --git a/test/tensorflow/sentiment-analyzer/cortex.yaml b/test/tensorflow/sentiment-analyzer/cortex.yaml new file mode 100644 index 0000000000..3e6447053e --- /dev/null +++ b/test/tensorflow/sentiment-analyzer/cortex.yaml @@ -0,0 +1,13 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: sentiment-analyzer + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/sentiment-analyzer/bert/ + monitoring: + model_type: classification + compute: + cpu: 1 + gpu: 1 diff --git a/test/tensorflow/sentiment-analyzer/predictor.py b/test/tensorflow/sentiment-analyzer/predictor.py new file mode 100644 index 0000000000..901f2bf349 --- /dev/null +++ b/test/tensorflow/sentiment-analyzer/predictor.py @@ -0,0 +1,29 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import tensorflow as tf +import tensorflow_hub as hub +from bert import tokenization, run_classifier + +labels = ["negative", "positive"] + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + with tf.Graph().as_default(): + bert_module = hub.Module("https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1") + info = bert_module(signature="tokenization_info", as_dict=True) + with tf.Session() as sess: + vocab_file, do_lower_case = sess.run([info["vocab_file"], info["do_lower_case"]]) + self._tokenizer = tokenization.FullTokenizer( + vocab_file=vocab_file, do_lower_case=do_lower_case + ) + self.client = tensorflow_client + + def predict(self, payload): + input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0) + input_feature = run_classifier.convert_single_example( + 0, input_example, [0, 1], 128, self._tokenizer + ) + model_input = {"input_ids": [input_feature.input_ids]} + prediction = self.client.predict(model_input) + return labels[prediction["labels"][0]] diff --git a/test/tensorflow/sentiment-analyzer/requirements.txt b/test/tensorflow/sentiment-analyzer/requirements.txt new file mode 100644 index 0000000000..273614922e --- /dev/null +++ b/test/tensorflow/sentiment-analyzer/requirements.txt @@ -0,0 +1,5 @@ +bert-tensorflow==1.0.1 +tensorflow-hub==0.7.0 +tensorflow==1.15.* +tensorflow-serving-api==1.15.* +numpy==1.16.* diff --git a/test/tensorflow/sentiment-analyzer/sample.json b/test/tensorflow/sentiment-analyzer/sample.json new file mode 100644 index 0000000000..c433e33216 --- /dev/null +++ b/test/tensorflow/sentiment-analyzer/sample.json @@ -0,0 +1,3 @@ +{ + "review": "the movie was amazing!" +} diff --git a/test/tensorflow/text-generator/README.md b/test/tensorflow/text-generator/README.md new file mode 100644 index 0000000000..41a04891b3 --- /dev/null +++ b/test/tensorflow/text-generator/README.md @@ -0,0 +1,3 @@ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +Please refer to the [tutorial](https://docs.cortex.dev/text-generator) to see how to deploy an example with Cortex. diff --git a/test/tensorflow/text-generator/cortex.yaml b/test/tensorflow/text-generator/cortex.yaml new file mode 100644 index 0000000000..d0e54b527d --- /dev/null +++ b/test/tensorflow/text-generator/cortex.yaml @@ -0,0 +1,11 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: text-generator + kind: RealtimeAPI + predictor: + type: tensorflow + path: predictor.py + model_path: s3://cortex-examples/tensorflow/text-generator/gpt-2/124M/ + compute: + cpu: 1 + gpu: 1 diff --git a/test/tensorflow/text-generator/encoder.py b/test/tensorflow/text-generator/encoder.py new file mode 100644 index 0000000000..2f73dd509b --- /dev/null +++ b/test/tensorflow/text-generator/encoder.py @@ -0,0 +1,118 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +# This file includes code which was modified from https://github.com/openai/gpt-2 + +import json +import regex +from functools import lru_cache + + +@lru_cache() +def bytes_to_unicode(): + bs = ( + list(range(ord("!"), ord("~") + 1)) + + list(range(ord("¡"), ord("¬") + 1)) + + list(range(ord("®"), ord("ÿ") + 1)) + ) + cs = bs[:] + n = 0 + for b in range(2 ** 8): + if b not in bs: + bs.append(b) + cs.append(2 ** 8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +class Encoder: + def __init__(self, encoder, bpe_merges, errors="replace"): + self.encoder = encoder + self.decoder = {v: k for k, v in self.encoder.items()} + self.errors = errors + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges)))) + self.cache = {} + self.pat = regex.compile( + r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" + ) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token) + pairs = get_pairs(word) + + if not pairs: + return token + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf"))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = " ".join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + for token in regex.findall(self.pat, text): + token = "".join(self.byte_encoder[b] for b in token.encode("utf-8")) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ")) + return bpe_tokens + + def decode(self, tokens): + text = "".join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors) + return text + + +def get_encoder(s3_client): + encoder = json.load( + s3_client.get_object( + Bucket="cortex-examples", Key="tensorflow/text-generator/gpt-2/encoder.json" + )["Body"] + ) + bpe_data = ( + s3_client.get_object( + Bucket="cortex-examples", Key="tensorflow/text-generator/gpt-2/vocab.bpe" + )["Body"] + .read() + .decode("utf-8") + ) + bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split("\n")[1:-1]] + return Encoder(encoder=encoder, bpe_merges=bpe_merges) diff --git a/test/tensorflow/text-generator/gpt-2.ipynb b/test/tensorflow/text-generator/gpt-2.ipynb new file mode 100644 index 0000000000..1597816fcd --- /dev/null +++ b/test/tensorflow/text-generator/gpt-2.ipynb @@ -0,0 +1,383 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "gpt-2.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "kc5cIgeEmv8o", + "colab_type": "text" + }, + "source": [ + "# Exporting GPT-2\n", + "\n", + "_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_\n", + "\n", + "In this notebook, we'll show how to export [OpenAI's GPT-2 text generation model](https://github.com/openai/gpt-2) for serving." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RAWs29lAktOK", + "colab_type": "text" + }, + "source": [ + "First, we'll download the GPT-2 code repository:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gHs3aaFaLUXq", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!git clone --no-checkout https://github.com/openai/gpt-2.git\n", + "!cd gpt-2 && git reset --hard ac5d52295f8a1c3856ea24fb239087cc1a3d1131" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A4al4P14nmni", + "colab_type": "text" + }, + "source": [ + "Next we'll specify the model size (choose one of 124M, 355M, or 774M):" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "3Y4bt6hkfuxY", + "colab_type": "code", + "colab": {}, + "cellView": "form" + }, + "source": [ + "import sys\n", + "\n", + "MODEL_SIZE = \"124M\" #@param {type:\"string\"}\n", + "\n", + "if MODEL_SIZE not in {\"124M\", \"355M\", \"774M\"}:\n", + " print(\"\\033[91m{}\\033[00m\".format('ERROR: MODEL_SIZE must be \"124M\", \"355M\", or \"774M\"'), file=sys.stderr)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C6xRx0Monh_j", + "colab_type": "text" + }, + "source": [ + "We can use `download_model.py` to download the model:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Kb50Z6NjbJBN", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!python3 ./gpt-2/download_model.py $MODEL_SIZE" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zz2ioOcpoPjV", + "colab_type": "text" + }, + "source": [ + "Next, we'll install the required packages:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Vk4Q2RR-UZQm", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!pip install tensorflow==1.14.* numpy==1.* boto3==1.*" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "KkVf5FmuUMrl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import sys\n", + "import os\n", + "import time\n", + "import json\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Ay7qiQFoWRn", + "colab_type": "text" + }, + "source": [ + "Now we can export the model for serving:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GdnYXr1IKaF0", + "colab_type": "code", + "colab": {} + }, + "source": [ + "sys.path.append(os.path.join(os.getcwd(), 'gpt-2/src'))\n", + "import model, sample\n", + "\n", + "def export_for_serving(\n", + " model_name='124M',\n", + " seed=None,\n", + " batch_size=1,\n", + " length=None,\n", + " temperature=1,\n", + " top_k=0,\n", + " models_dir='models'\n", + "):\n", + " \"\"\"\n", + " Export the model for TF Serving\n", + " :model_name=124M : String, which model to use\n", + " :seed=None : Integer seed for random number generators, fix seed to reproduce\n", + " results\n", + " :length=None : Number of tokens in generated text, if None (default), is\n", + " determined by model hyperparameters\n", + " :temperature=1 : Float value controlling randomness in boltzmann\n", + " distribution. Lower temperature results in less random completions. As the\n", + " temperature approaches zero, the model will become deterministic and\n", + " repetitive. Higher temperature results in more random completions.\n", + " :top_k=0 : Integer value controlling diversity. 1 means only 1 word is\n", + " considered for each step (token), resulting in deterministic completions,\n", + " while 40 means 40 words are considered at each step. 0 (default) is a\n", + " special setting meaning no restrictions. 40 generally is a good value.\n", + " :models_dir : path to parent folder containing model subfolders\n", + " (i.e. contains the folder)\n", + " \"\"\"\n", + " models_dir = os.path.expanduser(os.path.expandvars(models_dir))\n", + "\n", + " hparams = model.default_hparams()\n", + " with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:\n", + " hparams.override_from_dict(json.load(f))\n", + "\n", + " if length is None:\n", + " length = hparams.n_ctx\n", + " elif length > hparams.n_ctx:\n", + " raise ValueError(\"Can't get samples longer than window size: %s\" % hparams.n_ctx)\n", + "\n", + " with tf.Session(graph=tf.Graph()) as sess:\n", + " context = tf.placeholder(tf.int32, [batch_size, None])\n", + " np.random.seed(seed)\n", + " tf.set_random_seed(seed)\n", + "\n", + " output = sample.sample_sequence(\n", + " hparams=hparams, length=length,\n", + " context=context,\n", + " batch_size=batch_size,\n", + " temperature=temperature, top_k=top_k\n", + " )\n", + "\n", + " saver = tf.train.Saver()\n", + " ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))\n", + " saver.restore(sess, ckpt)\n", + "\n", + " export_dir=os.path.join(models_dir, model_name, \"export\", str(time.time()).split('.')[0])\n", + " if not os.path.isdir(export_dir):\n", + " os.makedirs(export_dir)\n", + "\n", + " builder = tf.saved_model.builder.SavedModelBuilder(export_dir)\n", + " signature = predict_signature_def(inputs={'context': context},\n", + " outputs={'sample': output})\n", + "\n", + " builder.add_meta_graph_and_variables(sess,\n", + " [tf.saved_model.SERVING],\n", + " signature_def_map={\"predict\": signature},\n", + " strip_default_attrs=True)\n", + " builder.save()\n", + "\n", + "\n", + "export_for_serving(top_k=40, length=256, model_name=MODEL_SIZE)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hGfSohMrowmg", + "colab_type": "text" + }, + "source": [ + "## Upload the model to AWS\n", + "\n", + "Cortex loads models from AWS, so we need to upload the exported model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BfB5QZ82ozj9", + "colab_type": "text" + }, + "source": [ + "Set these variables to configure your AWS credentials and model upload path:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "B2RNuNk7o1c5", + "colab_type": "code", + "colab": {}, + "cellView": "form" + }, + "source": [ + "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", + "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", + "S3_UPLOAD_PATH = \"s3://my-bucket/text-generator/gpt-2\" #@param {type:\"string\"}\n", + "\n", + "import sys\n", + "import re\n", + "\n", + "if AWS_ACCESS_KEY_ID == \"\":\n", + " print(\"\\033[91m {}\\033[00m\".format(\"ERROR: Please set AWS_ACCESS_KEY_ID\"), file=sys.stderr)\n", + "\n", + "elif AWS_SECRET_ACCESS_KEY == \"\":\n", + " print(\"\\033[91m {}\\033[00m\".format(\"ERROR: Please set AWS_SECRET_ACCESS_KEY\"), file=sys.stderr)\n", + "\n", + "else:\n", + " try:\n", + " bucket, key = re.match(\"s3://(.+?)/(.+)\", S3_UPLOAD_PATH).groups()\n", + " except:\n", + " print(\"\\033[91m {}\\033[00m\".format(\"ERROR: Invalid s3 path (should be of the form s3://my-bucket/path/to/file)\"), file=sys.stderr)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ics0omsrpS8V", + "colab_type": "text" + }, + "source": [ + "Upload the model to S3:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BnKncToppUhN", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import boto3\n", + "\n", + "s3 = boto3.client(\"s3\", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)\n", + "\n", + "for dirpath, _, filenames in os.walk(\"models/{}/export\".format(MODEL_SIZE)):\n", + " for filename in filenames:\n", + " filepath = os.path.join(dirpath, filename)\n", + " filekey = os.path.join(key, MODEL_SIZE, filepath[len(\"models/{}/export/\".format(MODEL_SIZE)):])\n", + " print(\"Uploading s3://{}/{} ...\".format(bucket, filekey), end = '')\n", + " s3.upload_file(filepath, bucket, filekey)\n", + " print(\" ✓\")\n", + "\n", + "print(\"\\nUploaded model export directory to {}/{}\".format(S3_UPLOAD_PATH, MODEL_SIZE))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IIMVPhe2qkU4", + "colab_type": "text" + }, + "source": [ + "\n", + "We also need to upload `vocab.bpe` and `encoder.json`, so that the [encoder](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/text-generator/encoder.py) in the [Predictor](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/text-generator/predictor.py) can encode the input text before making a request to the model." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YdN8MtZxsO9V", + "colab_type": "code", + "colab": {} + }, + "source": [ + "print(\"Uploading s3://{}/{}/vocab.bpe ...\".format(bucket, key), end = '')\n", + "s3.upload_file(os.path.join(\"models\", MODEL_SIZE, \"vocab.bpe\"), bucket, os.path.join(key, \"vocab.bpe\"))\n", + "print(\" ✓\")\n", + "\n", + "print(\"Uploading s3://{}/{}/encoder.json ...\".format(bucket, key), end = '')\n", + "s3.upload_file(os.path.join(\"models\", MODEL_SIZE, \"encoder.json\"), bucket, os.path.join(key, \"encoder.json\"))\n", + "print(\" ✓\")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MsoxwahIpnTO", + "colab_type": "text" + }, + "source": [ + "\n", + "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/tensorflow/text-generator) for how to deploy the model as an API." + ] + } + ] +} diff --git a/test/tensorflow/text-generator/predictor.py b/test/tensorflow/text-generator/predictor.py new file mode 100644 index 0000000000..3cbc45e1d7 --- /dev/null +++ b/test/tensorflow/text-generator/predictor.py @@ -0,0 +1,24 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import os +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +from encoder import get_encoder + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + self.encoder = get_encoder(s3) + + def predict(self, payload): + model_input = {"context": [self.encoder.encode(payload["text"])]} + prediction = self.client.predict(model_input) + return self.encoder.decode(prediction["sample"]) diff --git a/test/tensorflow/text-generator/requirements.txt b/test/tensorflow/text-generator/requirements.txt new file mode 100644 index 0000000000..f064e1eb7e --- /dev/null +++ b/test/tensorflow/text-generator/requirements.txt @@ -0,0 +1,2 @@ +requests +regex diff --git a/test/tensorflow/text-generator/sample.json b/test/tensorflow/text-generator/sample.json new file mode 100644 index 0000000000..dfd2a2f433 --- /dev/null +++ b/test/tensorflow/text-generator/sample.json @@ -0,0 +1,3 @@ +{ + "text": "machine learning is" +} diff --git a/test/traffic-splitter/README.md b/test/traffic-splitter/README.md new file mode 100644 index 0000000000..d68d763dd0 --- /dev/null +++ b/test/traffic-splitter/README.md @@ -0,0 +1,111 @@ +# Splitting traffic between APIs + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +This example shows how to split traffic between 2 different iris-classifiers deployed as Realtime APIs. + +To deploy this example: + +1. Determine your CLI Version `cortex version` +1. Clone the repo and switch to the current version by replacing `` with your CLI version: `git clone -b v https://github.com/cortexlabs/cortex` (e.g. if the output of `cortex version` is 0.18.1, the clone command would be `git clone -b v0.18.1 https://github.com/cortexlabs/cortex`) +1. Navigate to this example directory + +## `cortex deploy` + +```bash +$ cortex deploy --env aws + +creating iris-classifier-onnx (RealtimeAPI) +creating iris-classifier-tf (RealtimeAPI) +created iris-classifier (TrafficSplitter) +``` + +## `cortex get` + +```bash +$ cortex get + +env realtime api status up-to-date requested last update avg request 2XX +aws iris-classifier-onnx updating 0 1 27s - - +aws iris-classifier-tf updating 0 1 27s - - + +env traffic splitter apis last update +aws iris-classifier iris-classifier-onnx:30 iris-classifier-tf:70 27s +``` + +## `cortex get iris-classifier` + +```bash +$ cortex get iris-classifier --env aws + +apis weights status requested last update avg request 2XX 5XX +iris-classifier-onnx 30 live 1 1m - - - +iris-classifier-tf 70 live 1 1m - - - + +last updated: 1m +endpoint: https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier +example curl: curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +... +``` + +## Make multiple requests + +```bash +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +setosa +``` + +## `cortex get iris-classifier` + +Notice the requests being routed to the different Realtime APIs based on their weights (the output below may not match yours): + +```bash +$ cortex get iris-classifier --env aws + +using aws environment + + +apis weights status requested last update avg request 2XX 5XX +iris-classifier-onnx 30 live 1 4m 6.00791 ms 1 - +iris-classifier-tf 70 live 1 4m 5.81867 ms 5 - + +last updated: 4m +endpoint: https://comtf6hs64.execute-api.us-west-2.amazonaws.com/iris-classifier +example curl: curl https://comtf6hs64.execute-api.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json +... +``` + +## Cleanup + +Use `cortex delete ` to delete the Traffic Splitter and the two Realtime APIs (note that the Traffic Splitter and each Realtime API must be deleted by separate `cortex delete` commands): + +```bash +$ cortex delete iris-classifier --env aws + +deleting iris-classifier + +$ cortex delete iris-classifier-onnx --env aws + +deleting iris-classifier-onnx + +$ cortex delete iris-classifier-tf --env aws + +deleting iris-classifier-tf +``` + +Running `cortex delete ` will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster installation. It will not spin down your cluster. diff --git a/test/traffic-splitter/cortex.yaml b/test/traffic-splitter/cortex.yaml new file mode 100644 index 0000000000..16702378cd --- /dev/null +++ b/test/traffic-splitter/cortex.yaml @@ -0,0 +1,28 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +- name: iris-classifier-pytorch + kind: RealtimeAPI + predictor: + type: python + path: pytorch_predictor.py + config: + model: s3://cortex-examples/pytorch/iris-classifier/weights.pth + monitoring: + model_type: classification + +- name: iris-classifier-onnx + kind: RealtimeAPI + predictor: + type: onnx + path: onnx_predictor.py + model_path: s3://cortex-examples/onnx/iris-classifier/ + monitoring: + model_type: classification + +- name: iris-classifier + kind: TrafficSplitter + apis: + - name: iris-classifier-onnx + weight: 30 + - name: iris-classifier-pytorch + weight: 70 diff --git a/test/traffic-splitter/model.py b/test/traffic-splitter/model.py new file mode 100644 index 0000000000..fe29ff7b6d --- /dev/null +++ b/test/traffic-splitter/model.py @@ -0,0 +1,59 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score + + +class IrisNet(nn.Module): + def __init__(self): + super(IrisNet, self).__init__() + self.fc1 = nn.Linear(4, 100) + self.fc2 = nn.Linear(100, 100) + self.fc3 = nn.Linear(100, 3) + self.softmax = nn.Softmax(dim=1) + + def forward(self, X): + X = F.relu(self.fc1(X)) + X = self.fc2(X) + X = self.fc3(X) + X = self.softmax(X) + return X + + +if __name__ == "__main__": + iris = load_iris() + X, y = iris.data, iris.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) + + train_X = Variable(torch.Tensor(X_train).float()) + test_X = Variable(torch.Tensor(X_test).float()) + train_y = Variable(torch.Tensor(y_train).long()) + test_y = Variable(torch.Tensor(y_test).long()) + + model = IrisNet() + + criterion = nn.CrossEntropyLoss() + + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + + for epoch in range(1000): + optimizer.zero_grad() + out = model(train_X) + loss = criterion(out, train_y) + loss.backward() + optimizer.step() + + if epoch % 100 == 0: + print("number of epoch {} loss {}".format(epoch, loss)) + + predict_out = model(test_X) + _, predict_y = torch.max(predict_out, 1) + + print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data))) + + torch.save(model.state_dict(), "weights.pth") diff --git a/test/traffic-splitter/onnx_predictor.py b/test/traffic-splitter/onnx_predictor.py new file mode 100644 index 0000000000..b135129e14 --- /dev/null +++ b/test/traffic-splitter/onnx_predictor.py @@ -0,0 +1,20 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +labels = ["setosa", "versicolor", "virginica"] + + +class ONNXPredictor: + def __init__(self, onnx_client, config): + self.client = onnx_client + + def predict(self, payload): + model_input = [ + payload["sepal_length"], + payload["sepal_width"], + payload["petal_length"], + payload["petal_width"], + ] + + prediction = self.client.predict(model_input) + predicted_class_id = prediction[0][0] + return labels[predicted_class_id] diff --git a/test/traffic-splitter/pytorch_predictor.py b/test/traffic-splitter/pytorch_predictor.py new file mode 100644 index 0000000000..71994bb9ae --- /dev/null +++ b/test/traffic-splitter/pytorch_predictor.py @@ -0,0 +1,50 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import re +import torch +import os +import boto3 +from botocore import UNSIGNED +from botocore.client import Config +from model import IrisNet + +labels = ["setosa", "versicolor", "virginica"] + + +class PythonPredictor: + def __init__(self, config): + # download the model + bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() + + if os.environ.get("AWS_ACCESS_KEY_ID"): + s3 = boto3.client("s3") # client will use your credentials if available + else: + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) # anonymous client + + s3.download_file(bucket, key, "/tmp/model.pth") + + # initialize the model + model = IrisNet() + model.load_state_dict(torch.load("/tmp/model.pth")) + model.eval() + + self.model = model + + def predict(self, payload): + # Convert the request to a tensor and pass it into the model + input_tensor = torch.FloatTensor( + [ + [ + payload["sepal_length"], + payload["sepal_width"], + payload["petal_length"], + payload["petal_width"], + ] + ] + ) + + # Run the prediction + output = self.model(input_tensor) + + # Translate the model output to the corresponding label string + return labels[torch.argmax(output[0])] diff --git a/test/traffic-splitter/sample.json b/test/traffic-splitter/sample.json new file mode 100644 index 0000000000..e17bbb2896 --- /dev/null +++ b/test/traffic-splitter/sample.json @@ -0,0 +1,6 @@ +{ + "sepal_length": 5.2, + "sepal_width": 3.6, + "petal_length": 1.4, + "petal_width": 0.3 +} diff --git a/test/utils/README.md b/test/utils/README.md new file mode 100644 index 0000000000..61202eb0c0 --- /dev/null +++ b/test/utils/README.md @@ -0,0 +1,36 @@ +## Throughput tester + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +[throughput_test.py](throughput_test.py) is a Python CLI that can be used to test the throughput of your deployed API. The throughput will vary depending on your API's configuration (specified in your `cortex.yaml` file), your local machine's resources (mostly CPU, since it has to spawn many concurrent requests), and the internet connection on your local machine. + +```bash +Usage: throughput_test.py [OPTIONS] ENDPOINT PAYLOAD + + Program for testing the throughput of Cortex-deployed APIs. + +Options: + -w, --processes INTEGER Number of processes for prediction requests. [default: 1] + -t, --threads INTEGER Number of threads per process for prediction requests. [default: 1] + -s, --samples INTEGER Number of samples to run per thread. [default: 10] + -i, --time-based FLOAT How long the thread making predictions will run for in seconds. + If set, -s option will be ignored. + --help Show this message and exit. +``` + +`ENDPOINT` is the API's endpoint, which you can get by running `cortex get `. This argument can also be exported as an environment variable instead of being passed to the CLI. + +`PAYLOAD` can either be a local file or an URL resource that points to a file. The allowed extension types for the file are `json` and `jpg`. This argument can also be exported as an environment variable instead of being passed to the CLI. + +* `json` files are generally `sample.json`s as they are found in most Cortex examples. Each of these is attached to the request as payload. The content type of the request is `"application/json"`. +* `jpg` images are read as numpy arrays and then are converted to a bytes object using `cv2.imencode` function. The content type of the request is `"application/octet-stream"`. + +The same payload `PAYLOAD` is attached to all requests the script makes. + +### Dependencies + +The [throughput_test.py](throughput_test.py) CLI has been tested with Python 3.6.9. To install the CLI's dependencies, run the following: + +```bash +pip install requests click opencv-contrib-python numpy validator-collection imageio +``` diff --git a/test/utils/throughput_test.py b/test/utils/throughput_test.py new file mode 100644 index 0000000000..c157cf0b29 --- /dev/null +++ b/test/utils/throughput_test.py @@ -0,0 +1,179 @@ +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub) + +import os +import sys +import click +import concurrent.futures +import requests +import imageio +import json +import time +import itertools +import cv2 +import numpy as np + +from validator_collection import checkers + + +@click.command(help="Program for testing the throughput of Cortex-deployed APIs.") +@click.argument("endpoint", type=str, envvar="ENDPOINT") +@click.argument("payload", type=str, envvar="PAYLOAD") +@click.option( + "--processes", + "-p", + type=int, + default=1, + show_default=True, + help="Number of processes for prediction requests.", +) +@click.option( + "--threads", + "-t", + type=int, + default=1, + show_default=True, + help="Number of threads per process for prediction requests.", +) +@click.option( + "--samples", + "-s", + type=int, + default=10, + show_default=True, + help="Number of samples to run per thread.", +) +@click.option( + "--time-based", + "-i", + type=float, + default=0.0, + help="How long the thread making predictions will run for in seconds. If set, -s option will be ignored.", +) +def main(payload, endpoint, processes, threads, samples, time_based): + file_type = None + if checkers.is_url(payload): + if payload.lower().endswith(".json"): + file_type = "json" + payload_data = requests.get(payload).json() + elif payload.lower().endswith(".jpg"): + file_type = "jpg" + payload_data = imageio.imread(payload) + elif checkers.is_file(payload): + if payload.lower().endswith(".json"): + file_type = "json" + with open(payload, "r") as f: + payload_data = json.load(f) + elif payload.lower().endswith(".jpg"): + file_type = "jpg" + payload_data = cv2.imread(payload, cv2.IMREAD_COLOR) + else: + print(f"'{payload}' isn't an URL resource, nor is it a local file") + sys.exit(1) + + if file_type is None: + print(f"'{payload}' doesn't point to a jpg image or to a json file") + sys.exit(1) + if file_type == "jpg": + data = image_to_jpeg_bytes(payload_data) + if file_type == "json": + data = json.dumps(payload_data) + + print("Starting the inference throughput test...") + results = [] + start = time.time() + with concurrent.futures.ProcessPoolExecutor(max_workers=processes) as executor: + results = executor_submitter( + executor, processes, process_worker, threads, data, endpoint, samples, time_based + ) + end = time.time() + elapsed = end - start + + total_requests = sum(results) + + print(f"A total of {total_requests} requests have been served in {elapsed} seconds") + print(f"Avg number of inferences/sec is {total_requests / elapsed}") + print(f"Avg time spent on an inference is {elapsed / total_requests} seconds") + + +def process_worker(threads, data, endpoint, samples, time_based): + results = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: + results = executor_submitter(executor, threads, task, data, endpoint, samples, time_based) + + return results + + +def executor_submitter(executor, workers, *args, **kwargs): + futures = [] + for worker in range(workers): + future = executor.submit(*args, **kwargs) + futures.append(future) + + results = [future.result() for future in futures] + results = list(itertools.chain.from_iterable(results)) + + return results + + +def task(data, endpoint, samples, time_based): + timeout = 60 + + if isinstance(data, str): + headers = {"content-type": "application/json"} + elif isinstance(data, bytes): + headers = {"content-type": "application/octet-stream"} + else: + return + + if time_based == 0.0: + for i in range(samples): + try: + resp = requests.post( + endpoint, + data=data, + headers=headers, + timeout=timeout, + ) + except Exception as e: + print(e) + break + time.sleep(0.1) + return [samples] + else: + start = time.time() + counter = 0 + while start + time_based >= time.time(): + try: + resp = requests.post( + endpoint, + data=data, + headers=headers, + timeout=timeout, + ) + except Exception as e: + print(e) + break + time.sleep(0.1) + counter += 1 + return [counter] + + +def image_to_jpeg_nparray(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): + """ + Convert numpy image to jpeg numpy vector. + """ + is_success, im_buf_arr = cv2.imencode(".jpg", image, quality) + return im_buf_arr + + +def image_to_jpeg_bytes(image, quality=[int(cv2.IMWRITE_JPEG_QUALITY), 95]): + """ + Convert numpy image to bytes-encoded jpeg image. + """ + buf = image_to_jpeg_nparray(image, quality) + byte_im = buf.tobytes() + return byte_im + + +if __name__ == "__main__": + main() From f28337205af146c198ae7c71c998e28758abaaa9 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Mon, 7 Dec 2020 16:27:39 -0800 Subject: [PATCH 12/36] Update tutorials --- docs/deployments/telemetry.md | 2 +- docs/tutorials/batch.md | 4 +- docs/tutorials/multi-model.md | 77 ------------------- docs/tutorials/project.md | 117 +++++++++++++++++++++++++++++ docs/tutorials/realtime.md | 12 +-- docs/tutorials/traffic-splitter.md | 2 - 6 files changed, 122 insertions(+), 92 deletions(-) delete mode 100644 docs/tutorials/multi-model.md create mode 100644 docs/tutorials/project.md diff --git a/docs/deployments/telemetry.md b/docs/deployments/telemetry.md index e7e767c79c..0c9c3f4821 100644 --- a/docs/deployments/telemetry.md +++ b/docs/deployments/telemetry.md @@ -6,7 +6,7 @@ By default, Cortex sends anonymous usage data to Cortex Labs. ## What data is collected? -If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex deploy`, Cortex Labs will receive an event of the structure `{id: 1234, command: "deploy"}`. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster. +If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex get`, Cortex Labs will receive an event of the structure `{id: 1234, command: "get"}`. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster. ## How do I opt out? diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 6a188d487c..7d26f97755 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -1,8 +1,6 @@ # Deploy a batch API -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -**Note: Batch APIs are only supported on a Cortex cluster (in AWS).** +**Note: at this time, batch APIs are only supported on AWS.** ## Install cortex diff --git a/docs/tutorials/multi-model.md b/docs/tutorials/multi-model.md deleted file mode 100644 index 1f1bee4a82..0000000000 --- a/docs/tutorials/multi-model.md +++ /dev/null @@ -1,77 +0,0 @@ -# Deploy a multi-model API - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - -## Install cortex - -```bash -$ pip install cortex -``` - -## Spin up a cluster on AWS (requires AWS credentials) - -```bash -$ cortex cluster up -``` - -## Define a multi-model API - -```python -# multi_model.py - -import cortex - -class PythonPredictor: - def __init__(self, config): - from transformers import pipeline - - self.analyzer = pipeline(task="sentiment-analysis", device=device) - self.summarizer = pipeline(task="summarization", device=device) - - def predict(self, query_params, payload): - model = query_params.get("model") - - if model == "sentiment": - return self.analyzer(payload["text"])[0] - elif model == "summarizer": - return self.summarizer(payload["text"])[0]["summary_text"] - -requirements = ["tensorflow", "transformers"] - -api_spec = {"name": "multi-model", "kind": "RealtimeAPI"} - -cx = cortex.client("aws") -cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) -``` - -## Deploy to AWS - -```bash -$ python multi_model.py -``` - -## Monitor - -```bash -$ cortex get multi-model --env aws --watch -``` - -## Stream logs - -```bash -$ cortex logs multi-model -``` - -## Make a request - -```bash -$ curl https:// \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "hello world"}' -``` - -## Delete the API - -```bash -$ cortex delete multi-model -``` diff --git a/docs/tutorials/project.md b/docs/tutorials/project.md new file mode 100644 index 0000000000..84bc55bec9 --- /dev/null +++ b/docs/tutorials/project.md @@ -0,0 +1,117 @@ +# Deploy a project + +## Install cortex + +```bash +$ pip install cortex +``` + +## Create a directory + +```bash +$ mkdir text-generator && cd text-generator + +$ touch predictor.py requirements.txt realtime.py +``` + +## Define a Predictor + +```python +# predictor.py + +class PythonPredictor: + def __init__(self, config): + from transformers import pipeline + + self.model = pipeline(task="text-generation") + + def predict(self, payload): + return self.model(payload["text"])[0] +``` + +## Specify Python dependencies + +```text +tensorflow +transformers +``` + +## Configure an API + +```python +# realtime.py + +import cortex + +api_spec = { + "name": "text-generator", + "kind": "RealtimeAPI", + "predictor": {"type": "python", "path": "predictor.py"}, +} + +cx = cortex.client("local") +cx.deploy(api_spec, project_dir=".") +``` + +## Test locally (requires Docker) + +```bash +$ python realtime.py +``` + +## Monitor + +```bash +$ cortex get text-generator --watch +``` + +## Make a request + +```bash +$ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' +``` + +## Stream logs + +```bash +$ cortex logs text-generator +``` + +## Spin up a cluster on AWS (requires AWS credentials) + +```bash +$ cortex cluster up +``` + +## Edit `realtime.py` + +```python +# cx = cortex.client("local") +cx = cortex.client("aws") +``` + +## Deploy to AWS + +```bash +$ python realtime.py +``` + +## Monitor + +```bash +$ cortex get text-generator --env aws --watch +``` + +## Make a request + +```bash +$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' +``` + +## Delete the APIs + +```bash +$ cortex delete --env local text-generator + +$ cortex delete --env aws text-generator +``` diff --git a/docs/tutorials/realtime.md b/docs/tutorials/realtime.md index 3d1d1f02fc..6871021b2e 100644 --- a/docs/tutorials/realtime.md +++ b/docs/tutorials/realtime.md @@ -1,7 +1,5 @@ # Deploy a realtime API -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - ## Install cortex ```bash @@ -47,9 +45,7 @@ $ cortex get text-generator --watch ## Make a request ```bash -$ curl http://localhost:8889 \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "hello world"}' +$ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' ``` ## Stream logs @@ -86,12 +82,10 @@ $ cortex get text-generator --env aws --watch ## Make a request ```bash -$ curl https:// \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "hello world"}' +$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' ``` -## Delete the API +## Delete the APIs ```bash $ cortex delete --env local text-generator diff --git a/docs/tutorials/traffic-splitter.md b/docs/tutorials/traffic-splitter.md index 5db0afda9a..fb8537b000 100644 --- a/docs/tutorials/traffic-splitter.md +++ b/docs/tutorials/traffic-splitter.md @@ -1,7 +1,5 @@ # Deploy a traffic splitter -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - ## Install cortex ```bash From 3c6e53cab4ab6ba17e7f697ef2477b00ef07294a Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Mon, 7 Dec 2020 19:01:43 -0800 Subject: [PATCH 13/36] Update docs --- .dockerignore | 1 + build/lint.sh | 1 + build/test-examples.sh | 2 +- docs/{deployments/gpus.md => aws/gpu.md} | 6 +- docs/{deployments => aws}/inferentia.md | 0 docs/deployments/batch-api.md | 36 ------------ docs/deployments/compute.md | 36 ------------ docs/deployments/realtime-api.md | 39 ------------ docs/summary.md | 21 +++---- docs/tutorials/multi-model.md | 75 ++++++++++++++++++++++++ docs/tutorials/traffic-splitter.md | 6 +- 11 files changed, 94 insertions(+), 129 deletions(-) rename docs/{deployments/gpus.md => aws/gpu.md} (91%) rename docs/{deployments => aws}/inferentia.md (100%) delete mode 100644 docs/deployments/batch-api.md delete mode 100644 docs/deployments/compute.md delete mode 100644 docs/deployments/realtime-api.md create mode 100644 docs/tutorials/multi-model.md diff --git a/.dockerignore b/.dockerignore index ee2e048e67..3d39c7390f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,6 +2,7 @@ /bin/ /dev/ /docs/ +/test/ **/.* **/*.md diff --git a/build/lint.sh b/build/lint.sh index 9243106188..841c25111a 100755 --- a/build/lint.sh +++ b/build/lint.sh @@ -72,6 +72,7 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "./vendor/*" \ ! -path "**/.vscode/*" \ ! -path "**/__pycache__/*" \ +! -path "./test/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.circleci/*" \ diff --git a/build/test-examples.sh b/build/test-examples.sh index a886a63587..3b334f4d00 100755 --- a/build/test-examples.sh +++ b/build/test-examples.sh @@ -19,7 +19,7 @@ set -eou pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)" CORTEX="$ROOT/bin/cortex" -for example in $ROOT/docs/tutorials/*/cortex.yaml; do +for example in $ROOT/test/*/cortex.yaml; do timer=1200 example_base_dir=$(dirname "${example}") retry="false" diff --git a/docs/deployments/gpus.md b/docs/aws/gpu.md similarity index 91% rename from docs/deployments/gpus.md rename to docs/aws/gpu.md index cc7af572b7..98d950cea3 100644 --- a/docs/deployments/gpus.md +++ b/docs/aws/gpu.md @@ -5,9 +5,9 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t To use GPUs: 1. Make sure your AWS account is subscribed to the [EKS-optimized AMI with GPU Support](https://aws.amazon.com/marketplace/pp/B07GRHFXGM). -2. You may need to [request a limit increase](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) for your desired instance type. -3. Set instance type to an AWS GPU instance (e.g. `g4dn.xlarge`) when installing Cortex. -4. Set the `gpu` field in the `compute` configuration for your API. One unit of GPU corresponds to one virtual GPU. Fractional requests are not allowed. +1. You may need to [request a limit increase](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) for your desired instance type. +1. Set instance type to an AWS GPU instance (e.g. `g4dn.xlarge`) when installing Cortex. +1. Set the `gpu` field in the `compute` configuration for your API. One unit of GPU corresponds to one virtual GPU. Fractional requests are not allowed. ## Tips diff --git a/docs/deployments/inferentia.md b/docs/aws/inferentia.md similarity index 100% rename from docs/deployments/inferentia.md rename to docs/aws/inferentia.md diff --git a/docs/deployments/batch-api.md b/docs/deployments/batch-api.md deleted file mode 100644 index 57f994d70c..0000000000 --- a/docs/deployments/batch-api.md +++ /dev/null @@ -1,36 +0,0 @@ -# Batch API Overview - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -You can deploy your model as a Batch API to create a web service that can receive job requests and orchestrate offline batch inference on large datasets across multiple workers. - -## When should I use a Batch API - -You may want to deploy your model as a Batch API if any of the following scenarios apply to your use case: - -* inference will run on a large dataset and can be distributed across multiple workers -* job progress and status needs to be monitored -* inference is a part of internal data pipelines that may be chained together -* a small number of requests are received, but each request takes minutes or hours to complete - -You may want to consider deploying your model as a [Realtime API](realtime-api.md) if these scenarios don't apply to you. - -A Batch API deployed in Cortex will create/support the following: - -* a REST web service to receive job requests, manage running jobs, and retrieve job statuses -* an autoscaling worker pool that can scale to 0 -* log aggregation and streaming -* `on_job_complete` hook to for aggregation or triggering webhooks - -## How does it work - -You specify the following: - -* a Cortex Predictor class in Python that defines how to initialize your model run batch inference -* an API configuration YAML file that defines how your API will behave in production (parallelism, networking, compute, etc.) - -Once you've implemented your predictor and defined your API configuration, you can use the Cortex CLI to deploy a Batch API. The Cortex CLI will package your predictor implementation and the rest of the code and dependencies and upload it to the Cortex Cluster. The Cortex Cluster will setup an endpoint to a web service that can receive job submission requests and manage jobs. - -A job submission typically consists of an input dataset or the location of your input dataset, the number of workers for your job, and the batch size. When a job is submitted to your Batch API endpoint, you will immediately receive a Job ID that you can use to get the job's status and logs, and stop the job if necessary. Behind the scenes, your Batch API will break down the dataset into batches and push them onto a queue. Once all of the batches have been enqueued, the Cortex Cluster will spin up the requested number of workers and initialize them with your predictor implementation. Each worker will take one batch at a time from the queue and run your Predictor implementation. After all batches have been processed, the `on_job_complete` hook in your predictor implementation (if provided) will be executed by one of the workers. - -At any point, you can use the Job ID that was provided upon job submission to make requests to the Batch API endpoint to get job status, progress metrics, and worker statuses. Logs for each job are aggregated and are accessible via the Cortex CLI or in your AWS console. diff --git a/docs/deployments/compute.md b/docs/deployments/compute.md deleted file mode 100644 index 7937ca00ab..0000000000 --- a/docs/deployments/compute.md +++ /dev/null @@ -1,36 +0,0 @@ -# Compute - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -Compute resource requests in Cortex follow the syntax and meaning of [compute resources in Kubernetes](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container). - -For example: - -```yaml -- name: my-api - ... - compute: - cpu: 1 - gpu: 1 - mem: 1G -``` - -CPU, GPU, Inf, and memory requests in Cortex correspond to compute resource requests in Kubernetes. In the example above, the API will only be scheduled once 1 CPU, 1 GPU, and 1G of memory are available on any instance, and it will be guaranteed to have access to those resources throughout its execution. In some cases, resource requests can be (or may default to) `Null`. - -## CPU - -One unit of CPU corresponds to one virtual CPU on AWS. Fractional requests are allowed, and can be specified as a floating point number or via the "m" suffix (`0.2` and `200m` are equivalent). - -## GPU - -One unit of GPU corresponds to one virtual GPU. Fractional requests are not allowed. - -See [GPU documentation](gpus.md) for more information. - -## Memory - -One unit of memory is one byte. Memory can be expressed as an integer or by using one of these suffixes: `K`, `M`, `G`, `T` (or their power-of two counterparts: `Ki`, `Mi`, `Gi`, `Ti`). For example, the following values represent roughly the same memory: `128974848`, `129e6`, `129M`, `123Mi`. - -## Inf - -One unit of Inf corresponds to one Inferentia ASIC with 4 NeuronCores *(not the same thing as `cpu`)* and 8GB of cache memory *(not the same thing as `mem`)*. Fractional requests are not allowed. diff --git a/docs/deployments/realtime-api.md b/docs/deployments/realtime-api.md deleted file mode 100644 index 687fb270b0..0000000000 --- a/docs/deployments/realtime-api.md +++ /dev/null @@ -1,39 +0,0 @@ -# Realtime API Overview - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -You can deploy a Realtime API on Cortex to serve your model via an HTTP endpoint for on-demand inferences. - -## When should I use a Realtime API - -You may want to deploy your model as a Realtime API if any of the following scenarios apply to your use case: - -* predictions are served on demand -* predictions need to be made in the time of a single web request -* predictions need to be made on an individual basis -* predictions are served directly to consumers - -You may want to consider deploying your model as a [Batch API](batch-api.md) if these scenarios don't apply to you. - -A Realtime API deployed in Cortex has the following features: - -* request-based autoscaling -* rolling updates to enable you to update the model/serving code without downtime -* realtime metrics collection -* log streaming -* multi-model serving -* server-side batching -* traffic splitting (e.g. for A/B testing) - -## How does it work - -You specify the following: - -* a Cortex Predictor class in Python that defines how to initialize and serve your model -* an API configuration YAML file that defines how your API will behave in production (autoscaling, monitoring, networking, compute, etc.) - -Once you've implemented your predictor and defined your API configuration, you can use the Cortex CLI to deploy a Realtime API. The Cortex CLI will package your predictor implementation and the rest of the code and dependencies and upload it to the Cortex Cluster. The Cortex Cluster will set up an HTTP endpoint that routes traffic to multiple replicas/copies of web servers initialized with your code. - -When a request is made to the HTTP endpoint, it gets routed to one your API's replicas (at random). The replica receives the request, parses the payload and executes the inference code you've defined in your predictor implementation and sends a response. - -The Cortex Cluster will automatically scale based on the incoming traffic and the autoscaling configuration you've defined. You can safely update your model or your code and use the Cortex CLI to deploy without experiencing downtime because updates to your API will be rolled out automatically. Request metrics and logs will automatically be aggregated and be accessible via the Cortex CLI or on your AWS console. diff --git a/docs/summary.md b/docs/summary.md index 3b984a57ec..15642c9a60 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,7 +1,12 @@ # Table of contents -* [Deploy a realtime API](tutorials/realtime.md) -* [Deploy a batch API](tutorials/batch.md) +## Tutorials + +* [Realtime API](tutorials/realtime.md) +* [Batch API](tutorials/batch.md) +* [Multi-model API](tutorials/multi-model.md) +* [Traffic splitter](tutorials/traffic-splitter.md) +* [Project directory](tutorials/project.md) ## Running on AWS @@ -9,6 +14,8 @@ * [Credentials](aws/credentials.md) * [Security](aws/security.md) * [Spot instances](aws/spot.md) +* [GPUs](aws/gpus.md) +* [Inferentia](aws/inferentia.md) * [Networking](aws/networking.md) * [VPC peering](aws/vpc-peering.md) * [Custom domain](aws/custom-domain.md) @@ -36,16 +43,10 @@ * [Endpoints](deployments/batch-api/endpoints.md) * [Job statuses](deployments/batch-api/statuses.md) * [Python client](deployments/python-client.md) -* [Environments](deployments/environments.md) -* [Telemetry](deployments/telemetry.md) - -## Advanced - -* [Compute](deployments/compute.md) -* [Using GPUs](deployments/gpus.md) -* [Using Inferentia](deployments/inferentia.md) * [Python packages](deployments/python-packages.md) * [System packages](deployments/system-packages.md) +* [Environments](deployments/environments.md) +* [Telemetry](deployments/telemetry.md) ## Troubleshooting diff --git a/docs/tutorials/multi-model.md b/docs/tutorials/multi-model.md new file mode 100644 index 0000000000..aca9ab7825 --- /dev/null +++ b/docs/tutorials/multi-model.md @@ -0,0 +1,75 @@ +# Deploy a multi-model API + +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ + +## Install cortex + +```bash +$ pip install cortex +``` + +## Spin up a cluster on AWS (requires AWS credentials) + +```bash +$ cortex cluster up +``` + +## Define a multi-model API + +```python +# multi_model.py + +import cortex + +class PythonPredictor: + def __init__(self, config): + from transformers import pipeline + + self.analyzer = pipeline(task="sentiment-analysis", device=device) + self.summarizer = pipeline(task="summarization", device=device) + + def predict(self, query_params, payload): + model = query_params.get("model") + + if model == "sentiment": + return self.analyzer(payload["text"])[0] + elif model == "summarizer": + return self.summarizer(payload["text"])[0]["summary_text"] + +requirements = ["tensorflow", "transformers"] + +api_spec = {"name": "multi-model", "kind": "RealtimeAPI"} + +cx = cortex.client("aws") +cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) +``` + +## Deploy to AWS + +```bash +$ python multi_model.py +``` + +## Monitor + +```bash +$ cortex get multi-model --env aws --watch +``` + +## Stream logs + +```bash +$ cortex logs multi-model +``` + +## Make a request + +```bash +$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator?model=sentiment -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' +``` + +## Delete the API + +```bash +$ cortex delete multi-model +``` diff --git a/docs/tutorials/traffic-splitter.md b/docs/tutorials/traffic-splitter.md index fb8537b000..3e1d2a7182 100644 --- a/docs/tutorials/traffic-splitter.md +++ b/docs/tutorials/traffic-splitter.md @@ -70,7 +70,7 @@ $ python traffic_splitter.py ## Monitor ```bash -$ cortex get text-generator --env aws --watch +$ cortex get text-generator --watch ``` ## Stream logs @@ -82,9 +82,7 @@ $ cortex logs text-generator ## Make a request ```bash -$ curl https:// \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "hello world"}' +$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' ``` ## Delete the API From 230540a0ea36bb489ab4b3877a463970e721506e Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Mon, 7 Dec 2020 19:42:49 -0800 Subject: [PATCH 14/36] Update docs --- docs/deployments/batch-api/deployment.md | 120 ------------------ docs/deployments/realtime-api/deployment.md | 61 --------- docs/summary.md | 44 +++---- docs/tutorials/batch.md | 2 +- .../batch/configuration.md} | 12 +- .../batch}/endpoints.md | 0 .../batch}/predictors.md | 0 .../batch-api => workloads/batch}/statuses.md | 0 .../environments.md | 0 .../python-client.md | 0 .../python-packages.md | 0 .../realtime}/autoscaling.md | 0 .../realtime/configuration.md} | 10 -- .../realtime}/models.md | 0 .../realtime}/parallelism.md | 0 .../realtime}/prediction-monitoring.md | 0 .../realtime}/predictors.md | 0 .../realtime}/statuses.md | 0 .../realtime}/traffic-splitter.md | 0 .../system-packages.md | 0 docs/{deployments => workloads}/telemetry.md | 0 21 files changed, 23 insertions(+), 226 deletions(-) delete mode 100644 docs/deployments/batch-api/deployment.md delete mode 100644 docs/deployments/realtime-api/deployment.md rename docs/{deployments/batch-api/api-configuration.md => workloads/batch/configuration.md} (88%) rename docs/{deployments/batch-api => workloads/batch}/endpoints.md (100%) rename docs/{deployments/batch-api => workloads/batch}/predictors.md (100%) rename docs/{deployments/batch-api => workloads/batch}/statuses.md (100%) rename docs/{deployments => workloads}/environments.md (100%) rename docs/{deployments => workloads}/python-client.md (100%) rename docs/{deployments => workloads}/python-packages.md (100%) rename docs/{deployments/realtime-api => workloads/realtime}/autoscaling.md (100%) rename docs/{deployments/realtime-api/api-configuration.md => workloads/realtime/configuration.md} (93%) rename docs/{deployments/realtime-api => workloads/realtime}/models.md (100%) rename docs/{deployments/realtime-api => workloads/realtime}/parallelism.md (100%) rename docs/{deployments/realtime-api => workloads/realtime}/prediction-monitoring.md (100%) rename docs/{deployments/realtime-api => workloads/realtime}/predictors.md (100%) rename docs/{deployments/realtime-api => workloads/realtime}/statuses.md (100%) rename docs/{deployments/realtime-api => workloads/realtime}/traffic-splitter.md (100%) rename docs/{deployments => workloads}/system-packages.md (100%) rename docs/{deployments => workloads}/telemetry.md (100%) diff --git a/docs/deployments/batch-api/deployment.md b/docs/deployments/batch-api/deployment.md deleted file mode 100644 index 27b94f82bf..0000000000 --- a/docs/deployments/batch-api/deployment.md +++ /dev/null @@ -1,120 +0,0 @@ -# Batch API deployment - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -Once your model is [exported](../../guides/exporting.md), you've implemented a [Predictor](predictors.md), and you've [configured your API](api-configuration.md), you're ready to deploy a Batch API. - -## `cortex deploy` - -The `cortex deploy` command collects your configuration and source code and deploys your API on your cluster: - -```bash -$ cortex deploy - -created image-classifier (BatchAPI) -``` - -APIs are declarative, so to update your API, you can modify your source code and/or configuration and run `cortex deploy` again. - -After deploying a Batch API you can use `cortex get ` to display the Batch API endpoint, which you can use to make the following requests: - -1. Submit a batch job -1. Get the status of a job -1. Stop a job - -You can find documentation for the Batch API endpoint [here](endpoints.md). - -## `cortex get` - -The `cortex get` command displays the status of all of your API: - -```bash -$ cortex get - -env batch api running jobs latest job id last update -aws image-classifier 1 69d9c0013c2d0d97 (submitted 30s ago) 46s -``` - -## `cortex get ` - -`cortex get ` shows additional information about a specific Batch API and lists a summary of all currently running / recently submitted jobs. - -```bash -$ cortex get image-classifier - -job id status progress failed start time duration -69d9c0013c2d0d97 running 1/24 0 29 Jul 2020 14:38:01 UTC 30s -69da5b1f8cd3b2d3 completed with failures 15/16 1 29 Jul 2020 13:38:01 UTC 5m20s -69da5bc32feb6aa0 succeeded 40/40 0 29 Jul 2020 12:38:01 UTC 10m21s -69da5bd5b2f87258 succeeded 34/34 0 29 Jul 2020 11:38:01 UTC 8m54s - -endpoint: http://***.amazonaws.com/image-classifier -... -``` - -Appending the `--watch` flag will re-run the `cortex get` command every 2 seconds. - -## Job commands - -Once a job has been submitted to your Batch API (see [here](endpoints.md#submit-a-job)), you can use the Job ID from job submission response to get the status, stream logs, and stop a running job using the CLI. - -### `cortex get ` - -After a submitting a job, you can use the `cortex get ` command to show information about the job: - -```bash -$ cortex get image-classifier 69d9c0013c2d0d97 - -job id: 69d9c0013c2d0d97 -status: running - -start time: 29 Jul 2020 14:38:01 UTC -end time: - -duration: 32s - -batch stats -total succeeded failed avg time per batch -24 1 0 20s - -worker stats -requested running failed succeeded -2 2 0 0 - -job endpoint: https://***..amazonaws.com/image-classifier/69d9c0013c2d0d97 -``` - -### `cortex logs ` - -You can use `cortex logs ` to stream logs from a job: - -```bash -$ cortex logs image-classifier 69d9c0013c2d0d97 - -started enqueuing batches -partitioning 240 items found in job submission into 24 batches of size 10 -completed enqueuing a total of 24 batches -spinning up workers... -2020-07-30 16:50:30.147522:cortex:pid-1:INFO:downloading the project code -2020-07-30 16:50:30.268987:cortex:pid-1:INFO:downloading the python serving image -.... -``` - -### `cortex delete ` - -You can use `cortex delete ` to stop a running job: - -```bash -$ cortex delete image-classifier 69d9c0013c2d0d97 - -stopped job 69d96a01ea55da8c -``` - -## `cortex delete` - -Use the `cortex delete` command to delete your API: - -```bash -$ cortex delete my-api - -deleting my-api -``` diff --git a/docs/deployments/realtime-api/deployment.md b/docs/deployments/realtime-api/deployment.md deleted file mode 100644 index f068ff463d..0000000000 --- a/docs/deployments/realtime-api/deployment.md +++ /dev/null @@ -1,61 +0,0 @@ -# API deployment - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -Once your model is [exported](../../guides/exporting.md), you've implemented a [Predictor](predictors.md), and you've [configured your API](api-configuration.md), you're ready to deploy! - -## `cortex deploy` - -The `cortex deploy` command collects your configuration and source code and deploys your API on your cluster: - -```bash -$ cortex deploy - -creating my-api (RealtimeAPI) -``` - -APIs are declarative, so to update your API, you can modify your source code and/or configuration and run `cortex deploy` again. - -## `cortex get` - -The `cortex get` command displays the status of your APIs, and `cortex get ` shows additional information about a specific API. - -```bash -$ cortex get my-api - -status up-to-date requested last update avg request 2XX -live 1 1 1m - - - -endpoint: http://***.amazonaws.com/text-generator -... -``` - -Appending the `--watch` flag will re-run the `cortex get` command every 2 seconds. - -## `cortex logs` - -You can stream logs from your API using the `cortex logs` command: - -```bash -$ cortex logs my-api -``` - -## Making a prediction - -You can use `curl` to test your prediction service, for example: - -```bash -$ curl http://***.amazonaws.com/my-api \ - -X POST -H "Content-Type: application/json" \ - -d '{"key": "value"}' -``` - -## `cortex delete` - -Use the `cortex delete` command to delete your API: - -```bash -$ cortex delete my-api - -deleting my-api -``` diff --git a/docs/summary.md b/docs/summary.md index 15642c9a60..d92b75620b 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -24,29 +24,27 @@ * [Update](aws/update.md) * [Uninstall](aws/uninstall.md) -## Deployments +## Workloads -* [Realtime API](deployments/realtime-api.md) - * [Predictor implementation](deployments/realtime-api/predictors.md) - * [API configuration](deployments/realtime-api/api-configuration.md) - * [API deployment](deployments/realtime-api/deployment.md) - * [API statuses](deployments/realtime-api/statuses.md) - * [Models](deployments/realtime-api/models.md) - * [Parallelism](deployments/realtime-api/parallelism.md) - * [Autoscaling](deployments/realtime-api/autoscaling.md) - * [Prediction monitoring](deployments/realtime-api/prediction-monitoring.md) - * [Traffic Splitter](deployments/realtime-api/traffic-splitter.md) -* [Batch API](deployments/batch-api.md) - * [Predictor implementation](deployments/batch-api/predictors.md) - * [API configuration](deployments/batch-api/api-configuration.md) - * [API deployment](deployments/batch-api/deployment.md) - * [Endpoints](deployments/batch-api/endpoints.md) - * [Job statuses](deployments/batch-api/statuses.md) -* [Python client](deployments/python-client.md) -* [Python packages](deployments/python-packages.md) -* [System packages](deployments/system-packages.md) -* [Environments](deployments/environments.md) -* [Telemetry](deployments/telemetry.md) +* [Realtime API](workloads/realtime.md) + * [Predictor implementation](workloads/realtime/predictors.md) + * [API configuration](workloads/realtime/configuration.md) + * [API statuses](workloads/realtime/statuses.md) + * [Models](workloads/realtime/models.md) + * [Parallelism](workloads/realtime/parallelism.md) + * [Autoscaling](workloads/realtime/autoscaling.md) + * [Prediction monitoring](workloads/realtime/prediction-monitoring.md) + * [Traffic Splitter](workloads/realtime/traffic-splitter.md) +* [Batch API](workloads/batch.md) + * [Predictor implementation](workloads/batch/predictors.md) + * [API configuration](workloads/batch/configuration.md) + * [Endpoints](workloads/batch/endpoints.md) + * [Job statuses](workloads/batch/statuses.md) +* [Python client](workloads/python-client.md) +* [Python packages](workloads/python-packages.md) +* [System packages](workloads/system-packages.md) +* [Environments](workloads/environments.md) +* [Telemetry](workloads/telemetry.md) ## Troubleshooting @@ -54,7 +52,7 @@ * [404/503 API responses](troubleshooting/api-request-errors.md) * [NVIDIA runtime not found](troubleshooting/nvidia-container-runtime-not-found.md) * [TF session in predict()](troubleshooting/tf-session-in-predict.md) -* [Serving-side batching errors](troubleshooting/server-side-batching-errors.md) +* [Server-side batching errors](troubleshooting/server-side-batching-errors.md) ## Guides diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 7d26f97755..7fee9c5150 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -86,7 +86,7 @@ $ python batch.py ## Describe the Batch API ```bash -$ cortex get image-classifier -e aws +$ cortex get image-classifier --env aws ``` ## Submit a job diff --git a/docs/deployments/batch-api/api-configuration.md b/docs/workloads/batch/configuration.md similarity index 88% rename from docs/deployments/batch-api/api-configuration.md rename to docs/workloads/batch/configuration.md index eda8e9d067..ad5d216710 100644 --- a/docs/deployments/batch-api/api-configuration.md +++ b/docs/workloads/batch/configuration.md @@ -1,11 +1,7 @@ -# API configuration +# Batch API configuration _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -Once your model is [exported](../../guides/exporting.md) and you've implemented a [Predictor](predictors.md), you can configure your API via a YAML file (typically named `cortex.yaml`). - -Reference the section below which corresponds to your Predictor type: [Python](#python-predictor), [TensorFlow](#tensorflow-predictor), or [ONNX](#onnx-predictor). - ## Python Predictor @@ -29,8 +25,6 @@ Reference the section below which corresponds to your Predictor type: [Python](# mem: # memory request per worker, e.g. 200Mi or 1Gi (default: Null) ``` -See additional documentation for [compute](../compute.md), [networking](../../aws/networking.md), and [overriding API images](../system-packages.md). - ## TensorFlow Predictor @@ -65,8 +59,6 @@ See additional documentation for [compute](../compute.md), [networking](../../aw mem: # memory request per worker, e.g. 200Mi or 1Gi (default: Null) ``` -See additional documentation for [compute](../compute.md), [networking](../../aws/networking.md), and [overriding API images](../system-packages.md). - ## ONNX Predictor @@ -94,5 +86,3 @@ See additional documentation for [compute](../compute.md), [networking](../../aw gpu: # GPU request per worker (default: 0) mem: # memory request per worker, e.g. 200Mi or 1Gi (default: Null) ``` - -See additional documentation for [compute](../compute.md), [networking](../../aws/networking.md), and [overriding API images](../system-packages.md). diff --git a/docs/deployments/batch-api/endpoints.md b/docs/workloads/batch/endpoints.md similarity index 100% rename from docs/deployments/batch-api/endpoints.md rename to docs/workloads/batch/endpoints.md diff --git a/docs/deployments/batch-api/predictors.md b/docs/workloads/batch/predictors.md similarity index 100% rename from docs/deployments/batch-api/predictors.md rename to docs/workloads/batch/predictors.md diff --git a/docs/deployments/batch-api/statuses.md b/docs/workloads/batch/statuses.md similarity index 100% rename from docs/deployments/batch-api/statuses.md rename to docs/workloads/batch/statuses.md diff --git a/docs/deployments/environments.md b/docs/workloads/environments.md similarity index 100% rename from docs/deployments/environments.md rename to docs/workloads/environments.md diff --git a/docs/deployments/python-client.md b/docs/workloads/python-client.md similarity index 100% rename from docs/deployments/python-client.md rename to docs/workloads/python-client.md diff --git a/docs/deployments/python-packages.md b/docs/workloads/python-packages.md similarity index 100% rename from docs/deployments/python-packages.md rename to docs/workloads/python-packages.md diff --git a/docs/deployments/realtime-api/autoscaling.md b/docs/workloads/realtime/autoscaling.md similarity index 100% rename from docs/deployments/realtime-api/autoscaling.md rename to docs/workloads/realtime/autoscaling.md diff --git a/docs/deployments/realtime-api/api-configuration.md b/docs/workloads/realtime/configuration.md similarity index 93% rename from docs/deployments/realtime-api/api-configuration.md rename to docs/workloads/realtime/configuration.md index 21f1312477..9d9e47fa13 100644 --- a/docs/deployments/realtime-api/api-configuration.md +++ b/docs/workloads/realtime/configuration.md @@ -2,10 +2,6 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -Once your model is [exported](../../guides/exporting.md) and you've implemented a [Predictor](predictors.md), you can configure your API via a YAML file (typically named `cortex.yaml`). - -Reference the section below which corresponds to your Predictor type: [Python](#python-predictor), [TensorFlow](#tensorflow-predictor), or [ONNX](#onnx-predictor). - ## Python Predictor @@ -60,8 +56,6 @@ Reference the section below which corresponds to your Predictor type: [Python](# max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` -See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../../aws/networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). - ## TensorFlow Predictor @@ -123,8 +117,6 @@ See additional documentation for [models](models.md), [parallelism](parallelism. max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` -See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../../aws/networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). - ## ONNX Predictor @@ -178,5 +170,3 @@ See additional documentation for [models](models.md), [parallelism](parallelism. max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` - -See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../../aws/networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). diff --git a/docs/deployments/realtime-api/models.md b/docs/workloads/realtime/models.md similarity index 100% rename from docs/deployments/realtime-api/models.md rename to docs/workloads/realtime/models.md diff --git a/docs/deployments/realtime-api/parallelism.md b/docs/workloads/realtime/parallelism.md similarity index 100% rename from docs/deployments/realtime-api/parallelism.md rename to docs/workloads/realtime/parallelism.md diff --git a/docs/deployments/realtime-api/prediction-monitoring.md b/docs/workloads/realtime/prediction-monitoring.md similarity index 100% rename from docs/deployments/realtime-api/prediction-monitoring.md rename to docs/workloads/realtime/prediction-monitoring.md diff --git a/docs/deployments/realtime-api/predictors.md b/docs/workloads/realtime/predictors.md similarity index 100% rename from docs/deployments/realtime-api/predictors.md rename to docs/workloads/realtime/predictors.md diff --git a/docs/deployments/realtime-api/statuses.md b/docs/workloads/realtime/statuses.md similarity index 100% rename from docs/deployments/realtime-api/statuses.md rename to docs/workloads/realtime/statuses.md diff --git a/docs/deployments/realtime-api/traffic-splitter.md b/docs/workloads/realtime/traffic-splitter.md similarity index 100% rename from docs/deployments/realtime-api/traffic-splitter.md rename to docs/workloads/realtime/traffic-splitter.md diff --git a/docs/deployments/system-packages.md b/docs/workloads/system-packages.md similarity index 100% rename from docs/deployments/system-packages.md rename to docs/workloads/system-packages.md diff --git a/docs/deployments/telemetry.md b/docs/workloads/telemetry.md similarity index 100% rename from docs/deployments/telemetry.md rename to docs/workloads/telemetry.md From e11c015ad517e133b1ab5a132fa58464d847a576 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Mon, 7 Dec 2020 20:08:06 -0800 Subject: [PATCH 15/36] Update docs --- docs/summary.md | 2 ++ docs/tutorials/multi-model.md | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/summary.md b/docs/summary.md index d92b75620b..4e95d78306 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,5 +1,7 @@ # Table of contents +* [Get started](tutorials/realtime.md) + ## Tutorials * [Realtime API](tutorials/realtime.md) diff --git a/docs/tutorials/multi-model.md b/docs/tutorials/multi-model.md index aca9ab7825..4fc5b7f8e3 100644 --- a/docs/tutorials/multi-model.md +++ b/docs/tutorials/multi-model.md @@ -1,7 +1,5 @@ # Deploy a multi-model API -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.23.*, run `git checkout -b 0.23` or switch to the `0.23` branch on GitHub)_ - ## Install cortex ```bash From f6c11a1874ff77ee3755526cd75217020db4900c Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Mon, 7 Dec 2020 20:11:09 -0800 Subject: [PATCH 16/36] Update .gitbook.yaml --- .gitbook.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitbook.yaml b/.gitbook.yaml index 52b37106f2..2039ff62cf 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -1,6 +1,7 @@ root: ./docs/ structure: + readme: ./tutorials/realtime.md summary: summary.md redirects: From 3dce6f0f9162b3f2e379d3e51d6045e7c1f5a4ce Mon Sep 17 00:00:00 2001 From: Vishal Bollu Date: Tue, 8 Dec 2020 11:18:50 -0500 Subject: [PATCH 17/36] Revert batch predictor.py --- test/batch/image-classifier/predictor.py | 61 ++++++++++++++++-------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/test/batch/image-classifier/predictor.py b/test/batch/image-classifier/predictor.py index 92a8cc26b9..293c466fd3 100644 --- a/test/batch/image-classifier/predictor.py +++ b/test/batch/image-classifier/predictor.py @@ -2,22 +2,18 @@ import os import requests +import torch +import torchvision +from torchvision import transforms from PIL import Image from io import BytesIO +import boto3 import json import re -# labels "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" -# bucket, key - class PythonPredictor: def __init__(self, config, job_spec): - import re - import boto3 - from torchvision import transforms - import torchvision - self.model = torchvision.models.alexnet(pretrained=True).eval() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) @@ -25,34 +21,61 @@ def __init__(self, config, job_spec): [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] ) - self.labels = requests.get(config["labels"]).text.split("\n")[1:] + self.labels = requests.get( + "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" + ).text.split("\n")[1:] + + if len(config.get("dest_s3_dir", "")) == 0: + raise Exception("'dest_s3_dir' field was not provided in job submission") - self.s3 = boto3.client("s3") # initialize S3 client to save results + self.s3 = boto3.client("s3") self.bucket, self.key = re.match("s3://(.+?)/(.+)", config["dest_s3_dir"]).groups() self.key = os.path.join(self.key, job_spec["job_id"]) def predict(self, payload, batch_id): - import json - from PIL import Image - import torch - tensor_list = [] - for image_url in payload: # download and preprocess each image - img_pil = Image.open(BytesIO(requests.get(image_url).content)) + + # download and preprocess each image + for image_url in payload: + if image_url.startswith("s3://"): + bucket, image_key = re.match("s3://(.+?)/(.+)", image_url).groups() + image_bytes = self.s3.get_object(Bucket=bucket, Key=image_key)["Body"].read() + else: + image_bytes = requests.get(image_url).content + + img_pil = Image.open(BytesIO(image_bytes)) tensor_list.append(self.preprocess(img_pil)) + # classify the batch of images img_tensor = torch.stack(tensor_list) - with torch.no_grad(): # classify the batch of images + with torch.no_grad(): prediction = self.model(img_tensor) _, indices = prediction.max(1) - results = [ # extract predicted classes + # extract predicted classes + results = [ {"url": payload[i], "class": self.labels[class_idx]} for i, class_idx in enumerate(indices) ] + json_output = json.dumps(results) # save results + self.s3.put_object(Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json_output) + + def on_job_complete(self): + all_results = [] + + # aggregate all classifications + paginator = self.s3.get_paginator("list_objects_v2") + for page in paginator.paginate(Bucket=self.bucket, Prefix=self.key): + for obj in page["Contents"]: + body = self.s3.get_object(Bucket=self.bucket, Key=obj["Key"])["Body"] + all_results += json.loads(body.read().decode("utf8")) + + # save single file containing aggregated classifications self.s3.put_object( - Bucket=self.bucket, Key=f"{self.key}/{batch_id}.json", Body=json.dumps(results) + Bucket=self.bucket, + Key=os.path.join(self.key, "aggregated_results.json"), + Body=json.dumps(all_results), ) From e598e242c05614e6a4ba1f69766ea9f2838990e1 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 8 Dec 2020 10:55:22 -0800 Subject: [PATCH 18/36] Update docs --- docs/summary.md | 1 + docs/tutorials/batch.md | 6 ++---- docs/tutorials/realtime.md | 14 +++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/summary.md b/docs/summary.md index 4e95d78306..88688246b5 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,6 +1,7 @@ # Table of contents * [Get started](tutorials/realtime.md) +* [Chat with us](https://gitter.im/cortexlabs/cortex) ## Tutorials diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 7fee9c5150..75140dfa12 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -1,14 +1,12 @@ # Deploy a batch API -**Note: at this time, batch APIs are only supported on AWS.** - ## Install cortex ```bash $ pip install cortex ``` -## Spin up a cluster on AWS (requires AWS credentials) +## Spin up a cluster on AWS ```bash $ cortex cluster up @@ -142,5 +140,5 @@ Once the job is complete, you should be able to find the results of the batch jo ## Delete the Batch API ```bash -$ cortex delete --env local image-classifier +$ cortex delete image-classifier --env local ``` diff --git a/docs/tutorials/realtime.md b/docs/tutorials/realtime.md index 6871021b2e..4e22e6b43a 100644 --- a/docs/tutorials/realtime.md +++ b/docs/tutorials/realtime.md @@ -9,7 +9,7 @@ $ pip install cortex ## Define a realtime API ```python -# realtime.py +# text_generator.py import cortex @@ -33,7 +33,7 @@ cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) ## Test locally (requires Docker) ```bash -$ python realtime.py +$ python text_generator.py ``` ## Monitor @@ -54,13 +54,13 @@ $ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"t $ cortex logs text-generator ``` -## Spin up a cluster on AWS (requires AWS credentials) +## Spin up a cluster on AWS ```bash $ cortex cluster up ``` -## Edit `realtime.py` +## Edit `text_generator.py` ```python # cx = cortex.client("local") @@ -70,7 +70,7 @@ cx = cortex.client("aws") ## Deploy to AWS ```bash -$ python realtime.py +$ python text_generator.py ``` ## Monitor @@ -88,7 +88,7 @@ $ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H ## Delete the APIs ```bash -$ cortex delete --env local text-generator +$ cortex delete text-generator --env local -$ cortex delete --env aws text-generator +$ cortex delete text-generator --env aws ``` From 68080c346c256f2c5fba65a468d72115d3077d95 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 8 Dec 2020 13:40:26 -0800 Subject: [PATCH 19/36] Update README.md --- README.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/README.md b/README.md index e955d8c274..9e450eb2d0 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,30 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Runs inference on spot instances with on-demand backups. * Autoscales to handle production workloads. +#### Configure Cortex + +```yaml +# cluster.yaml + +region: us-east-1 +instance_type: g4dn.xlarge +min_instances: 10 +max_instances: 100 +spot: true +``` + +#### Spin up Cortex on your AWS account + +```text +$ cortex cluster up --config cluster.yaml + +○ configuring autoscaling ✓ +○ configuring networking ✓ +○ configuring logging ✓ + +cortex is ready! +``` +
## Reproducible deployments @@ -25,6 +49,36 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Integrate with your data science platform or CI/CD system. * Test locally before deploying to your cluster. +#### Implement a predictor + +```python +from transformers import pipeline + +class PythonPredictor: + def __init__(self, config): + self.model = pipeline(task="text-generation") + + def predict(self, payload): + return self.model(payload["text"])[0] +``` + +#### Configure an API + +```python +api_spec = { + "name": "text-generator", + "kind": "RealtimeAPI", + "compute": { + "gpu": 1, + "mem": "8Gi" + }, + "autoscaling": { + "min_replicas": 1, + "max_replicas": 10 + } +} +``` +
## Scalable machine learning APIs @@ -35,8 +89,29 @@ Cortex is an open source platform for deploying, managing, and scaling machine l * Configure traffic splitting for A/B testing. * Update APIs without downtime. +#### Deploy to your cluster + +```python +import cortex + +cx = cortex.client("aws") +cx.create_api(api_spec, predictor=PythonPredictor) + +# creating https://example.com/text-generator +``` + +#### Consume your API + +```bash +$ curl https://example.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' +``` +
## Get started +```bash +pip install cortex +``` + [Deploy models](https://docs.cortex.dev) and [join our community](https://gitter.im/cortexlabs/cortex). From 1f19d616ce56f184d66f507a486c56ef1e6c5064 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 8 Dec 2020 14:44:21 -0800 Subject: [PATCH 20/36] Update docs --- docs/tutorials/advanced.md | 119 +++++++++++++++++++++++++++++++++++++ docs/tutorials/batch.md | 34 +++++++---- docs/tutorials/realtime.md | 38 ++++++++---- 3 files changed, 167 insertions(+), 24 deletions(-) create mode 100644 docs/tutorials/advanced.md diff --git a/docs/tutorials/advanced.md b/docs/tutorials/advanced.md new file mode 100644 index 0000000000..347d45f9aa --- /dev/null +++ b/docs/tutorials/advanced.md @@ -0,0 +1,119 @@ +# Advanced deployments + +## Install cortex + +```bash +$ pip install cortex +``` + +## Create a directory + +```bash +$ mkdir text-generator && cd text-generator + +$ touch predictor.py requirements.txt text-generator.yaml +``` + +## Define a Predictor in `predictor.py` + +```python +class PythonPredictor: + def __init__(self, config): + from transformers import pipeline + + self.model = pipeline(task="text-generation") + + def predict(self, payload): + return self.model(payload["text"])[0] +``` + +## Specify Python dependencies in `requirements.txt` + +```text +tensorflow +transformers +``` + +## Configure 2 realtime APIs and a traffic splitter in `text-generator.yaml` + +```yaml +- name: text-generator-cpu + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + cpu: 1 + +- name: text-generator-gpu + kind: RealtimeAPI + predictor: + type: python + path: predictor.py + compute: + gpu: 1 + +- name: text-generator + kind: TrafficSplitter + apis: + - name: text-generator-cpu + weight: 80 + - name: text-generator-gpu + weight: 20 +``` + +## Test locally (requires Docker) + +```bash +$ cortex deploy text-generator.yaml +``` + +## Monitor + +```bash +$ cortex get text-generator --watch +``` + +## Make a request + +```bash +$ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' +``` + +## Stream logs + +```bash +$ cortex logs text-generator +``` + +## Spin up a cluster on AWS + +```bash +$ cortex cluster up +``` + +## Deploy to AWS + +```bash +$ cortex deploy text-generator.yaml --env aws +``` + +## Monitor + +```bash +$ cortex get text-generator --env aws --watch +``` + +## Make a request + +```bash +$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' +``` + +## Delete the APIs + +```bash +$ cortex delete text-generator --env local + +$ cortex delete text-generator --env aws +``` diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 75140dfa12..253ff00e5c 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -1,18 +1,30 @@ # Deploy a batch API -## Install cortex +Deploy models as batch APIs that can orchestrate distributed batch inference jobs on large datasets. + +## Key features + +* Distributed inference +* Fault tolerance with queues +* Metrics and log aggregation +* `on_job_complete` webhook +* Scale to 0 + +## How it works + +### Install cortex ```bash $ pip install cortex ``` -## Spin up a cluster on AWS +### Spin up a cluster on AWS ```bash $ cortex cluster up ``` -## Define a batch API +### Define a batch API ```python # batch.py @@ -75,19 +87,19 @@ cx = cortex.client("aws") cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) ``` -## Deploy to your Cortex cluster on AWS +### Deploy to your Cortex cluster on AWS ```bash $ python batch.py ``` -## Describe the Batch API +### Describe the Batch API ```bash $ cortex get image-classifier --env aws ``` -## Submit a job +### Submit a job ```python import cortex @@ -121,24 +133,24 @@ print(response) # > {"job_id":"69b183ed6bdf3e9b","api_name":"image-classifier", "config": {"dest_s3_dir": ...}} ``` -## Monitor the job +### Monitor the job ```bash $ cortex get image-classifier 69b183ed6bdf3e9b ``` -## Stream job logs +### Stream job logs ```bash $ cortex logs image-classifier 69b183ed6bdf3e9b ``` -## View the results +### View the results Once the job is complete, you should be able to find the results of the batch job in the S3 directory you've specified. -## Delete the Batch API +### Delete the Batch API ```bash -$ cortex delete image-classifier --env local +$ cortex delete image-classifier --env local ``` diff --git a/docs/tutorials/realtime.md b/docs/tutorials/realtime.md index 4e22e6b43a..f21ad88903 100644 --- a/docs/tutorials/realtime.md +++ b/docs/tutorials/realtime.md @@ -1,12 +1,24 @@ # Deploy a realtime API -## Install cortex +Deploy models as realtime APIs that can respond to prediction requests on demand. For example, an object detection web service that receives an image and returns a list of objects in the image. + +## Key features + +* Request-based autoscaling +* Multi-model endpoints +* Server-side batching +* Metrics and log aggregation +* Rolling updates + +## How it works + +### Install cortex ```bash $ pip install cortex ``` -## Define a realtime API +### Define a realtime API ```python # text_generator.py @@ -27,65 +39,65 @@ requirements = ["tensorflow", "transformers"] api_spec = {"name": "text-generator", "kind": "RealtimeAPI"} cx = cortex.client("local") -cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) +cx.create_api(api_spec, predictor=PythonPredictor, requirements=requirements) ``` -## Test locally (requires Docker) +### Test locally (requires Docker) ```bash $ python text_generator.py ``` -## Monitor +### Monitor ```bash $ cortex get text-generator --watch ``` -## Make a request +### Make a request ```bash $ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' ``` -## Stream logs +### Stream logs ```bash $ cortex logs text-generator ``` -## Spin up a cluster on AWS +### Spin up a cluster on AWS ```bash $ cortex cluster up ``` -## Edit `text_generator.py` +### Edit `text_generator.py` ```python # cx = cortex.client("local") cx = cortex.client("aws") ``` -## Deploy to AWS +### Deploy to AWS ```bash $ python text_generator.py ``` -## Monitor +### Monitor ```bash $ cortex get text-generator --env aws --watch ``` -## Make a request +### Make a request ```bash $ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' ``` -## Delete the APIs +### Delete the APIs ```bash $ cortex delete text-generator --env local From 6b03e4eefe3eef4a3676b9288fafcb88b5266668 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 17:55:36 -0500 Subject: [PATCH 21/36] Tutorials --- docs/tutorials/batch.md | 4 +- docs/tutorials/multi-model.md | 46 +----------- docs/tutorials/project.md | 112 ++++++++--------------------- docs/tutorials/traffic-splitter.md | 75 +++++++------------ 4 files changed, 57 insertions(+), 180 deletions(-) diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 75140dfa12..868f90fdb1 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -72,7 +72,7 @@ api_spec = { } cx = cortex.client("aws") -cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) +cx.create_api(api_spec, predictor=PythonPredictor, requirements=requirements) ``` ## Deploy to your Cortex cluster on AWS @@ -140,5 +140,5 @@ Once the job is complete, you should be able to find the results of the batch jo ## Delete the Batch API ```bash -$ cortex delete image-classifier --env local +$ cortex delete image-classifier --env local ``` diff --git a/docs/tutorials/multi-model.md b/docs/tutorials/multi-model.md index 4fc5b7f8e3..8fd78fe2da 100644 --- a/docs/tutorials/multi-model.md +++ b/docs/tutorials/multi-model.md @@ -1,19 +1,5 @@ # Deploy a multi-model API -## Install cortex - -```bash -$ pip install cortex -``` - -## Spin up a cluster on AWS (requires AWS credentials) - -```bash -$ cortex cluster up -``` - -## Define a multi-model API - ```python # multi_model.py @@ -39,35 +25,5 @@ requirements = ["tensorflow", "transformers"] api_spec = {"name": "multi-model", "kind": "RealtimeAPI"} cx = cortex.client("aws") -cx.deploy(api_spec, predictor=PythonPredictor, requirements=requirements) -``` - -## Deploy to AWS - -```bash -$ python multi_model.py -``` - -## Monitor - -```bash -$ cortex get multi-model --env aws --watch -``` - -## Stream logs - -```bash -$ cortex logs multi-model -``` - -## Make a request - -```bash -$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator?model=sentiment -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' -``` - -## Delete the API - -```bash -$ cortex delete multi-model +cx.create_api(api_spec, predictor=PythonPredictor, requirements=requirements) ``` diff --git a/docs/tutorials/project.md b/docs/tutorials/project.md index 84bc55bec9..be51aea5fb 100644 --- a/docs/tutorials/project.md +++ b/docs/tutorials/project.md @@ -1,117 +1,63 @@ # Deploy a project -## Install cortex +You can deploy an API by providing a project directory. Cortex will save the project directory and make it available during API initialization. ```bash -$ pip install cortex +project/ + ├── model.py + ├── util.py + ├── predictor.py + ├── requirements.txt + └── ... ``` -## Create a directory - -```bash -$ mkdir text-generator && cd text-generator - -$ touch predictor.py requirements.txt realtime.py -``` - -## Define a Predictor +You can define your Predictor class in a separate python file and import code from your project. ```python # predictor.py +from model import MyModel + class PythonPredictor: def __init__(self, config): - from transformers import pipeline + model = MyModel() - self.model = pipeline(task="text-generation") - - def predict(self, payload): - return self.model(payload["text"])[0] + def predict(payload): + return model(payload) ``` -## Specify Python dependencies - -```text -tensorflow -transformers -``` - -## Configure an API +## Deploy using the Python Client ```python -# realtime.py - import cortex api_spec = { "name": "text-generator", "kind": "RealtimeAPI", - "predictor": {"type": "python", "path": "predictor.py"}, + "predictor": { + "type": "python", + "path": "predictor.py" + } } -cx = cortex.client("local") -cx.deploy(api_spec, project_dir=".") -``` - -## Test locally (requires Docker) - -```bash -$ python realtime.py -``` - -## Monitor - -```bash -$ cortex get text-generator --watch -``` - -## Make a request - -```bash -$ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' -``` - -## Stream logs - -```bash -$ cortex logs text-generator -``` - -## Spin up a cluster on AWS (requires AWS credentials) - -```bash -$ cortex cluster up -``` - -## Edit `realtime.py` - -```python -# cx = cortex.client("local") cx = cortex.client("aws") +cx.create_api(api_spec, project_dir=".") ``` -## Deploy to AWS +## Deploy using the CLI -```bash -$ python realtime.py -``` +Navigate to your project directory and define a yaml with the api specification: -## Monitor +```yaml +# api.yaml -```bash -$ cortex get text-generator --env aws --watch +- name: text-generator + kind: RealtimeAPI + predictor: + type: python + path: predictor.py ``` -## Make a request - ```bash -$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' -``` - -## Delete the APIs - -```bash -$ cortex delete --env local text-generator - -$ cortex delete --env aws text-generator +$ cortex deploy api.yaml -e aws ``` diff --git a/docs/tutorials/traffic-splitter.md b/docs/tutorials/traffic-splitter.md index 3e1d2a7182..be4502929b 100644 --- a/docs/tutorials/traffic-splitter.md +++ b/docs/tutorials/traffic-splitter.md @@ -1,29 +1,17 @@ -# Deploy a traffic splitter +# Traffic splitter -## Install cortex +A Traffic Splitter can be used expose multiple APIs as a single endpoint. The percentage of traffic routed to each API can be controlled. This can be useful when performing A/B tests, setting up multi-armed bandits or performing canary deployments. -```bash -$ pip install cortex -``` - -## Spin up a cluster on AWS (requires AWS credentials) - -```bash -$ cortex cluster up -``` +**Note: Traffic Splitter is only supported on a Cortex cluster** -## Define 2 realtime APIs and a traffic splitter +## Deploy APIs ```python -# traffic_splitter.py - -import cortex - class PythonPredictor: def __init__(self, config): from transformers import pipeline - self.model = pipeline(task="text-generation") + self.model = pipeline(task="text-generation", model=config["model"]) def predict(self, payload): return self.model(payload["text"])[0] @@ -46,47 +34,34 @@ api_spec_gpu = { }, } -traffic_splitter = { - "name": "text-generator", - "kind": "TrafficSplitter", - "apis": [ - {"name": "text-generator-cpu", "weight": 30}, - {"name": "text-generator-gpu", "weight": 70}, - ], -} - cx = cortex.client("aws") -cx.deploy(api_spec_cpu, predictor=PythonPredictor, requirements=requirements) -cx.deploy(api_spec_gpu, predictor=PythonPredictor, requirements=requirements) -cx.deploy(traffic_splitter) +cx.create_api(api_spec_cpu, predictor=PythonPredictor, requirements=requirements) +cx.create_api(api_spec_gpu, predictor=PythonPredictor, requirements=requirements) ``` -## Deploy to AWS +## Deploy a traffic splitter -```bash -$ python traffic_splitter.py -``` - -## Monitor - -```bash -$ cortex get text-generator --watch -``` - -## Stream logs +```python +traffic_splitter_spec = { + "name": "classifier", + "kind": "TrafficSplitter", + "apis": [ + {"name": "text-generator-cpu", "weight": 50}, + {"name": "text-generator-gpu", "weight": 50}, + ], +} -```bash -$ cortex logs text-generator +cx.create_api(traffic_splitter_spec) ``` -## Make a request +## Update the weights of the traffic splitter -```bash -$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' -``` +```python +traffic_splitter_spec = cx.get_api("classifier")["spec"]["submitted_api_spec"] -## Delete the API +# send 99% of the traffic to text-generator-gpu +traffic_splitter_spec["api"][0]["weight"] = 1 +traffic_splitter_spec["api"][1]["weight"] = 99 -```bash -$ cortex delete text-generator +cx.patch(traffic_splitter_spec) ``` From 6d9fe04d587aa43b36e501aa08cfb36bbd23abe9 Mon Sep 17 00:00:00 2001 From: Vishal Bollu Date: Tue, 8 Dec 2020 23:37:08 +0000 Subject: [PATCH 22/36] Remove cloud folder --- docs/cloud/install.md | 15 --------------- docs/cloud/uninstall.md | 15 --------------- docs/cloud/update.md | 11 ----------- 3 files changed, 41 deletions(-) delete mode 100644 docs/cloud/install.md delete mode 100644 docs/cloud/uninstall.md delete mode 100644 docs/cloud/update.md diff --git a/docs/cloud/install.md b/docs/cloud/install.md deleted file mode 100644 index c210d7c2e4..0000000000 --- a/docs/cloud/install.md +++ /dev/null @@ -1,15 +0,0 @@ -# Install - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## AWS - -To spin up Cortex using AWS as the cloud provider, follow [these instructions](../aws/install.md). - -## GCP - -To spin up Cortex using GCP as the cloud provider, follow [these instructions](../gcp/install.md). - -## Local - -If you'll only be using Cortex locally, install it with `pip install cortex`. diff --git a/docs/cloud/uninstall.md b/docs/cloud/uninstall.md deleted file mode 100644 index a162f34dbd..0000000000 --- a/docs/cloud/uninstall.md +++ /dev/null @@ -1,15 +0,0 @@ -# Uninstall - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## AWS - -To spin down a Cortex cluster on AWS, follow [these instructions](../aws/uninstall.md). - -## GCP - -To spin down a Cortex cluster on GCP, follow [these instructions](../gcp/uninstall.md). - -## Local - -To uninstall the Cortex CLI, run `pip uninstall cortex`. diff --git a/docs/cloud/update.md b/docs/cloud/update.md deleted file mode 100644 index 1cf87cc8da..0000000000 --- a/docs/cloud/update.md +++ /dev/null @@ -1,11 +0,0 @@ -# Update - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## AWS - -To update the configuration of a running Cortex cluster on AWS, follow [these instructions](../aws/update.md). - -## GCP - -It is currently not possible to update a Cortex cluster running on GCP. From 215ab408b292f56b2abc3bbc137e3024965c5058 Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 8 Dec 2020 15:45:02 -0800 Subject: [PATCH 23/36] Update docs --- docs/tutorials/multi-model.md | 10 ++++++++++ docs/tutorials/realtime.md | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/multi-model.md b/docs/tutorials/multi-model.md index 8fd78fe2da..4993dfebf5 100644 --- a/docs/tutorials/multi-model.md +++ b/docs/tutorials/multi-model.md @@ -1,5 +1,9 @@ # Deploy a multi-model API +Deploy several models in a single API to improve resource utilization efficiency. + +### Define a multi-model API + ```python # multi_model.py @@ -27,3 +31,9 @@ api_spec = {"name": "multi-model", "kind": "RealtimeAPI"} cx = cortex.client("aws") cx.create_api(api_spec, predictor=PythonPredictor, requirements=requirements) ``` + +### Deploy + +```bash +$ python multi_model.py +``` diff --git a/docs/tutorials/realtime.md b/docs/tutorials/realtime.md index f21ad88903..5befb96bc8 100644 --- a/docs/tutorials/realtime.md +++ b/docs/tutorials/realtime.md @@ -1,6 +1,6 @@ # Deploy a realtime API -Deploy models as realtime APIs that can respond to prediction requests on demand. For example, an object detection web service that receives an image and returns a list of objects in the image. +Deploy models as realtime APIs that can respond to prediction requests on demand. ## Key features From 1c4cea1f109a61e39e1629f3ff2c6668aa93f36f Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 19:17:21 -0500 Subject: [PATCH 24/36] Remove references to specific documentation in the code --- cli/cluster/errors.go | 2 +- cli/cmd/errors.go | 4 ++-- cli/cmd/lib_aws_creds.go | 2 +- cli/cmd/lib_cluster_config_aws.go | 22 +++++++++---------- cli/cmd/lib_cluster_config_gcp.go | 10 ++++----- cli/local/deploy.go | 2 +- docs/aws/install.md | 2 +- docs/workloads/python-client.md | 7 +----- manager/debug.sh | 2 +- manager/info.sh | 2 +- manager/install.sh | 4 ++-- manager/refresh.sh | 2 +- pkg/lib/docker/errors.go | 2 +- pkg/lib/k8s/errors.go | 2 +- pkg/operator/endpoints/errors.go | 2 +- pkg/operator/endpoints/submit_job.go | 2 +- .../resources/batchapi/validations.go | 2 +- pkg/operator/resources/resources.go | 4 ++-- pkg/types/spec/errors.go | 4 ++-- pkg/types/spec/validations.go | 18 ++------------- pkg/workloads/cortex/client/cortex/client.py | 9 ++------ pkg/workloads/cortex/serve/init/bootloader.sh | 4 ++-- 22 files changed, 43 insertions(+), 67 deletions(-) diff --git a/cli/cluster/errors.go b/cli/cluster/errors.go index a0a953428b..0256283da5 100644 --- a/cli/cluster/errors.go +++ b/cli/cluster/errors.go @@ -62,7 +62,7 @@ func ErrorFailedToConnectOperator(originalError error, envName string, operatorU msg += fmt.Sprintf(" → otherwise you can ignore this message, and prevent it in the future with `cortex env delete %s`\n", envName) msg += "\nif you have a cluster running:\n" msg += fmt.Sprintf(" → run `cortex cluster info --configure-env %s` to update your environment (include `--config ` if you have a cluster configuration file)\n", envName) - msg += fmt.Sprintf(" → if you set `operator_load_balancer_scheme: internal` in your cluster configuration file, your CLI must run from within a VPC that has access to your cluster's VPC (see https://docs.cortex.dev/v/%s/aws/vpc-peering)\n", consts.CortexVersionMinor) + msg += fmt.Sprintf(" → if you set `operator_load_balancer_scheme: internal` in your cluster configuration file, your CLI must run from within a VPC that has access to your cluster's VPC (see https://docs.cortex.dev/v/%s/)\n", consts.CortexVersionMinor) } return errors.WithStack(&errors.Error{ diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go index 7757ed835b..bd94fa8b11 100644 --- a/cli/cmd/errors.go +++ b/cli/cmd/errors.go @@ -249,7 +249,7 @@ func ErrorMissingAWSCredentials() error { func ErrorCredentialsInClusterConfig(cmd string, path string) error { return errors.WithStack(&errors.Error{ Kind: ErrCredentialsInClusterConfig, - Message: fmt.Sprintf("specifying credentials in the cluster configuration is no longer supported, please specify aws credentials using flags (e.g. cortex cluster %s --config %s --aws-key --aws-secret ) or set environment variables; see https://docs.cortex.dev/v/%s/aws/security#iam-permissions for more information", cmd, path, consts.CortexVersionMinor), + Message: fmt.Sprintf("specifying credentials in the cluster configuration is no longer supported, please specify aws credentials using flags (e.g. cortex cluster %s --config %s --aws-key --aws-secret ) or set environment variables; see https://docs.cortex.dev/v/%s/ for more information", cmd, path, consts.CortexVersionMinor), }) } @@ -343,6 +343,6 @@ func ErrorDeployFromTopLevelDir(genericDirName string, providerType types.Provid } return errors.WithStack(&errors.Error{ Kind: ErrDeployFromTopLevelDir, - Message: fmt.Sprintf("cannot deploy from your %s directory - when deploying your API, cortex sends all files in your project directory (i.e. the directory which contains cortex.yaml) to your %s (see https://docs.cortex.dev/v/%s/deployments/realtime-api/predictors#project-files for Realtime API and https://docs.cortex.dev/v/%s/deployments/batch-api/predictors#project-files for Batch API); therefore it is recommended to create a subdirectory for your project files", genericDirName, targetStr, consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("cannot deploy from your %s directory - when deploying your API, cortex sends all files in your project directory (i.e. the directory which contains cortex.yaml) to your %s (see https://docs.cortex.dev/v/%s/); therefore it is recommended to create a subdirectory for your project files", genericDirName, targetStr, consts.CortexVersionMinor), }) } diff --git a/cli/cmd/lib_aws_creds.go b/cli/cmd/lib_aws_creds.go index d2a8866393..99a49167db 100644 --- a/cli/cmd/lib_aws_creds.go +++ b/cli/cmd/lib_aws_creds.go @@ -69,7 +69,7 @@ func promptIfNotAdmin(awsClient *aws.Client, disallowPrompt bool) { } if !awsClient.IsAdmin() { - warningStr := fmt.Sprintf("warning: your IAM user%s does not have administrator access. This will likely prevent Cortex from installing correctly, so it is recommended to attach the AdministratorAccess policy to your IAM user (or to a group that your IAM user belongs to) via the AWS IAM console. If you'd like, you may provide separate credentials for your cluster to use after it's running (see https://docs.cortex.dev/v/%s/aws/security for instructions).\n\n", accessKeyMsg, consts.CortexVersionMinor) + warningStr := fmt.Sprintf("warning: your IAM user%s does not have administrator access. This will likely prevent Cortex from installing correctly, so it is recommended to attach the AdministratorAccess policy to your IAM user (or to a group that your IAM user belongs to) via the AWS IAM console. If you'd like, you may provide separate credentials for your cluster to use after it's running (see https://docs.cortex.dev/v/%s/).\n\n", accessKeyMsg, consts.CortexVersionMinor) if disallowPrompt { fmt.Print(warningStr) } else { diff --git a/cli/cmd/lib_cluster_config_aws.go b/cli/cmd/lib_cluster_config_aws.go index 2971e47c19..020a9648ee 100644 --- a/cli/cmd/lib_cluster_config_aws.go +++ b/cli/cmd/lib_cluster_config_aws.go @@ -70,7 +70,7 @@ func readCachedClusterConfigFile(clusterConfig *clusterconfig.Config, filePath s func readUserClusterConfigFile(clusterConfig *clusterconfig.Config) error { errs := cr.ParseYAMLFile(clusterConfig, clusterconfig.UserValidation, _flagClusterConfig) if errors.HasError(errs) { - return errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) + return errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } return nil @@ -85,7 +85,7 @@ func getNewClusterAccessConfig(disallowPrompt bool) (*clusterconfig.AccessConfig if _flagClusterConfig != "" { errs := cr.ParseYAMLFile(accessConfig, clusterconfig.AccessValidation, _flagClusterConfig) if errors.HasError(errs) { - return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) + return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } } @@ -121,7 +121,7 @@ func getClusterAccessConfigWithCache(disallowPrompt bool) (*clusterconfig.Access if _flagClusterConfig != "" { errs := cr.ParseYAMLFile(accessConfig, clusterconfig.AccessValidation, _flagClusterConfig) if errors.HasError(errs) { - return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) + return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } } @@ -192,7 +192,7 @@ func getInstallClusterConfig(awsClient *aws.Client, awsCreds AWSCredentials, acc err = clusterConfig.Validate(awsClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) if _flagClusterConfig != "" { err = errors.Wrap(err, _flagClusterConfig) } @@ -258,7 +258,7 @@ func getConfigureClusterConfig(cachedClusterConfig clusterconfig.Config, awsCred err = userClusterConfig.Validate(awsClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) if _flagClusterConfig != "" { err = errors.Wrap(err, _flagClusterConfig) } @@ -542,23 +542,23 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsCreds A fmt.Printf("cortex will also create an s3 bucket (%s) and a cloudwatch log group (%s)%s\n\n", clusterConfig.Bucket, clusterConfig.ClusterName, privateSubnetMsg) if clusterConfig.APIGatewaySetting == clusterconfig.NoneAPIGatewaySetting { - fmt.Print(fmt.Sprintf("warning: you've disabled API Gateway cluster-wide, so APIs will not be able to create API Gateway endpoints (they will still be reachable via the API load balancer; see https://docs.cortex.dev/v/%s/aws/networking for more information)\n\n", consts.CortexVersionMinor)) + fmt.Print(fmt.Sprintf("warning: you've disabled API Gateway cluster-wide, so APIs will not be able to create API Gateway endpoints (they will still be reachable via the API load balancer; see https://docs.cortex.dev/v/%s/ for more information)\n\n", consts.CortexVersionMinor)) } if clusterConfig.OperatorLoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme { - fmt.Print(fmt.Sprintf("warning: you've configured the operator load balancer to be internal; you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/v/%s/aws/vpc-peering)\n\n", consts.CortexVersionMinor)) + fmt.Print(fmt.Sprintf("warning: you've configured the operator load balancer to be internal; you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/v/%s/)\n\n", consts.CortexVersionMinor)) } if isSpot && clusterConfig.SpotConfig.OnDemandBackup != nil && !*clusterConfig.SpotConfig.OnDemandBackup { if *clusterConfig.SpotConfig.OnDemandBaseCapacity == 0 && *clusterConfig.SpotConfig.OnDemandPercentageAboveBaseCapacity == 0 { - fmt.Printf("warning: you've disabled on-demand instances (%s=0 and %s=0); spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/aws/spot for more information\n\n", clusterconfig.OnDemandBaseCapacityKey, clusterconfig.OnDemandPercentageAboveBaseCapacityKey, consts.CortexVersionMinor) + fmt.Printf("warning: you've disabled on-demand instances (%s=0 and %s=0); spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/ for more information\n\n", clusterconfig.OnDemandBaseCapacityKey, clusterconfig.OnDemandPercentageAboveBaseCapacityKey, consts.CortexVersionMinor) } else { - fmt.Printf("warning: you've enabled spot instances; spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/aws/spot for more information\n\n", consts.CortexVersionMinor) + fmt.Printf("warning: you've enabled spot instances; spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/ for more information\n\n", consts.CortexVersionMinor) } } if !disallowPrompt { - exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/aws/install for more information", consts.CortexVersionMinor) + exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/ for more information", consts.CortexVersionMinor) prompt.YesOrExit("would you like to continue?", "", exitMessage) } } @@ -567,7 +567,7 @@ func confirmConfigureClusterConfig(clusterConfig clusterconfig.Config, awsCreds fmt.Println(clusterConfigConfirmationStr(clusterConfig, awsCreds, awsClient)) if !disallowPrompt { - exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/aws/install for more information", consts.CortexVersionMinor) + exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/ for more information", consts.CortexVersionMinor) prompt.YesOrExit(fmt.Sprintf("your cluster named \"%s\" in %s will be updated according to the configuration above, are you sure you want to continue?", clusterConfig.ClusterName, *clusterConfig.Region), "", exitMessage) } } diff --git a/cli/cmd/lib_cluster_config_gcp.go b/cli/cmd/lib_cluster_config_gcp.go index 4530bab618..62724d0616 100644 --- a/cli/cmd/lib_cluster_config_gcp.go +++ b/cli/cmd/lib_cluster_config_gcp.go @@ -66,7 +66,7 @@ func readCachedGCPClusterConfigFile(clusterConfig *clusterconfig.GCPConfig, file func readUserGCPClusterConfigFile(clusterConfig *clusterconfig.GCPConfig) error { errs := cr.ParseYAMLFile(clusterConfig, clusterconfig.UserGCPValidation, _flagClusterGCPConfig) if errors.HasError(errs) { - return errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/gcp/install", consts.CortexVersionMinor)) + return errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } return nil @@ -81,7 +81,7 @@ func getNewGCPClusterAccessConfig(disallowPrompt bool) (*clusterconfig.GCPAccess if _flagClusterGCPConfig != "" { errs := cr.ParseYAMLFile(accessConfig, clusterconfig.GCPAccessValidation, _flagClusterGCPConfig) if errors.HasError(errs) { - return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/gcp/install", consts.CortexVersionMinor)) + return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } } @@ -120,7 +120,7 @@ func getGCPClusterAccessConfigWithCache(disallowPrompt bool) (*clusterconfig.GCP if _flagClusterGCPConfig != "" { errs := cr.ParseYAMLFile(accessConfig, clusterconfig.GCPAccessValidation, _flagClusterGCPConfig) if errors.HasError(errs) { - return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/gcp/install", consts.CortexVersionMinor)) + return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } } @@ -196,7 +196,7 @@ func getGCPInstallClusterConfig(gcpClient *gcp.Client, accessConfig clusterconfi err = clusterConfig.Validate(gcpClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/gcp/install", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) if _flagClusterGCPConfig != "" { err = errors.Wrap(err, _flagClusterGCPConfig) } @@ -212,7 +212,7 @@ func confirmGCPInstallClusterConfig(clusterConfig *clusterconfig.GCPConfig, disa fmt.Printf("a cluster named \"%s\" will be created in %s (zone: %s)\n\n", clusterConfig.ClusterName, *clusterConfig.Project, *clusterConfig.Zone) if !disallowPrompt { - exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/gcp/install for more information", consts.CortexVersionMinor) + exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/ for more information", consts.CortexVersionMinor) prompt.YesOrExit("would you like to continue?", "", exitMessage) } } diff --git a/cli/local/deploy.go b/cli/local/deploy.go index 3f3407ab42..3a7741c9ba 100644 --- a/cli/local/deploy.go +++ b/cli/local/deploy.go @@ -101,7 +101,7 @@ func deploy(env cliconfig.Environment, apiConfigs []userconfig.API, projectFiles models := []spec.CuratedModelResource{} err = ValidateLocalAPIs(apiConfigs, &models, projectFiles, awsClient, gcpClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) return nil, err } diff --git a/docs/aws/install.md b/docs/aws/install.md index 55dc61d1a8..e44282dd51 100644 --- a/docs/aws/install.md +++ b/docs/aws/install.md @@ -62,7 +62,7 @@ nat_gateway: none api_load_balancer_scheme: internet-facing # operator load balancer scheme [internet-facing | internal] -# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/aws/vpc-peering) +# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/) operator_load_balancer_scheme: internet-facing # API Gateway [public (API Gateway will be used by default, can be disabled per API) | none (API Gateway will be disabled for all APIs)] diff --git a/docs/workloads/python-client.md b/docs/workloads/python-client.md index 6c98188c56..3af866f68c 100644 --- a/docs/workloads/python-client.md +++ b/docs/workloads/python-client.md @@ -116,13 +116,8 @@ Deploy an API. **Arguments**: -- `api_spec` - A dictionary defining a single Cortex API. Schema can be found here: - → Realtime API: https://docs.cortex.dev/v/master/deployments/realtime-api/api-configuration - → Batch API: https://docs.cortex.dev/v/master/deployments/batch-api/api-configuration - → Traffic Splitter: https://docs.cortex.dev/v/master/deployments/realtime-api/traffic-splitter +- `api_spec` - A dictionary defining a single Cortex API. See https://docs.cortex.dev/v/master/ for schema. - `predictor` - A Cortex Predictor class implementation. Not required when deploying a traffic splitter. - → Realtime API: https://docs.cortex.dev/v/master/deployments/realtime-api/predictors - → Batch API: https://docs.cortex.dev/v/master/deployments/batch-api/predictors - `requirements` - A list of PyPI dependencies that will be installed before the predictor class implementation is invoked. - `conda_packages` - A list of Conda dependencies that will be installed before the predictor class implementation is invoked. - `project_dir` - Path to a python project. diff --git a/manager/debug.sh b/manager/debug.sh index 46c3a6e03c..0b292b5fed 100755 --- a/manager/debug.sh +++ b/manager/debug.sh @@ -27,7 +27,7 @@ if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTE fi eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true -out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi +out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi echo -n "gathering cluster data" diff --git a/manager/info.sh b/manager/info.sh index c754737605..a682d17f8b 100755 --- a/manager/info.sh +++ b/manager/info.sh @@ -36,7 +36,7 @@ if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTE fi eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true -out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi +out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi operator_endpoint=$(get_operator_endpoint) api_load_balancer_endpoint=$(get_api_load_balancer_endpoint) diff --git a/manager/install.sh b/manager/install.sh index cb0b9616ea..aeddadbc3a 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -97,7 +97,7 @@ function cluster_up_aws() { echo -e "\ncortex is ready!" if [ "$CORTEX_OPERATOR_LOAD_BALANCER_SCHEME" == "internal" ]; then - echo -e "note: you will need to configure VPC Peering to connect to your cluster: https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/vpc-peering" + echo -e "note: you will need to configure VPC Peering to connect to your cluster: https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/" fi print_endpoints_aws @@ -242,7 +242,7 @@ function check_eks() { function write_kubeconfig() { eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true - out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi + out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi } function setup_configmap() { diff --git a/manager/refresh.sh b/manager/refresh.sh index ce1389cdd5..42595008c4 100755 --- a/manager/refresh.sh +++ b/manager/refresh.sh @@ -27,7 +27,7 @@ if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTE fi eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true -out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi +out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi kubectl get -n=default configmap cluster-config -o yaml >> cluster_configmap.yaml python refresh_cluster_config.py cluster_configmap.yaml tmp_cluster_config.yaml diff --git a/pkg/lib/docker/errors.go b/pkg/lib/docker/errors.go index fe7483e41b..d12033731a 100644 --- a/pkg/lib/docker/errors.go +++ b/pkg/lib/docker/errors.go @@ -81,7 +81,7 @@ func ErrorImageInaccessible(image string, providerType types.ProviderType, cause } case types.AWSProviderType: if strings.Contains(cause.Error(), "authorized") || strings.Contains(cause.Error(), "authentication") { - message += fmt.Sprintf("\n\nif you would like to use a private docker registry, see https://docs.cortex.dev/v/%s/guides/private-docker", consts.CortexVersionMinor) + message += fmt.Sprintf("\n\nif you would like to use a private docker registry, see https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor) } } diff --git a/pkg/lib/k8s/errors.go b/pkg/lib/k8s/errors.go index d47a644f4f..0fd3ff0f69 100644 --- a/pkg/lib/k8s/errors.go +++ b/pkg/lib/k8s/errors.go @@ -63,6 +63,6 @@ func ErrorParseAnnotation(annotationName string, annotationVal string, desiredTy func ErrorParseQuantity(qtyStr string) error { return errors.WithStack(&errors.Error{ Kind: ErrParseQuantity, - Message: fmt.Sprintf("%s: invalid kubernetes quantity, some valid examples are 1, 200m, 500Mi, 2G (see here for more information: https://docs.cortex.dev/v/%s/advanced/compute)", qtyStr, consts.CortexVersionMinor), + Message: fmt.Sprintf("%s: invalid kubernetes quantity, some valid examples are 1, 200m, 500Mi, 2G (see here for more information: https://docs.cortex.dev/v/%s/)", qtyStr, consts.CortexVersionMinor), }) } diff --git a/pkg/operator/endpoints/errors.go b/pkg/operator/endpoints/errors.go index 061df2e0bf..b4ba8ccfaa 100644 --- a/pkg/operator/endpoints/errors.go +++ b/pkg/operator/endpoints/errors.go @@ -42,7 +42,7 @@ const ( func ErrorAPIVersionMismatch(operatorVersion string, clientVersion string) error { return errors.WithStack(&errors.Error{ Kind: ErrAPIVersionMismatch, - Message: fmt.Sprintf("your CLI version (%s) doesn't match your Cortex operator version (%s); please update your cluster by following the instructions at https://docs.cortex.dev/update, or update your CLI (pip install cortex==%s)", clientVersion, operatorVersion, operatorVersion), + Message: fmt.Sprintf("your CLI version (%s) doesn't match your Cortex operator version (%s); please update your cluster by following the instructions at https://docs.cortex.dev, or update your CLI (pip install cortex==%s)", clientVersion, operatorVersion, operatorVersion), }) } diff --git a/pkg/operator/endpoints/submit_job.go b/pkg/operator/endpoints/submit_job.go index 44bc1f606c..e3eff9bd63 100644 --- a/pkg/operator/endpoints/submit_job.go +++ b/pkg/operator/endpoints/submit_job.go @@ -60,7 +60,7 @@ func SubmitJob(w http.ResponseWriter, r *http.Request) { err = json.Unmarshal(bodyBytes, &submission) if err != nil { - respondError(w, r, errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/endpoints", consts.CortexVersionMinor))) + respondError(w, r, errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor))) return } diff --git a/pkg/operator/resources/batchapi/validations.go b/pkg/operator/resources/batchapi/validations.go index 13323161b6..e1e7ebf84c 100644 --- a/pkg/operator/resources/batchapi/validations.go +++ b/pkg/operator/resources/batchapi/validations.go @@ -86,7 +86,7 @@ func validateJobSubmissionSchema(submission *schema.JobSubmission) error { func validateJobSubmission(submission *schema.JobSubmission) error { err := validateJobSubmissionSchema(submission) if err != nil { - return errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/endpoints", consts.CortexVersionMinor)) + return errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } if submission.FilePathLister != nil { diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go index fb98a2244e..13c0bfe750 100644 --- a/pkg/operator/resources/resources.go +++ b/pkg/operator/resources/resources.go @@ -101,7 +101,7 @@ func Deploy(projectBytes []byte, configFileName string, configBytes []byte, forc err = ValidateClusterAPIs(apiConfigs, projectFiles) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found here:\n → Realtime API: https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\n → Batch API: https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\n → Traffic Splitter: https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) return nil, err } @@ -252,7 +252,7 @@ func patchAPI(apiConfig *userconfig.API, configFileName string, force bool) (*sp err = ValidateClusterAPIs([]userconfig.API{*apiConfig}, projectFiles) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found here:\n → Realtime API: https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\n → Batch API: https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\n → Traffic Splitter: https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found here:\n → Realtime API: https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) return nil, "", err } diff --git a/pkg/types/spec/errors.go b/pkg/types/spec/errors.go index 937c60f6a7..db4cd568dc 100644 --- a/pkg/types/spec/errors.go +++ b/pkg/types/spec/errors.go @@ -100,14 +100,14 @@ var _modelCurrentStructure = ` func ErrorMalformedConfig() error { return errors.WithStack(&errors.Error{ Kind: ErrMalformedConfig, - Message: fmt.Sprintf("cortex YAML configuration files must contain a list of maps (see https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("cortex YAML configuration files must contain a list of maps (see https://docs.cortex.dev/v/%s/ for api configuration schema)", consts.CortexVersionMinor), }) } func ErrorNoAPIs() error { return errors.WithStack(&errors.Error{ Kind: ErrNoAPIs, - Message: fmt.Sprintf("at least one API must be configured (see https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("at least one API must be configured (see https://docs.cortex.dev/v/%s/ for api configuration schema)", consts.CortexVersionMinor), }) } diff --git a/pkg/types/spec/validations.go b/pkg/types/spec/validations.go index 46dcb5ae23..1522416d98 100644 --- a/pkg/types/spec/validations.go +++ b/pkg/types/spec/validations.go @@ -641,14 +641,7 @@ func ExtractAPIConfigs( kindString, _ := data[userconfig.KindKey].(string) kind := userconfig.KindFromString(kindString) err = errors.Wrap(errors.FirstError(errs...), userconfig.IdentifyAPI(configFileName, name, kind, i)) - switch provider { - case types.LocalProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime APIs can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) - case types.AWSProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found here:\n → Realtime API: https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\n → Batch API: https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\n → Traffic Splitter: https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) - case types.GCPProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime APIs can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) - } + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } if resourceStruct.Kind == userconfig.BatchAPIKind || resourceStruct.Kind == userconfig.TrafficSplitterKind { @@ -663,14 +656,7 @@ func ExtractAPIConfigs( kindString, _ := data[userconfig.KindKey].(string) kind := userconfig.KindFromString(kindString) err = errors.Wrap(errors.FirstError(errs...), userconfig.IdentifyAPI(configFileName, name, kind, i)) - switch kind { - case userconfig.RealtimeAPIKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) - case userconfig.BatchAPIKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Batch API can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration", consts.CortexVersionMinor)) - case userconfig.TrafficSplitterKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Traffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor)) - } + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema can be found at https://docs.cortex.dev/v/%s/", consts.CortexVersionMinor)) } api.Index = i api.FileName = configFileName diff --git a/pkg/workloads/cortex/client/cortex/client.py b/pkg/workloads/cortex/client/cortex/client.py index 27722bcaa2..e1af803e5a 100644 --- a/pkg/workloads/cortex/client/cortex/client.py +++ b/pkg/workloads/cortex/client/cortex/client.py @@ -44,7 +44,7 @@ def __init__(self, env: dict): self.env = env self.env_name = env["name"] - # CORTEX_VERSION_MINOR x5 + # CORTEX_VERSION_MINOR def create_api( self, api_spec: dict, @@ -59,13 +59,8 @@ def create_api( Deploy an API. Args: - api_spec: A dictionary defining a single Cortex API. Schema can be found here: - → Realtime API: https://docs.cortex.dev/v/master/deployments/realtime-api/api-configuration - → Batch API: https://docs.cortex.dev/v/master/deployments/batch-api/api-configuration - → Traffic Splitter: https://docs.cortex.dev/v/master/deployments/realtime-api/traffic-splitter + api_spec: A dictionary defining a single Cortex API. See https://docs.cortex.dev/v/master/ for schema. predictor: A Cortex Predictor class implementation. Not required when deploying a traffic splitter. - → Realtime API: https://docs.cortex.dev/v/master/deployments/realtime-api/predictors - → Batch API: https://docs.cortex.dev/v/master/deployments/batch-api/predictors requirements: A list of PyPI dependencies that will be installed before the predictor class implementation is invoked. conda_packages: A list of Conda dependencies that will be installed before the predictor class implementation is invoked. project_dir: Path to a python project. diff --git a/pkg/workloads/cortex/serve/init/bootloader.sh b/pkg/workloads/cortex/serve/init/bootloader.sh index d119fdf0ff..7ea38ff92f 100755 --- a/pkg/workloads/cortex/serve/init/bootloader.sh +++ b/pkg/workloads/cortex/serve/init/bootloader.sh @@ -21,9 +21,9 @@ export EXPECTED_CORTEX_VERSION=master if [ "$CORTEX_VERSION" != "$EXPECTED_CORTEX_VERSION" ]; then if [ "$CORTEX_PROVIDER" == "local" ]; then - echo "error: your Cortex CLI version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your CLI by following the instructions at https://docs.cortex.dev/update" + echo "error: your Cortex CLI version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your CLI by following the instructions at https://docs.cortex.dev/" else - echo "error: your Cortex operator version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your cluster by following the instructions at https://docs.cortex.dev/update" + echo "error: your Cortex operator version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your cluster by following the instructions at https://docs.cortex.dev/" fi exit 1 fi From 6429cb4b470ced559bb941b67e171c3118a8a0c2 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 19:17:41 -0500 Subject: [PATCH 25/36] Update generate_python_client_md.sh --- dev/generate_python_client_md.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dev/generate_python_client_md.sh b/dev/generate_python_client_md.sh index fd68e9130f..2aa4317250 100755 --- a/dev/generate_python_client_md.sh +++ b/dev/generate_python_client_md.sh @@ -30,38 +30,38 @@ cd $ROOT/pkg/workloads/cortex/client pip3 install -e . -pydoc-markdown -m cortex -m cortex.client --render-toc > $ROOT/docs/miscellaneous/python-client.md +pydoc-markdown -m cortex -m cortex.client --render-toc > $ROOT/docs/workloads/python-client.md # title -sed -i "s/# Table of Contents/# Python client\n\n_WARNING: you are on the master branch, please refer to the docs on the branch that matches your \`cortex version\`_/g" $ROOT/docs/miscellaneous/python-client.md +sed -i "s/# Table of Contents/# Python client\n\n_WARNING: you are on the master branch, please refer to the docs on the branch that matches your \`cortex version\`_/g" $ROOT/docs/workloads/python-client.md # delete links -sed -i "//g" $ROOT/docs/miscellaneous/python-client.md +sed -i "s/^## create\\\_api/## create\\\_api\n\n/g" $ROOT/docs/workloads/python-client.md pip3 uninstall -y cortex rm -rf $ROOT/pkg/workloads/cortex/client/cortex.egg-info From f4ed4295f4853dbd5e4477b79ce59fa409b928cc Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 19:22:42 -0500 Subject: [PATCH 26/36] Update traffic-splitter.md --- docs/tutorials/traffic-splitter.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/tutorials/traffic-splitter.md b/docs/tutorials/traffic-splitter.md index be4502929b..ea403d9586 100644 --- a/docs/tutorials/traffic-splitter.md +++ b/docs/tutorials/traffic-splitter.md @@ -7,6 +7,8 @@ A Traffic Splitter can be used expose multiple APIs as a single endpoint. The pe ## Deploy APIs ```python +import cortex + class PythonPredictor: def __init__(self, config): from transformers import pipeline @@ -43,7 +45,7 @@ cx.create_api(api_spec_gpu, predictor=PythonPredictor, requirements=requirements ```python traffic_splitter_spec = { - "name": "classifier", + "name": "text-generator", "kind": "TrafficSplitter", "apis": [ {"name": "text-generator-cpu", "weight": 50}, @@ -57,11 +59,11 @@ cx.create_api(traffic_splitter_spec) ## Update the weights of the traffic splitter ```python -traffic_splitter_spec = cx.get_api("classifier")["spec"]["submitted_api_spec"] +traffic_splitter_spec = cx.get_api("text-generator")["spec"]["submitted_api_spec"] # send 99% of the traffic to text-generator-gpu -traffic_splitter_spec["api"][0]["weight"] = 1 -traffic_splitter_spec["api"][1]["weight"] = 99 +traffic_splitter_spec["apis"][0]["weight"] = 1 +traffic_splitter_spec["apis"][1]["weight"] = 99 cx.patch(traffic_splitter_spec) ``` From daed845eb05e97af22271b167bcc3e1ea41f9056 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 19:32:31 -0500 Subject: [PATCH 27/36] Update batch.md --- docs/tutorials/batch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 5c9941b64c..1b69532e5a 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -129,7 +129,7 @@ job_spec = { response = requests.post(batch_endpoint, json=job_spec) -print(response) +print(response.text) # > {"job_id":"69b183ed6bdf3e9b","api_name":"image-classifier", "config": {"dest_s3_dir": ...}} ``` From 48d08598f8f99274979918c727e4020141d7c88b Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 19:40:35 -0500 Subject: [PATCH 28/36] Update traffic-splitter.md --- docs/tutorials/traffic-splitter.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tutorials/traffic-splitter.md b/docs/tutorials/traffic-splitter.md index ea403d9586..1191ab6c91 100644 --- a/docs/tutorials/traffic-splitter.md +++ b/docs/tutorials/traffic-splitter.md @@ -12,8 +12,7 @@ import cortex class PythonPredictor: def __init__(self, config): from transformers import pipeline - - self.model = pipeline(task="text-generation", model=config["model"]) + self.model = pipeline(task="text-generation") def predict(self, payload): return self.model(payload["text"])[0] From 2f4d0f2dfc3cc3011dd8df73e3d3355735680ac0 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 19:56:14 -0500 Subject: [PATCH 29/36] Update multi-model.md --- docs/tutorials/multi-model.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/multi-model.md b/docs/tutorials/multi-model.md index 4993dfebf5..043ea6c6ad 100644 --- a/docs/tutorials/multi-model.md +++ b/docs/tutorials/multi-model.md @@ -12,19 +12,23 @@ import cortex class PythonPredictor: def __init__(self, config): from transformers import pipeline + self.analyzer = pipeline(task="sentiment-analysis") - self.analyzer = pipeline(task="sentiment-analysis", device=device) - self.summarizer = pipeline(task="summarization", device=device) + import wget + import fasttext + wget.download( + "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin", "/tmp/model" + ) + self.language_identifier = fasttext.load_model("/tmp/model") def predict(self, query_params, payload): model = query_params.get("model") - if model == "sentiment": return self.analyzer(payload["text"])[0] - elif model == "summarizer": - return self.summarizer(payload["text"])[0]["summary_text"] + elif model == "language": + return self.language_identifier.predict(payload["text"])[0][0][-2:] -requirements = ["tensorflow", "transformers"] +requirements = ["tensorflow", "transformers", "wget", "fasttext"] api_spec = {"name": "multi-model", "kind": "RealtimeAPI"} From fe8797a85fd0f2c37df544edecda31c6aa28857f Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 20:06:29 -0500 Subject: [PATCH 30/36] Remove examples from linting and skip version check in tutorials --- build/lint.sh | 30 +----------------------------- docs/guides/contributing.md | 2 ++ 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/build/lint.sh b/build/lint.sh index a5711abfdd..94dcd54465 100755 --- a/build/lint.sh +++ b/build/lint.sh @@ -137,25 +137,12 @@ if [ "$is_release_branch" = "true" ]; then exit 1 fi - # Check for version warning comments in examples - output=$(cd "$ROOT/examples" && find . -type f \ - ! -name "README.md" \ - ! -name "*.json" \ - ! -name "*.txt" \ - ! -name ".*" \ - ! -name "*.bin" \ - -exec grep -L -e "this is an example for cortex release ${git_branch} and may not deploy correctly on other releases of cortex" {} \;) - if [[ $output ]]; then - echo "examples file(s) are missing appropriate version comment:" - echo "$output" - exit 1 - fi - else # Check for version warning comments in docs output=$(cd "$ROOT/docs" && find . -type f \ ! -path "./README.md" \ ! -name "summary.md" \ + ! -path "./tutorials/*" \ ! -name "development.md" \ ! -name "*.json" \ ! -name "*.txt" \ @@ -167,21 +154,6 @@ else echo "$output" exit 1 fi - - # Check for version warning comments in examples - output=$(cd "$ROOT/examples" && find . -type f \ - ! -path "./README.md" \ - ! -path "**/__pycache__/*" \ - ! -name "*.json" \ - ! -name "*.txt" \ - ! -name ".*" \ - ! -name "*.bin" \ - -exec grep -L "WARNING: you are on the master branch; please refer to examples on the branch corresponding to your \`cortex version\` (e\.g\. for version [0-9]*\.[0-9]*\.\*, run \`git checkout -b [0-9]*\.[0-9]*\` or switch to the \`[0-9]*\.[0-9]*\` branch on GitHub)" {} \;) - if [[ $output ]]; then - echo "example file(s) are missing version appropriate comment:" - echo "$output" - exit 1 - fi fi # Check for trailing whitespace diff --git a/docs/guides/contributing.md b/docs/guides/contributing.md index ebdb48d443..6eb6d76d88 100644 --- a/docs/guides/contributing.md +++ b/docs/guides/contributing.md @@ -1,5 +1,7 @@ # Contributing +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + ## Remote development We recommend that you run your development environment on a cloud instance due to frequent docker registry pushing, e.g. an AWS EC2 instance or GCP VM. We've had a good experience using [Mutagen](https://mutagen.io/documentation/introduction) to synchronize local / remote file systems. Feel free to reach out to us on [gitter](https://gitter.im/cortexlabs/cortex) if you have any questions about this. From c17a49cd806c614049b053ca7375b7c681e63660 Mon Sep 17 00:00:00 2001 From: Vishal Bollu Date: Tue, 8 Dec 2020 20:16:29 -0500 Subject: [PATCH 31/36] Update install.md --- docs/aws/install.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/aws/install.md b/docs/aws/install.md index e44282dd51..bae9c28e4e 100644 --- a/docs/aws/install.md +++ b/docs/aws/install.md @@ -62,7 +62,7 @@ nat_gateway: none api_load_balancer_scheme: internet-facing # operator load balancer scheme [internet-facing | internal] -# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/) +# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator operator_load_balancer_scheme: internet-facing # API Gateway [public (API Gateway will be used by default, can be disabled per API) | none (API Gateway will be disabled for all APIs)] @@ -100,8 +100,6 @@ image_istio_proxy: quay.io/cortexlabs/istio-proxy:master image_istio_pilot: quay.io/cortexlabs/istio-pilot:master ``` -The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and in your [Batch API configuration](../deployments/batch-api/api-configuration.md). - ## Advanced * [Security](security.md) From d38a99843768ac51e4dfb8dfd01a9a980ced2ac1 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 20:18:35 -0500 Subject: [PATCH 32/36] Update gcp install docs --- docs/gcp/install.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/gcp/install.md b/docs/gcp/install.md index e65520c4b4..263267fb78 100644 --- a/docs/gcp/install.md +++ b/docs/gcp/install.md @@ -19,12 +19,8 @@ cortex cluster-gcp up # or: cortex cluster-gcp up --config cluster.yaml (see co cortex env default gcp ``` - -Try the [tutorial](../../examples/pytorch/text-generator/README.md). - ## Configure Cortex - ```yaml # cluster.yaml @@ -62,5 +58,3 @@ image_istio_proxy: quay.io/cortexlabs/istio-proxy:master image_istio_pilot: quay.io/cortexlabs/istio-pilot:master image_pause: quay.io/cortexlabs/pause:master ``` - -The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md). From e51a969c9d7c6f93ed96294e38124e310b403723 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 20:25:23 -0500 Subject: [PATCH 33/36] Fix docs links deployments -> workloads --- docs/aws/networking.md | 2 +- docs/aws/rest-api-gateway.md | 4 ++-- docs/guides/docker-hub-rate-limiting.md | 2 +- docs/guides/production.md | 18 +++++++++--------- docs/guides/self-hosted-images.md | 2 +- .../server-side-batching-errors.md | 2 +- docs/troubleshooting/tf-session-in-predict.md | 2 +- test/batch/image-classifier/README.md | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/aws/networking.md b/docs/aws/networking.md index eddddb4a31..824abe0c1f 100644 --- a/docs/aws/networking.md +++ b/docs/aws/networking.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t ![api architecture diagram](https://user-images.githubusercontent.com/808475/84695323-8507dd00-aeff-11ea-8b32-5a55cef76c79.png) -APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and [Batch API configuration](../deployments/batch-api/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: none` in your [cluster configuration](install.md) file (before creating your cluster). +APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](../workloads/realtime/configuration.md) and [Batch API configuration](../workloads/batch/configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: none` in your [cluster configuration](install.md) file (before creating your cluster). By default, the API load balancer is public. You can configure your API load balancer to be private by setting `api_load_balancer_scheme: internal` in your [cluster configuration](install.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be public regardless of `api_load_balancer_scheme`. See below for common configurations. diff --git a/docs/aws/rest-api-gateway.md b/docs/aws/rest-api-gateway.md index cc5a5ced37..0144df386d 100644 --- a/docs/aws/rest-api-gateway.md +++ b/docs/aws/rest-api-gateway.md @@ -17,7 +17,7 @@ If your API load balancer is internal (i.e. you set `api_load_balancer_scheme: i Disable the default API Gateway: * If you haven't created your cluster yet, you can set `api_gateway: none` in your [cluster configuration file](install.md) before creating your cluster. -* If you have already created your cluster, you can set `api_gateway: none` in the `networking` field of your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md), and then re-deploy your API. +* If you have already created your cluster, you can set `api_gateway: none` in the `networking` field of your [Realtime API configuration](../workloads/realtime/configuration.md) and/or [Batch API configuration](../workloads/batch/configuration.md), and then re-deploy your API. ### Step 2 @@ -96,7 +96,7 @@ Delete the API Gateway before spinning down your Cortex cluster: Disable the default API Gateway: * If you haven't created your cluster yet, you can set `api_gateway: none` in your [cluster configuration file](install.md) before creating your cluster. -* If you have already created your cluster, you can set `api_gateway: none` in the `networking` field of your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md), and then re-deploy your API. +* If you have already created your cluster, you can set `api_gateway: none` in the `networking` field of your [Realtime API configuration](../workloads/realtime/configuration.md) and/or [Batch API configuration](../workloads/batch/configuration.md), and then re-deploy your API. ### Step 2 diff --git a/docs/guides/docker-hub-rate-limiting.md b/docs/guides/docker-hub-rate-limiting.md index 98d9ff3570..378919686d 100644 --- a/docs/guides/docker-hub-rate-limiting.md +++ b/docs/guides/docker-hub-rate-limiting.md @@ -64,7 +64,7 @@ Once you've updated your cluster configuration file, you can spin up your cluste ### Update your API configuration file(s) -To configure your APIs to use the Quay images, you cna update your [API configuration files](../deployments/realtime-api/api-configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the Docker Hub images are used for your predictors, so you will need to specify the Quay image paths for all of your APIs. +To configure your APIs to use the Quay images, you can update your [API configuration files](../workloads/realtime/configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the Docker Hub images are used for your predictors, so you will need to specify the Quay image paths for all of your APIs. Here is a list of available images (make sure to set `` to your cluster's version): diff --git a/docs/guides/production.md b/docs/guides/production.md index bd2f259826..2f3eb6c0f0 100644 --- a/docs/guides/production.md +++ b/docs/guides/production.md @@ -10,24 +10,24 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t **Additional tips for realtime APIs** -* Consider tuning `processes_per_replica` and `threads_per_process` in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md). Each model behaves differently, so the best way to find a good value is to run a load test on a single replica (you can set `min_replicas` to 1 to avoid autocaling). Here is [additional information](../deployments/realtime-api/parallelism.md#concurrency) about these fields. +* Consider tuning `processes_per_replica` and `threads_per_process` in your [Realtime API configuration](../workloads/realtime/configuration.md). Each model behaves differently, so the best way to find a good value is to run a load test on a single replica (you can set `min_replicas` to 1 to avoid autocaling). Here is [additional information](../workloads/realtime/parallelism.md#concurrency) about these fields. -* You may wish to customize the autoscaler for your APIs. The [autoscaling documentation](../deployments/realtime-api/autoscaling.md) describes each of the parameters that can be configured. +* You may wish to customize the autoscaler for your APIs. The [autoscaling documentation](../workloads/realtime/autoscaling.md) describes each of the parameters that can be configured. * When creating an API that you will send large amounts of traffic to all at once, set `min_replicas` at (or slightly above) the number of replicas you expect will be necessary to handle the load at steady state. After traffic has been fully shifted to your API, `min_replicas` can be reduced to allow automatic downscaling. -* [Traffic splitters](./deployments/realtime-api/traffic-splitter.md) can be used to route a subset of traffic to an updated API. For example, you can create a traffic splitter named `my-api`, and route requests to `my-api` to any number of Realtime APIs (e.g. `my-api_v1`, `my-api_v2`, etc). The percentage of traffic that the traffic splitter routes to each API can be updated on the fly. +* [Traffic splitters](./workloads/realtime/traffic-splitter.md) can be used to route a subset of traffic to an updated API. For example, you can create a traffic splitter named `my-api`, and route requests to `my-api` to any number of Realtime APIs (e.g. `my-api_v1`, `my-api_v2`, etc). The percentage of traffic that the traffic splitter routes to each API can be updated on the fly. -* If initialization of your API replicas takes a while (e.g. due to downloading large models from slow hosts or installing dependencies), and responsive autoscaling is important to you, consider pre-building your API's Docker image. See [here](../deployments/system-packages.md#custom-docker-image) for instructions. +* If initialization of your API replicas takes a while (e.g. due to downloading large models from slow hosts or installing dependencies), and responsive autoscaling is important to you, consider pre-building your API's Docker image. See [here](../workloads/system-packages.md#custom-docker-image) for instructions. -* If your API is receiving many queries per second and you are using the TensorFlow Predictor, consider enabling [server-side batching](../deployments/realtime-api/parallelism.md#server-side-batching). +* If your API is receiving many queries per second and you are using the TensorFlow Predictor, consider enabling [server-side batching](../workloads/realtime/parallelism.md#server-side-batching). -* [Overprovisioning](../deployments/realtime-api/autoscaling.md#overprovisioning) can be used to reduce the chance of large queues building up. This can be especially important when inferences take a long time. +* [Overprovisioning](../workloads/realtime/autoscaling.md#overprovisioning) can be used to reduce the chance of large queues building up. This can be especially important when inferences take a long time. **Additional tips for inferences that take a long time:** -* Consider using [GPUs](../deployments/gpus.md) or [Inferentia](../deployments/inferentia.md) to speed up inference. +* Consider using [GPUs](../aws/gpu.md) or [Inferentia](../aws/inferentia.md) to speed up inference. -* Consider setting a low value for `max_replica_concurrency`, since if there are many requests in the queue, it will take a long time until newly received requests are processed. See [autoscaling docs](../deployments/realtime-api/autoscaling.md) for more details. +* Consider setting a low value for `max_replica_concurrency`, since if there are many requests in the queue, it will take a long time until newly received requests are processed. See [autoscaling docs](../workloads/realtime/autoscaling.md) for more details. -* Keep in mind that API Gateway has a 29 second timeout; if your requests take longer (due to a long inference time and/or long request queues), you will need to disable API Gateway for your API by setting `api_gateway: none` in the `networking` config in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md). Alternatively, you can disable API gateway for all APIs in your cluster by setting `api_gateway: none` in your [cluster configuration file](../aws/install.md) before creating your cluster. +* Keep in mind that API Gateway has a 29 second timeout; if your requests take longer (due to a long inference time and/or long request queues), you will need to disable API Gateway for your API by setting `api_gateway: none` in the `networking` config in your [Realtime API configuration](../workloads/realtime/configuration.md) and/or [Batch API configuration](../workloads/batch/configuration.md). Alternatively, you can disable API gateway for all APIs in your cluster by setting `api_gateway: none` in your [cluster configuration file](../aws/install.md) before creating your cluster. diff --git a/docs/guides/self-hosted-images.md b/docs/guides/self-hosted-images.md index 61f298eaf0..916dd4a2ca 100644 --- a/docs/guides/self-hosted-images.md +++ b/docs/guides/self-hosted-images.md @@ -131,7 +131,7 @@ echo "-----------------------------------------------" The first list of images that were printed (the cluster images) can be directly copy-pasted in your [cluster configuration file](../aws/install.md) before spinning up your cluster. -The second list of images that were printed (the API images) can be used in your [API configuration files](../deployments/realtime-api/api-configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the public images offered by Cortex are used for your predictors, so you will need to specify your ECR image paths for all of your APIs. +The second list of images that were printed (the API images) can be used in your [API configuration files](../workloads/realtime/api-configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the public images offered by Cortex are used for your predictors, so you will need to specify your ECR image paths for all of your APIs. ## Step 5 diff --git a/docs/troubleshooting/server-side-batching-errors.md b/docs/troubleshooting/server-side-batching-errors.md index 4740d903fa..df03f75b06 100644 --- a/docs/troubleshooting/server-side-batching-errors.md +++ b/docs/troubleshooting/server-side-batching-errors.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When `max_batch_size` and `batch_interval` fields are set for the [Realtime API TensorFlow Predictor](../deployments/realtime-api/predictors.md#tensorflow-predictor), errors can be encountered if the associated model hasn't been built for batching. +When `max_batch_size` and `batch_interval` fields are set for the [Realtime API TensorFlow Predictor](../workloads/realtime/predictors.md#tensorflow-predictor), errors can be encountered if the associated model hasn't been built for batching. The following error is an example of what happens when the input shape doesn't accommodate batching - e.g. when its shape is `[height, width, 3]` instead of `[batch_size, height, width, 3]`: diff --git a/docs/troubleshooting/tf-session-in-predict.md b/docs/troubleshooting/tf-session-in-predict.md index c8e1d56218..fa0f2d6b49 100644 --- a/docs/troubleshooting/tf-session-in-predict.md +++ b/docs/troubleshooting/tf-session-in-predict.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When doing inferences with TensorFlow using the [Realtime API Python Predictor](../deployments/realtime-api/predictors.md#python-predictor) or [Batch API Python Predictor](../deployments/batch-api/predictors.md#python-predictor), it should be noted that your Python Predictor's `__init__()` constructor is only called on one thread, whereas its `predict()` method can run on any of the available threads (which is configured via the `threads_per_process` field in the API's `predictor` configuration). If `threads_per_process` is set to `1` (the default value), then there is no concern, since `__init__()` and `predict()` will run on the same thread. However, if `threads_per_process` is greater than `1`, then only one of the inference threads will have executed the `__init__()` function. This can cause issues with TensorFlow because the default graph is a property of the current thread, so if `__init__()` initializes the TensorFlow graph, only the thread that executed `__init__()` will have the default graph set. +When doing inferences with TensorFlow using the [Realtime API Python Predictor](../workloads/realtime/predictors.md#python-predictor) or [Batch API Python Predictor](../workloads/batch/predictors.md#python-predictor), it should be noted that your Python Predictor's `__init__()` constructor is only called on one thread, whereas its `predict()` method can run on any of the available threads (which is configured via the `threads_per_process` field in the API's `predictor` configuration). If `threads_per_process` is set to `1` (the default value), then there is no concern, since `__init__()` and `predict()` will run on the same thread. However, if `threads_per_process` is greater than `1`, then only one of the inference threads will have executed the `__init__()` function. This can cause issues with TensorFlow because the default graph is a property of the current thread, so if `__init__()` initializes the TensorFlow graph, only the thread that executed `__init__()` will have the default graph set. The error you may see if the default graph is not set (as a consequence of `__init__()` and `predict()` running in separate threads) is: diff --git a/test/batch/image-classifier/README.md b/test/batch/image-classifier/README.md index 03cc827d35..3d62908e52 100644 --- a/test/batch/image-classifier/README.md +++ b/test/batch/image-classifier/README.md @@ -105,7 +105,7 @@ class PythonPredictor: ) ``` -Here are the complete [Predictor docs](../../../docs/deployments/batch-api/predictors.md). +Here are the complete [Predictor docs](../../../docs/workloads/batch/predictors.md).
@@ -140,7 +140,7 @@ Create a `cortex.yaml` file and add the configuration below. An `api` with `kind cpu: 1 ``` -Here are the complete [API configuration docs](../../../docs/deployments/batch-api/api-configuration.md). +Here are the complete [API configuration docs](../../../docs/workloads/batch/configuration.md).
From 685194f17513933a7e3fa2637b419900b7abef3a Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 20:31:13 -0500 Subject: [PATCH 34/36] Update single-node-deployment.md --- docs/guides/single-node-deployment.md | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/docs/guides/single-node-deployment.md b/docs/guides/single-node-deployment.md index c9973ebe68..6a949bb48c 100644 --- a/docs/guides/single-node-deployment.md +++ b/docs/guides/single-node-deployment.md @@ -114,26 +114,4 @@ $ bash -c "$(curl -sS https://raw.githubusercontent.com/cortexlabs/cortex/master ### Step 13 -You can now use Cortex to deploy your model: - - -```bash -$ git clone -b master https://github.com/cortexlabs/cortex.git - -$ cd cortex/docs/tutorials/realtime - -$ cortex deploy - -# take note of the curl command -$ cortex get text-generator -``` - -### Step 14 - -Make requests by replacing "localhost" in the curl command with your instance's public DNS: - -```bash -$ curl : \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "machine learning is"}' -``` +You can now use Cortex to deploy your model. From 80d77cb7704d72e8bdab80146c125a8639ce7364 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 20:33:23 -0500 Subject: [PATCH 35/36] Update summary.md --- docs/summary.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/summary.md b/docs/summary.md index 64cf145e9d..1aac625cd9 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -17,7 +17,7 @@ * [Credentials](aws/credentials.md) * [Security](aws/security.md) * [Spot instances](aws/spot.md) -* [GPUs](aws/gpus.md) +* [GPUs](aws/gpu.md) * [Inferentia](aws/inferentia.md) * [Networking](aws/networking.md) * [VPC peering](aws/vpc-peering.md) From 0cbbd7cee5d59354c09ac5dac0639bea7c578fd3 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 8 Dec 2020 20:48:21 -0500 Subject: [PATCH 36/36] PR review fixes --- docs/tutorials/advanced.md | 119 ------------------------------------- docs/tutorials/batch.md | 2 +- docs/tutorials/project.md | 2 - 3 files changed, 1 insertion(+), 122 deletions(-) delete mode 100644 docs/tutorials/advanced.md diff --git a/docs/tutorials/advanced.md b/docs/tutorials/advanced.md deleted file mode 100644 index 347d45f9aa..0000000000 --- a/docs/tutorials/advanced.md +++ /dev/null @@ -1,119 +0,0 @@ -# Advanced deployments - -## Install cortex - -```bash -$ pip install cortex -``` - -## Create a directory - -```bash -$ mkdir text-generator && cd text-generator - -$ touch predictor.py requirements.txt text-generator.yaml -``` - -## Define a Predictor in `predictor.py` - -```python -class PythonPredictor: - def __init__(self, config): - from transformers import pipeline - - self.model = pipeline(task="text-generation") - - def predict(self, payload): - return self.model(payload["text"])[0] -``` - -## Specify Python dependencies in `requirements.txt` - -```text -tensorflow -transformers -``` - -## Configure 2 realtime APIs and a traffic splitter in `text-generator.yaml` - -```yaml -- name: text-generator-cpu - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - cpu: 1 - -- name: text-generator-gpu - kind: RealtimeAPI - predictor: - type: python - path: predictor.py - compute: - gpu: 1 - -- name: text-generator - kind: TrafficSplitter - apis: - - name: text-generator-cpu - weight: 80 - - name: text-generator-gpu - weight: 20 -``` - -## Test locally (requires Docker) - -```bash -$ cortex deploy text-generator.yaml -``` - -## Monitor - -```bash -$ cortex get text-generator --watch -``` - -## Make a request - -```bash -$ curl http://localhost:8889 -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' -``` - -## Stream logs - -```bash -$ cortex logs text-generator -``` - -## Spin up a cluster on AWS - -```bash -$ cortex cluster up -``` - -## Deploy to AWS - -```bash -$ cortex deploy text-generator.yaml --env aws -``` - -## Monitor - -```bash -$ cortex get text-generator --env aws --watch -``` - -## Make a request - -```bash -$ curl https://***.execute-api.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}' -``` - -## Delete the APIs - -```bash -$ cortex delete text-generator --env local - -$ cortex delete text-generator --env aws -``` diff --git a/docs/tutorials/batch.md b/docs/tutorials/batch.md index 1b69532e5a..b10691f806 100644 --- a/docs/tutorials/batch.md +++ b/docs/tutorials/batch.md @@ -152,5 +152,5 @@ Once the job is complete, you should be able to find the results of the batch jo ### Delete the Batch API ```bash -$ cortex delete image-classifier --env local +$ cortex delete image-classifier --env aws ``` diff --git a/docs/tutorials/project.md b/docs/tutorials/project.md index be51aea5fb..dc512bfb49 100644 --- a/docs/tutorials/project.md +++ b/docs/tutorials/project.md @@ -46,8 +46,6 @@ cx.create_api(api_spec, project_dir=".") ## Deploy using the CLI -Navigate to your project directory and define a yaml with the api specification: - ```yaml # api.yaml