Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
363840b
work in progress #1
vishalbollu Dec 11, 2019
d31c054
Merge branch 'master' into tf-onnx-api-changes
vishalbollu Dec 13, 2019
f78907b
Add classes to TensorFlow and ONNX and rename predictor to Python
vishalbollu Dec 16, 2019
21a2838
Fix typos
vishalbollu Dec 16, 2019
815d6df
Remove python client from docs temporarily
vishalbollu Dec 16, 2019
54bffac
Put labels loading outside class def in image-classifier def
vishalbollu Dec 16, 2019
3ecb2c6
Merge branch 'master' into tf-onnx-api-changes
vishalbollu Dec 16, 2019
a0deb45
Update predictor names
vishalbollu Dec 16, 2019
dde1757
Fix linting
vishalbollu Dec 16, 2019
65ce5d2
Rename tf_client to TensorFlowClient and go over docs
vishalbollu Dec 16, 2019
9879965
Update links to master
vishalbollu Dec 16, 2019
95e92a6
Docs and makefile update
vishalbollu Dec 16, 2019
e216ac7
Remove dead code
vishalbollu Dec 16, 2019
403dd6b
Fix compile issue
vishalbollu Dec 16, 2019
30753f6
Update docs
deliahu Dec 16, 2019
e398858
Remove unnecessary imports
vishalbollu Dec 17, 2019
e110532
Respond to PR comments
vishalbollu Dec 17, 2019
3ddb69a
Merge branch 'tf-onnx-api-changes' of github.com:cortexlabs/cortex in…
vishalbollu Dec 17, 2019
7d9faa9
Renaming and example pass
vishalbollu Dec 17, 2019
6439d0d
Check error when validating config in deploy CLI
vishalbollu Dec 17, 2019
7b01d80
Add more details on what the tf and onnx clients are managing
vishalbollu Dec 17, 2019
126c10b
Move hardcoded labels to global scope and rest to into init
vishalbollu Dec 17, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ test-examples:
###############

ci-build-images:
@./build/build-image.sh images/predictor-serve predictor-serve
@./build/build-image.sh images/predictor-serve-gpu predictor-serve-gpu
@./build/build-image.sh images/python-serve python-serve
@./build/build-image.sh images/python-serve-gpu python-serve-gpu
@./build/build-image.sh images/tf-serve tf-serve
@./build/build-image.sh images/tf-serve-gpu tf-serve-gpu
@./build/build-image.sh images/tf-api tf-api
Expand All @@ -140,8 +140,8 @@ ci-build-images:
@./build/build-image.sh images/istio-galley istio-galley

ci-push-images:
@./build/push-image.sh predictor-serve
@./build/push-image.sh predictor-serve-gpu
@./build/push-image.sh python-serve
@./build/push-image.sh python-serve-gpu
@./build/push-image.sh tf-serve
@./build/push-image.sh tf-serve-gpu
@./build/push-image.sh tf-api
Expand Down
52 changes: 25 additions & 27 deletions cli/cmd/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ var deployCmd = &cobra.Command{

func deploy(force bool, ignoreCache bool) {
root := mustAppRoot()
config, err := readConfig() // Check proper cortex.yaml
_, err := readConfig() // Check proper cortex.yaml
if err != nil {
exit.Error(err)
}
Expand All @@ -76,37 +76,35 @@ func deploy(force bool, ignoreCache bool) {
"cortex.yaml": configBytes,
}

if config.AreProjectFilesRequired() {
projectPaths, err := files.ListDirRecursive(root, false,
files.IgnoreCortexYAML,
files.IgnoreHiddenFiles,
files.IgnoreHiddenFolders,
files.IgnorePythonGeneratedFiles,
)
if err != nil {
exit.Error(err)
}

projectZipBytes, err := zip.ToMem(&zip.Input{
FileLists: []zip.FileListInput{
{
Sources: projectPaths,
RemovePrefix: root,
},
},
})
projectPaths, err := files.ListDirRecursive(root, false,
files.IgnoreCortexYAML,
files.IgnoreHiddenFiles,
files.IgnoreHiddenFolders,
files.IgnorePythonGeneratedFiles,
)
if err != nil {
exit.Error(err)
}

if err != nil {
exit.Error(errors.Wrap(err, "failed to zip project folder"))
}
projectZipBytes, err := zip.ToMem(&zip.Input{
FileLists: []zip.FileListInput{
{
Sources: projectPaths,
RemovePrefix: root,
},
},
})

if len(projectZipBytes) > MaxProjectSize {
exit.Error(errors.New("zipped project folder exceeds " + s.Int(MaxProjectSize) + " bytes"))
}
if err != nil {
exit.Error(errors.Wrap(err, "failed to zip project folder"))
}

uploadBytes["project.zip"] = projectZipBytes
if len(projectZipBytes) > MaxProjectSize {
exit.Error(errors.New("zipped project folder exceeds " + s.Int(MaxProjectSize) + " bytes"))
}

uploadBytes["project.zip"] = projectZipBytes

uploadInput := &HTTPUploadInput{
Bytes: uploadBytes,
}
Expand Down
8 changes: 4 additions & 4 deletions cli/cmd/lib_cluster_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,11 @@ func clusterConfigConfirmaionStr(clusterConfig *clusterconfig.ClusterConfig, aws
items.Add(clusterconfig.TelemetryUserFacingKey, clusterConfig.Telemetry)
}

if clusterConfig.ImagePredictorServe != defaultConfig.ImagePredictorServe {
items.Add(clusterconfig.ImagePredictorServeUserFacingKey, clusterConfig.ImagePredictorServe)
if clusterConfig.ImagePythonServe != defaultConfig.ImagePythonServe {
items.Add(clusterconfig.ImagePythonServeUserFacingKey, clusterConfig.ImagePythonServe)
}
if clusterConfig.ImagePredictorServeGPU != defaultConfig.ImagePredictorServeGPU {
items.Add(clusterconfig.ImagePredictorServeGPUUserFacingKey, clusterConfig.ImagePredictorServeGPU)
if clusterConfig.ImagePythonServeGPU != defaultConfig.ImagePythonServeGPU {
items.Add(clusterconfig.ImagePythonServeGPUUserFacingKey, clusterConfig.ImagePythonServeGPU)
}
if clusterConfig.ImageTFServe != defaultConfig.ImageTFServe {
items.Add(clusterconfig.ImageTFServeUserFacingKey, clusterConfig.ImageTFServe)
Expand Down
8 changes: 4 additions & 4 deletions dev/registry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ function ecr_login() {
}

function create_registry() {
aws ecr create-repository --repository-name=cortexlabs/predictor-serve --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/predictor-serve-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/python-serve --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/python-serve-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/tf-serve --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/tf-api --region=$REGISTRY_REGION || true
Expand Down Expand Up @@ -140,8 +140,8 @@ elif [ "$cmd" = "update" ]; then
fi

build_and_push $ROOT/images/manager manager latest
build_and_push $ROOT/images/predictor-serve predictor-serve latest
build_and_push $ROOT/images/predictor-serve-gpu predictor-serve-gpu latest
build_and_push $ROOT/images/python-serve python-serve latest
build_and_push $ROOT/images/python-serve-gpu python-serve-gpu latest
build_and_push $ROOT/images/tf-api tf-api latest
build_and_push $ROOT/images/onnx-serve onnx-serve latest
build_and_push $ROOT/images/onnx-serve-gpu onnx-serve-gpu latest
Expand Down
11 changes: 6 additions & 5 deletions dev/versions.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,14 @@ note: once the default AWS CNI version is >= 1.5.5 this may no longer be necessa

## TensorFlow / TensorFlow Serving / Python / Python base operating system

The Python version in the base images for `tf-api` and `onnx-serve-gpu`/`predictor-serve-gpu` determines the Python version used throughout Cortex.
The Python version in the base images for `tf-api` and `onnx-serve-gpu`/`python-serve-gpu` determines the Python version used throughout Cortex.

1. Update the `tensorflow/tensorflow` base image in `images/tf-api/Dockerfile` to the desired version ([Dockerhub](https://hub.docker.com/r/tensorflow/tensorflow))
1. Update the `nvidia/cuda` base image in `images/onnx-serve-gpu/Dockerfile` to the desired version ([Dockerhub](https://hub.docker.com/r/nvidia/cuda))
1. Run `docker run --rm -it tensorflow/tensorflow:***`, and in the container run `python3 --version` and `cat /etc/lsb-release`
1. Run `docker run --rm -it nvidia/cuda:***`, and in the container run `python3 --version` and `cat /etc/lsb-release`
1. The Ubuntu and Python versions must match; if they do not, downgrade whichever one is too advanced
1. Update TensorFlow version in `tensorflow.md`
1. Search the codebase for the current minor TensorFlow version (e.g. `1.14`) and update versions as appropriate
1. Search the codebase for the minor Python version (e.g. `3.6`) and update versions as appropriate
1. Search the codebase for `ubuntu` and update versions as appropriate
Expand All @@ -86,7 +87,7 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo
## ONNX runtime

1. Update `ONNXRUNTIME_VERSION` in `images/onnx-serve/Dockerfile` and `images/onnx-serve-gpu/Dockerfile` ([releases](https://github.com/microsoft/onnxruntime/releases))
1. Update the version listed for `onnxruntime` in "Pre-installed Packages" in `request-handlers.py`
1. Update the version listed for `onnxruntime` in "Pre-installed Packages" in `onnx.md`

## Nvidia device plugin

Expand All @@ -101,12 +102,12 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo

1. Update the link at the top of the file to the URL you copied from
1. Check that your diff is reasonable
1. Confirm GPUs work for TensorFlow and ONNX models
1. Confirm GPUs work for PyTorch, TensorFlow, and ONNX models

## Python packages

1. Update versions in `pkg/workloads/cortex/lib/requirements.txt`, `pkg/workloads/cortex/tf_api/requirements.txt`, `pkg/workloads/cortex/onnx_serve/requirements.txt`, and `pkg/workloads/cortex/predictor_serve/requirements.txt`
1. Update the versions listed in "Pre-installed packages" in `request-handlers.md` and `predictor.md`
1. Update versions in `pkg/workloads/cortex/lib/requirements.txt`, `pkg/workloads/cortex/tf_api/requirements.txt`, `pkg/workloads/cortex/onnx_serve/requirements.txt`, and `pkg/workloads/cortex/python_serve/requirements.txt`
1. Update the versions listed in "Pre-installed packages" in `python.md`, `onnx.md`, and `tensorflow.md`
1. Rerun all examples and check their logs

## Istio
Expand Down
4 changes: 2 additions & 2 deletions docs/cluster-management/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ spot_config:
instance_pools: 2

# docker image paths
image_predictor_serve: cortexlabs/predictor-serve:master
image_predictor_serve_gpu: cortexlabs/predictor-serve-gpu:master
image_python_serve: cortexlabs/python-serve:master
image_python_serve_gpu: cortexlabs/python-serve-gpu:master
image_tf_serve: cortexlabs/tf-serve:master
image_tf_serve_gpu: cortexlabs/tf-serve-gpu:master
image_tf_api: cortexlabs/tf-api:master
Expand Down
4 changes: 2 additions & 2 deletions docs/contributing/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ region: us-west-2
log_group: cortex
cluster_name: cortex

image_predictor_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/predictor-serve:latest
image_predictor_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/predictor-serve-gpu:latest
image_python_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
image_python_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve-gpu:latest
image_tf_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve:latest
image_tf_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve-gpu:latest
image_tf_api: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-api:latest
Expand Down
23 changes: 11 additions & 12 deletions docs/dependency-management/python-packages.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ You can install your required PyPI packages and import them in your Python files
```text
./iris-classifier/
├── cortex.yaml
├── handler.py
├── predictor.py
├── ...
└── requirements.txt
```

Note that some packages are pre-installed by default (see [predictor](../deployments/predictor.md) or [request handlers](../deployments/request-handlers.md) depending on which runtime you're using).
Note that some packages are pre-installed by default (see [python predictor](../deployments/python.md), [tensorflow predictor](../deployments/tensorflow.md), [onnx predictor](../deployments/onnx.md) depending on which runtime you're using).

## Private packages on GitHub

Expand All @@ -30,28 +30,27 @@ You can generate a personal access token by following [these steps](https://help

## Project files

Cortex makes all files in the project directory (i.e. the directory which contains `cortex.yaml`) available to request handlers. Python bytecode files (`*.pyc`, `*.pyo`, `*.pyd`), files or folders that start with `.`, and `cortex.yaml` are excluded.
Cortex makes all files in the project directory (i.e. the directory which contains `cortex.yaml`) available for use in your Predictor implementations. Python bytecode files (`*.pyc`, `*.pyo`, `*.pyd`), files or folders that start with `.`, and `cortex.yaml` are excluded.

The contents of the project directory is available in `/mnt/project/` in the API containers. For example, if this is your project directory:

```text
./iris-classifier/
├── cortex.yaml
├── config.json
├── handler.py
├── values.json
├── predictor.py
├── ...
└── requirements.txt
```

You can access `config.json` in `handler.py` like this:
You can access `values.json` in `predictor.py` like this:

```python
import json

with open('/mnt/project/config.json', 'r') as config_file:
config = json.load(config_file)

def pre_inference(payload, signature, metadata):
print(config)
...
class PythonPredictor:
def __init__(self, config):
with open('/mnt/project/values.json', 'r') as values_file:
values = json.load(values_file)
self.values = values
```
6 changes: 3 additions & 3 deletions docs/dependency-management/system-packages.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Specify the base image you want to override followed by your customizations. The
```dockerfile
# Dockerfile

FROM cortexlabs/predictor-serve
FROM cortexlabs/python-serve

RUN apt-get update \
&& apt-get install -y tree \
Expand Down Expand Up @@ -58,7 +58,7 @@ Update your cluster configuration file to point to your image:
# cluster.yaml

# ...
image_predictor_serve: <repository_url>:latest
image_python_serve: <repository_url>:latest
# ...
```

Expand All @@ -77,7 +77,7 @@ Cortex will use your image to launch Python serving workloads and you will have

import subprocess

class Predictor:
class PythonPredictor:
def __init__(self, config):
subprocess.run(["tree"])
...
Expand Down
83 changes: 76 additions & 7 deletions docs/deployments/onnx.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_

Deploy ONNX models as web services.
You can deploy ONNX models as web services by defining a class that implements Cortex's ONNX Predictor interface.

## Config

Expand All @@ -12,9 +12,9 @@ Deploy ONNX models as web services.
endpoint: <string> # the endpoint for the API (default: /<deployment_name>/<api_name>)
onnx:
model: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model.onnx) (required)
request_handler: <string> # path to the request handler implementation file, relative to the Cortex root (optional)
predictor: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
config: <string: value> # dictionary passed to the constructor of a Predictor (optional)
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
metadata: <string: value> # dictionary that can be used to configure custom values (optional)
tracker:
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
model_type: <string> # model type, must be "classification" or "regression" (required)
Expand All @@ -37,7 +37,7 @@ See [packaging ONNX models](../packaging-models/onnx.md) for information about e
name: my-api
onnx:
model: s3://my-bucket/my-model.onnx
request_handler: handler.py
predictor: predictor.py
compute:
gpu: 1
```
Expand All @@ -47,6 +47,75 @@ See [packaging ONNX models](../packaging-models/onnx.md) for information about e
You can log information about each request by adding a `?debug=true` parameter to your requests. This will print:

1. The payload
2. The value after running the `pre_inference` function (if provided)
3. The value after running inference
4. The value after running the `post_inference` function (if provided)
2. The value after running the `predict` function

# ONNX Predictor

An ONNX Predictor is a Python class that describes how to serve your ONNX model to make predictions.

<!-- CORTEX_VERSION_MINOR -->
Cortex provides an `onnx_client` and a config object to initialize your implementation of the ONNX Predictor class. The `onnx_client` is an instance of [ONNXClient](https://github.com/cortexlabs/cortex/tree/master/pkg/workloads/cortex/onnx_serve/client.py) that manages an ONNX Runtime session and helps make predictions using your model. Once your implementation of the ONNX Predictor class has been initialized, the replica is available to serve requests. Upon receiving a request, your implementation's `predict()` function is called with the JSON payload and is responsible for returning a prediction or batch of predictions. Your `predict()` function should call `onnx_client.predict()` to make an inference against your exported ONNX model. Preprocessing of the JSON payload and postprocessing of predictions can be implemented in your `predict()` function as well.

## Implementation

```python
class ONNXPredictor:
def __init__(self, onnx_client, config):
"""Called once before the API becomes available. Setup for model serving such as downloading/initializing vocabularies can be done here. Required.

Args:
onnx_client: ONNX client which can be used to make predictions.
config: Dictionary passed from API configuration in cortex.yaml (if specified).
"""
pass

def predict(self, payload):
"""Called once per request. Runs preprocessing of the request payload, inference, and postprocessing of the inference output. Required.

Args:
payload: The parsed JSON request payload.

Returns:
Prediction or a batch of predictions.
"""
```

## Example

```python
import numpy as np

labels = ["iris-setosa", "iris-versicolor", "iris-virginica"]


class ONNXPredictor:
def __init__(self, onnx_client, config):
self.client = onnx_client

def predict(self, payload):
model_input = [
payload["sepal_length"],
payload["sepal_width"],
payload["petal_length"],
payload["petal_width"],
]

prediction = self.client.predict(model_input)
predicted_class_id = prediction[0][0]
return labels[predicted_class_id]
```

## Pre-installed packages

The following packages have been pre-installed and can be used in your implementations:

```text
boto3==1.10.13
dill==0.3.1.1
msgpack==0.6.2
numpy==1.17.3
requests==2.22.0
onnxruntime==0.5.0
```

Learn how to install additional packages [here](../dependency-management/python-packages.md).
4 changes: 2 additions & 2 deletions docs/deployments/prediction-monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ For classification models, the tracker should be configured with `model_type: cl
```yaml
- kind: api
name: iris
predictor:
path: predictor.py
python:
predictor: predictor.py
tracker:
model_type: classification
```
Loading