Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3cfa91b
Modify predictor validation struct accordingly
RobertLucian Dec 15, 2020
f833586
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 15, 2020
d13b743
More validations for models
RobertLucian Dec 15, 2020
dc0fda9
Model resource extraction in Python stack
RobertLucian Dec 15, 2020
aa6e338
Fixes for DML on Python
RobertLucian Dec 15, 2020
7fe6d0f
Fix models not showing up in cortex get
RobertLucian Dec 15, 2020
da93dbd
Telemetry & misc
RobertLucian Dec 15, 2020
6bc7c64
DML fixes
RobertLucian Dec 15, 2020
bd09727
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 16, 2020
15bdde5
Add dynamic_model_loading to the docs
RobertLucian Dec 16, 2020
12095cc
Fix test examples
RobertLucian Dec 16, 2020
268c297
Disallow DML for BatchAPI kind
RobertLucian Dec 16, 2020
c8c9aa6
Merge branch 'master' into improvement/python-api-spec
vishalbollu Dec 17, 2020
26e5e58
Move model_path field inside the models section
RobertLucian Dec 17, 2020
0f09a21
Fixes on the go-side + some docs
RobertLucian Dec 17, 2020
6fa13b4
Fixes for the Python side
RobertLucian Dec 18, 2020
927e2f5
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 18, 2020
e8bfc53
Update docs
RobertLucian Dec 18, 2020
7c79acd
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 18, 2020
a906fff
Rename models:model_path to models:path
RobertLucian Dec 18, 2020
220f7fa
Misc changes
RobertLucian Dec 18, 2020
2df1cd5
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 21, 2020
6b06a91
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 21, 2020
4f63ace
Address docs review comments
RobertLucian Dec 21, 2020
4596f5a
Address code review comments
RobertLucian Dec 21, 2020
0d4c7d5
Fix merge conflicts from 'master' into improvement/python-api-spec
RobertLucian Dec 21, 2020
2aefb83
Fix MMC/LR models not becoming discoverable
RobertLucian Dec 22, 2020
47e0b23
Merge branch 'master' into improvement/python-api-spec
RobertLucian Dec 22, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/cmd/lib_realtime_apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)

out += "\n" + console.Bold("endpoint: ") + realtimeAPI.Endpoint + "\n"

if !(realtimeAPI.Spec.Predictor.Type == userconfig.PythonPredictorType && realtimeAPI.Spec.Predictor.ModelPath == nil && realtimeAPI.Spec.Predictor.Models == nil) {
if !(realtimeAPI.Spec.Predictor.Type == userconfig.PythonPredictorType && realtimeAPI.Spec.Predictor.MultiModelReloading == nil) {
out += "\n" + describeModelInput(realtimeAPI.Status, realtimeAPI.Spec.Predictor, realtimeAPI.Endpoint)
}

Expand Down
2 changes: 1 addition & 1 deletion cli/local/docker_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func getAPIEnv(api *spec.API, awsClient *aws.Client, gcpClient *gcp.Client) []st
"CORTEX_MAX_REPLICA_CONCURRENCY="+s.Int32(api.Predictor.ProcessesPerReplica*api.Predictor.ThreadsPerProcess+1024), // allow a queue of 1024
)

if api.Predictor.ModelPath != nil || api.Predictor.Models != nil {
if api.Predictor.Type != userconfig.PythonPredictorType || api.Predictor.MultiModelReloading != nil {
envs = append(envs, "CORTEX_MODEL_DIR="+_modelDir)
}

Expand Down
24 changes: 17 additions & 7 deletions cli/local/model_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ func CacheLocalModels(apiSpec *spec.API, models []spec.CuratedModelResource) err
var localModelCache *spec.LocalModelCache
localModelCaches := make([]*spec.LocalModelCache, 0)

var predictorModels *userconfig.MultiModels
var predictorModelsKey string
if apiSpec.Predictor.Models != nil {
predictorModels = apiSpec.Predictor.Models
predictorModelsKey = userconfig.ModelsKey
} else if apiSpec.Predictor.MultiModelReloading != nil {
predictorModels = apiSpec.Predictor.MultiModelReloading
predictorModelsKey = userconfig.MultiModelReloadingKey
}

modelsThatWereCachedAlready := 0
for _, model := range models {
if !model.LocalPath {
Expand All @@ -44,12 +54,12 @@ func CacheLocalModels(apiSpec *spec.API, models []spec.CuratedModelResource) err

localModelCache, wasAlreadyCached, err = cacheLocalModel(model)
if err != nil {
if apiSpec.Predictor.ModelPath != nil {
return errors.Wrap(err, apiSpec.Identify(), userconfig.PredictorKey, userconfig.ModelPathKey)
} else if apiSpec.Predictor.Models != nil && apiSpec.Predictor.Models.Dir != nil {
return errors.Wrap(err, apiSpec.Identify(), userconfig.PredictorKey, userconfig.ModelsKey, userconfig.ModelsDirKey, model.Name, *apiSpec.Predictor.Models.Dir)
if predictorModels.Path != nil {
return errors.Wrap(err, apiSpec.Identify(), userconfig.PredictorKey, predictorModelsKey, userconfig.ModelsPathKey)
} else if predictorModels.Dir != nil {
return errors.Wrap(err, apiSpec.Identify(), userconfig.PredictorKey, predictorModelsKey, userconfig.ModelsDirKey, model.Name, *apiSpec.Predictor.Models.Dir)
}
return errors.Wrap(err, apiSpec.Identify(), userconfig.PredictorKey, userconfig.ModelsKey, userconfig.ModelsPathsKey, model.Name, userconfig.ModelPathKey)
return errors.Wrap(err, apiSpec.Identify(), userconfig.PredictorKey, predictorModelsKey, userconfig.ModelsPathsKey, model.Name, userconfig.ModelsPathKey)
}
if wasAlreadyCached {
modelsThatWereCachedAlready++
Expand Down Expand Up @@ -79,7 +89,7 @@ func cacheLocalModel(model spec.CuratedModelResource) (*spec.LocalModelCache, bo
return nil, false, nil
}

hash, err := localModelHash(model.ModelPath)
hash, err := localModelHash(model.Path)
if err != nil {
return nil, false, err
}
Expand Down Expand Up @@ -130,7 +140,7 @@ func cacheLocalModel(model spec.CuratedModelResource) (*spec.LocalModelCache, bo
if len(model.Versions) == 0 {
destModelDir = filepath.Join(destModelDir, "1")
}
if err := files.CopyDirOverwrite(strings.TrimSuffix(model.ModelPath, "/"), s.EnsureSuffix(destModelDir, "/")); err != nil {
if err := files.CopyDirOverwrite(strings.TrimSuffix(model.Path, "/"), s.EnsureSuffix(destModelDir, "/")); err != nil {
return nil, false, err
}

Expand Down
26 changes: 14 additions & 12 deletions docs/workloads/batch/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@
predictor:
type: tensorflow
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
model_path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model) (either this or 'models' must be provided)
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
models: # use this when multiple models per API are desired (either this or 'model_path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
model_path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model) (required)
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
...
models: # use this to serve a single model or multiple ones
path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model) (either this or 'paths' field must be provided)
paths: # (either this or 'path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model) (required)
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
...
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
server_side_batching: # (optional)
max_batch_size: <int> # the maximum number of requests to aggregate before running inference
batch_interval: <duration> # the maximum amount of time to spend waiting for additional requests before running inference on the batch of requests
Expand Down Expand Up @@ -66,11 +67,12 @@
predictor:
type: onnx
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
model_path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model.onnx) (either this or 'models' must be provided)
models: # use this when multiple models per API are desired (either this or 'model_path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
model_path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model.onnx) (required)
...
models: # use this to serve a single model or multiple ones
path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model) (either this or 'paths' must be provided)
paths: # (either this or 'path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
path: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model.onnx) (required)
...
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (can be overridden by config passed in job submission) (optional)
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
image: <string> # docker image to use for the Predictor (default: quay.io/cortexlabs/onnx-predictor-gpu:master or quay.io/cortexlabs/onnx-predictor-cpu:master based on compute)
Expand Down
15 changes: 8 additions & 7 deletions docs/workloads/multi-model/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ The directory `s3://cortex-examples/sklearn/mpg-estimator/linreg/` contains 4 di
predictor:
type: python
path: predictor.py
model_path: s3://cortex-examples/sklearn/mpg-estimator/linreg/
models:
path: s3://cortex-examples/sklearn/mpg-estimator/linreg/
```

#### `predictor.py`
Expand Down Expand Up @@ -94,11 +95,11 @@ class PythonPredictor:
models:
paths:
- name: inception
model_path: s3://cortex-examples/tensorflow/image-classifier/inception/
path: s3://cortex-examples/tensorflow/image-classifier/inception/
- name: iris
model_path: s3://cortex-examples/tensorflow/iris-classifier/nn/
path: s3://cortex-examples/tensorflow/iris-classifier/nn/
- name: resnet50
model_path: s3://cortex-examples/tensorflow/resnet50/
path: s3://cortex-examples/tensorflow/resnet50/
...
```

Expand Down Expand Up @@ -130,11 +131,11 @@ class TensorFlowPredictor:
models:
paths:
- name: resnet50
model_path: s3://cortex-examples/onnx/resnet50/
path: s3://cortex-examples/onnx/resnet50/
- name: mobilenet
model_path: s3://cortex-examples/onnx/mobilenet/
path: s3://cortex-examples/onnx/mobilenet/
- name: shufflenet
model_path: s3://cortex-examples/onnx/shufflenet/
path: s3://cortex-examples/onnx/shufflenet/
...
```

Expand Down
33 changes: 16 additions & 17 deletions docs/workloads/realtime/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
predictor:
type: python
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
model_path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (optional, cannot be provided along with 'models')
models: # use this to serve multiple models in a single API (optional, cannot be provided along with 'model_path')
dir: <string> # S3 path to a directory containing multiple models (e.g. s3://my-bucket/models/) (either this or 'paths' must be provided)
paths: # list of S3 paths to exported model directories (either this or 'dir' must be provided)
multi_model_reloading: # use this to serve a single model or multiple ones with live reloading (optional)
path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (either this, 'dir', or 'paths' must be provided)
paths: # list of S3 paths to exported model directories (either this, 'dir', or 'path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
model_path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (required)
path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (required)
...
dir: <string> # S3 path to a directory containing multiple models (e.g. s3://my-bucket/models/) (either this, 'path', or 'paths' must be provided)
cache_size: <int> # the number models to keep in memory (optional; all models are kept in memory by default)
disk_cache_size: <int> # the number of models to keep on disk (optional; all models are kept on disk by default)
server_side_batching: # (optional)
Expand Down Expand Up @@ -62,16 +62,15 @@
predictor:
type: tensorflow
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
model_path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (either this or 'models' must be provided)
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
models: # use this to serve multiple models in a single API (either this or 'model_path' must be provided)
dir: <string> # S3 path to a directory containing multiple models (e.g. s3://my-bucket/models/) (either this or 'paths' must be provided)
paths: # list of S3 paths to exported model directories (either this or 'dir' must be provided)
models: # use this to serve a single model or multiple ones (required)
path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (either this, 'dir', or 'paths' must be provided)
paths: # list of S3 paths to exported model directories (either this, 'dir', or 'path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
model_path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (required)
path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (required)
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
...
signature_key: # name of the signature def to use for prediction for 'dir'-specified models or for models specified using 'paths' that haven't had a signature key set
dir: <string> # S3 path to a directory containing multiple models (e.g. s3://my-bucket/models/) (either this, 'path', or 'paths' must be provided)
signature_key: # name of the signature def to use for prediction (required if your model has more than one signature def)
cache_size: <int> # the number models to keep in memory (optional; all models are kept in memory by default)
disk_cache_size: <int> # the number of models to keep on disk (optional; all models are kept on disk by default)
server_side_batching: # (optional)
Expand Down Expand Up @@ -119,13 +118,13 @@
predictor:
type: onnx
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
model_path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (either this or 'models' must be provided)
models: # use this to serve multiple models in a single API (either this or 'model_path' must be provided)
dir: <string> # S3 path to a directory containing multiple models (e.g. s3://my-bucket/models/) (either this or 'paths' must be provided)
paths: # list of S3 paths to exported model directories (either this or 'dir' must be provided)
models: # use this to serve a single model or multiple ones (required)
path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (either this, 'dir', or 'paths' must be provided)
paths: # list of S3 paths to exported model directories (either this, 'dir', or 'path' must be provided)
- name: <string> # unique name for the model (e.g. text-generator) (required)
model_path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (required)
path: <string> # S3 path to an exported model directory (e.g. s3://my-bucket/exported_model/) (required)
...
dir: <string> # S3 path to a directory containing multiple models (e.g. s3://my-bucket/models/) (either this, 'path', or 'paths' must be provided)
cache_size: <int> # the number models to keep in memory (optional; all models are kept in memory by default)
disk_cache_size: <int> # the number of models to keep on disk (optional; all models are kept on disk by default)
processes_per_replica: <int> # the number of parallel serving processes to run on each replica (default: 1)
Expand Down
14 changes: 8 additions & 6 deletions docs/workloads/realtime/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ or for a versioned model:

## Single model

The most common pattern is to serve a single model per API. The path to the model is specified in the `model_path` field in the `predictor` configuration. For example:
The most common pattern is to serve a single model per API. The path to the model is specified in the `path` field in the `predictor.models` configuration. For example:

```yaml
# cortex.yaml
Expand All @@ -111,10 +111,11 @@ The most common pattern is to serve a single model per API. The path to the mode
kind: RealtimeAPI
predictor:
# ...
model_path: s3://my-bucket/models/text-generator/
models:
path: s3://my-bucket/models/text-generator/
```

Note: for the Python predictor type, it is not necessary to specify the path to your model in `model_path`, since you can download and load it in your predictor's `__init__()` function. That said, it is necessary to use the `model_path` field to take advantage of [live model reloading](#live-model-reloading).
For the Python predictor type, the `models` field comes under the name of `multi_model_reloading`. It is also not necessary to specify the `multi_model_reloading` section at all, since you can download and load the model in your predictor's `__init__()` function. That said, it is necessary to use the `path` field to take advantage of [live model reloading](#live-model-reloading).

## Multiple models

Expand Down Expand Up @@ -147,7 +148,8 @@ or:
dir: s3://my-bucket/models/
```

Note: for the Python predictor type, it is not necessary to specify the paths to your models in `models`, since you can download and load them in your predictor's `__init__()` function. That said, it is necessary to use the `models` field to take advantage of live reloading or multi model caching (see below).

For the Python predictor type, the `models` field comes under the name of `multi_model_reloading`. It is also not necessary to specify the `multi_model_reloading` section at all, since you can download and load the model in your predictor's `__init__()` function. That said, it is necessary to use the `models` field to take advantage of [live model reloading](#live-model-reloading) or [multi model caching](#multi-model-caching).

When using the `models.paths` field, each path must be a valid model directory (see above for valid model directory structures).

Expand All @@ -168,7 +170,7 @@ In this case, there are two models in the directory, one of which is named "text

## Live model reloading

Live model reloading is a mechanism that periodically checks for updated models in the model path(s) provided in `predictor.model_path` or `predictor.models`. It is automatically enabled for all predictor types, including the Python predictor type (as long as model paths are specified via `model_path` or `models` in the `predictor` configuration).
Live model reloading is a mechanism that periodically checks for updated models in the model path(s) provided in `predictor.models`. It is automatically enabled for all predictor types, including the Python predictor type (as long as model paths are specified via `multi_model_reloading` in the `predictor` configuration).

The following is a list of events that will trigger the API to update its model(s):

Expand All @@ -181,7 +183,7 @@ Usage varies based on the predictor type:

### Python

To use live model reloading with the Python predictor, the model path(s) must be specified in the API's `predictor` configuration (via the `model_path` or `models` field). When models are specified in this manner, your `PythonPredictor` class must implement the `load_model()` function, and models can be retrieved by using the `get_model()` method of the `python_client` that's passed into your predictor's constructor.
To use live model reloading with the Python predictor, the model path(s) must be specified in the API's `predictor` configuration, via the `models` field. When models are specified in this manner, your `PythonPredictor` class must implement the `load_model()` function, and models can be retrieved by using the `get_model()` method of the `python_client` that's passed into your predictor's constructor.

The `load_model()` function that you implement in your `PythonPredictor` can return anything that you need to make a prediction. There is one caveat: whatever the return value is, it must be unloadable from memory via the `del` keyword. The following frameworks have been tested to work:

Expand Down
5 changes: 2 additions & 3 deletions docs/workloads/realtime/predictors.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ class PythonPredictor:
the model and/or metadata.
python_client (optional): Python client which is used to retrieve
models for prediction. This should be saved for use in predict().
Required when `predictor.model_path` or `predictor.models` is
specified in the api configuration.
Required when `predictor.multi_model_reloading` is specified in the api configuration.
"""
self.client = python_client # optional

Expand Down Expand Up @@ -103,7 +102,7 @@ class PythonPredictor:
def load_model(self, model_path):
"""(Optional) Called by Cortex to load a model when necessary.

This method is required when `predictor.model_path` or `predictor.models`
This method is required when `predictor.multi_model_reloading`
field is specified in the api configuration.

Warning: this method must not make any modification to the model's
Expand Down
Loading