From 59e0cf687a5f0a0d4e23ea86b029643ac29f1676 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 18 Aug 2020 20:34:16 -0400 Subject: [PATCH 01/12] Rename sync api to realtime api and api splitter to traffic splitter --- cli/cluster/delete.go | 8 +- cli/cmd/deploy.go | 2 +- cli/cmd/errors.go | 2 +- cli/cmd/get.go | 62 +++++----- ...{lib_sync_apis.go => lib_realtime_apis.go} | 71 ++++++----- ...i_splitter.go => lib_traffic_splitters.go} | 48 ++++---- cli/cmd/predict.go | 10 +- cli/local/deploy.go | 2 +- cli/local/get.go | 8 +- dev/versions.md | 2 +- docs/cluster-management/config.md | 2 +- docs/cluster-management/install.md | 2 +- docs/deployments/batchapi.md | 2 +- docs/deployments/inferentia.md | 4 +- docs/deployments/networking.md | 2 +- docs/deployments/prediction-monitoring.md | 2 +- docs/deployments/python-packages.md | 2 +- .../{syncapi.md => realtimeapi.md} | 20 ++-- .../api-configuration.md | 6 +- .../{syncapi => realtimeapi}/autoscaling.md | 0 .../{syncapi => realtimeapi}/deployment.md | 2 +- .../{syncapi => realtimeapi}/parallelism.md | 0 .../prediction-monitoring.md | 2 +- .../{syncapi => realtimeapi}/predictors.md | 0 .../{syncapi => realtimeapi}/statuses.md | 0 .../realtimeapi/traffic-splitter.md | 82 +++++++++++++ docs/deployments/syncapi/apisplitter.md | 82 ------------- docs/guides/multi-model.md | 6 +- docs/summary.md | 18 +-- .../server-side-batching-errors.md | 2 +- docs/troubleshooting/tf-session-in-predict.md | 2 +- examples/keras/document-denoiser/cortex.yaml | 2 +- examples/onnx/iris-classifier/cortex.yaml | 2 +- .../onnx/multi-model-classifier/cortex.yaml | 2 +- examples/onnx/yolov5-youtube/cortex.yaml | 2 +- examples/pytorch/answer-generator/cortex.yaml | 2 +- .../image-classifier-alexnet/cortex.yaml | 2 +- .../image-classifier-resnet50/cortex.yaml | 2 +- .../image-classifier-resnet50/cortex_gpu.yaml | 2 +- .../image-classifier-resnet50/cortex_inf.yaml | 2 +- examples/pytorch/iris-classifier/cortex.yaml | 2 +- .../pytorch/language-identifier/cortex.yaml | 2 +- .../multi-model-text-analyzer/cortex.yaml | 2 +- examples/pytorch/object-detector/cortex.yaml | 2 +- .../pytorch/reading-comprehender/cortex.yaml | 2 +- examples/pytorch/search-completer/cortex.yaml | 2 +- .../pytorch/sentiment-analyzer/cortex.yaml | 2 +- examples/pytorch/text-generator/cortex.yaml | 2 +- examples/pytorch/text-summarizer/cortex.yaml | 2 +- examples/sklearn/iris-classifier/README.md | 42 +++---- examples/sklearn/iris-classifier/cortex.yaml | 6 +- examples/sklearn/mpg-estimator/cortex.yaml | 2 +- examples/spacy/entity-recognizer/cortex.yaml | 2 +- .../image-classifier-inception/cortex.yaml | 2 +- .../cortex_server_side_batching.yaml | 2 +- .../image-classifier-resnet50/cortex.yaml | 2 +- .../image-classifier-resnet50/cortex_gpu.yaml | 2 +- .../cortex_gpu_server_side_batching.yaml | 2 +- .../image-classifier-resnet50/cortex_inf.yaml | 2 +- .../cortex_inf_server_side_batching.yaml | 2 +- .../tensorflow/iris-classifier/cortex.yaml | 2 +- .../license-plate-reader/cortex_full.yaml | 4 +- .../license-plate-reader/cortex_lite.yaml | 2 +- .../multi-model-classifier/cortex.yaml | 2 +- .../tensorflow/sentiment-analyzer/cortex.yaml | 2 +- .../tensorflow/text-generator/cortex.yaml | 2 +- examples/traffic-splitter/README.md | 111 ++++++++++++++++++ examples/traffic-splitter/cortex.yaml | 27 +++++ examples/traffic-splitter/onnx_predictor.py | 20 ++++ .../sample.json | 0 .../traffic-splitter/tensorflow_predictor.py | 13 ++ pkg/operator/endpoints/logs.go | 8 +- pkg/operator/main.go | 6 +- pkg/operator/resources/errors.go | 20 ++-- .../resources/{syncapi => realtimeapi}/api.go | 14 +-- .../{syncapi => realtimeapi}/autoscaler.go | 2 +- .../{syncapi => realtimeapi}/dashboard.go | 2 +- .../{syncapi => realtimeapi}/errors.go | 4 +- .../{syncapi => realtimeapi}/k8s_specs.go | 2 +- .../{syncapi => realtimeapi}/logs.go | 2 +- .../{syncapi => realtimeapi}/metrics.go | 2 +- .../{syncapi => realtimeapi}/status.go | 2 +- pkg/operator/resources/resources.go | 92 +++++++-------- .../{apisplitter => trafficsplitter}/api.go | 30 ++--- .../k8s_specs.go | 20 ++-- pkg/operator/resources/validations.go | 22 ++-- pkg/operator/schema/schema.go | 16 +-- pkg/types/spec/errors.go | 22 ++-- pkg/types/spec/validations.go | 34 +++--- pkg/types/userconfig/api.go | 2 +- pkg/types/userconfig/config_key.go | 2 +- pkg/types/userconfig/kind.go | 8 +- pkg/workloads/cortex/serve/run.sh | 2 +- pkg/workloads/cortex/serve/start.py | 2 +- 94 files changed, 615 insertions(+), 441 deletions(-) rename cli/cmd/{lib_sync_apis.go => lib_realtime_apis.go} (79%) rename cli/cmd/{lib_api_splitter.go => lib_traffic_splitters.go} (62%) rename docs/deployments/{syncapi.md => realtimeapi.md} (64%) rename docs/deployments/{syncapi => realtimeapi}/api-configuration.md (99%) rename docs/deployments/{syncapi => realtimeapi}/autoscaling.md (100%) rename docs/deployments/{syncapi => realtimeapi}/deployment.md (98%) rename docs/deployments/{syncapi => realtimeapi}/parallelism.md (100%) rename docs/deployments/{syncapi => realtimeapi}/prediction-monitoring.md (98%) rename docs/deployments/{syncapi => realtimeapi}/predictors.md (100%) rename docs/deployments/{syncapi => realtimeapi}/statuses.md (100%) create mode 100644 docs/deployments/realtimeapi/traffic-splitter.md delete mode 100644 docs/deployments/syncapi/apisplitter.md create mode 100644 examples/traffic-splitter/README.md create mode 100644 examples/traffic-splitter/cortex.yaml create mode 100644 examples/traffic-splitter/onnx_predictor.py rename examples/{apisplitter => traffic-splitter}/sample.json (100%) create mode 100644 examples/traffic-splitter/tensorflow_predictor.py rename pkg/operator/resources/{syncapi => realtimeapi}/api.go (97%) rename pkg/operator/resources/{syncapi => realtimeapi}/autoscaler.go (99%) rename pkg/operator/resources/{syncapi => realtimeapi}/dashboard.go (99%) rename pkg/operator/resources/{syncapi => realtimeapi}/errors.go (92%) rename pkg/operator/resources/{syncapi => realtimeapi}/k8s_specs.go (99%) rename pkg/operator/resources/{syncapi => realtimeapi}/logs.go (99%) rename pkg/operator/resources/{syncapi => realtimeapi}/metrics.go (99%) rename pkg/operator/resources/{syncapi => realtimeapi}/status.go (99%) rename pkg/operator/resources/{apisplitter => trafficsplitter}/api.go (86%) rename pkg/operator/resources/{apisplitter => trafficsplitter}/k8s_specs.go (67%) diff --git a/cli/cluster/delete.go b/cli/cluster/delete.go index b8a60d5e40..23aade3fc1 100644 --- a/cli/cluster/delete.go +++ b/cli/cluster/delete.go @@ -29,7 +29,7 @@ import ( func Delete(operatorConfig OperatorConfig, apiName string, keepCache bool, force bool) (schema.DeleteResponse, error) { if !force { - readyReplicas := getReadySyncAPIReplicasOrNil(operatorConfig, apiName) + readyReplicas := getReadyRealtimeAPIReplicasOrNil(operatorConfig, apiName) if readyReplicas != nil && *readyReplicas > 2 { prompt.YesOrExit(fmt.Sprintf("are you sure you want to delete %s (which has %d live replicas)?", apiName, *readyReplicas), "", "") } @@ -54,7 +54,7 @@ func Delete(operatorConfig OperatorConfig, apiName string, keepCache bool, force return deleteRes, nil } -func getReadySyncAPIReplicasOrNil(operatorConfig OperatorConfig, apiName string) *int32 { +func getReadyRealtimeAPIReplicasOrNil(operatorConfig OperatorConfig, apiName string) *int32 { httpRes, err := HTTPGet(operatorConfig, "/get/"+apiName) if err != nil { return nil @@ -65,11 +65,11 @@ func getReadySyncAPIReplicasOrNil(operatorConfig OperatorConfig, apiName string) return nil } - if apiRes.SyncAPI == nil { + if apiRes.RealtimeAPI == nil { return nil } - totalReady := apiRes.SyncAPI.Status.Updated.Ready + apiRes.SyncAPI.Status.Stale.Ready + totalReady := apiRes.RealtimeAPI.Status.Updated.Ready + apiRes.RealtimeAPI.Status.Stale.Ready return &totalReady } diff --git a/cli/cmd/deploy.go b/cli/cmd/deploy.go index a868ef532a..d9b58202cd 100644 --- a/cli/cmd/deploy.go +++ b/cli/cmd/deploy.go @@ -283,7 +283,7 @@ func getAPICommandsMessage(results []schema.DeployResult, envName string) string items.Add(fmt.Sprintf("cortex get %s%s", apiName, envArg), "(show api info)") for _, result := range results { - if result.API.Kind == userconfig.SyncAPIKind { + if result.API.Kind == userconfig.RealtimeAPIKind { items.Add(fmt.Sprintf("cortex logs %s%s", apiName, envArg), "(stream api logs)") break } diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go index 43b3b2aaf9..832dd02356 100644 --- a/cli/cmd/errors.go +++ b/cli/cmd/errors.go @@ -347,6 +347,6 @@ func ErrorDeployFromTopLevelDir(genericDirName string, providerType types.Provid } return errors.WithStack(&errors.Error{ Kind: ErrDeployFromTopLevelDir, - Message: fmt.Sprintf("cannot deploy from your %s directory - when deploying your API, cortex sends all files in your project directory (i.e. the directory which contains cortex.yaml) to your %s (see https://docs.cortex.dev/v/%s/deployments/syncapi/predictors#project-files for Sync API and https://docs.cortex.dev/v/%s/deployments/batchapi/predictors#project-files for Batch API); therefore it is recommended to create a subdirectory for your project files", genericDirName, targetStr, consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("cannot deploy from your %s directory - when deploying your API, cortex sends all files in your project directory (i.e. the directory which contains cortex.yaml) to your %s (see https://docs.cortex.dev/v/%s/deployments/realtimeapi/predictors#project-files for Realtime API and https://docs.cortex.dev/v/%s/deployments/batchapi/predictors#project-files for Batch API); therefore it is recommended to create a subdirectory for your project files", genericDirName, targetStr, consts.CortexVersionMinor, consts.CortexVersionMinor), }) } diff --git a/cli/cmd/get.go b/cli/cmd/get.go index ed0d428e98..34461c7213 100644 --- a/cli/cmd/get.go +++ b/cli/cmd/get.go @@ -37,7 +37,7 @@ import ( const ( _titleEnvironment = "env" - _titleSyncAPI = "sync api" + _titleRealtimeAPI = "realtime api" _titleStatus = "status" _titleUpToDate = "up-to-date" _titleStale = "stale" @@ -150,12 +150,12 @@ func getAPIsInAllEnvironments() (string, error) { return "", err } - var allSyncAPIs []schema.SyncAPI - var allSyncAPIEnvs []string + var allRealtimeAPIs []schema.RealtimeAPI + var allRealtimeAPIEnvs []string var allBatchAPIs []schema.BatchAPI var allBatchAPIEnvs []string - var allAPISplitters []schema.APISplitter - var allAPISplitterEnvs []string + var allTrafficSplitters []schema.TrafficSplitter + var allTrafficSplitterEnvs []string errorsMap := map[string]error{} // get apis from both environments @@ -172,15 +172,15 @@ func getAPIsInAllEnvironments() (string, error) { for range apisRes.BatchAPIs { allBatchAPIEnvs = append(allBatchAPIEnvs, env.Name) } - for range apisRes.SyncAPIs { - allSyncAPIEnvs = append(allSyncAPIEnvs, env.Name) + for range apisRes.RealtimeAPIs { + allRealtimeAPIEnvs = append(allRealtimeAPIEnvs, env.Name) } - for range apisRes.APISplitters { - allAPISplitterEnvs = append(allAPISplitterEnvs, env.Name) + for range apisRes.TrafficSplitters { + allTrafficSplitterEnvs = append(allTrafficSplitterEnvs, env.Name) } - allSyncAPIs = append(allSyncAPIs, apisRes.SyncAPIs...) + allRealtimeAPIs = append(allRealtimeAPIs, apisRes.RealtimeAPIs...) allBatchAPIs = append(allBatchAPIs, apisRes.BatchAPIs...) - allAPISplitters = append(allAPISplitters, apisRes.APISplitters...) + allTrafficSplitters = append(allTrafficSplitters, apisRes.TrafficSplitters...) } else { errorsMap[env.Name] = err } @@ -188,7 +188,7 @@ func getAPIsInAllEnvironments() (string, error) { out := "" - if len(allSyncAPIs) == 0 && len(allBatchAPIs) == 0 && len(allAPISplitters) == 0 { + if len(allRealtimeAPIs) == 0 && len(allBatchAPIs) == 0 && len(allTrafficSplitters) == 0 { if len(errorsMap) == 1 { // Print the error if there is just one exit.Error(errors.FirstErrorInMap(errorsMap)) @@ -203,9 +203,9 @@ func getAPIsInAllEnvironments() (string, error) { out += t.MustFormat() } - if len(allSyncAPIs) > 0 { - t := syncAPIsTable(allSyncAPIs, allSyncAPIEnvs) - if strset.New(allSyncAPIEnvs...).IsEqual(strset.New(types.LocalProviderType.String())) { + if len(allRealtimeAPIs) > 0 { + t := realtimeAPIsTable(allRealtimeAPIs, allRealtimeAPIEnvs) + if strset.New(allRealtimeAPIEnvs...).IsEqual(strset.New(types.LocalProviderType.String())) { hideReplicaCountColumns(&t) } @@ -216,10 +216,10 @@ func getAPIsInAllEnvironments() (string, error) { out += t.MustFormat() } - if len(allAPISplitters) > 0 { - t := apiSplitterListTable(allAPISplitters, allAPISplitterEnvs) + if len(allTrafficSplitters) > 0 { + t := trafficSplitterListTable(allTrafficSplitters, allTrafficSplitterEnvs) - if len(allSyncAPIs) > 0 || len(allBatchAPIs) > 0 { + if len(allRealtimeAPIs) > 0 || len(allBatchAPIs) > 0 { out += "\n" } @@ -267,7 +267,7 @@ func getAPIsByEnv(env cliconfig.Environment, printEnv bool) (string, error) { } } - if len(apisRes.SyncAPIs) == 0 && len(apisRes.BatchAPIs) == 0 && len(apisRes.APISplitters) == 0 { + if len(apisRes.RealtimeAPIs) == 0 && len(apisRes.BatchAPIs) == 0 && len(apisRes.TrafficSplitters) == 0 { return console.Bold("no apis are deployed"), nil } @@ -285,13 +285,13 @@ func getAPIsByEnv(env cliconfig.Environment, printEnv bool) (string, error) { out += t.MustFormat() } - if len(apisRes.SyncAPIs) > 0 { + if len(apisRes.RealtimeAPIs) > 0 { envNames := []string{} - for range apisRes.SyncAPIs { + for range apisRes.RealtimeAPIs { envNames = append(envNames, env.Name) } - t := syncAPIsTable(apisRes.SyncAPIs, envNames) + t := realtimeAPIsTable(apisRes.RealtimeAPIs, envNames) t.FindHeaderByTitle(_titleEnvironment).Hidden = true if len(apisRes.BatchAPIs) > 0 { @@ -305,16 +305,16 @@ func getAPIsByEnv(env cliconfig.Environment, printEnv bool) (string, error) { out += t.MustFormat() } - if len(apisRes.APISplitters) > 0 { + if len(apisRes.TrafficSplitters) > 0 { envNames := []string{} - for range apisRes.APISplitters { + for range apisRes.TrafficSplitters { envNames = append(envNames, env.Name) } - t := apiSplitterListTable(apisRes.APISplitters, envNames) + t := trafficSplitterListTable(apisRes.TrafficSplitters, envNames) t.FindHeaderByTitle(_titleEnvironment).Hidden = true - if len(apisRes.BatchAPIs) > 0 || len(apisRes.SyncAPIs) > 0 { + if len(apisRes.BatchAPIs) > 0 || len(apisRes.RealtimeAPIs) > 0 { out += "\n" } @@ -357,11 +357,11 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) { return "", err } - if apiRes.SyncAPI != nil { - return syncAPITable(apiRes.SyncAPI, env) + if apiRes.RealtimeAPI != nil { + return realtimeAPITable(apiRes.RealtimeAPI, env) } - if apiRes.APISplitter != nil { - return apiSplitterTable(apiRes.APISplitter, env) + if apiRes.TrafficSplitter != nil { + return trafficSplitterTable(apiRes.TrafficSplitter, env) } return batchAPITable(*apiRes.BatchAPI), nil } @@ -375,7 +375,7 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) { return "", err } - return syncAPITable(apiRes.SyncAPI, env) + return realtimeAPITable(apiRes.RealtimeAPI, env) } func titleStr(title string) string { diff --git a/cli/cmd/lib_sync_apis.go b/cli/cmd/lib_realtime_apis.go similarity index 79% rename from cli/cmd/lib_sync_apis.go rename to cli/cmd/lib_realtime_apis.go index 556b73c16f..1c835858fe 100644 --- a/cli/cmd/lib_sync_apis.go +++ b/cli/cmd/lib_realtime_apis.go @@ -41,82 +41,79 @@ import ( "github.com/cortexlabs/cortex/pkg/types/userconfig" ) -func syncAPITable(syncAPI *schema.SyncAPI, env cliconfig.Environment) (string, error) { +func realtimeAPITable(realtimeAPI *schema.RealtimeAPI, env cliconfig.Environment) (string, error) { var out string - t := syncAPIsTable([]schema.SyncAPI{*syncAPI}, []string{env.Name}) + t := realtimeAPIsTable([]schema.RealtimeAPI{*realtimeAPI}, []string{env.Name}) t.FindHeaderByTitle(_titleEnvironment).Hidden = true - t.FindHeaderByTitle(_titleSyncAPI).Hidden = true - if env.Provider == types.LocalProviderType { - hideReplicaCountColumns(&t) - } + t.FindHeaderByTitle(_titleRealtimeAPI).Hidden = true out += t.MustFormat() - if env.Provider != types.LocalProviderType && syncAPI.Spec.Monitoring != nil { - switch syncAPI.Spec.Monitoring.ModelType { + if env.Provider != types.LocalProviderType && realtimeAPI.Spec.Monitoring != nil { + switch realtimeAPI.Spec.Monitoring.ModelType { case userconfig.ClassificationModelType: - out += "\n" + classificationMetricsStr(&syncAPI.Metrics) + out += "\n" + classificationMetricsStr(&realtimeAPI.Metrics) case userconfig.RegressionModelType: - out += "\n" + regressionMetricsStr(&syncAPI.Metrics) + out += "\n" + regressionMetricsStr(&realtimeAPI.Metrics) } } - if syncAPI.DashboardURL != "" { - out += "\n" + console.Bold("metrics dashboard: ") + syncAPI.DashboardURL + "\n" + if realtimeAPI.DashboardURL != "" { + out += "\n" + console.Bold("metrics dashboard: ") + realtimeAPI.DashboardURL + "\n" } - out += "\n" + console.Bold("endpoint: ") + syncAPI.Endpoint + out += "\n" + console.Bold("endpoint: ") + realtimeAPI.Endpoint - out += fmt.Sprintf("\n%s curl %s -X POST -H \"Content-Type: application/json\" -d @sample.json\n", console.Bold("curl:"), syncAPI.Endpoint) + out += fmt.Sprintf("\n%s curl %s -X POST -H \"Content-Type: application/json\" -d @sample.json\n", console.Bold("curl:"), realtimeAPI.Endpoint) - if syncAPI.Spec.Predictor.Type == userconfig.TensorFlowPredictorType || syncAPI.Spec.Predictor.Type == userconfig.ONNXPredictorType { - out += "\n" + describeModelInput(&syncAPI.Status, syncAPI.Endpoint) + if realtimeAPI.Spec.Predictor.Type == userconfig.TensorFlowPredictorType || realtimeAPI.Spec.Predictor.Type == userconfig.ONNXPredictorType { + out += "\n" + describeModelInput(&realtimeAPI.Status, realtimeAPI.Endpoint) } - out += titleStr("configuration") + strings.TrimSpace(syncAPI.Spec.UserStr(env.Provider)) + out += titleStr("configuration") + strings.TrimSpace(realtimeAPI.Spec.UserStr(env.Provider)) return out, nil } -func syncAPIsTable(syncAPIs []schema.SyncAPI, envNames []string) table.Table { - rows := make([][]interface{}, 0, len(syncAPIs)) +func realtimeAPIsTable(realtimeAPIs []schema.RealtimeAPI, envNames []string) table.Table { + rows := make([][]interface{}, 0, len(realtimeAPIs)) var totalFailed int32 var totalStale int32 var total4XX int var total5XX int - for i, syncAPI := range syncAPIs { - lastUpdated := time.Unix(syncAPI.Spec.LastUpdated, 0) + for i, realtimeAPI := range realtimeAPIs { + lastUpdated := time.Unix(realtimeAPI.Spec.LastUpdated, 0) rows = append(rows, []interface{}{ envNames[i], - syncAPI.Spec.Name, - syncAPI.Status.Message(), - syncAPI.Status.Updated.Ready, - syncAPI.Status.Stale.Ready, - syncAPI.Status.Requested, - syncAPI.Status.Updated.TotalFailed(), + realtimeAPI.Spec.Name, + realtimeAPI.Status.Message(), + realtimeAPI.Status.Updated.Ready, + realtimeAPI.Status.Stale.Ready, + realtimeAPI.Status.Requested, + realtimeAPI.Status.Updated.TotalFailed(), libtime.SinceStr(&lastUpdated), - latencyStr(&syncAPI.Metrics), - code2XXStr(&syncAPI.Metrics), - code4XXStr(&syncAPI.Metrics), - code5XXStr(&syncAPI.Metrics), + latencyStr(&realtimeAPI.Metrics), + code2XXStr(&realtimeAPI.Metrics), + code4XXStr(&realtimeAPI.Metrics), + code5XXStr(&realtimeAPI.Metrics), }) - totalFailed += syncAPI.Status.Updated.TotalFailed() - totalStale += syncAPI.Status.Stale.Ready + totalFailed += realtimeAPI.Status.Updated.TotalFailed() + totalStale += realtimeAPI.Status.Stale.Ready - if syncAPI.Metrics.NetworkStats != nil { - total4XX += syncAPI.Metrics.NetworkStats.Code4XX - total5XX += syncAPI.Metrics.NetworkStats.Code5XX + if realtimeAPI.Metrics.NetworkStats != nil { + total4XX += realtimeAPI.Metrics.NetworkStats.Code4XX + total5XX += realtimeAPI.Metrics.NetworkStats.Code5XX } } return table.Table{ Headers: []table.Header{ {Title: _titleEnvironment}, - {Title: _titleSyncAPI}, + {Title: _titleRealtimeAPI}, {Title: _titleStatus}, {Title: _titleUpToDate}, {Title: _titleStale, Hidden: totalStale == 0}, diff --git a/cli/cmd/lib_api_splitter.go b/cli/cmd/lib_traffic_splitters.go similarity index 62% rename from cli/cmd/lib_api_splitter.go rename to cli/cmd/lib_traffic_splitters.go index da12c601a0..fb62ea216a 100644 --- a/cli/cmd/lib_api_splitter.go +++ b/cli/cmd/lib_traffic_splitters.go @@ -31,17 +31,17 @@ import ( ) const ( - _titleAPISplitter = "api splitter" - _apiSplitterWeights = "weights" - _titleAPIs = "apis" + _titleTrafficSplitter = "traffic splitter" + _trafficSplitterWeights = "weights" + _titleAPIs = "apis" ) -func apiSplitterTable(apiSplitter *schema.APISplitter, env cliconfig.Environment) (string, error) { +func trafficSplitterTable(trafficSplitter *schema.TrafficSplitter, env cliconfig.Environment) (string, error) { var out string - lastUpdated := time.Unix(apiSplitter.Spec.LastUpdated, 0) + lastUpdated := time.Unix(trafficSplitter.Spec.LastUpdated, 0) - t, err := trafficSplitTable(*apiSplitter, env) + t, err := trafficSplitTable(*trafficSplitter, env) if err != nil { return "", err } @@ -50,33 +50,33 @@ func apiSplitterTable(apiSplitter *schema.APISplitter, env cliconfig.Environment out += t.MustFormat() out += "\n" + console.Bold("last updated: ") + libtime.SinceStr(&lastUpdated) - out += "\n" + console.Bold("endpoint: ") + apiSplitter.Endpoint - out += fmt.Sprintf("\n%s curl %s -X POST -H \"Content-Type: application/json\" -d @sample.json\n", console.Bold("curl:"), apiSplitter.Endpoint) + out += "\n" + console.Bold("endpoint: ") + trafficSplitter.Endpoint + out += fmt.Sprintf("\n%s curl %s -X POST -H \"Content-Type: application/json\" -d @sample.json\n", console.Bold("curl:"), trafficSplitter.Endpoint) - out += titleStr("configuration") + strings.TrimSpace(apiSplitter.Spec.UserStr(env.Provider)) + out += titleStr("configuration") + strings.TrimSpace(trafficSplitter.Spec.UserStr(env.Provider)) return out, nil } -func trafficSplitTable(apiSplitter schema.APISplitter, env cliconfig.Environment) (table.Table, error) { - rows := make([][]interface{}, 0, len(apiSplitter.Spec.APIs)) +func trafficSplitTable(trafficSplitter schema.TrafficSplitter, env cliconfig.Environment) (table.Table, error) { + rows := make([][]interface{}, 0, len(trafficSplitter.Spec.APIs)) - for _, api := range apiSplitter.Spec.APIs { + for _, api := range trafficSplitter.Spec.APIs { apiRes, err := cluster.GetAPI(MustGetOperatorConfig(env.Name), api.Name) if err != nil { return table.Table{}, err } - lastUpdated := time.Unix(apiRes.SyncAPI.Spec.LastUpdated, 0) + lastUpdated := time.Unix(apiRes.RealtimeAPI.Spec.LastUpdated, 0) rows = append(rows, []interface{}{ env.Name, - apiRes.SyncAPI.Spec.Name, + apiRes.RealtimeAPI.Spec.Name, api.Weight, - apiRes.SyncAPI.Status.Message(), - apiRes.SyncAPI.Status.Requested, + apiRes.RealtimeAPI.Status.Message(), + apiRes.RealtimeAPI.Status.Requested, libtime.SinceStr(&lastUpdated), - latencyStr(&apiRes.SyncAPI.Metrics), - code2XXStr(&apiRes.SyncAPI.Metrics), - code5XXStr(&apiRes.SyncAPI.Metrics), + latencyStr(&apiRes.RealtimeAPI.Metrics), + code2XXStr(&apiRes.RealtimeAPI.Metrics), + code5XXStr(&apiRes.RealtimeAPI.Metrics), }) } @@ -84,7 +84,7 @@ func trafficSplitTable(apiSplitter schema.APISplitter, env cliconfig.Environment Headers: []table.Header{ {Title: _titleEnvironment}, {Title: _titleAPIs}, - {Title: _apiSplitterWeights}, + {Title: _trafficSplitterWeights}, {Title: _titleStatus}, {Title: _titleRequested}, {Title: _titleLastupdated}, @@ -96,9 +96,9 @@ func trafficSplitTable(apiSplitter schema.APISplitter, env cliconfig.Environment }, nil } -func apiSplitterListTable(apiSplitter []schema.APISplitter, envNames []string) table.Table { - rows := make([][]interface{}, 0, len(apiSplitter)) - for i, splitAPI := range apiSplitter { +func trafficSplitterListTable(trafficSplitter []schema.TrafficSplitter, envNames []string) table.Table { + rows := make([][]interface{}, 0, len(trafficSplitter)) + for i, splitAPI := range trafficSplitter { lastUpdated := time.Unix(splitAPI.Spec.LastUpdated, 0) var apis []string for _, api := range splitAPI.Spec.APIs { @@ -116,7 +116,7 @@ func apiSplitterListTable(apiSplitter []schema.APISplitter, envNames []string) t return table.Table{ Headers: []table.Header{ {Title: _titleEnvironment}, - {Title: _titleAPISplitter}, + {Title: _titleTrafficSplitter}, {Title: _titleAPIs}, {Title: _titleLastupdated}, }, diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go index c1f463aee8..52c679e68a 100644 --- a/cli/cmd/predict.go +++ b/cli/cmd/predict.go @@ -75,18 +75,18 @@ var _predictCmd = &cobra.Command{ } } - if apiRes.SyncAPI == nil { + if apiRes.RealtimeAPI == nil { exit.Error(errors.ErrorUnexpected("unable to get api", apiName)) // unexpected } - syncAPI := apiRes.SyncAPI + realtimeAPI := apiRes.RealtimeAPI - totalReady := syncAPI.Status.Updated.Ready + syncAPI.Status.Stale.Ready + totalReady := realtimeAPI.Status.Updated.Ready + realtimeAPI.Status.Stale.Ready if totalReady == 0 { - exit.Error(ErrorAPINotReady(apiName, syncAPI.Status.Message())) + exit.Error(ErrorAPINotReady(apiName, realtimeAPI.Status.Message())) } - predictResponse, err := makePredictRequest(syncAPI.Endpoint, jsonPath) + predictResponse, err := makePredictRequest(realtimeAPI.Endpoint, jsonPath) if err != nil { exit.Error(err) } diff --git a/cli/local/deploy.go b/cli/local/deploy.go index 51f5d3e548..b12f6bcb0f 100644 --- a/cli/local/deploy.go +++ b/cli/local/deploy.go @@ -69,7 +69,7 @@ func Deploy(env cliconfig.Environment, configPath string, projectFileList []stri err = ValidateLocalAPIs(apiConfigs, projectFiles, awsClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Sync API can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration", consts.CortexVersionMinor)) return schema.DeployResponse{}, err } diff --git a/cli/local/get.go b/cli/local/get.go index 7734cef86f..2c8bd6778d 100644 --- a/cli/local/get.go +++ b/cli/local/get.go @@ -41,7 +41,7 @@ func GetAPIs() (schema.GetAPIsResponse, error) { return schema.GetAPIsResponse{}, err } - syncAPIs := make([]schema.SyncAPI, len(apiSpecList)) + realtimeAPIs := make([]schema.RealtimeAPI, len(apiSpecList)) for i, apiSpec := range apiSpecList { apiStatus, err := GetAPIStatus(&apiSpec) if err != nil { @@ -53,7 +53,7 @@ func GetAPIs() (schema.GetAPIsResponse, error) { return schema.GetAPIsResponse{}, err } - syncAPIs[i] = schema.SyncAPI{ + realtimeAPIs[i] = schema.RealtimeAPI{ Spec: apiSpec, Status: apiStatus, Metrics: metrics, @@ -61,7 +61,7 @@ func GetAPIs() (schema.GetAPIsResponse, error) { } return schema.GetAPIsResponse{ - SyncAPIs: syncAPIs, + RealtimeAPIs: realtimeAPIs, }, nil } @@ -169,7 +169,7 @@ func GetAPI(apiName string) (schema.GetAPIResponse, error) { } return schema.GetAPIResponse{ - SyncAPI: &schema.SyncAPI{ + RealtimeAPI: &schema.RealtimeAPI{ Spec: *apiSpec, Status: apiStatus, Metrics: apiMetrics, diff --git a/dev/versions.md b/dev/versions.md index 8f7284f30f..38ff02d6f0 100644 --- a/dev/versions.md +++ b/dev/versions.md @@ -144,7 +144,7 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo 1. Update versions in `images/python-predictor-*/Dockerfile`, `images/tensorflow-predictor/Dockerfile`, and `images/onnx-predictor-*/Dockerfile` 1. To determine the versions used in `images/python-predictor-inf/Dockerfile`, run `pip install --extra-index-url https://pip.repos.neuron.amazonaws.com neuron-cc tensorflow-neuron torch-neuron` from a clean environment and check what versions of all the dependencies are installed. 1. Update versions in `pkg/workloads/cortex/serve/requirements.txt` and `pkg/workloads/cortex/downloader/requirements.txt` -1. Update the versions listed in "Pre-installed packages" in `syncapi/predictors.md` and `batchapi/predictors.md` (look at the diff carefully since some packages are not shown, and e.g. `tensorflow-cpu` -> `tensorflow`) +1. Update the versions listed in "Pre-installed packages" in `realtimeapi/predictors.md` and `batchapi/predictors.md` (look at the diff carefully since some packages are not shown, and e.g. `tensorflow-cpu` -> `tensorflow`) 1. Rerun all examples and check their logs ## Istio diff --git a/docs/cluster-management/config.md b/docs/cluster-management/config.md index df055590d9..5f4d2cdbbd 100644 --- a/docs/cluster-management/config.md +++ b/docs/cluster-management/config.md @@ -85,7 +85,7 @@ spot: false ssl_certificate_arn: ``` -The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Sync API configuration](../deployments/syncapi/api-configuration.md) and in your [Batch API configuration](../deployments/batchapi/api-configuration.md). +The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtimeapi/api-configuration.md) and in your [Batch API configuration](../deployments/batchapi/api-configuration.md). The docker images used by the Cortex cluster can also be overridden, although this is not common. They can be configured by adding any of these keys to your cluster configuration file (default values are shown): diff --git a/docs/cluster-management/install.md b/docs/cluster-management/install.md index e9daf44217..5dff8989fa 100644 --- a/docs/cluster-management/install.md +++ b/docs/cluster-management/install.md @@ -21,7 +21,7 @@ git clone -b master https://github.com/cortexlabs/cortex.git # navigate to the TensorFlow iris classification example cd cortex/examples/tensorflow/iris-classifier -# deploy the model as a sync api +# deploy the model as a realtime api cortex deploy # view the status of the api diff --git a/docs/deployments/batchapi.md b/docs/deployments/batchapi.md index 5113d7e3ac..1e8786fd34 100644 --- a/docs/deployments/batchapi.md +++ b/docs/deployments/batchapi.md @@ -13,7 +13,7 @@ You may want to deploy your model as a Batch API if any of the following scenari * inference is a part of internal data pipelines that may be chained together * a small number of requests are received, but each request takes minutes or hours to complete -You may want to consider deploying your model as a [Sync API](syncapi.md) if these scenarios don't apply to you. +You may want to consider deploying your model as a [Realtime API](realtimeapi.md) if these scenarios don't apply to you. A Batch API deployed in Cortex will create/support the following: diff --git a/docs/deployments/inferentia.md b/docs/deployments/inferentia.md index a7e1f1205a..0981491e5b 100644 --- a/docs/deployments/inferentia.md +++ b/docs/deployments/inferentia.md @@ -22,7 +22,7 @@ Each Inferentia ASIC comes with 4 NeuronCores and 8GB of cache memory. To better A [NeuronCore Group](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/tutorial-NeuronCore-Group.md) (NCG) is a set of NeuronCores that is used to load and run a compiled model. NCGs exist to aggregate NeuronCores to improve hardware performance. Models can be shared within an NCG, but this would require the device driver to dynamically context switch between each model, which degrades performance. Therefore we've decided to only allow one model per NCG (unless you are using a [multi-model endpoint](../guides/multi-model.md), in which case there will be multiple models on a single NCG, and there will be context switching). -Each Cortex API process will have its own copy of the model and will run on its own NCG (the number of API processes is configured by the [`processes_per_replica`](syncapi/autoscaling.md#replica-parallelism) for Sync APIs field in the API configuration). Each NCG will have an equal share of NeuronCores. Therefore, the size of each NCG will be `4 * inf / processes_per_replica` (`inf` refers to your API's `compute` request, and it's multiplied by 4 because there are 4 NeuronCores per Inferentia chip). +Each Cortex API process will have its own copy of the model and will run on its own NCG (the number of API processes is configured by the [`processes_per_replica`](realtimeapi/autoscaling.md#replica-parallelism) for Realtime APIs field in the API configuration). Each NCG will have an equal share of NeuronCores. Therefore, the size of each NCG will be `4 * inf / processes_per_replica` (`inf` refers to your API's `compute` request, and it's multiplied by 4 because there are 4 NeuronCores per Inferentia chip). For example, if your API requests 2 `inf` chips, there will be 8 NeuronCores available. If you set `processes_per_replica` to 1, there will be one copy of your model running on a single NCG of size 8 NeuronCores. If `processes_per_replica` is 2, there will be two copies of your model, each running on a separate NCG of size 4 NeuronCores. If `processes_per_replica` is 4, there will be 4 NCGs of size 2 NeuronCores, and if If `processes_per_replica` is 8, there will be 8 NCGs of size 1 NeuronCores. In this scenario, these are the only valid values for `processes_per_replica`. In other words the total number of requested NeuronCores (which equals 4 * the number of requested Inferentia chips) must be divisible by `processes_per_replica`. @@ -64,7 +64,7 @@ model_neuron = torch.neuron.trace( model_neuron.save(compiled_model) ``` -The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Sync API pre-installed packages list](syncapi/predictors.md#inferentia-equipped-apis) and [Batch API pre-installed packages list](batchapi/predictors.md#inferentia-equipped-apis). When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. +The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Realtime API pre-installed packages list](realtimeapi/predictors.md#inferentia-equipped-apis) and [Batch API pre-installed packages list](batchapi/predictors.md#inferentia-equipped-apis). When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. See AWS's [TensorFlow](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/tutorial-compile-infer.md#step-3-compile-on-compilation-instance) and [PyTorch](https://github.com/aws/aws-neuron-sdk/blob/master/docs/pytorch-neuron/tutorial-compile-infer.md#step-3-compile-on-compilation-instance) guides on how to compile models for Inferentia. Here are 2 examples implemented with Cortex: diff --git a/docs/deployments/networking.md b/docs/deployments/networking.md index 1b68c5f6c6..1d296752f9 100644 --- a/docs/deployments/networking.md +++ b/docs/deployments/networking.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t ![api architecture diagram](https://user-images.githubusercontent.com/808475/84695323-8507dd00-aeff-11ea-8b32-5a55cef76c79.png) -APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Sync API configuration](syncapi/api-configuration.md) and [Batch API configuration](batchapi/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: disabled` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). +APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](realtimeapi/api-configuration.md) and [Batch API configuration](batchapi/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: disabled` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). By default, the API load balancer is public. You can configure your API load balancer to be private by setting `api_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be public regardless of `api_load_balancer_scheme`. See below for common configurations. diff --git a/docs/deployments/prediction-monitoring.md b/docs/deployments/prediction-monitoring.md index f0aa2be64b..c1182883f3 100644 --- a/docs/deployments/prediction-monitoring.md +++ b/docs/deployments/prediction-monitoring.md @@ -19,7 +19,7 @@ For classification models, `monitoring` should be configured with `model_type: c ```yaml - name: iris - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/docs/deployments/python-packages.md b/docs/deployments/python-packages.md index d8cff80164..8c740396e7 100644 --- a/docs/deployments/python-packages.md +++ b/docs/deployments/python-packages.md @@ -16,7 +16,7 @@ You can install your required PyPI packages and import them in your Python files If you want to use `conda` to install your python packages, see the [Conda section](#conda-packages) below. -Note that some packages are pre-installed by default (see "pre-installed packages" for your Predictor type in the [Sync API Predictor documentation](syncapi/predictors.md) and [Batch API Predictor documentation](batchapi/predictors.md)). +Note that some packages are pre-installed by default (see "pre-installed packages" for your Predictor type in the [Realtime API Predictor documentation](realtimeapi/predictors.md) and [Batch API Predictor documentation](batchapi/predictors.md)). ## Private PyPI packages diff --git a/docs/deployments/syncapi.md b/docs/deployments/realtimeapi.md similarity index 64% rename from docs/deployments/syncapi.md rename to docs/deployments/realtimeapi.md index 7a1bd304de..e6f504a76b 100644 --- a/docs/deployments/syncapi.md +++ b/docs/deployments/realtimeapi.md @@ -1,12 +1,12 @@ -# Sync API Overview +# Realtime API Overview _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -You can deploy a Sync API on Cortex to serve your model via an HTTP endpoint for on-demand inferences. +You can deploy a Realtime API on Cortex to serve your model via an HTTP endpoint for on-demand inferences. -## When should I use a Sync API +## When should I use a Realtime API -You may want to deploy your model as a Sync API if any of the following scenarios apply to your use case: +You may want to deploy your model as a Realtime API if any of the following scenarios apply to your use case: * predictions are served on demand * predictions need to be made in the time of a single web request @@ -15,7 +15,7 @@ You may want to deploy your model as a Sync API if any of the following scenario You may want to consider deploying your model as a [Batch API](batchapi.md) if these scenarios don't apply to you. -A Sync API deployed in Cortex has the following features: +A Realtime API deployed in Cortex has the following features: * request-based autoscaling * rolling updates to enable you to update the model/serving code without downtime @@ -32,7 +32,7 @@ You specify the following: * a Cortex Predictor class in Python that defines how to initialize and serve your model * an API configuration yaml file that defines how your API will behave in production (autoscaling, monitoring, networking, compute, etc.) -Once you've implemented your predictor and defined your API configuration, you can use the Cortex CLI to deploy a Sync API. The Cortex CLI will package your predictor implementation and the rest of the code and dependencies and upload it to the Cortex Cluster. The Cortex Cluster will set up an HTTP endpoint that routes traffic to multiple replicas/copies of web servers initialized with your code. +Once you've implemented your predictor and defined your API configuration, you can use the Cortex CLI to deploy a Realtime API. The Cortex CLI will package your predictor implementation and the rest of the code and dependencies and upload it to the Cortex Cluster. The Cortex Cluster will set up an HTTP endpoint that routes traffic to multiple replicas/copies of web servers initialized with your code. When a request is made to the HTTP endpoint, it gets routed to one your API's replicas (at random). The replica receives the request, parses the payload and executes the inference code you've defined in your predictor implementation and sends a response. @@ -40,7 +40,7 @@ The Cortex Cluster will automatically scale based on the incoming traffic and th ## Next steps -* Try the [tutorial](../../examples/sklearn/iris-classifier/README.md) to deploy a Sync API locally or on AWS. -* See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Sync API. -* See the [Predictor docs](syncapi/predictors.md) for how to implement a Predictor class. -* See the [API configuration docs](syncapi/api-configuration.md) for a full list of features that can be used to deploy your Sync API. +* Try the [tutorial](../../examples/sklearn/iris-classifier/README.md) to deploy a Realtime API locally or on AWS. +* See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Realtime API. +* See the [Predictor docs](realtimeapi/predictors.md) for how to implement a Predictor class. +* See the [API configuration docs](realtimeapieapi/api-configuration.md) for a full list of features that can be used to deploy your Realtime API. diff --git a/docs/deployments/syncapi/api-configuration.md b/docs/deployments/realtimeapi/api-configuration.md similarity index 99% rename from docs/deployments/syncapi/api-configuration.md rename to docs/deployments/realtimeapi/api-configuration.md index 5dc032fa7d..243dc3258e 100644 --- a/docs/deployments/syncapi/api-configuration.md +++ b/docs/deployments/realtimeapi/api-configuration.md @@ -10,7 +10,7 @@ Reference the section below which corresponds to your Predictor type: [Python](# ```yaml - name: # API name (required) - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required) @@ -56,7 +56,7 @@ See additional documentation for [parallelism](parallelism.md), [autoscaling](au ```yaml - name: # API name (required) - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required) @@ -113,7 +113,7 @@ See additional documentation for [parallelism](parallelism.md), [autoscaling](au ```yaml - name: # API name (required) - kind: SyncAPI + kind: RealtimeAPI predictor: type: onnx path: # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required) diff --git a/docs/deployments/syncapi/autoscaling.md b/docs/deployments/realtimeapi/autoscaling.md similarity index 100% rename from docs/deployments/syncapi/autoscaling.md rename to docs/deployments/realtimeapi/autoscaling.md diff --git a/docs/deployments/syncapi/deployment.md b/docs/deployments/realtimeapi/deployment.md similarity index 98% rename from docs/deployments/syncapi/deployment.md rename to docs/deployments/realtimeapi/deployment.md index 61e4039e69..20468c8242 100644 --- a/docs/deployments/syncapi/deployment.md +++ b/docs/deployments/realtimeapi/deployment.md @@ -11,7 +11,7 @@ The `cortex deploy` command collects your configuration and source code and depl ```bash $ cortex deploy -creating my-api (SyncAPI) +creating my-api (RealtimeAPI) ``` APIs are declarative, so to update your API, you can modify your source code and/or configuration and run `cortex deploy` again. diff --git a/docs/deployments/syncapi/parallelism.md b/docs/deployments/realtimeapi/parallelism.md similarity index 100% rename from docs/deployments/syncapi/parallelism.md rename to docs/deployments/realtimeapi/parallelism.md diff --git a/docs/deployments/syncapi/prediction-monitoring.md b/docs/deployments/realtimeapi/prediction-monitoring.md similarity index 98% rename from docs/deployments/syncapi/prediction-monitoring.md rename to docs/deployments/realtimeapi/prediction-monitoring.md index f0aa2be64b..c1182883f3 100644 --- a/docs/deployments/syncapi/prediction-monitoring.md +++ b/docs/deployments/realtimeapi/prediction-monitoring.md @@ -19,7 +19,7 @@ For classification models, `monitoring` should be configured with `model_type: c ```yaml - name: iris - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/docs/deployments/syncapi/predictors.md b/docs/deployments/realtimeapi/predictors.md similarity index 100% rename from docs/deployments/syncapi/predictors.md rename to docs/deployments/realtimeapi/predictors.md diff --git a/docs/deployments/syncapi/statuses.md b/docs/deployments/realtimeapi/statuses.md similarity index 100% rename from docs/deployments/syncapi/statuses.md rename to docs/deployments/realtimeapi/statuses.md diff --git a/docs/deployments/realtimeapi/traffic-splitter.md b/docs/deployments/realtimeapi/traffic-splitter.md new file mode 100644 index 0000000000..f498694234 --- /dev/null +++ b/docs/deployments/realtimeapi/traffic-splitter.md @@ -0,0 +1,82 @@ +# Traffic Splitter + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +The Traffic Splitter feature allows you to split traffic between multiple Realtime APIs on your Cortex Cluster. This can be useful for A/B testing models in production. + +After [deploying Realtime APIs](deployment.md), you can deploy an Traffic Splitter to provide a single endpoint that can route a request randomly to one of the target Realtime APIs. Weights can be assigned to Realtime APIs to control the percentage of requests routed to each API. + +**Traffic Splitters are only supported on a Cortex cluster (in AWS).** + +## Traffic Splitter Configuration + +Traffic Splitter expects the target Realtime APIs to already be running or be included in the same configuration file as the Traffic Splitter. The traffic is routed according to the specified weights. The weights assigned to all Realtime APIs must to sum to 100. + +```yaml +- name: # Traffic Splitter name (required) + kind: TrafficSplitter # must be "TrafficSplitter", create an Traffic Splitter which routes traffic to multiple Realtime APIs + networking: + endpoint: # the endpoint for the Traffic Splitter (default: ) + api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public) + apis: # list of Realtime APIs to target + - name: # name of a Realtime API that is already running or is included in the same configuration file (required) + weight: # percentage of traffic to route to the Realtime API (all weights must sum to 100) (required) +``` + +## `cortex deploy` + +The `cortex deploy` command is used to deploy an Traffic Splitter. + +```bash +$ cortex deploy + +created traffic-splitter (TrafficSplitter) +``` + +Traffic Splitters are declarative, so to update your Traffic Splitter, you can modify the configuration and re-run `cortex deploy`. + +## `cortex get` + +The `cortex get` command displays the status of your Realtime APIs and Traffic Splitters, and `cortex get ` shows additional information about a specific Traffic Splitter. + +```bash +$ cortex get traffic-splitter + +apis weights status requested last update avg request 2XX 5XX +another-my-api 80 live 1 5m - - - +my-api 20 live 1 6m - - - + +last updated: 4m +endpoint: https://******.execute-api.eu-central-1.amazonaws.com/traffic-splitter +curl: curl https://******.execute-api.eu-central-1.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +... +``` + +## Making a prediction + +You can use `curl` to test your Traffic Splitter. This will distribute the requests across the Realtime APIs targeted by the Traffic Splitter: + +```bash +$ curl http://***.amazonaws.com/traffic-splitter \ + -X POST -H "Content-Type: application/json" \ + -d '{"key": "value"}' +``` + +## `cortex delete` + +Use `cortex delete ` to delete your Traffic Splitter: + +```bash +$ cortex delete traffic-splitter + +deleted traffic-splitter +``` + +Note that this will not delete the Realtime APIs targeted by the Traffic Splitter. + +## Additional resources + + +* [Traffic Splitter Tutorial](../../../examples/traffic-splitter/README.md) provides a step-by-step walkthrough for deploying an Traffic Splitter +* [Realtime API Tutorial](../../../examples/sklearn/iris-classifier/README.md) provides a step-by-step walkthrough of deploying an iris classifier Realtime API +* [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands diff --git a/docs/deployments/syncapi/apisplitter.md b/docs/deployments/syncapi/apisplitter.md deleted file mode 100644 index 09ae143dbf..0000000000 --- a/docs/deployments/syncapi/apisplitter.md +++ /dev/null @@ -1,82 +0,0 @@ -# API Splitter - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -The API Splitter feature allows you to split traffic between multiple Sync APIs on your Cortex Cluster. This can be useful for A/B testing models in production. - -After [deploying Sync APIs](deployment.md), you can deploy an API Splitter to provide a single endpoint that can route a request randomly to one of the target Sync APIs. Weights can be assigned to Sync APIs to control the percentage of requests routed to each API. - -**API Splitters are only supported on a Cortex cluster (in AWS).** - -## API Splitter Configuration - -API Splitter expects the target Sync APIs to already be running or be included in the same configuration file as the API Splitter. The traffic is routed according to the specified weights. The weights assigned to all Sync APIs must to sum to 100. - -```yaml -- name: # API Splitter name (required) - kind: APISplitter # must be "APISplitter", create an API Splitter which routes traffic to multiple Sync APIs - networking: - endpoint: # the endpoint for the API Splitter (default: ) - api_gateway: public | none # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public) - apis: # list of Sync APIs to target - - name: # name of a Sync API that is already running or is included in the same configuration file (required) - weight: # percentage of traffic to route to the Sync API (all weights must sum to 100) (required) -``` - -## `cortex deploy` - -The `cortex deploy` command is used to deploy an API Splitter. - -```bash -$ cortex deploy - -created my-apisplitter (APISplitter) -``` - -API Splitters are declarative, so to update your API Splitter, you can modify the configuration and re-run `cortex deploy`. - -## `cortex get` - -The `cortex get` command displays the status of your Sync APIs and API Splitters, and `cortex get ` shows additional information about a specific API Splitter. - -```bash -$ cortex get my-apisplitter - -apis weights status requested last update avg request 2XX 5XX -another-my-api 80 live 1 5m - - - -my-api 20 live 1 6m - - - - -last updated: 4m -endpoint: https://******.execute-api.eu-central-1.amazonaws.com/my-apisplitter -curl: curl https://******.execute-api.eu-central-1.amazonaws.com/my-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -... -``` - -## Making a prediction - -You can use `curl` to test your API Splitter. This will distribute the requests across the Sync APIs targeted by the API Splitter: - -```bash -$ curl http://***.amazonaws.com/my-apisplitter \ - -X POST -H "Content-Type: application/json" \ - -d '{"key": "value"}' -``` - -## `cortex delete` - -Use `cortex delete ` to delete your API Splitter: - -```bash -$ cortex delete my-apisplitter - -deleted my-apisplitter -``` - -Note that this will not delete the Sync APIs targeted by the API Splitter. - -## Additional resources - - -* [API Splitter Tutorial](../../../examples/apisplitter/README.md) provides a step-by-step walkthrough for deploying an API Splitter -* [Sync API Tutorial](../../../examples/sklearn/iris-classifier/README.md) provides a step-by-step walkthrough of deploying an iris classifier Sync API -* [CLI documentation](../../miscellaneous/cli.md) lists all CLI commands diff --git a/docs/guides/multi-model.md b/docs/guides/multi-model.md index a978d32df7..31a6db19eb 100644 --- a/docs/guides/multi-model.md +++ b/docs/guides/multi-model.md @@ -16,7 +16,7 @@ The following template is based on the [pytorch/multi-model-text-analyzer](https ```yaml - name: multi-model-text-analyzer - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -94,7 +94,7 @@ The following template is based on the [tensorflow/multi-model-classifier](https ```yaml - name: multi-model-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py @@ -177,7 +177,7 @@ The following template is based on the [onnx/multi-model-classifier](https://git ```yaml - name: multi-model-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: onnx path: predictor.py diff --git a/docs/summary.md b/docs/summary.md index ac5a7c1b61..bf76b52dca 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -11,16 +11,16 @@ ## Deployments -* [Sync API](deployments/syncapi.md) - * [Predictor implementation](deployments/syncapi/predictors.md) - * [API configuration](deployments/syncapi/api-configuration.md) - * [API deployment](deployments/syncapi/deployment.md) - * [API statuses](deployments/syncapi/statuses.md) - * [Parallelism](deployments/syncapi/parallelism.md) - * [Autoscaling](deployments/syncapi/autoscaling.md) - * [Prediction monitoring](deployments/syncapi/prediction-monitoring.md) +* [Realtime API](deployments/realtime.md) + * [Predictor implementation](deployments/realtime/predictors.md) + * [API configuration](deployments/realtime/api-configuration.md) + * [API deployment](deployments/realtime/deployment.md) + * [API statuses](deployments/realtime/statuses.md) + * [Parallelism](deployments/realtime/parallelism.md) + * [Autoscaling](deployments/realtime/autoscaling.md) + * [Prediction monitoring](deployments/realtime/prediction-monitoring.md) * [Tutorial](../examples/sklearn/iris-classifier/README.md) - * [API Splitter](deployments/syncapi/apisplitter.md) + * [Traffic Splitter](deployments/realtime/traffic-splitter.md) * [Batch API](deployments/batchapi.md) * [Predictor implementation](deployments/batchapi/predictors.md) * [API configuration](deployments/batchapi/api-configuration.md) diff --git a/docs/troubleshooting/server-side-batching-errors.md b/docs/troubleshooting/server-side-batching-errors.md index 50700f9169..a674a09e93 100644 --- a/docs/troubleshooting/server-side-batching-errors.md +++ b/docs/troubleshooting/server-side-batching-errors.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When `max_batch_size` and `batch_interval` fields are set for the [Sync API TensorFlow Predictor](../deployments/syncapi/predictors.md#tensorflow-predictor), errors can be encountered if the associated model hasn't been built for batching. +When `max_batch_size` and `batch_interval` fields are set for the [Realtime API TensorFlow Predictor](../deployments/realtimeapi/predictors.md#tensorflow-predictor), errors can be encountered if the associated model hasn't been built for batching. The following error is an example of what happens when the input shape doesn't accommodate batching - e.g. when its shape is `[height, width, 3]` instead of `[batch_size, height, width, 3]`: diff --git a/docs/troubleshooting/tf-session-in-predict.md b/docs/troubleshooting/tf-session-in-predict.md index 9aa50c0b01..bba6e090e0 100644 --- a/docs/troubleshooting/tf-session-in-predict.md +++ b/docs/troubleshooting/tf-session-in-predict.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When doing inferences with TensorFlow using the [Sync API Python Predictor](../deployments/syncapi/predictors.md#python-predictor) or [Batch API Python Predictor](../deployments/batchapi/predictors.md#python-predictor), it should be noted that your Python Predictor's `__init__()` constructor is only called on one thread, whereas its `predict()` method can run on any of the available threads (which is configured via the `threads_per_process` field in the API's `predictor` configuration). If `threads_per_process` is set to `1` (the default value), then there is no concern, since `__init__()` and `predict()` will run on the same thread. However, if `threads_per_process` is greater than `1`, then only one of the inference threads will have executed the `__init__()` function. This can cause issues with TensorFlow because the default graph is a property of the current thread, so if `__init__()` initializes the TensorFlow graph, only the thread that executed `__init__()` will have the default graph set. +When doing inferences with TensorFlow using the [Realtime API Python Predictor](../deployments/realtimeapi/predictors.md#python-predictor) or [Batch API Python Predictor](../deployments/batchapi/predictors.md#python-predictor), it should be noted that your Python Predictor's `__init__()` constructor is only called on one thread, whereas its `predict()` method can run on any of the available threads (which is configured via the `threads_per_process` field in the API's `predictor` configuration). If `threads_per_process` is set to `1` (the default value), then there is no concern, since `__init__()` and `predict()` will run on the same thread. However, if `threads_per_process` is greater than `1`, then only one of the inference threads will have executed the `__init__()` function. This can cause issues with TensorFlow because the default graph is a property of the current thread, so if `__init__()` initializes the TensorFlow graph, only the thread that executed `__init__()` will have the default graph set. The error you may see if the default graph is not set (as a consequence of `__init__()` and `predict()` running in separate threads) is: diff --git a/examples/keras/document-denoiser/cortex.yaml b/examples/keras/document-denoiser/cortex.yaml index 8cea537133..cc6462761b 100644 --- a/examples/keras/document-denoiser/cortex.yaml +++ b/examples/keras/document-denoiser/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: document-denoiser - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/onnx/iris-classifier/cortex.yaml b/examples/onnx/iris-classifier/cortex.yaml index e8e3328d86..5d5949d701 100644 --- a/examples/onnx/iris-classifier/cortex.yaml +++ b/examples/onnx/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: SyncAPI + kind: predictor: type: onnx path: predictor.py diff --git a/examples/onnx/multi-model-classifier/cortex.yaml b/examples/onnx/multi-model-classifier/cortex.yaml index ebd77bd555..6a8557a702 100644 --- a/examples/onnx/multi-model-classifier/cortex.yaml +++ b/examples/onnx/multi-model-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-classifier - kind: SyncAPI + kind: predictor: type: onnx path: predictor.py diff --git a/examples/onnx/yolov5-youtube/cortex.yaml b/examples/onnx/yolov5-youtube/cortex.yaml index 0bc142e1f4..4c63c7e453 100644 --- a/examples/onnx/yolov5-youtube/cortex.yaml +++ b/examples/onnx/yolov5-youtube/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: yolov5-youtube - kind: SyncAPI + kind: predictor: type: onnx path: predictor.py diff --git a/examples/pytorch/answer-generator/cortex.yaml b/examples/pytorch/answer-generator/cortex.yaml index 82b6ee41c8..94bbef88c5 100644 --- a/examples/pytorch/answer-generator/cortex.yaml +++ b/examples/pytorch/answer-generator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: answer-generator - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/image-classifier-alexnet/cortex.yaml b/examples/pytorch/image-classifier-alexnet/cortex.yaml index ed18771607..da6a7d1d20 100644 --- a/examples/pytorch/image-classifier-alexnet/cortex.yaml +++ b/examples/pytorch/image-classifier-alexnet/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-alexnet - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/image-classifier-resnet50/cortex.yaml b/examples/pytorch/image-classifier-resnet50/cortex.yaml index 1288383be7..fbb0f8e20a 100644 --- a/examples/pytorch/image-classifier-resnet50/cortex.yaml +++ b/examples/pytorch/image-classifier-resnet50/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/image-classifier-resnet50/cortex_gpu.yaml b/examples/pytorch/image-classifier-resnet50/cortex_gpu.yaml index ce50d096e6..aba7745dec 100644 --- a/examples/pytorch/image-classifier-resnet50/cortex_gpu.yaml +++ b/examples/pytorch/image-classifier-resnet50/cortex_gpu.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/image-classifier-resnet50/cortex_inf.yaml b/examples/pytorch/image-classifier-resnet50/cortex_inf.yaml index d0b35addcd..6fea053913 100644 --- a/examples/pytorch/image-classifier-resnet50/cortex_inf.yaml +++ b/examples/pytorch/image-classifier-resnet50/cortex_inf.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/iris-classifier/cortex.yaml b/examples/pytorch/iris-classifier/cortex.yaml index bb4b3ee9c1..5bed271c6e 100644 --- a/examples/pytorch/iris-classifier/cortex.yaml +++ b/examples/pytorch/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/language-identifier/cortex.yaml b/examples/pytorch/language-identifier/cortex.yaml index f79488b9af..182520d447 100644 --- a/examples/pytorch/language-identifier/cortex.yaml +++ b/examples/pytorch/language-identifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: language-identifier - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/multi-model-text-analyzer/cortex.yaml b/examples/pytorch/multi-model-text-analyzer/cortex.yaml index db6bab6ba6..2d3ebef396 100644 --- a/examples/pytorch/multi-model-text-analyzer/cortex.yaml +++ b/examples/pytorch/multi-model-text-analyzer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-text-analyzer - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/object-detector/cortex.yaml b/examples/pytorch/object-detector/cortex.yaml index 406025950c..86e720fa37 100644 --- a/examples/pytorch/object-detector/cortex.yaml +++ b/examples/pytorch/object-detector/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: object-detector - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/reading-comprehender/cortex.yaml b/examples/pytorch/reading-comprehender/cortex.yaml index e243586f63..7340d12fe3 100644 --- a/examples/pytorch/reading-comprehender/cortex.yaml +++ b/examples/pytorch/reading-comprehender/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: reading-comprehender - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/search-completer/cortex.yaml b/examples/pytorch/search-completer/cortex.yaml index ef20e9776c..5c78f3a9bb 100644 --- a/examples/pytorch/search-completer/cortex.yaml +++ b/examples/pytorch/search-completer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: search-completer - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/sentiment-analyzer/cortex.yaml b/examples/pytorch/sentiment-analyzer/cortex.yaml index eed2e61713..960d405739 100644 --- a/examples/pytorch/sentiment-analyzer/cortex.yaml +++ b/examples/pytorch/sentiment-analyzer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: sentiment-analyzer - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/text-generator/cortex.yaml b/examples/pytorch/text-generator/cortex.yaml index 5983052371..f85cc85517 100644 --- a/examples/pytorch/text-generator/cortex.yaml +++ b/examples/pytorch/text-generator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: text-generator - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/pytorch/text-summarizer/cortex.yaml b/examples/pytorch/text-summarizer/cortex.yaml index 9d1cff81da..2c4b1c2e85 100644 --- a/examples/pytorch/text-summarizer/cortex.yaml +++ b/examples/pytorch/text-summarizer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: text-summarizer - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/sklearn/iris-classifier/README.md b/examples/sklearn/iris-classifier/README.md index 8154b60977..0a0452301a 100644 --- a/examples/sklearn/iris-classifier/README.md +++ b/examples/sklearn/iris-classifier/README.md @@ -1,4 +1,4 @@ -# Deploy models as Sync APIs +# Deploy models as Realtime APIs _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub)_ @@ -90,7 +90,7 @@ class PythonPredictor: return labels[label_id] ``` -Here are the complete [Predictor docs](../../../docs/deployments/syncapi/predictors.md). +Here are the complete [Predictor docs](../../../docs/deployments/realtimeapi/predictors.md).
@@ -104,7 +104,7 @@ Create a `requirements.txt` file to specify the dependencies needed by `predicto boto3 ``` -You can skip dependencies that are [pre-installed](../../../docs/deployments/syncapi/predictors.md) to speed up the deployment process. Note that `pickle` is part of the Python standard library so it doesn't need to be included. +You can skip dependencies that are [pre-installed](../../../docs/deployments/realtimeapi/predictors.md) to speed up the deployment process. Note that `pickle` is part of the Python standard library so it doesn't need to be included.
@@ -116,7 +116,7 @@ Create a `cortex.yaml` file and add the configuration below and replace `cortex- # cortex.yaml - name: iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -125,7 +125,7 @@ Create a `cortex.yaml` file and add the configuration below and replace `cortex- key: sklearn/iris-classifier/model.pkl ``` -Here are the complete [API configuration docs](../../../docs/deployments/syncapi/api-configuration.md). +Here are the complete [API configuration docs](../../../docs/deployments/realtimeapi/api-configuration.md).
@@ -136,7 +136,7 @@ Here are the complete [API configuration docs](../../../docs/deployments/syncapi ```bash $ cortex deploy -creating iris-classifier (SyncAPI) +creating iris-classifier (RealtimeAPI) ``` Monitor the status of your API using `cortex get`: @@ -185,7 +185,7 @@ After your cluster is created, you can deploy your model to your cluster by usin ```bash $ cortex deploy --env aws -creating iris-classifier (SyncAPI) +creating iris-classifier (RealtimeAPI) ``` You can then get your API's endpoint (along with other useful information about your API) using the `cortex get` command: @@ -218,7 +218,7 @@ Add `monitoring` to your `cortex.yaml` and specify that this is a classification # cortex.yaml - name: iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -234,7 +234,7 @@ Run `cortex deploy` again to perform a rolling update to your API with the new c ```bash $ cortex deploy --env aws -updating iris-classifier (SyncAPI) +updating iris-classifier (RealtimeAPI) ``` After making more predictions, your `cortex get` command will show information about your API's past predictions: @@ -261,7 +261,7 @@ This model is fairly small but larger models may require more compute resources. # cortex.yaml - name: iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -280,7 +280,7 @@ You could also configure GPU compute here if your cluster supports it. Adding co ```bash $ cortex deploy --env aws -updating iris-classifier (SyncAPI) +updating iris-classifier (RealtimeAPI) ``` Run `cortex get` again: @@ -307,7 +307,7 @@ If you trained another model and want to A/B test it with your previous model, s # cortex.yaml - name: iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -321,7 +321,7 @@ If you trained another model and want to A/B test it with your previous model, s mem: 100M - name: another-iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -340,8 +340,8 @@ Run `cortex deploy` to create the new API: ```bash $ cortex deploy --env aws -iris-classifier (SyncAPI) is up to date -creating another-iris-classifier (SyncAPI) +iris-classifier (RealtimeAPI) is up to date +creating another-iris-classifier (RealtimeAPI) ``` `cortex deploy` is declarative so the `iris-classifier` API is unchanged while `another-iris-classifier` is created: @@ -403,7 +403,7 @@ Next, add the `api` to `cortex.yaml`: # cortex.yaml - name: iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -417,7 +417,7 @@ Next, add the `api` to `cortex.yaml`: mem: 100M - name: another-iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -431,7 +431,7 @@ Next, add the `api` to `cortex.yaml`: mem: 100M - name: batch-iris-classifier - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: batch-predictor.py @@ -448,9 +448,9 @@ Run `cortex deploy` to create your batch API: ```bash $ cortex deploy --env aws -updating iris-classifier (SyncAPI) -updating another-iris-classifier (SyncAPI) -creating batch-iris-classifier (SyncAPI) +updating iris-classifier (RealtimeAPI) +updating another-iris-classifier (RealtimeAPI) +creating batch-iris-classifier (RealtimeAPI) ``` Since a new file was added to the directory, and all files in the directory containing `cortex.yaml` are made available in your APIs, the previous two APIs were updated in addition to the the batch classifier being created. diff --git a/examples/sklearn/iris-classifier/cortex.yaml b/examples/sklearn/iris-classifier/cortex.yaml index bd194f3654..111bd69966 100644 --- a/examples/sklearn/iris-classifier/cortex.yaml +++ b/examples/sklearn/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: SyncAPI + kind: predictor: type: python path: predictor.py @@ -15,7 +15,7 @@ mem: 100M - name: another-iris-classifier - kind: SyncAPI + kind: predictor: type: python path: predictor.py @@ -29,7 +29,7 @@ mem: 100M - name: batch-iris-classifier - kind: SyncAPI + kind: predictor: type: python path: batch-predictor.py diff --git a/examples/sklearn/mpg-estimator/cortex.yaml b/examples/sklearn/mpg-estimator/cortex.yaml index acf60fce04..9185cd602e 100644 --- a/examples/sklearn/mpg-estimator/cortex.yaml +++ b/examples/sklearn/mpg-estimator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: mpg-estimator - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/spacy/entity-recognizer/cortex.yaml b/examples/spacy/entity-recognizer/cortex.yaml index 4b5301f1cb..c982243cca 100644 --- a/examples/spacy/entity-recognizer/cortex.yaml +++ b/examples/spacy/entity-recognizer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: entity-recognizer - kind: SyncAPI + kind: predictor: type: python path: predictor.py diff --git a/examples/tensorflow/image-classifier-inception/cortex.yaml b/examples/tensorflow/image-classifier-inception/cortex.yaml index 8e63517445..8848b3d5c2 100644 --- a/examples/tensorflow/image-classifier-inception/cortex.yaml +++ b/examples/tensorflow/image-classifier-inception/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-inception - kind: SyncAPI + kind: predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml b/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml index 109ea4a91e..69b3740406 100644 --- a/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml +++ b/examples/tensorflow/image-classifier-inception/cortex_server_side_batching.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-inception - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/cortex.yaml b/examples/tensorflow/image-classifier-resnet50/cortex.yaml index 977caac07a..51c8697f26 100644 --- a/examples/tensorflow/image-classifier-resnet50/cortex.yaml +++ b/examples/tensorflow/image-classifier-resnet50/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_gpu.yaml b/examples/tensorflow/image-classifier-resnet50/cortex_gpu.yaml index e4c3329d37..29c14b3757 100644 --- a/examples/tensorflow/image-classifier-resnet50/cortex_gpu.yaml +++ b/examples/tensorflow/image-classifier-resnet50/cortex_gpu.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml b/examples/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml index 412b12729c..4c9b11de1c 100644 --- a/examples/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml +++ b/examples/tensorflow/image-classifier-resnet50/cortex_gpu_server_side_batching.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_inf.yaml b/examples/tensorflow/image-classifier-resnet50/cortex_inf.yaml index 748dbd4f5d..5b8c42aa62 100644 --- a/examples/tensorflow/image-classifier-resnet50/cortex_inf.yaml +++ b/examples/tensorflow/image-classifier-resnet50/cortex_inf.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml b/examples/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml index 6cce40dd8d..11d91c21ff 100644 --- a/examples/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml +++ b/examples/tensorflow/image-classifier-resnet50/cortex_inf_server_side_batching.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/iris-classifier/cortex.yaml b/examples/tensorflow/iris-classifier/cortex.yaml index 17caa5eefe..5b47c1093f 100644 --- a/examples/tensorflow/iris-classifier/cortex.yaml +++ b/examples/tensorflow/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: SyncAPI + kind: predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/license-plate-reader/cortex_full.yaml b/examples/tensorflow/license-plate-reader/cortex_full.yaml index 486dc000df..89be93177f 100644 --- a/examples/tensorflow/license-plate-reader/cortex_full.yaml +++ b/examples/tensorflow/license-plate-reader/cortex_full.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: yolov3 - kind: SyncAPI + kind: RealtimeAPI predictor: type: tensorflow path: predictor_yolo.py @@ -20,7 +20,7 @@ max_replicas: 2 - name: crnn - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor_crnn.py diff --git a/examples/tensorflow/license-plate-reader/cortex_lite.yaml b/examples/tensorflow/license-plate-reader/cortex_lite.yaml index f818e88e24..a184e59c34 100644 --- a/examples/tensorflow/license-plate-reader/cortex_lite.yaml +++ b/examples/tensorflow/license-plate-reader/cortex_lite.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: license-plate-reader - kind: SyncAPI + kind: RealtimeAPI predictor: type: python path: predictor_lite.py diff --git a/examples/tensorflow/multi-model-classifier/cortex.yaml b/examples/tensorflow/multi-model-classifier/cortex.yaml index 9f7c114fe6..43d29c1c4b 100644 --- a/examples/tensorflow/multi-model-classifier/cortex.yaml +++ b/examples/tensorflow/multi-model-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-classifier - kind: SyncAPI + kind: predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/sentiment-analyzer/cortex.yaml b/examples/tensorflow/sentiment-analyzer/cortex.yaml index a62481fba5..61a2388033 100644 --- a/examples/tensorflow/sentiment-analyzer/cortex.yaml +++ b/examples/tensorflow/sentiment-analyzer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: sentiment-analyzer - kind: SyncAPI + kind: predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/text-generator/cortex.yaml b/examples/tensorflow/text-generator/cortex.yaml index 039fe6077d..4434740f8a 100644 --- a/examples/tensorflow/text-generator/cortex.yaml +++ b/examples/tensorflow/text-generator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: text-generator - kind: SyncAPI + kind: predictor: type: tensorflow path: predictor.py diff --git a/examples/traffic-splitter/README.md b/examples/traffic-splitter/README.md new file mode 100644 index 0000000000..f5742463b6 --- /dev/null +++ b/examples/traffic-splitter/README.md @@ -0,0 +1,111 @@ +# Splitting traffic between APIs + +_WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version`_ + +This example shows how to split traffic between 2 different iris-classifiers deployed as RealtimeAPIs. + +To deploy this example: + +1. Determine your CLI Version `cortex version` +1. Clone the repo and switch to the current version by replacing `` with your CLI version: `git clone -b v https://github.com/cortexlabs/cortex` (e.g. if the output of `cortex version` is 0.18.1, the clone command would be `git clone -b v0.18.1 https://github.com/cortexlabs/cortex`) +1. Navigate to this example directory + +## `cortex deploy` + +```bash +$ cortex deploy --env aws + +creating iris-classifier-onnx (RealtimeAPI) +creating iris-classifier-tf (RealtimeAPI) +created traffic-splitter (TrafficSplitter) +``` + +## `cortex get` + +```bash +$ cortex get + +env realtime api status up-to-date requested last update avg request 2XX +aws iris-classifier-onnx updating 0 1 27s - - +aws iris-classifier-tf updating 0 1 27s - - + +env traffic splitter apis last update +aws traffic-splitter iris-classifier-onnx:30 iris-classifier-tf:70 27s +``` + +## `cortex get traffic-splitter` + +```bash +$ cortex get traffic-splitter --env aws + +apis weights status requested last update avg request 2XX 5XX +iris-classifier-onnx 30 live 1 1m - - - +iris-classifier-tf 70 live 1 1m - - - + +last updated: 1m +endpoint: https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter +curl: curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +... +``` + +## Make multiple requests + +```bash +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +setosa + +$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +setosa +``` + +## `cortex get traffic-splitter` + +Notice the requests being routed to the different RealtimeAPIs based on their weights (the output below may not match yours): + +```bash +$ cortex get traffic-splitter --env aws + +using aws environment + + +apis weights status requested last update avg request 2XX 5XX +iris-classifier-onnx 30 live 1 4m 6.00791 ms 1 - +iris-classifier-tf 70 live 1 4m 5.81867 ms 5 - + +last updated: 4m +endpoint: https://comtf6hs64.execute-api.us-west-2.amazonaws.com/traffic-splitter +curl: curl https://comtf6hs64.execute-api.us-west-2.amazonaws.com/traffic-splitter -X POST -H "Content-Type: application/json" -d @sample.json +... +``` + +## Cleanup + +Use `cortex delete ` to delete the Traffic Splitter and the two RealtimeAPIs (note that the Traffic Splitter and each Realtime API must be deleted by separate `cortex delete` commands): + +```bash +$ cortex delete traffic-splitter --env aws + +deleting traffic-splitter + +$ cortex delete iris-classifier-onnx --env aws + +deleting iris-classifier-onnx + +$ cortex delete iris-classifier-tf --env aws + +deleting iris-classifier-tf +``` + +Running `cortex delete ` will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster installation. It will not spin down your cluster. diff --git a/examples/traffic-splitter/cortex.yaml b/examples/traffic-splitter/cortex.yaml new file mode 100644 index 0000000000..ec7a1bf01d --- /dev/null +++ b/examples/traffic-splitter/cortex.yaml @@ -0,0 +1,27 @@ +# WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version` + +- name: iris-classifier-onnx + kind: + predictor: + type: onnx + path: onnx_predictor.py + model_path: s3://cortex-examples/onnx/iris-classifier/gbtree.onnx + monitoring: + model_type: classification + +- name: iris-classifier-tf + kind: + predictor: + type: tensorflow + path: tensorflow_predictor.py + model_path: s3://cortex-examples/tensorflow/iris-classifier/nn + monitoring: + model_type: classification + +- name: traffic-splitter + kind: TrafficSplitter + apis: + - name: iris-classifier-onnx + weight: 30 + - name: iris-classifier-tf + weight: 70 diff --git a/examples/traffic-splitter/onnx_predictor.py b/examples/traffic-splitter/onnx_predictor.py new file mode 100644 index 0000000000..1526cdcc36 --- /dev/null +++ b/examples/traffic-splitter/onnx_predictor.py @@ -0,0 +1,20 @@ +# WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version` + +labels = ["setosa", "versicolor", "virginica"] + + +class ONNXPredictor: + def __init__(self, onnx_client, config): + self.client = onnx_client + + def predict(self, payload): + model_input = [ + payload["sepal_length"], + payload["sepal_width"], + payload["petal_length"], + payload["petal_width"], + ] + + prediction = self.client.predict(model_input) + predicted_class_id = prediction[0][0] + return labels[predicted_class_id] diff --git a/examples/apisplitter/sample.json b/examples/traffic-splitter/sample.json similarity index 100% rename from examples/apisplitter/sample.json rename to examples/traffic-splitter/sample.json diff --git a/examples/traffic-splitter/tensorflow_predictor.py b/examples/traffic-splitter/tensorflow_predictor.py new file mode 100644 index 0000000000..c880d2233f --- /dev/null +++ b/examples/traffic-splitter/tensorflow_predictor.py @@ -0,0 +1,13 @@ +# WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version` + +labels = ["setosa", "versicolor", "virginica"] + + +class TensorFlowPredictor: + def __init__(self, tensorflow_client, config): + self.client = tensorflow_client + + def predict(self, payload): + prediction = self.client.predict(payload) + predicted_class_id = int(prediction["class_ids"][0]) + return labels[predicted_class_id] diff --git a/pkg/operator/endpoints/logs.go b/pkg/operator/endpoints/logs.go index 7aea9ee4e2..de455597dc 100644 --- a/pkg/operator/endpoints/logs.go +++ b/pkg/operator/endpoints/logs.go @@ -20,7 +20,7 @@ import ( "net/http" "github.com/cortexlabs/cortex/pkg/operator/resources" - "github.com/cortexlabs/cortex/pkg/operator/resources/syncapi" + "github.com/cortexlabs/cortex/pkg/operator/resources/realtimeapi" "github.com/cortexlabs/cortex/pkg/types/userconfig" "github.com/gorilla/mux" "github.com/gorilla/websocket" @@ -38,8 +38,8 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { if deployedResource.Kind == userconfig.BatchAPIKind { respondError(w, r, ErrorLogsJobIDRequired(*deployedResource)) return - } else if deployedResource.Kind != userconfig.SyncAPIKind { - respondError(w, r, resources.ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.SyncAPIKind)) + } else if deployedResource.Kind != userconfig.RealtimeAPIKind { + respondError(w, r, resources.ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.RealtimeAPIKind)) return } @@ -51,5 +51,5 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { } defer socket.Close() - syncapi.ReadLogs(apiName, socket) + realtimeapi.ReadLogs(apiName, socket) } diff --git a/pkg/operator/main.go b/pkg/operator/main.go index 714e2ad6fc..2b04b563df 100644 --- a/pkg/operator/main.go +++ b/pkg/operator/main.go @@ -29,7 +29,7 @@ import ( "github.com/cortexlabs/cortex/pkg/operator/endpoints" "github.com/cortexlabs/cortex/pkg/operator/operator" "github.com/cortexlabs/cortex/pkg/operator/resources/batchapi" - "github.com/cortexlabs/cortex/pkg/operator/resources/syncapi" + "github.com/cortexlabs/cortex/pkg/operator/resources/realtimeapi" "github.com/cortexlabs/cortex/pkg/types/userconfig" "github.com/gorilla/mux" ) @@ -54,8 +54,8 @@ func main() { } for _, deployment := range deployments { - if userconfig.KindFromString(deployment.Labels["apiKind"]) == userconfig.SyncAPIKind { - if err := syncapi.UpdateAutoscalerCron(&deployment); err != nil { + if userconfig.KindFromString(deployment.Labels["apiKind"]) == userconfig.RealtimeAPIKind { + if err := realtimeapi.UpdateAutoscalerCron(&deployment); err != nil { exit.Error(errors.Wrap(err, "init")) } } diff --git a/pkg/operator/resources/errors.go b/pkg/operator/resources/errors.go index 0fb26f1988..b7d9a54356 100644 --- a/pkg/operator/resources/errors.go +++ b/pkg/operator/resources/errors.go @@ -32,8 +32,8 @@ const ( ErrCannotChangeTypeOfDeployedAPI = "resources.cannot_change_kind_of_deployed_api" ErrNoAvailableNodeComputeLimit = "resources.no_available_node_compute_limit" ErrJobIDRequired = "resources.job_id_required" - ErrAPIUsedByAPISplitter = "resources.syncapi_used_by_apisplitter" - ErrNotDeployedAPIsAPISplitter = "resources.trafficsplit_apis_not_deployed" + ErrAPIUsedByTrafficSplitter = "resources.realtimeapi_used_by_traffic_splitter" + ErrNotDeployedAPIsTrafficSplitter = "resources.trafficsplit_apis_not_deployed" ErrAPIGatewayDisabled = "resources.api_gateway_disabled" ) @@ -76,17 +76,21 @@ func ErrorNoAvailableNodeComputeLimit(resource string, reqStr string, maxStr str }) } -func ErrorAPIUsedByAPISplitter(apiSplitters []string) error { +func ErrorAPIUsedByTrafficSplitter(trafficSplitters []string) error { return errors.WithStack(&errors.Error{ - Kind: ErrAPIUsedByAPISplitter, - Message: fmt.Sprintf("cannot delete api because it is used by the following %s: %s", strings.PluralS("APISplitter", len(apiSplitters)), strings.StrsSentence(apiSplitters, "")), + Kind: ErrAPIUsedByTrafficSplitter, + Message: fmt.Sprintf("cannot delete api because it is used by the following %s: %s", strings.PluralS("TrafficSplitter", len(trafficSplitters)), strings.StrsSentence(trafficSplitters, "")), }) } -func ErrorNotDeployedAPIsAPISplitter(notDeployedAPIs []string) error { +func ErrorNotDeployedAPIsTrafficSplitter(notDeployedAPIs []string) error { + message := fmt.Sprintf("apis %s were either not found or are not RealtimeAPI kind", strings.StrsAnd(notDeployedAPIs)) + if len(notDeployedAPIs) == 1 { + message = fmt.Sprintf("api %s was either not found or is not RealtimeAPI kind", notDeployedAPIs[0]) + } return errors.WithStack(&errors.Error{ - Kind: ErrNotDeployedAPIsAPISplitter, - Message: fmt.Sprintf("unable to find specified %s: %s", strings.PluralS("SyncAPI", len(notDeployedAPIs)), strings.StrsAnd(notDeployedAPIs)), + Kind: ErrNotDeployedAPIsTrafficSplitter, + Message: message, }) } diff --git a/pkg/operator/resources/syncapi/api.go b/pkg/operator/resources/realtimeapi/api.go similarity index 97% rename from pkg/operator/resources/syncapi/api.go rename to pkg/operator/resources/realtimeapi/api.go index aaa725ad15..75fae61715 100644 --- a/pkg/operator/resources/syncapi/api.go +++ b/pkg/operator/resources/realtimeapi/api.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "fmt" @@ -170,7 +170,7 @@ func DeleteAPI(apiName string, keepCache bool) error { }, // delete api from cloudwatch dashboard func() error { - virtualServices, err := config.K8s.ListVirtualServicesByLabel("apiKind", userconfig.SyncAPIKind.String()) + virtualServices, err := config.K8s.ListVirtualServicesByLabel("apiKind", userconfig.RealtimeAPIKind.String()) if err != nil { return errors.Wrap(err, "failed to get virtual services") } @@ -194,7 +194,7 @@ func DeleteAPI(apiName string, keepCache bool) error { return nil } -func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.SyncAPI, error) { +func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.RealtimeAPI, error) { statuses, err := GetAllStatuses(deployments, pods) if err != nil { return nil, err @@ -211,7 +211,7 @@ func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.Sync return nil, err } - syncAPIs := make([]schema.SyncAPI, len(apis)) + realtimeAPIs := make([]schema.RealtimeAPI, len(apis)) for i, api := range apis { endpoint, err := operator.APIEndpoint(&api) @@ -219,7 +219,7 @@ func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.Sync return nil, err } - syncAPIs[i] = schema.SyncAPI{ + realtimeAPIs[i] = schema.RealtimeAPI{ Spec: api, Status: statuses[i], Metrics: allMetrics[i], @@ -227,7 +227,7 @@ func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.Sync } } - return syncAPIs, nil + return realtimeAPIs, nil } func namesAndIDsFromStatuses(statuses []status.Status) ([]string, []string) { @@ -264,7 +264,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) (*schema.GetAPIRe } return &schema.GetAPIResponse{ - SyncAPI: &schema.SyncAPI{ + RealtimeAPI: &schema.RealtimeAPI{ Spec: *api, Status: *status, Metrics: *metrics, diff --git a/pkg/operator/resources/syncapi/autoscaler.go b/pkg/operator/resources/realtimeapi/autoscaler.go similarity index 99% rename from pkg/operator/resources/syncapi/autoscaler.go rename to pkg/operator/resources/realtimeapi/autoscaler.go index fb9e847045..c30964ee1e 100644 --- a/pkg/operator/resources/syncapi/autoscaler.go +++ b/pkg/operator/resources/realtimeapi/autoscaler.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "log" diff --git a/pkg/operator/resources/syncapi/dashboard.go b/pkg/operator/resources/realtimeapi/dashboard.go similarity index 99% rename from pkg/operator/resources/syncapi/dashboard.go rename to pkg/operator/resources/realtimeapi/dashboard.go index 8cbaa19970..10ed673a01 100644 --- a/pkg/operator/resources/syncapi/dashboard.go +++ b/pkg/operator/resources/realtimeapi/dashboard.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "fmt" diff --git a/pkg/operator/resources/syncapi/errors.go b/pkg/operator/resources/realtimeapi/errors.go similarity index 92% rename from pkg/operator/resources/syncapi/errors.go rename to pkg/operator/resources/realtimeapi/errors.go index 27fa9d264a..4a8a045ece 100644 --- a/pkg/operator/resources/syncapi/errors.go +++ b/pkg/operator/resources/realtimeapi/errors.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "fmt" @@ -23,7 +23,7 @@ import ( ) const ( - ErrAPIUpdating = "syncapi.api_updating" + ErrAPIUpdating = "realtimeapi.api_updating" ) func ErrorAPIUpdating(apiName string) error { diff --git a/pkg/operator/resources/syncapi/k8s_specs.go b/pkg/operator/resources/realtimeapi/k8s_specs.go similarity index 99% rename from pkg/operator/resources/syncapi/k8s_specs.go rename to pkg/operator/resources/realtimeapi/k8s_specs.go index 17ad5dfcb3..2ece34db89 100644 --- a/pkg/operator/resources/syncapi/k8s_specs.go +++ b/pkg/operator/resources/realtimeapi/k8s_specs.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "github.com/cortexlabs/cortex/pkg/lib/k8s" diff --git a/pkg/operator/resources/syncapi/logs.go b/pkg/operator/resources/realtimeapi/logs.go similarity index 99% rename from pkg/operator/resources/syncapi/logs.go rename to pkg/operator/resources/realtimeapi/logs.go index ce29820bc2..47b0cf3a9f 100644 --- a/pkg/operator/resources/syncapi/logs.go +++ b/pkg/operator/resources/realtimeapi/logs.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "encoding/json" diff --git a/pkg/operator/resources/syncapi/metrics.go b/pkg/operator/resources/realtimeapi/metrics.go similarity index 99% rename from pkg/operator/resources/syncapi/metrics.go rename to pkg/operator/resources/realtimeapi/metrics.go index 69f14bc885..d433608cce 100644 --- a/pkg/operator/resources/syncapi/metrics.go +++ b/pkg/operator/resources/realtimeapi/metrics.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "encoding/base64" diff --git a/pkg/operator/resources/syncapi/status.go b/pkg/operator/resources/realtimeapi/status.go similarity index 99% rename from pkg/operator/resources/syncapi/status.go rename to pkg/operator/resources/realtimeapi/status.go index df383e3135..33e5424afd 100644 --- a/pkg/operator/resources/syncapi/status.go +++ b/pkg/operator/resources/realtimeapi/status.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package syncapi +package realtimeapi import ( "sort" diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go index a56558aad0..fcf2ec7e84 100644 --- a/pkg/operator/resources/resources.go +++ b/pkg/operator/resources/resources.go @@ -27,9 +27,9 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/zip" "github.com/cortexlabs/cortex/pkg/operator/config" "github.com/cortexlabs/cortex/pkg/operator/operator" - "github.com/cortexlabs/cortex/pkg/operator/resources/apisplitter" "github.com/cortexlabs/cortex/pkg/operator/resources/batchapi" - "github.com/cortexlabs/cortex/pkg/operator/resources/syncapi" + "github.com/cortexlabs/cortex/pkg/operator/resources/realtimeapi" + "github.com/cortexlabs/cortex/pkg/operator/resources/trafficsplitter" "github.com/cortexlabs/cortex/pkg/operator/schema" "github.com/cortexlabs/cortex/pkg/types" "github.com/cortexlabs/cortex/pkg/types/spec" @@ -93,7 +93,7 @@ func Deploy(projectBytes []byte, configFileName string, configBytes []byte, forc err = ValidateClusterAPIs(apiConfigs, projectFiles) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nSync API can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration\nAPI Splitter can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/apisplitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/trafficsplitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) return nil, err } @@ -107,8 +107,8 @@ func Deploy(projectBytes []byte, configFileName string, configBytes []byte, forc } } - // This is done if user specifies SyncAPIs in same file as APISplitter - apiConfigs = append(ExclusiveFilterAPIsByKind(apiConfigs, userconfig.APISplitterKind), InclusiveFilterAPIsByKind(apiConfigs, userconfig.APISplitterKind)...) + // This is done if user specifies RealtimeAPIs in same file as TrafficSplitter + apiConfigs = append(ExclusiveFilterAPIsByKind(apiConfigs, userconfig.TrafficSplitterKind), InclusiveFilterAPIsByKind(apiConfigs, userconfig.TrafficSplitterKind)...) results := make([]schema.DeployResult, len(apiConfigs)) for i, apiConfig := range apiConfigs { @@ -137,14 +137,14 @@ func UpdateAPI(apiConfig *userconfig.API, projectID string, force bool) (*spec.A } switch apiConfig.Kind { - case userconfig.SyncAPIKind: - return syncapi.UpdateAPI(apiConfig, projectID, force) + case userconfig.RealtimeAPIKind: + return realtimeapi.UpdateAPI(apiConfig, projectID, force) case userconfig.BatchAPIKind: return batchapi.UpdateAPI(apiConfig, projectID) - case userconfig.APISplitterKind: - return apisplitter.UpdateAPI(apiConfig, projectID, force) + case userconfig.TrafficSplitterKind: + return trafficsplitter.UpdateAPI(apiConfig, projectID, force) default: - return nil, "", ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.SyncAPIKind, userconfig.BatchAPIKind, userconfig.APISplitterKind) // unexpected + return nil, "", ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.RealtimeAPIKind, userconfig.BatchAPIKind, userconfig.TrafficSplitterKind) // unexpected } } @@ -155,10 +155,10 @@ func RefreshAPI(apiName string, force bool) (string, error) { } switch deployedResource.Kind { - case userconfig.SyncAPIKind: - return syncapi.RefreshAPI(apiName, force) + case userconfig.RealtimeAPIKind: + return realtimeapi.RefreshAPI(apiName, force) default: - return "", ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.SyncAPIKind) + return "", ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.RealtimeAPIKind) } } @@ -172,13 +172,13 @@ func DeleteAPI(apiName string, keepCache bool) (*schema.DeleteResponse, error) { go func() { err := parallel.RunFirstErr( func() error { - return syncapi.DeleteAPI(apiName, keepCache) + return realtimeapi.DeleteAPI(apiName, keepCache) }, func() error { return batchapi.DeleteAPI(apiName, keepCache) }, func() error { - return apisplitter.DeleteAPI(apiName, keepCache) + return trafficsplitter.DeleteAPI(apiName, keepCache) }, ) if err != nil { @@ -189,17 +189,17 @@ func DeleteAPI(apiName string, keepCache bool) (*schema.DeleteResponse, error) { } switch deployedResource.Kind { - case userconfig.SyncAPIKind: - err := checkIfUsedByAPISplitter(apiName) + case userconfig.RealtimeAPIKind: + err := checkIfUsedByTrafficSplitter(apiName) if err != nil { return nil, err } - err = syncapi.DeleteAPI(apiName, keepCache) + err = realtimeapi.DeleteAPI(apiName, keepCache) if err != nil { return nil, err } - case userconfig.APISplitterKind: - err := apisplitter.DeleteAPI(apiName, keepCache) + case userconfig.TrafficSplitterKind: + err := trafficsplitter.DeleteAPI(apiName, keepCache) if err != nil { return nil, err } @@ -209,7 +209,7 @@ func DeleteAPI(apiName string, keepCache bool) (*schema.DeleteResponse, error) { return nil, err } default: - return nil, ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.SyncAPIKind, userconfig.BatchAPIKind, userconfig.APISplitterKind) // unexpected + return nil, ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.RealtimeAPIKind, userconfig.BatchAPIKind, userconfig.TrafficSplitterKind) // unexpected } return &schema.DeleteResponse{ @@ -249,30 +249,30 @@ func GetAPIs() (*schema.GetAPIsResponse, error) { return nil, err } - syncAPIPods := []kcore.Pod{} + realtimeAPIPods := []kcore.Pod{} batchAPIPods := []kcore.Pod{} for _, pod := range pods { switch pod.Labels["apiKind"] { - case userconfig.SyncAPIKind.String(): - syncAPIPods = append(syncAPIPods, pod) + case userconfig.RealtimeAPIKind.String(): + realtimeAPIPods = append(realtimeAPIPods, pod) case userconfig.BatchAPIKind.String(): batchAPIPods = append(batchAPIPods, pod) } } var batchAPIVirtualServices []istioclientnetworking.VirtualService - var apiSplitterVirtualServices []istioclientnetworking.VirtualService + var trafficSplitterVirtualServices []istioclientnetworking.VirtualService for _, vs := range virtualServices { switch vs.Labels["apiKind"] { case userconfig.BatchAPIKind.String(): batchAPIVirtualServices = append(batchAPIVirtualServices, vs) - case userconfig.APISplitterKind.String(): - apiSplitterVirtualServices = append(apiSplitterVirtualServices, vs) + case userconfig.TrafficSplitterKind.String(): + trafficSplitterVirtualServices = append(trafficSplitterVirtualServices, vs) } } - syncAPIList, err := syncapi.GetAllAPIs(syncAPIPods, deployments) + realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIPods, deployments) if err != nil { return nil, err } @@ -282,14 +282,14 @@ func GetAPIs() (*schema.GetAPIsResponse, error) { return nil, err } - apiSplitterList, err := apisplitter.GetAllAPIs(apiSplitterVirtualServices) + trafficSplitterList, err := trafficsplitter.GetAllAPIs(trafficSplitterVirtualServices) if err != nil { return nil, err } return &schema.GetAPIsResponse{ - BatchAPIs: batchAPIList, - SyncAPIs: syncAPIList, - APISplitters: apiSplitterList, + BatchAPIs: batchAPIList, + RealtimeAPIs: realtimeAPIList, + TrafficSplitters: trafficSplitterList, }, nil } @@ -300,38 +300,38 @@ func GetAPI(apiName string) (*schema.GetAPIResponse, error) { } switch deployedResource.Kind { - case userconfig.SyncAPIKind: - return syncapi.GetAPIByName(deployedResource) + case userconfig.RealtimeAPIKind: + return realtimeapi.GetAPIByName(deployedResource) case userconfig.BatchAPIKind: return batchapi.GetAPIByName(deployedResource) - case userconfig.APISplitterKind: - return apisplitter.GetAPIByName(deployedResource) + case userconfig.TrafficSplitterKind: + return trafficsplitter.GetAPIByName(deployedResource) default: - return nil, ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.SyncAPIKind, userconfig.BatchAPIKind) // unexpected + return nil, ErrorOperationIsOnlySupportedForKind(*deployedResource, userconfig.RealtimeAPIKind, userconfig.BatchAPIKind) // unexpected } } -//checkIfUsedByAPISplitter checks if api is used by a deployed APISplitter -func checkIfUsedByAPISplitter(apiName string) error { - virtualServices, err := config.K8s.ListVirtualServicesByLabel("apiKind", userconfig.APISplitterKind.String()) +//checkIfUsedByTrafficSplitter checks if api is used by a deployed TrafficSplitter +func checkIfUsedByTrafficSplitter(apiName string) error { + virtualServices, err := config.K8s.ListVirtualServicesByLabel("apiKind", userconfig.TrafficSplitterKind.String()) if err != nil { return err } - var usedByAPISplitters []string + var usedByTrafficSplitters []string for _, vs := range virtualServices { - apiSplitterSpec, err := operator.DownloadAPISpec(vs.Labels["apiName"], vs.Labels["apiID"]) + trafficSplitterSpec, err := operator.DownloadAPISpec(vs.Labels["apiName"], vs.Labels["apiID"]) if err != nil { return err } - for _, api := range apiSplitterSpec.APIs { + for _, api := range trafficSplitterSpec.APIs { if apiName == api.Name { - usedByAPISplitters = append(usedByAPISplitters, apiSplitterSpec.Name) + usedByTrafficSplitters = append(usedByTrafficSplitters, trafficSplitterSpec.Name) } } } - if len(usedByAPISplitters) > 0 { - return ErrorAPIUsedByAPISplitter(usedByAPISplitters) + if len(usedByTrafficSplitters) > 0 { + return ErrorAPIUsedByTrafficSplitter(usedByTrafficSplitters) } return nil } diff --git a/pkg/operator/resources/apisplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go similarity index 86% rename from pkg/operator/resources/apisplitter/api.go rename to pkg/operator/resources/trafficsplitter/api.go index 45f61413e3..f123c18871 100644 --- a/pkg/operator/resources/apisplitter/api.go +++ b/pkg/operator/resources/trafficsplitter/api.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package apisplitter +package trafficsplitter import ( "fmt" @@ -116,8 +116,8 @@ func getK8sResources(apiConfig *userconfig.API) (*istioclientnetworking.VirtualS return virtualService, err } -func applyK8sVirtualService(apiSplitter *spec.API, prevVirtualService *istioclientnetworking.VirtualService) error { - newVirtualService := virtualServiceSpec(apiSplitter) +func applyK8sVirtualService(trafficSplitter *spec.API, prevVirtualService *istioclientnetworking.VirtualService) error { + newVirtualService := virtualServiceSpec(trafficSplitter) if prevVirtualService == nil { _, err := config.K8s.CreateVirtualService(newVirtualService) @@ -128,9 +128,9 @@ func applyK8sVirtualService(apiSplitter *spec.API, prevVirtualService *istioclie return err } -func getAPISplitterDestinations(apiSplitter *spec.API) []k8s.Destination { - destinations := make([]k8s.Destination, len(apiSplitter.APIs)) - for i, api := range apiSplitter.APIs { +func getTrafficSplitterDestinations(trafficSplitter *spec.API) []k8s.Destination { + destinations := make([]k8s.Destination, len(trafficSplitter.APIs)) + for i, api := range trafficSplitter.APIs { destinations[i] = k8s.Destination{ ServiceName: operator.K8sName(api.Name), Weight: int32(api.Weight), @@ -140,13 +140,13 @@ func getAPISplitterDestinations(apiSplitter *spec.API) []k8s.Destination { return destinations } -func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schema.APISplitter, error) { +func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schema.TrafficSplitter, error) { apiNames := []string{} apiIDs := []string{} - apiSplitters := []schema.APISplitter{} + trafficSplitters := []schema.TrafficSplitter{} for _, virtualService := range virtualServices { - if virtualService.Labels["apiKind"] == userconfig.APISplitterKind.String() { + if virtualService.Labels["apiKind"] == userconfig.TrafficSplitterKind.String() { apiNames = append(apiNames, virtualService.Labels["apiName"]) apiIDs = append(apiIDs, virtualService.Labels["apiID"]) } @@ -157,19 +157,19 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem return nil, err } - for _, apiSplitter := range apis { - endpoint, err := operator.APIEndpoint(&apiSplitter) + for _, trafficSplitter := range apis { + endpoint, err := operator.APIEndpoint(&trafficSplitter) if err != nil { return nil, err } - apiSplitters = append(apiSplitters, schema.APISplitter{ - Spec: apiSplitter, + trafficSplitters = append(trafficSplitters, schema.TrafficSplitter{ + Spec: trafficSplitter, Endpoint: endpoint, }) } - return apiSplitters, nil + return trafficSplitters, nil } func GetAPIByName(deployedResource *operator.DeployedResource) (*schema.GetAPIResponse, error) { @@ -184,7 +184,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) (*schema.GetAPIRe } return &schema.GetAPIResponse{ - APISplitter: &schema.APISplitter{ + TrafficSplitter: &schema.TrafficSplitter{ Spec: *api, Endpoint: endpoint, }, diff --git a/pkg/operator/resources/apisplitter/k8s_specs.go b/pkg/operator/resources/trafficsplitter/k8s_specs.go similarity index 67% rename from pkg/operator/resources/apisplitter/k8s_specs.go rename to pkg/operator/resources/trafficsplitter/k8s_specs.go index 73c1beed3f..b691135a4a 100644 --- a/pkg/operator/resources/apisplitter/k8s_specs.go +++ b/pkg/operator/resources/trafficsplitter/k8s_specs.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package apisplitter +package trafficsplitter import ( "github.com/cortexlabs/cortex/pkg/lib/k8s" @@ -29,20 +29,20 @@ const ( _defaultPortInt32, _defaultPortStr = int32(8888), "8888" ) -func virtualServiceSpec(apiSplitter *spec.API) *istioclientnetworking.VirtualService { +func virtualServiceSpec(trafficSplitter *spec.API) *istioclientnetworking.VirtualService { return k8s.VirtualService(&k8s.VirtualServiceSpec{ - Name: operator.K8sName(apiSplitter.Name), + Name: operator.K8sName(trafficSplitter.Name), Gateways: []string{"apis-gateway"}, - Destinations: getAPISplitterDestinations(apiSplitter), - ExactPath: apiSplitter.Networking.Endpoint, + Destinations: getTrafficSplitterDestinations(trafficSplitter), + ExactPath: trafficSplitter.Networking.Endpoint, Rewrite: pointer.String("predict"), Annotations: map[string]string{ - userconfig.EndpointAnnotationKey: *apiSplitter.Networking.Endpoint, - userconfig.APIGatewayAnnotationKey: apiSplitter.Networking.APIGateway.String()}, + userconfig.EndpointAnnotationKey: *trafficSplitter.Networking.Endpoint, + userconfig.APIGatewayAnnotationKey: trafficSplitter.Networking.APIGateway.String()}, Labels: map[string]string{ - "apiName": apiSplitter.Name, - "apiKind": apiSplitter.Kind.String(), - "apiID": apiSplitter.ID, + "apiName": trafficSplitter.Name, + "apiKind": trafficSplitter.Kind.String(), + "apiID": trafficSplitter.ID, }, }) } diff --git a/pkg/operator/resources/validations.go b/pkg/operator/resources/validations.go index 6031d2a3f5..b9487c33e9 100644 --- a/pkg/operator/resources/validations.go +++ b/pkg/operator/resources/validations.go @@ -87,21 +87,21 @@ func ValidateClusterAPIs(apis []userconfig.API, projectFiles spec.ProjectFiles) return err } - deployedSyncAPIs := strset.New() + deployedRealtimeAPIs := strset.New() for _, virtualService := range virtualServices { - if virtualService.Labels["apiKind"] == userconfig.SyncAPIKind.String() { - deployedSyncAPIs.Add(virtualService.Labels["apiName"]) + if virtualService.Labels["apiKind"] == userconfig.RealtimeAPIKind.String() { + deployedRealtimeAPIs.Add(virtualService.Labels["apiName"]) } } didPrintWarning := false - syncAPIs := InclusiveFilterAPIsByKind(apis, userconfig.SyncAPIKind) + realtimeAPIs := InclusiveFilterAPIsByKind(apis, userconfig.RealtimeAPIKind) for i := range apis { api := &apis[i] - if api.Kind == userconfig.SyncAPIKind || api.Kind == userconfig.BatchAPIKind { + if api.Kind == userconfig.RealtimeAPIKind || api.Kind == userconfig.BatchAPIKind { if err := spec.ValidateAPI(api, projectFiles, types.AWSProviderType, config.AWS); err != nil { return errors.Wrap(err, api.Identify()) } @@ -115,11 +115,11 @@ func ValidateClusterAPIs(apis []userconfig.API, projectFiles spec.ProjectFiles) } } - if api.Kind == userconfig.APISplitterKind { - if err := spec.ValidateAPISplitter(api, types.AWSProviderType, config.AWS); err != nil { + if api.Kind == userconfig.TrafficSplitterKind { + if err := spec.ValidateTrafficSplitter(api, types.AWSProviderType, config.AWS); err != nil { return errors.Wrap(err, api.Identify()) } - if err := checkIfAPIExists(api.APIs, syncAPIs, deployedSyncAPIs); err != nil { + if err := checkIfAPIExists(api.APIs, realtimeAPIs, deployedRealtimeAPIs); err != nil { return errors.Wrap(err, api.Identify()) } if err := validateEndpointCollisions(api, virtualServices); err != nil { @@ -301,12 +301,12 @@ func ExclusiveFilterAPIsByKind(apis []userconfig.API, kindsToExclude ...userconf } // checkIfAPIExists checks if referenced apis in trafficsplitter are either defined in yaml or already deployed -func checkIfAPIExists(trafficSplitterAPIs []*userconfig.TrafficSplit, apis []userconfig.API, deployedSyncAPIs strset.Set) error { +func checkIfAPIExists(trafficSplitterAPIs []*userconfig.TrafficSplit, apis []userconfig.API, deployedRealtimeAPIs strset.Set) error { var missingAPIs []string // check if apis named in trafficsplitter are either defined in same yaml or already deployed for _, trafficSplitAPI := range trafficSplitterAPIs { //check if already deployed - deployed := deployedSyncAPIs.Has(trafficSplitAPI.Name) + deployed := deployedRealtimeAPIs.Has(trafficSplitAPI.Name) // check defined apis for _, definedAPI := range apis { @@ -319,7 +319,7 @@ func checkIfAPIExists(trafficSplitterAPIs []*userconfig.TrafficSplit, apis []use } } if len(missingAPIs) != 0 { - return ErrorNotDeployedAPIsAPISplitter(missingAPIs) + return ErrorNotDeployedAPIsTrafficSplitter(missingAPIs) } return nil diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go index 3d7ddd7ea3..d8a849d8ec 100644 --- a/pkg/operator/schema/schema.go +++ b/pkg/operator/schema/schema.go @@ -52,12 +52,12 @@ type DeployResult struct { } type GetAPIsResponse struct { - SyncAPIs []SyncAPI `json:"sync_apis"` - BatchAPIs []BatchAPI `json:"batch_apis"` - APISplitters []APISplitter `json:"api_splitters"` + RealtimeAPIs []RealtimeAPI `json:"realtime_apis"` + BatchAPIs []BatchAPI `json:"batch_apis"` + TrafficSplitters []TrafficSplitter `json:"traffic_splitters"` } -type SyncAPI struct { +type RealtimeAPI struct { Spec spec.API `json:"spec"` Status status.Status `json:"status"` Metrics metrics.Metrics `json:"metrics"` @@ -65,15 +65,15 @@ type SyncAPI struct { DashboardURL string `json:"dashboard_url"` } -type APISplitter struct { +type TrafficSplitter struct { Spec spec.API `json:"spec"` Endpoint string `json:"endpoint"` } type GetAPIResponse struct { - SyncAPI *SyncAPI `json:"sync_api"` - BatchAPI *BatchAPI `json:"batch_api"` - APISplitter *APISplitter `json:"api_splitter"` + RealtimeAPI *RealtimeAPI `json:"realtime_api"` + BatchAPI *BatchAPI `json:"batch_api"` + TrafficSplitter *TrafficSplitter `json:"traffic_splitters"` } type BatchAPI struct { diff --git a/pkg/types/spec/errors.go b/pkg/types/spec/errors.go index d1e8c14df9..759323eea7 100644 --- a/pkg/types/spec/errors.go +++ b/pkg/types/spec/errors.go @@ -70,21 +70,21 @@ const ( ErrInvalidNumberOfInfs = "spec.invalid_number_of_infs" ErrInsufficientBatchConcurrencyLevel = "spec.insufficient_batch_concurrency_level" ErrInsufficientBatchConcurrencyLevelInf = "spec.insufficient_batch_concurrency_level_inf" - ErrIncorrectAPISplitterWeight = "spec.incorrect_api_splitter_weight" - ErrAPISplitterAPIsNotUnique = "spec.apisplitter_apis_not_unique" + ErrIncorrectTrafficSplitterWeight = "spec.incorrect_traffic_splitters_weight" + ErrTrafficSplitterAPIsNotUnique = "spec.traffic_splitter_apis_not_unique" ) func ErrorMalformedConfig() error { return errors.WithStack(&errors.Error{ Kind: ErrMalformedConfig, - Message: fmt.Sprintf("cortex YAML configuration files must contain a list of maps (see https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration for Sync API documentation and see https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("cortex YAML configuration files must contain a list of maps (see https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), }) } func ErrorNoAPIs() error { return errors.WithStack(&errors.Error{ Kind: ErrNoAPIs, - Message: fmt.Sprintf("at least one API must be configured (see https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration for Sync API documentation and see https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("at least one API must be configured (see https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), }) } @@ -397,16 +397,18 @@ func ErrorInsufficientBatchConcurrencyLevelInf(maxBatchSize int32, threadsPerPro }) } -func ErrorIncorrectAPISplitterWeightTotal(totalWeight int) error { +// TODO test this error +func ErrorIncorrectTrafficSplitterWeightTotal(totalWeight int) error { return errors.WithStack(&errors.Error{ - Kind: ErrIncorrectAPISplitterWeight, - Message: fmt.Sprintf("expected api splitter weights to sum to 100 but found %d", totalWeight), + Kind: ErrIncorrectTrafficSplitterWeight, + Message: fmt.Sprintf("expected weights to sum to 100 but found %d", totalWeight), }) } -func ErrorAPISplitterAPIsNotUnique(names []string) error { +// TODO test this error +func ErrorTrafficSplitterAPIsNotUnique(names []string) error { return errors.WithStack(&errors.Error{ - Kind: ErrAPISplitterAPIsNotUnique, - Message: fmt.Sprintf("api splitter %s not unique: %s", s.PluralS("API", len(names)), s.StrsSentence(names, "")), + Kind: ErrTrafficSplitterAPIsNotUnique, + Message: fmt.Sprintf("%s not unique: %s", s.PluralS("api", len(names)), s.StrsSentence(names, "")), }) } diff --git a/pkg/types/spec/validations.go b/pkg/types/spec/validations.go index 9cf752edd9..8a14dd6967 100644 --- a/pkg/types/spec/validations.go +++ b/pkg/types/spec/validations.go @@ -50,7 +50,7 @@ var AutoscalingTickInterval = 10 * time.Second func apiValidation(provider types.ProviderType, resource userconfig.Resource) *cr.StructValidation { structFieldValidations := []*cr.StructFieldValidation{} switch resource.Kind { - case userconfig.SyncAPIKind: + case userconfig.RealtimeAPIKind: structFieldValidations = append(resourceStructValidations, predictorValidation(), networkingValidation(resource.Kind), @@ -65,7 +65,7 @@ func apiValidation(provider types.ProviderType, resource userconfig.Resource) *c networkingValidation(resource.Kind), computeValidation(provider), ) - case userconfig.APISplitterKind: + case userconfig.TrafficSplitterKind: structFieldValidations = append(resourceStructValidations, multiAPIsValidation(), networkingValidation(resource.Kind), @@ -273,7 +273,7 @@ func networkingValidation(kind userconfig.Kind) *cr.StructFieldValidation { }, }, } - if kind == userconfig.SyncAPIKind { + if kind == userconfig.RealtimeAPIKind { structFieldValidation = append(structFieldValidation, &cr.StructFieldValidation{ StructField: "LocalPort", IntPtrValidation: &cr.IntPtrValidation{ @@ -595,13 +595,13 @@ func ExtractAPIConfigs(configBytes []byte, provider types.ProviderType, configFi err = errors.Wrap(errors.FirstError(errs...), userconfig.IdentifyAPI(configFileName, name, kind, i)) switch provider { case types.LocalProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Sync API can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration", consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration", consts.CortexVersionMinor)) case types.AWSProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nSync API can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration\nAPI Splitter can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/apisplitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) } } - if resourceStruct.Kind == userconfig.BatchAPIKind || resourceStruct.Kind == userconfig.APISplitterKind { + if resourceStruct.Kind == userconfig.BatchAPIKind || resourceStruct.Kind == userconfig.TrafficSplitterKind { if provider == types.LocalProviderType { return nil, errors.Wrap(ErrorKindIsNotSupportedByProvider(resourceStruct.Kind, types.LocalProviderType), userconfig.IdentifyAPI(configFileName, resourceStruct.Name, resourceStruct.Kind, i)) } @@ -614,19 +614,19 @@ func ExtractAPIConfigs(configBytes []byte, provider types.ProviderType, configFi kind := userconfig.KindFromString(kindString) err = errors.Wrap(errors.FirstError(errs...), userconfig.IdentifyAPI(configFileName, name, kind, i)) switch kind { - case userconfig.SyncAPIKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Sync API can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/api-configuration", consts.CortexVersionMinor)) + case userconfig.RealtimeAPIKind: + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration", consts.CortexVersionMinor)) case userconfig.BatchAPIKind: return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Batch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration", consts.CortexVersionMinor)) - case userconfig.APISplitterKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for API Splitter can be found at https://docs.cortex.dev/v/%s/deployments/syncapi/apisplitter", consts.CortexVersionMinor)) + case userconfig.TrafficSplitterKind: + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Traffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/traffic-splitter", consts.CortexVersionMinor)) } } api.Index = i api.FileName = configFileName - if resourceStruct.Kind == userconfig.SyncAPIKind || resourceStruct.Kind == userconfig.BatchAPIKind { + if resourceStruct.Kind == userconfig.RealtimeAPIKind || resourceStruct.Kind == userconfig.BatchAPIKind { api.ApplyDefaultDockerPaths() } @@ -669,7 +669,7 @@ func ValidateAPI( return nil } -func ValidateAPISplitter( +func ValidateTrafficSplitter( api *userconfig.API, providerType types.ProviderType, awsClient *aws.Client, @@ -680,7 +680,7 @@ func ValidateAPISplitter( if err := verifyTotalWeight(api.APIs); err != nil { return err } - if err := areAPISplitterAPIsUnique(api.APIs); err != nil { + if err := areTrafficSplitterAPIsUnique(api.APIs); err != nil { return err } @@ -1274,11 +1274,11 @@ func verifyTotalWeight(apis []*userconfig.TrafficSplit) error { if totalWeight == 100 { return nil } - return ErrorIncorrectAPISplitterWeightTotal(totalWeight) + return errors.Wrap(ErrorIncorrectTrafficSplitterWeightTotal(totalWeight), userconfig.APIsKey) } -// areAPISplitterAPIsUnique gives error if the same API is used multiple times in APISplitter -func areAPISplitterAPIsUnique(apis []*userconfig.TrafficSplit) error { +// areTrafficSplitterAPIsUnique gives error if the same API is used multiple times in TrafficSplitter +func areTrafficSplitterAPIsUnique(apis []*userconfig.TrafficSplit) error { names := make(map[string][]userconfig.TrafficSplit) for _, api := range apis { names[api.Name] = append(names[api.Name], *api) @@ -1290,7 +1290,7 @@ func areAPISplitterAPIsUnique(apis []*userconfig.TrafficSplit) error { } } if len(notUniqueAPIs) > 0 { - return ErrorAPISplitterAPIsNotUnique(notUniqueAPIs) + return errors.Wrap(ErrorTrafficSplitterAPIsNotUnique(notUniqueAPIs), userconfig.APIsKey) } return nil } diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go index a258529328..743182f030 100644 --- a/pkg/types/userconfig/api.go +++ b/pkg/types/userconfig/api.go @@ -299,7 +299,7 @@ func (api *API) UserStr(provider types.ProviderType) string { sb.WriteString(fmt.Sprintf("%s: %s\n", NameKey, api.Name)) sb.WriteString(fmt.Sprintf("%s: %s\n", KindKey, api.Kind.String())) - if api.Kind == APISplitterKind { + if api.Kind == TrafficSplitterKind { sb.WriteString(fmt.Sprintf("%s:\n", APIsKey)) for _, api := range api.APIs { sb.WriteString(s.Indent(api.UserStr(), " ")) diff --git a/pkg/types/userconfig/config_key.go b/pkg/types/userconfig/config_key.go index 7ead3784bf..ca7201e438 100644 --- a/pkg/types/userconfig/config_key.go +++ b/pkg/types/userconfig/config_key.go @@ -27,7 +27,7 @@ const ( AutoscalingKey = "autoscaling" UpdateStrategyKey = "update_strategy" - // APISplitter + // TrafficSplitter APIsKey = "apis" WeightKey = "weight" diff --git a/pkg/types/userconfig/kind.go b/pkg/types/userconfig/kind.go index dc76c324a1..3495426e59 100644 --- a/pkg/types/userconfig/kind.go +++ b/pkg/types/userconfig/kind.go @@ -20,16 +20,16 @@ type Kind int const ( UnknownKind Kind = iota - SyncAPIKind + RealtimeAPIKind BatchAPIKind - APISplitterKind + TrafficSplitterKind ) var _kinds = []string{ "unknown", - "SyncAPI", + "RealtimeAPI", "BatchAPI", - "APISplitter", + "TrafficSplitter", } func KindFromString(s string) Kind { diff --git a/pkg/workloads/cortex/serve/run.sh b/pkg/workloads/cortex/serve/run.sh index 04e4f325e4..4de3fc693f 100755 --- a/pkg/workloads/cortex/serve/run.sh +++ b/pkg/workloads/cortex/serve/run.sh @@ -52,7 +52,7 @@ export PYTHONPATH=$PYTHONPATH:$CORTEX_PYTHON_PATH export PYTHONUNBUFFERED=TRUE if [ "$CORTEX_PROVIDER" != "local" ]; then - if [ "$CORTEX_KIND" == "SyncAPI" ]; then + if [ "$CORTEX_KIND" == "RealtimeAPI" ]; then sysctl -w net.core.somaxconn=$CORTEX_SO_MAX_CONN >/dev/null sysctl -w net.ipv4.ip_local_port_range="15000 64000" >/dev/null sysctl -w net.ipv4.tcp_fin_timeout=30 >/dev/null diff --git a/pkg/workloads/cortex/serve/start.py b/pkg/workloads/cortex/serve/start.py index a42fbf8183..f86a808abd 100644 --- a/pkg/workloads/cortex/serve/start.py +++ b/pkg/workloads/cortex/serve/start.py @@ -81,7 +81,7 @@ def main(): if raw_api_spec["predictor"]["type"] == "tensorflow": load_tensorflow_serving_models() - if raw_api_spec["kind"] == "SyncAPI": + if raw_api_spec["kind"] == "RealtimeAPI": # https://github.com/encode/uvicorn/blob/master/uvicorn/config.py uvicorn.run( "cortex.serve.wsgi:app", From e61142874a37e8796f75aa7e2defe68c111f2454 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 18 Aug 2020 20:39:25 -0400 Subject: [PATCH 02/12] Rename apisplitter example to traffic-splitter --- examples/apisplitter/README.md | 111 ------------------ examples/apisplitter/cortex.yaml | 27 ----- examples/apisplitter/onnx_predictor.py | 20 ---- examples/apisplitter/tensorflow_predictor.py | 13 -- examples/traffic-splitter/README.md | 8 +- examples/traffic-splitter/cortex.yaml | 2 +- examples/traffic-splitter/onnx_predictor.py | 2 +- .../traffic-splitter/tensorflow_predictor.py | 2 +- 8 files changed, 7 insertions(+), 178 deletions(-) delete mode 100644 examples/apisplitter/README.md delete mode 100644 examples/apisplitter/cortex.yaml delete mode 100644 examples/apisplitter/onnx_predictor.py delete mode 100644 examples/apisplitter/tensorflow_predictor.py diff --git a/examples/apisplitter/README.md b/examples/apisplitter/README.md deleted file mode 100644 index 5d7015dc10..0000000000 --- a/examples/apisplitter/README.md +++ /dev/null @@ -1,111 +0,0 @@ -# Splitting traffic between APIs - -_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub)_ - -This example shows how to split traffic between 2 different iris-classifiers deployed as SyncAPIs. - -To deploy this example: - -1. Determine your CLI Version `cortex version` -1. Clone the repo and switch to the current version by replacing `` with your CLI version: `git clone -b v https://github.com/cortexlabs/cortex` (e.g. if the output of `cortex version` is 0.18.1, the clone command would be `git clone -b v0.18.1 https://github.com/cortexlabs/cortex`) -1. Navigate to this example directory - -## `cortex deploy` - -```bash -$ cortex deploy --env aws - -creating iris-classifier-onnx (SyncAPI) -creating iris-classifier-tf (SyncAPI) -created iris-classifier-apisplitter (APISplitter) -``` - -## `cortex get` - -```bash -$ cortex get - -env sync api status up-to-date requested last update avg request 2XX -aws iris-classifier-onnx updating 0 1 27s - - -aws iris-classifier-tf updating 0 1 27s - - - -env api splitter apis last update -aws iris-classifier-apisplitter iris-classifier-onnx:30 iris-classifier-tf:70 27s -``` - -## `cortex get iris-classifier-apisplitter` - -```bash -$ cortex get iris-classifier-apisplitter --env aws - -apis weights status requested last update avg request 2XX 5XX -iris-classifier-onnx 30 live 1 1m - - - -iris-classifier-tf 70 live 1 1m - - - - -last updated: 1m -endpoint: https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -curl: curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -... -``` - -## Make multiple requests - -```bash -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -setosa - -$ curl https://abcedefg.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -setosa -``` - -## `cortex get iris-classifier-apisplitter` - -Notice the requests being routed to the different SyncAPIs based on their weights (the output below may not match yours): - -```bash -$ cortex get iris-classifier-apisplitter --env aws - -using aws environment - - -apis weights status requested last update avg request 2XX 5XX -iris-classifier-onnx 30 live 1 4m 6.00791 ms 1 - -iris-classifier-tf 70 live 1 4m 5.81867 ms 5 - - -last updated: 4m -endpoint: https://comtf6hs64.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -curl: curl https://comtf6hs64.execute-api.us-west-2.amazonaws.com/iris-classifier-apisplitter -X POST -H "Content-Type: application/json" -d @sample.json -... -``` - -## Cleanup - -Use `cortex delete ` to delete the API Splitter and the two SyncAPIs (note that the API Splitter and each Sync API must be deleted by separate `cortex delete` commands): - -```bash -$ cortex delete iris-classifier-apisplitter --env aws - -deleting iris-classifier-apisplitter - -$ cortex delete iris-classifier-onnx --env aws - -deleting iris-classifier-onnx - -$ cortex delete iris-classifier-tf --env aws - -deleting iris-classifier-tf -``` - -Running `cortex delete ` will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster installation. It will not spin down your cluster. diff --git a/examples/apisplitter/cortex.yaml b/examples/apisplitter/cortex.yaml deleted file mode 100644 index 97da4635dd..0000000000 --- a/examples/apisplitter/cortex.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - -- name: iris-classifier-onnx - kind: SyncAPI - predictor: - type: onnx - path: onnx_predictor.py - model_path: s3://cortex-examples/onnx/iris-classifier/gbtree.onnx - monitoring: - model_type: classification - -- name: iris-classifier-tf - kind: SyncAPI - predictor: - type: tensorflow - path: tensorflow_predictor.py - model_path: s3://cortex-examples/tensorflow/iris-classifier/nn - monitoring: - model_type: classification - -- name: iris-classifier-apisplitter - kind: APISplitter - apis: - - name: iris-classifier-onnx - weight: 30 - - name: iris-classifier-tf - weight: 70 diff --git a/examples/apisplitter/onnx_predictor.py b/examples/apisplitter/onnx_predictor.py deleted file mode 100644 index 7e851e81bf..0000000000 --- a/examples/apisplitter/onnx_predictor.py +++ /dev/null @@ -1,20 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - -labels = ["setosa", "versicolor", "virginica"] - - -class ONNXPredictor: - def __init__(self, onnx_client, config): - self.client = onnx_client - - def predict(self, payload): - model_input = [ - payload["sepal_length"], - payload["sepal_width"], - payload["petal_length"], - payload["petal_width"], - ] - - prediction = self.client.predict(model_input) - predicted_class_id = prediction[0][0] - return labels[predicted_class_id] diff --git a/examples/apisplitter/tensorflow_predictor.py b/examples/apisplitter/tensorflow_predictor.py deleted file mode 100644 index d8de5facb6..0000000000 --- a/examples/apisplitter/tensorflow_predictor.py +++ /dev/null @@ -1,13 +0,0 @@ -# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - -labels = ["setosa", "versicolor", "virginica"] - - -class TensorFlowPredictor: - def __init__(self, tensorflow_client, config): - self.client = tensorflow_client - - def predict(self, payload): - prediction = self.client.predict(payload) - predicted_class_id = int(prediction["class_ids"][0]) - return labels[predicted_class_id] diff --git a/examples/traffic-splitter/README.md b/examples/traffic-splitter/README.md index f5742463b6..22f05dbc43 100644 --- a/examples/traffic-splitter/README.md +++ b/examples/traffic-splitter/README.md @@ -1,8 +1,8 @@ # Splitting traffic between APIs -_WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version`_ +_WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub)_ -This example shows how to split traffic between 2 different iris-classifiers deployed as RealtimeAPIs. +This example shows how to split traffic between 2 different iris-classifiers deployed as Realtime APIs. To deploy this example: @@ -72,7 +72,7 @@ setosa ## `cortex get traffic-splitter` -Notice the requests being routed to the different RealtimeAPIs based on their weights (the output below may not match yours): +Notice the requests being routed to the different Realtime APIs based on their weights (the output below may not match yours): ```bash $ cortex get traffic-splitter --env aws @@ -92,7 +92,7 @@ curl: curl https://comtf6hs64.execute-api.us-west-2.amazonaws.com/traffic-splitt ## Cleanup -Use `cortex delete ` to delete the Traffic Splitter and the two RealtimeAPIs (note that the Traffic Splitter and each Realtime API must be deleted by separate `cortex delete` commands): +Use `cortex delete ` to delete the Traffic Splitter and the two Realtime APIs (note that the Traffic Splitter and each Realtime API must be deleted by separate `cortex delete` commands): ```bash $ cortex delete traffic-splitter --env aws diff --git a/examples/traffic-splitter/cortex.yaml b/examples/traffic-splitter/cortex.yaml index ec7a1bf01d..10ee00c58f 100644 --- a/examples/traffic-splitter/cortex.yaml +++ b/examples/traffic-splitter/cortex.yaml @@ -1,4 +1,4 @@ -# WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version` +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier-onnx kind: diff --git a/examples/traffic-splitter/onnx_predictor.py b/examples/traffic-splitter/onnx_predictor.py index 1526cdcc36..7e851e81bf 100644 --- a/examples/traffic-splitter/onnx_predictor.py +++ b/examples/traffic-splitter/onnx_predictor.py @@ -1,4 +1,4 @@ -# WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version` +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) labels = ["setosa", "versicolor", "virginica"] diff --git a/examples/traffic-splitter/tensorflow_predictor.py b/examples/traffic-splitter/tensorflow_predictor.py index c880d2233f..d8de5facb6 100644 --- a/examples/traffic-splitter/tensorflow_predictor.py +++ b/examples/traffic-splitter/tensorflow_predictor.py @@ -1,4 +1,4 @@ -# WARNING: you are on the master branch, please refer to the examples on the branch that matches your `cortex version` +# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) labels = ["setosa", "versicolor", "virginica"] From 14a83dcc3b151c0d8d3c0eb4d0980d8bcfa0e19e Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 18 Aug 2020 20:48:02 -0400 Subject: [PATCH 03/12] Fix examples --- examples/onnx/iris-classifier/cortex.yaml | 2 +- examples/onnx/multi-model-classifier/cortex.yaml | 2 +- examples/onnx/yolov5-youtube/cortex.yaml | 2 +- examples/pytorch/answer-generator/cortex.yaml | 2 +- examples/pytorch/image-classifier-alexnet/cortex.yaml | 2 +- examples/pytorch/image-classifier-resnet50/cortex.yaml | 2 +- examples/pytorch/iris-classifier/cortex.yaml | 2 +- examples/pytorch/language-identifier/cortex.yaml | 2 +- examples/pytorch/multi-model-text-analyzer/cortex.yaml | 2 +- examples/pytorch/object-detector/cortex.yaml | 2 +- examples/pytorch/reading-comprehender/cortex.yaml | 2 +- examples/pytorch/search-completer/cortex.yaml | 2 +- examples/pytorch/sentiment-analyzer/cortex.yaml | 2 +- examples/pytorch/text-generator/cortex.yaml | 2 +- examples/pytorch/text-summarizer/cortex.yaml | 2 +- examples/sklearn/iris-classifier/cortex.yaml | 6 +++--- examples/sklearn/mpg-estimator/cortex.yaml | 2 +- examples/spacy/entity-recognizer/cortex.yaml | 2 +- examples/tensorflow/image-classifier-inception/cortex.yaml | 2 +- examples/tensorflow/image-classifier-resnet50/cortex.yaml | 2 +- examples/tensorflow/iris-classifier/cortex.yaml | 2 +- examples/tensorflow/multi-model-classifier/cortex.yaml | 2 +- examples/tensorflow/sentiment-analyzer/cortex.yaml | 2 +- examples/tensorflow/text-generator/cortex.yaml | 2 +- examples/traffic-splitter/cortex.yaml | 4 ++-- 25 files changed, 28 insertions(+), 28 deletions(-) diff --git a/examples/onnx/iris-classifier/cortex.yaml b/examples/onnx/iris-classifier/cortex.yaml index 5d5949d701..78138f5d84 100644 --- a/examples/onnx/iris-classifier/cortex.yaml +++ b/examples/onnx/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: + kind: RealtimeAPi predictor: type: onnx path: predictor.py diff --git a/examples/onnx/multi-model-classifier/cortex.yaml b/examples/onnx/multi-model-classifier/cortex.yaml index 6a8557a702..6a4174a9f6 100644 --- a/examples/onnx/multi-model-classifier/cortex.yaml +++ b/examples/onnx/multi-model-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-classifier - kind: + kind: RealtimeApi predictor: type: onnx path: predictor.py diff --git a/examples/onnx/yolov5-youtube/cortex.yaml b/examples/onnx/yolov5-youtube/cortex.yaml index 4c63c7e453..b8d1eca046 100644 --- a/examples/onnx/yolov5-youtube/cortex.yaml +++ b/examples/onnx/yolov5-youtube/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: yolov5-youtube - kind: + kind: RealtimeAPI predictor: type: onnx path: predictor.py diff --git a/examples/pytorch/answer-generator/cortex.yaml b/examples/pytorch/answer-generator/cortex.yaml index 94bbef88c5..60552956fe 100644 --- a/examples/pytorch/answer-generator/cortex.yaml +++ b/examples/pytorch/answer-generator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: answer-generator - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/image-classifier-alexnet/cortex.yaml b/examples/pytorch/image-classifier-alexnet/cortex.yaml index da6a7d1d20..693471cc59 100644 --- a/examples/pytorch/image-classifier-alexnet/cortex.yaml +++ b/examples/pytorch/image-classifier-alexnet/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-alexnet - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/image-classifier-resnet50/cortex.yaml b/examples/pytorch/image-classifier-resnet50/cortex.yaml index fbb0f8e20a..18344b9917 100644 --- a/examples/pytorch/image-classifier-resnet50/cortex.yaml +++ b/examples/pytorch/image-classifier-resnet50/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/iris-classifier/cortex.yaml b/examples/pytorch/iris-classifier/cortex.yaml index 5bed271c6e..3f506030f7 100644 --- a/examples/pytorch/iris-classifier/cortex.yaml +++ b/examples/pytorch/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/language-identifier/cortex.yaml b/examples/pytorch/language-identifier/cortex.yaml index 182520d447..1d54d1590a 100644 --- a/examples/pytorch/language-identifier/cortex.yaml +++ b/examples/pytorch/language-identifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: language-identifier - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/multi-model-text-analyzer/cortex.yaml b/examples/pytorch/multi-model-text-analyzer/cortex.yaml index 2d3ebef396..b699fb918a 100644 --- a/examples/pytorch/multi-model-text-analyzer/cortex.yaml +++ b/examples/pytorch/multi-model-text-analyzer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-text-analyzer - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/object-detector/cortex.yaml b/examples/pytorch/object-detector/cortex.yaml index 86e720fa37..e08d2ca666 100644 --- a/examples/pytorch/object-detector/cortex.yaml +++ b/examples/pytorch/object-detector/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: object-detector - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/reading-comprehender/cortex.yaml b/examples/pytorch/reading-comprehender/cortex.yaml index 7340d12fe3..d8d1b2f942 100644 --- a/examples/pytorch/reading-comprehender/cortex.yaml +++ b/examples/pytorch/reading-comprehender/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: reading-comprehender - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/search-completer/cortex.yaml b/examples/pytorch/search-completer/cortex.yaml index 5c78f3a9bb..72e7c58f03 100644 --- a/examples/pytorch/search-completer/cortex.yaml +++ b/examples/pytorch/search-completer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: search-completer - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/sentiment-analyzer/cortex.yaml b/examples/pytorch/sentiment-analyzer/cortex.yaml index 960d405739..d859bdc01d 100644 --- a/examples/pytorch/sentiment-analyzer/cortex.yaml +++ b/examples/pytorch/sentiment-analyzer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: sentiment-analyzer - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/text-generator/cortex.yaml b/examples/pytorch/text-generator/cortex.yaml index f85cc85517..31899a5ca7 100644 --- a/examples/pytorch/text-generator/cortex.yaml +++ b/examples/pytorch/text-generator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: text-generator - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/pytorch/text-summarizer/cortex.yaml b/examples/pytorch/text-summarizer/cortex.yaml index 2c4b1c2e85..6b6bcb96cc 100644 --- a/examples/pytorch/text-summarizer/cortex.yaml +++ b/examples/pytorch/text-summarizer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: text-summarizer - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/sklearn/iris-classifier/cortex.yaml b/examples/sklearn/iris-classifier/cortex.yaml index 111bd69966..ef7e21b72e 100644 --- a/examples/sklearn/iris-classifier/cortex.yaml +++ b/examples/sklearn/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -15,7 +15,7 @@ mem: 100M - name: another-iris-classifier - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py @@ -29,7 +29,7 @@ mem: 100M - name: batch-iris-classifier - kind: + kind: RealtimeAPI predictor: type: python path: batch-predictor.py diff --git a/examples/sklearn/mpg-estimator/cortex.yaml b/examples/sklearn/mpg-estimator/cortex.yaml index 9185cd602e..a3f2ed2fb6 100644 --- a/examples/sklearn/mpg-estimator/cortex.yaml +++ b/examples/sklearn/mpg-estimator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: mpg-estimator - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/spacy/entity-recognizer/cortex.yaml b/examples/spacy/entity-recognizer/cortex.yaml index c982243cca..c9fd00d794 100644 --- a/examples/spacy/entity-recognizer/cortex.yaml +++ b/examples/spacy/entity-recognizer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: entity-recognizer - kind: + kind: RealtimeAPI predictor: type: python path: predictor.py diff --git a/examples/tensorflow/image-classifier-inception/cortex.yaml b/examples/tensorflow/image-classifier-inception/cortex.yaml index 8848b3d5c2..9ee27e5a12 100644 --- a/examples/tensorflow/image-classifier-inception/cortex.yaml +++ b/examples/tensorflow/image-classifier-inception/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-inception - kind: + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/image-classifier-resnet50/cortex.yaml b/examples/tensorflow/image-classifier-resnet50/cortex.yaml index 51c8697f26..9095316729 100644 --- a/examples/tensorflow/image-classifier-resnet50/cortex.yaml +++ b/examples/tensorflow/image-classifier-resnet50/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: image-classifier-resnet50 - kind: + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/iris-classifier/cortex.yaml b/examples/tensorflow/iris-classifier/cortex.yaml index 5b47c1093f..ecfe78175b 100644 --- a/examples/tensorflow/iris-classifier/cortex.yaml +++ b/examples/tensorflow/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/multi-model-classifier/cortex.yaml b/examples/tensorflow/multi-model-classifier/cortex.yaml index 43d29c1c4b..1d5ebbd5d8 100644 --- a/examples/tensorflow/multi-model-classifier/cortex.yaml +++ b/examples/tensorflow/multi-model-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-classifier - kind: + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/sentiment-analyzer/cortex.yaml b/examples/tensorflow/sentiment-analyzer/cortex.yaml index 61a2388033..6e9f458c0c 100644 --- a/examples/tensorflow/sentiment-analyzer/cortex.yaml +++ b/examples/tensorflow/sentiment-analyzer/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: sentiment-analyzer - kind: + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/tensorflow/text-generator/cortex.yaml b/examples/tensorflow/text-generator/cortex.yaml index 4434740f8a..2eb81505c5 100644 --- a/examples/tensorflow/text-generator/cortex.yaml +++ b/examples/tensorflow/text-generator/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: text-generator - kind: + kind: RealtimeAPI predictor: type: tensorflow path: predictor.py diff --git a/examples/traffic-splitter/cortex.yaml b/examples/traffic-splitter/cortex.yaml index 10ee00c58f..b489627f5b 100644 --- a/examples/traffic-splitter/cortex.yaml +++ b/examples/traffic-splitter/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier-onnx - kind: + kind: RealtimeAPI predictor: type: onnx path: onnx_predictor.py @@ -10,7 +10,7 @@ model_type: classification - name: iris-classifier-tf - kind: + kind: RealtimeAPI predictor: type: tensorflow path: tensorflow_predictor.py From 28a217126a743b9bbea850d3c67fd981b5625307 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 08:31:59 -0400 Subject: [PATCH 04/12] Respond to PR comments --- cli/cmd/lib_realtime_apis.go | 3 ++ dev/versions.md | 2 +- docs/cluster-management/config.md | 2 +- .../deployments/{batchapi.md => batch-api.md} | 6 ++-- .../api-configuration.md | 0 .../{batchapi => batch-api}/deployment.md | 0 .../{batchapi => batch-api}/endpoints.md | 0 .../{batchapi => batch-api}/predictors.md | 0 .../{batchapi => batch-api}/statuses.md | 0 docs/deployments/inferentia.md | 4 +-- docs/deployments/networking.md | 2 +- docs/deployments/python-packages.md | 2 +- .../{realtimeapi.md => realtime-api.md} | 6 ++-- .../api-configuration.md | 0 .../autoscaling.md | 0 .../deployment.md | 0 .../parallelism.md | 0 .../prediction-monitoring.md | 0 .../predictors.md | 0 .../{realtimeapi => realtime-api}/statuses.md | 0 .../traffic-splitter.md | 0 docs/summary.md | 30 +++++++++---------- .../server-side-batching-errors.md | 2 +- docs/troubleshooting/tf-session-in-predict.md | 2 +- examples/batch/image-classifier/README.md | 4 +-- examples/batch/onnx/README.md | 2 +- examples/batch/tensorflow/README.md | 2 +- examples/sklearn/iris-classifier/README.md | 6 ++-- pkg/types/spec/errors.go | 4 +-- 29 files changed, 40 insertions(+), 39 deletions(-) rename docs/deployments/{batchapi.md => batch-api.md} (91%) rename docs/deployments/{batchapi => batch-api}/api-configuration.md (100%) rename docs/deployments/{batchapi => batch-api}/deployment.md (100%) rename docs/deployments/{batchapi => batch-api}/endpoints.md (100%) rename docs/deployments/{batchapi => batch-api}/predictors.md (100%) rename docs/deployments/{batchapi => batch-api}/statuses.md (100%) rename docs/deployments/{realtimeapi.md => realtime-api.md} (89%) rename docs/deployments/{realtimeapi => realtime-api}/api-configuration.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/autoscaling.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/deployment.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/parallelism.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/prediction-monitoring.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/predictors.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/statuses.md (100%) rename docs/deployments/{realtimeapi => realtime-api}/traffic-splitter.md (100%) diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go index 1c835858fe..a38e381465 100644 --- a/cli/cmd/lib_realtime_apis.go +++ b/cli/cmd/lib_realtime_apis.go @@ -47,6 +47,9 @@ func realtimeAPITable(realtimeAPI *schema.RealtimeAPI, env cliconfig.Environment t := realtimeAPIsTable([]schema.RealtimeAPI{*realtimeAPI}, []string{env.Name}) t.FindHeaderByTitle(_titleEnvironment).Hidden = true t.FindHeaderByTitle(_titleRealtimeAPI).Hidden = true + if env.Provider == types.LocalProviderType { + hideReplicaCountColumns(&t) + } out += t.MustFormat() diff --git a/dev/versions.md b/dev/versions.md index 38ff02d6f0..2818278b9c 100644 --- a/dev/versions.md +++ b/dev/versions.md @@ -144,7 +144,7 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo 1. Update versions in `images/python-predictor-*/Dockerfile`, `images/tensorflow-predictor/Dockerfile`, and `images/onnx-predictor-*/Dockerfile` 1. To determine the versions used in `images/python-predictor-inf/Dockerfile`, run `pip install --extra-index-url https://pip.repos.neuron.amazonaws.com neuron-cc tensorflow-neuron torch-neuron` from a clean environment and check what versions of all the dependencies are installed. 1. Update versions in `pkg/workloads/cortex/serve/requirements.txt` and `pkg/workloads/cortex/downloader/requirements.txt` -1. Update the versions listed in "Pre-installed packages" in `realtimeapi/predictors.md` and `batchapi/predictors.md` (look at the diff carefully since some packages are not shown, and e.g. `tensorflow-cpu` -> `tensorflow`) +1. Update the versions listed in "Pre-installed packages" in `realtime-api/predictors.md` and `batch-api/predictors.md` (look at the diff carefully since some packages are not shown, and e.g. `tensorflow-cpu` -> `tensorflow`) 1. Rerun all examples and check their logs ## Istio diff --git a/docs/cluster-management/config.md b/docs/cluster-management/config.md index 5f4d2cdbbd..f38c077851 100644 --- a/docs/cluster-management/config.md +++ b/docs/cluster-management/config.md @@ -85,7 +85,7 @@ spot: false ssl_certificate_arn: ``` -The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtimeapi/api-configuration.md) and in your [Batch API configuration](../deployments/batchapi/api-configuration.md). +The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and in your [Batch API configuration](../deployments/batch-api/api-configuration.md). The docker images used by the Cortex cluster can also be overridden, although this is not common. They can be configured by adding any of these keys to your cluster configuration file (default values are shown): diff --git a/docs/deployments/batchapi.md b/docs/deployments/batch-api.md similarity index 91% rename from docs/deployments/batchapi.md rename to docs/deployments/batch-api.md index 1e8786fd34..90b1700c29 100644 --- a/docs/deployments/batchapi.md +++ b/docs/deployments/batch-api.md @@ -13,7 +13,7 @@ You may want to deploy your model as a Batch API if any of the following scenari * inference is a part of internal data pipelines that may be chained together * a small number of requests are received, but each request takes minutes or hours to complete -You may want to consider deploying your model as a [Realtime API](realtimeapi.md) if these scenarios don't apply to you. +You may want to consider deploying your model as a [Realtime API](realtime-api.md) if these scenarios don't apply to you. A Batch API deployed in Cortex will create/support the following: @@ -39,5 +39,5 @@ At any point, you can use the Job ID that was provided upon job submission to ma * Try the [tutorial](../../examples/batch/image-classifier/README.md) to deploy a Batch API on your Cortex cluster. * See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Batch API. -* See the [Predictor docs](batchapi/predictors.md) for how to implement a Predictor class. -* See the [API configuration docs](batchapi/api-configuration.md) for a full list of features that can be used to deploy your Batch API. +* See the [Predictor docs](batch-api/predictors.md) for how to implement a Predictor class. +* See the [API configuration docs](batch-api/api-configuration.md) for a full list of features that can be used to deploy your Batch API. diff --git a/docs/deployments/batchapi/api-configuration.md b/docs/deployments/batch-api/api-configuration.md similarity index 100% rename from docs/deployments/batchapi/api-configuration.md rename to docs/deployments/batch-api/api-configuration.md diff --git a/docs/deployments/batchapi/deployment.md b/docs/deployments/batch-api/deployment.md similarity index 100% rename from docs/deployments/batchapi/deployment.md rename to docs/deployments/batch-api/deployment.md diff --git a/docs/deployments/batchapi/endpoints.md b/docs/deployments/batch-api/endpoints.md similarity index 100% rename from docs/deployments/batchapi/endpoints.md rename to docs/deployments/batch-api/endpoints.md diff --git a/docs/deployments/batchapi/predictors.md b/docs/deployments/batch-api/predictors.md similarity index 100% rename from docs/deployments/batchapi/predictors.md rename to docs/deployments/batch-api/predictors.md diff --git a/docs/deployments/batchapi/statuses.md b/docs/deployments/batch-api/statuses.md similarity index 100% rename from docs/deployments/batchapi/statuses.md rename to docs/deployments/batch-api/statuses.md diff --git a/docs/deployments/inferentia.md b/docs/deployments/inferentia.md index 0981491e5b..769e2f90ea 100644 --- a/docs/deployments/inferentia.md +++ b/docs/deployments/inferentia.md @@ -22,7 +22,7 @@ Each Inferentia ASIC comes with 4 NeuronCores and 8GB of cache memory. To better A [NeuronCore Group](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/tutorial-NeuronCore-Group.md) (NCG) is a set of NeuronCores that is used to load and run a compiled model. NCGs exist to aggregate NeuronCores to improve hardware performance. Models can be shared within an NCG, but this would require the device driver to dynamically context switch between each model, which degrades performance. Therefore we've decided to only allow one model per NCG (unless you are using a [multi-model endpoint](../guides/multi-model.md), in which case there will be multiple models on a single NCG, and there will be context switching). -Each Cortex API process will have its own copy of the model and will run on its own NCG (the number of API processes is configured by the [`processes_per_replica`](realtimeapi/autoscaling.md#replica-parallelism) for Realtime APIs field in the API configuration). Each NCG will have an equal share of NeuronCores. Therefore, the size of each NCG will be `4 * inf / processes_per_replica` (`inf` refers to your API's `compute` request, and it's multiplied by 4 because there are 4 NeuronCores per Inferentia chip). +Each Cortex API process will have its own copy of the model and will run on its own NCG (the number of API processes is configured by the [`processes_per_replica`](realtime-api/autoscaling.md#replica-parallelism) for Realtime APIs field in the API configuration). Each NCG will have an equal share of NeuronCores. Therefore, the size of each NCG will be `4 * inf / processes_per_replica` (`inf` refers to your API's `compute` request, and it's multiplied by 4 because there are 4 NeuronCores per Inferentia chip). For example, if your API requests 2 `inf` chips, there will be 8 NeuronCores available. If you set `processes_per_replica` to 1, there will be one copy of your model running on a single NCG of size 8 NeuronCores. If `processes_per_replica` is 2, there will be two copies of your model, each running on a separate NCG of size 4 NeuronCores. If `processes_per_replica` is 4, there will be 4 NCGs of size 2 NeuronCores, and if If `processes_per_replica` is 8, there will be 8 NCGs of size 1 NeuronCores. In this scenario, these are the only valid values for `processes_per_replica`. In other words the total number of requested NeuronCores (which equals 4 * the number of requested Inferentia chips) must be divisible by `processes_per_replica`. @@ -64,7 +64,7 @@ model_neuron = torch.neuron.trace( model_neuron.save(compiled_model) ``` -The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Realtime API pre-installed packages list](realtimeapi/predictors.md#inferentia-equipped-apis) and [Batch API pre-installed packages list](batchapi/predictors.md#inferentia-equipped-apis). When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. +The versions of `tensorflow-neuron` and `torch-neuron` that are used by Cortex are found in the [Realtime API pre-installed packages list](realtime-api/predictors.md#inferentia-equipped-apis) and [Batch API pre-installed packages list](batch-api/predictors.md#inferentia-equipped-apis). When installing these packages with `pip` to compile models of your own, use the extra index URL `--extra-index-url=https://pip.repos.neuron.amazonaws.com`. See AWS's [TensorFlow](https://github.com/aws/aws-neuron-sdk/blob/master/docs/tensorflow-neuron/tutorial-compile-infer.md#step-3-compile-on-compilation-instance) and [PyTorch](https://github.com/aws/aws-neuron-sdk/blob/master/docs/pytorch-neuron/tutorial-compile-infer.md#step-3-compile-on-compilation-instance) guides on how to compile models for Inferentia. Here are 2 examples implemented with Cortex: diff --git a/docs/deployments/networking.md b/docs/deployments/networking.md index 1d296752f9..1f75d80f77 100644 --- a/docs/deployments/networking.md +++ b/docs/deployments/networking.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t ![api architecture diagram](https://user-images.githubusercontent.com/808475/84695323-8507dd00-aeff-11ea-8b32-5a55cef76c79.png) -APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](realtimeapi/api-configuration.md) and [Batch API configuration](batchapi/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: disabled` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). +APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](realtime-api/api-configuration.md) and [Batch API configuration](batch-api/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: disabled` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). By default, the API load balancer is public. You can configure your API load balancer to be private by setting `api_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be public regardless of `api_load_balancer_scheme`. See below for common configurations. diff --git a/docs/deployments/python-packages.md b/docs/deployments/python-packages.md index 8c740396e7..43118d3c38 100644 --- a/docs/deployments/python-packages.md +++ b/docs/deployments/python-packages.md @@ -16,7 +16,7 @@ You can install your required PyPI packages and import them in your Python files If you want to use `conda` to install your python packages, see the [Conda section](#conda-packages) below. -Note that some packages are pre-installed by default (see "pre-installed packages" for your Predictor type in the [Realtime API Predictor documentation](realtimeapi/predictors.md) and [Batch API Predictor documentation](batchapi/predictors.md)). +Note that some packages are pre-installed by default (see "pre-installed packages" for your Predictor type in the [Realtime API Predictor documentation](realtime-api/predictors.md) and [Batch API Predictor documentation](batch-api/predictors.md)). ## Private PyPI packages diff --git a/docs/deployments/realtimeapi.md b/docs/deployments/realtime-api.md similarity index 89% rename from docs/deployments/realtimeapi.md rename to docs/deployments/realtime-api.md index e6f504a76b..4971779b99 100644 --- a/docs/deployments/realtimeapi.md +++ b/docs/deployments/realtime-api.md @@ -13,7 +13,7 @@ You may want to deploy your model as a Realtime API if any of the following scen * predictions need to be made on an individual basis * predictions are served directly to consumers -You may want to consider deploying your model as a [Batch API](batchapi.md) if these scenarios don't apply to you. +You may want to consider deploying your model as a [Batch API](batch-api.md) if these scenarios don't apply to you. A Realtime API deployed in Cortex has the following features: @@ -42,5 +42,5 @@ The Cortex Cluster will automatically scale based on the incoming traffic and th * Try the [tutorial](../../examples/sklearn/iris-classifier/README.md) to deploy a Realtime API locally or on AWS. * See our [exporting guide](../guides/exporting.md) for how to export your model to use in a Realtime API. -* See the [Predictor docs](realtimeapi/predictors.md) for how to implement a Predictor class. -* See the [API configuration docs](realtimeapieapi/api-configuration.md) for a full list of features that can be used to deploy your Realtime API. +* See the [Predictor docs](realtime-api/predictors.md) for how to implement a Predictor class. +* See the [API configuration docs](realtime-api/api-configuration.md) for a full list of features that can be used to deploy your Realtime API. diff --git a/docs/deployments/realtimeapi/api-configuration.md b/docs/deployments/realtime-api/api-configuration.md similarity index 100% rename from docs/deployments/realtimeapi/api-configuration.md rename to docs/deployments/realtime-api/api-configuration.md diff --git a/docs/deployments/realtimeapi/autoscaling.md b/docs/deployments/realtime-api/autoscaling.md similarity index 100% rename from docs/deployments/realtimeapi/autoscaling.md rename to docs/deployments/realtime-api/autoscaling.md diff --git a/docs/deployments/realtimeapi/deployment.md b/docs/deployments/realtime-api/deployment.md similarity index 100% rename from docs/deployments/realtimeapi/deployment.md rename to docs/deployments/realtime-api/deployment.md diff --git a/docs/deployments/realtimeapi/parallelism.md b/docs/deployments/realtime-api/parallelism.md similarity index 100% rename from docs/deployments/realtimeapi/parallelism.md rename to docs/deployments/realtime-api/parallelism.md diff --git a/docs/deployments/realtimeapi/prediction-monitoring.md b/docs/deployments/realtime-api/prediction-monitoring.md similarity index 100% rename from docs/deployments/realtimeapi/prediction-monitoring.md rename to docs/deployments/realtime-api/prediction-monitoring.md diff --git a/docs/deployments/realtimeapi/predictors.md b/docs/deployments/realtime-api/predictors.md similarity index 100% rename from docs/deployments/realtimeapi/predictors.md rename to docs/deployments/realtime-api/predictors.md diff --git a/docs/deployments/realtimeapi/statuses.md b/docs/deployments/realtime-api/statuses.md similarity index 100% rename from docs/deployments/realtimeapi/statuses.md rename to docs/deployments/realtime-api/statuses.md diff --git a/docs/deployments/realtimeapi/traffic-splitter.md b/docs/deployments/realtime-api/traffic-splitter.md similarity index 100% rename from docs/deployments/realtimeapi/traffic-splitter.md rename to docs/deployments/realtime-api/traffic-splitter.md diff --git a/docs/summary.md b/docs/summary.md index bf76b52dca..e798e68a51 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -11,22 +11,22 @@ ## Deployments -* [Realtime API](deployments/realtime.md) - * [Predictor implementation](deployments/realtime/predictors.md) - * [API configuration](deployments/realtime/api-configuration.md) - * [API deployment](deployments/realtime/deployment.md) - * [API statuses](deployments/realtime/statuses.md) - * [Parallelism](deployments/realtime/parallelism.md) - * [Autoscaling](deployments/realtime/autoscaling.md) - * [Prediction monitoring](deployments/realtime/prediction-monitoring.md) +* [Realtime API](deployments/realtime-api/.md) + * [Predictor implementation](deployments/realtime-api//predictors.md) + * [API configuration](deployments/realtime-api//api-configuration.md) + * [API deployment](deployments/realtime-api//deployment.md) + * [API statuses](deployments/realtime-api//statuses.md) + * [Parallelism](deployments/realtime-api//parallelism.md) + * [Autoscaling](deployments/realtime-api//autoscaling.md) + * [Prediction monitoring](deployments/realtime-api//prediction-monitoring.md) * [Tutorial](../examples/sklearn/iris-classifier/README.md) - * [Traffic Splitter](deployments/realtime/traffic-splitter.md) -* [Batch API](deployments/batchapi.md) - * [Predictor implementation](deployments/batchapi/predictors.md) - * [API configuration](deployments/batchapi/api-configuration.md) - * [API deployment](deployments/batchapi/deployment.md) - * [Endpoints](deployments/batchapi/endpoints.md) - * [Job statuses](deployments/batchapi/statuses.md) + * [Traffic Splitter](deployments/realtime-api//traffic-splitter.md) +* [Batch API](deployments/batch-api.md) + * [Predictor implementation](deployments/batch-api/predictors.md) + * [API configuration](deployments/batch-api/api-configuration.md) + * [API deployment](deployments/batch-api/deployment.md) + * [Endpoints](deployments/batch-api/endpoints.md) + * [Job statuses](deployments/batch-api/statuses.md) * [Tutorial](../examples/batch/image-classifier/README.md) ## Advanced diff --git a/docs/troubleshooting/server-side-batching-errors.md b/docs/troubleshooting/server-side-batching-errors.md index a674a09e93..4740d903fa 100644 --- a/docs/troubleshooting/server-side-batching-errors.md +++ b/docs/troubleshooting/server-side-batching-errors.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When `max_batch_size` and `batch_interval` fields are set for the [Realtime API TensorFlow Predictor](../deployments/realtimeapi/predictors.md#tensorflow-predictor), errors can be encountered if the associated model hasn't been built for batching. +When `max_batch_size` and `batch_interval` fields are set for the [Realtime API TensorFlow Predictor](../deployments/realtime-api/predictors.md#tensorflow-predictor), errors can be encountered if the associated model hasn't been built for batching. The following error is an example of what happens when the input shape doesn't accommodate batching - e.g. when its shape is `[height, width, 3]` instead of `[batch_size, height, width, 3]`: diff --git a/docs/troubleshooting/tf-session-in-predict.md b/docs/troubleshooting/tf-session-in-predict.md index bba6e090e0..c8e1d56218 100644 --- a/docs/troubleshooting/tf-session-in-predict.md +++ b/docs/troubleshooting/tf-session-in-predict.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When doing inferences with TensorFlow using the [Realtime API Python Predictor](../deployments/realtimeapi/predictors.md#python-predictor) or [Batch API Python Predictor](../deployments/batchapi/predictors.md#python-predictor), it should be noted that your Python Predictor's `__init__()` constructor is only called on one thread, whereas its `predict()` method can run on any of the available threads (which is configured via the `threads_per_process` field in the API's `predictor` configuration). If `threads_per_process` is set to `1` (the default value), then there is no concern, since `__init__()` and `predict()` will run on the same thread. However, if `threads_per_process` is greater than `1`, then only one of the inference threads will have executed the `__init__()` function. This can cause issues with TensorFlow because the default graph is a property of the current thread, so if `__init__()` initializes the TensorFlow graph, only the thread that executed `__init__()` will have the default graph set. +When doing inferences with TensorFlow using the [Realtime API Python Predictor](../deployments/realtime-api/predictors.md#python-predictor) or [Batch API Python Predictor](../deployments/batch-api/predictors.md#python-predictor), it should be noted that your Python Predictor's `__init__()` constructor is only called on one thread, whereas its `predict()` method can run on any of the available threads (which is configured via the `threads_per_process` field in the API's `predictor` configuration). If `threads_per_process` is set to `1` (the default value), then there is no concern, since `__init__()` and `predict()` will run on the same thread. However, if `threads_per_process` is greater than `1`, then only one of the inference threads will have executed the `__init__()` function. This can cause issues with TensorFlow because the default graph is a property of the current thread, so if `__init__()` initializes the TensorFlow graph, only the thread that executed `__init__()` will have the default graph set. The error you may see if the default graph is not set (as a consequence of `__init__()` and `predict()` running in separate threads) is: diff --git a/examples/batch/image-classifier/README.md b/examples/batch/image-classifier/README.md index 2ce6092c61..ac8e508a68 100644 --- a/examples/batch/image-classifier/README.md +++ b/examples/batch/image-classifier/README.md @@ -106,7 +106,7 @@ class PythonPredictor: ) ``` -Here are the complete [Predictor docs](../../../docs/deployments/batchapi/predictors.md). +Here are the complete [Predictor docs](../../../docs/deployments/batch-api/predictors.md).
@@ -141,7 +141,7 @@ Create a `cortex.yaml` file and add the configuration below. An `api` with `kind cpu: 1 ``` -Here are the complete [API configuration docs](../../../docs/deployments/batchapi/api-configuration.md). +Here are the complete [API configuration docs](../../../docs/deployments/batch-api/api-configuration.md).
diff --git a/examples/batch/onnx/README.md b/examples/batch/onnx/README.md index 799ec688f1..e971b5b909 100644 --- a/examples/batch/onnx/README.md +++ b/examples/batch/onnx/README.md @@ -3,4 +3,4 @@ _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub)_ -Please refer to the [tutorial](https://docs.cortex.dev/v/master/batchapi/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. +Please refer to the [tutorial](https://docs.cortex.dev/v/master/batch-api/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. diff --git a/examples/batch/tensorflow/README.md b/examples/batch/tensorflow/README.md index 01950b073d..bdbe399fe2 100644 --- a/examples/batch/tensorflow/README.md +++ b/examples/batch/tensorflow/README.md @@ -3,4 +3,4 @@ _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub)_ -Please refer to the [tutorial](https://docs.cortex.dev/v/master/batchapi/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. +Please refer to the [tutorial](https://docs.cortex.dev/v/master/batch-api/image-classifier#deploy-your-batch-api) to see how to deploy a Batch API with Cortex. diff --git a/examples/sklearn/iris-classifier/README.md b/examples/sklearn/iris-classifier/README.md index 0a0452301a..a03ab30ced 100644 --- a/examples/sklearn/iris-classifier/README.md +++ b/examples/sklearn/iris-classifier/README.md @@ -90,7 +90,7 @@ class PythonPredictor: return labels[label_id] ``` -Here are the complete [Predictor docs](../../../docs/deployments/realtimeapi/predictors.md). +Here are the complete [Predictor docs](../../../docs/deployments/realtime-api/predictors.md).
@@ -104,7 +104,7 @@ Create a `requirements.txt` file to specify the dependencies needed by `predicto boto3 ``` -You can skip dependencies that are [pre-installed](../../../docs/deployments/realtimeapi/predictors.md) to speed up the deployment process. Note that `pickle` is part of the Python standard library so it doesn't need to be included. +You can skip dependencies that are [pre-installed](../../../docs/deployments/realtime-api/predictors.md) to speed up the deployment process. Note that `pickle` is part of the Python standard library so it doesn't need to be included.
@@ -125,7 +125,7 @@ Create a `cortex.yaml` file and add the configuration below and replace `cortex- key: sklearn/iris-classifier/model.pkl ``` -Here are the complete [API configuration docs](../../../docs/deployments/realtimeapi/api-configuration.md). +Here are the complete [API configuration docs](../../../docs/deployments/realtime-api/api-configuration.md).
diff --git a/pkg/types/spec/errors.go b/pkg/types/spec/errors.go index 759323eea7..753f7569e4 100644 --- a/pkg/types/spec/errors.go +++ b/pkg/types/spec/errors.go @@ -70,7 +70,7 @@ const ( ErrInvalidNumberOfInfs = "spec.invalid_number_of_infs" ErrInsufficientBatchConcurrencyLevel = "spec.insufficient_batch_concurrency_level" ErrInsufficientBatchConcurrencyLevelInf = "spec.insufficient_batch_concurrency_level_inf" - ErrIncorrectTrafficSplitterWeight = "spec.incorrect_traffic_splitters_weight" + ErrIncorrectTrafficSplitterWeight = "spec.incorrect_traffic_splitter_weight" ErrTrafficSplitterAPIsNotUnique = "spec.traffic_splitter_apis_not_unique" ) @@ -397,7 +397,6 @@ func ErrorInsufficientBatchConcurrencyLevelInf(maxBatchSize int32, threadsPerPro }) } -// TODO test this error func ErrorIncorrectTrafficSplitterWeightTotal(totalWeight int) error { return errors.WithStack(&errors.Error{ Kind: ErrIncorrectTrafficSplitterWeight, @@ -405,7 +404,6 @@ func ErrorIncorrectTrafficSplitterWeightTotal(totalWeight int) error { }) } -// TODO test this error func ErrorTrafficSplitterAPIsNotUnique(names []string) error { return errors.WithStack(&errors.Error{ Kind: ErrTrafficSplitterAPIsNotUnique, From e41533eb144bf09cafb2e40b75a62d1f8a0a957b Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 08:50:49 -0400 Subject: [PATCH 05/12] Update summary.md --- docs/summary.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/summary.md b/docs/summary.md index e798e68a51..31847e2ac9 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -11,16 +11,16 @@ ## Deployments -* [Realtime API](deployments/realtime-api/.md) - * [Predictor implementation](deployments/realtime-api//predictors.md) - * [API configuration](deployments/realtime-api//api-configuration.md) - * [API deployment](deployments/realtime-api//deployment.md) - * [API statuses](deployments/realtime-api//statuses.md) - * [Parallelism](deployments/realtime-api//parallelism.md) - * [Autoscaling](deployments/realtime-api//autoscaling.md) - * [Prediction monitoring](deployments/realtime-api//prediction-monitoring.md) +* [Realtime API](deployments/realtime-api.md) + * [Predictor implementation](deployments/realtime-api/predictors.md) + * [API configuration](deployments/realtime-api/api-configuration.md) + * [API deployment](deployments/realtime-api/deployment.md) + * [API statuses](deployments/realtime-api/statuses.md) + * [Parallelism](deployments/realtime-api/parallelism.md) + * [Autoscaling](deployments/realtime-api/autoscaling.md) + * [Prediction monitoring](deployments/realtime-api/prediction-monitoring.md) * [Tutorial](../examples/sklearn/iris-classifier/README.md) - * [Traffic Splitter](deployments/realtime-api//traffic-splitter.md) + * [Traffic Splitter](deployments/realtime-api/traffic-splitter.md) * [Batch API](deployments/batch-api.md) * [Predictor implementation](deployments/batch-api/predictors.md) * [API configuration](deployments/batch-api/api-configuration.md) From 63b504edc4c70fb40224c9c8907106b4758a776c Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 08:59:56 -0400 Subject: [PATCH 06/12] Reorder tutorial and traffic splitter --- docs/summary.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/summary.md b/docs/summary.md index 31847e2ac9..b916ce74eb 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -19,8 +19,8 @@ * [Parallelism](deployments/realtime-api/parallelism.md) * [Autoscaling](deployments/realtime-api/autoscaling.md) * [Prediction monitoring](deployments/realtime-api/prediction-monitoring.md) - * [Tutorial](../examples/sklearn/iris-classifier/README.md) * [Traffic Splitter](deployments/realtime-api/traffic-splitter.md) + * [Tutorial](../examples/sklearn/iris-classifier/README.md) * [Batch API](deployments/batch-api.md) * [Predictor implementation](deployments/batch-api/predictors.md) * [API configuration](deployments/batch-api/api-configuration.md) From 39b923eb7d0df44fbcf642779786ac8dd9e4639a Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 10:01:21 -0400 Subject: [PATCH 07/12] Update endpoints.md --- docs/deployments/batch-api/endpoints.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/deployments/batch-api/endpoints.md b/docs/deployments/batch-api/endpoints.md index f520c01dbd..0d5550fc69 100644 --- a/docs/deployments/batch-api/endpoints.md +++ b/docs/deployments/batch-api/endpoints.md @@ -2,7 +2,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -Once your model is [exported](../../guides/exporting.md), you've implemented a [Predictor](predictors.md), you've [configured your API](api-configuration.md), and you've [deployed an api](endpoints.md), you can submit and manage jobs by making HTTP requests to your Batch API endpoint. +Once your model is [exported](../../guides/exporting.md), you've implemented a [Predictor](predictors.md), you've [configured your API](api-configuration.md), and you've [deployed an api](deployment.md), you can submit and manage jobs by making HTTP requests to your Batch API endpoint. A deployed Batch API endpoint supports the following: From 4bf27072b2767fa353d02bd0727a69739d1a8571 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 10:10:30 -0400 Subject: [PATCH 08/12] Update documenation in go --- cli/cmd/errors.go | 2 +- cli/local/deploy.go | 2 +- pkg/operator/endpoints/submit_job.go | 2 +- pkg/operator/resources/batchapi/validations.go | 2 +- pkg/operator/resources/resources.go | 2 +- pkg/types/spec/errors.go | 4 ++-- pkg/types/spec/validations.go | 10 +++++----- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go index 832dd02356..7358d4a3cf 100644 --- a/cli/cmd/errors.go +++ b/cli/cmd/errors.go @@ -347,6 +347,6 @@ func ErrorDeployFromTopLevelDir(genericDirName string, providerType types.Provid } return errors.WithStack(&errors.Error{ Kind: ErrDeployFromTopLevelDir, - Message: fmt.Sprintf("cannot deploy from your %s directory - when deploying your API, cortex sends all files in your project directory (i.e. the directory which contains cortex.yaml) to your %s (see https://docs.cortex.dev/v/%s/deployments/realtimeapi/predictors#project-files for Realtime API and https://docs.cortex.dev/v/%s/deployments/batchapi/predictors#project-files for Batch API); therefore it is recommended to create a subdirectory for your project files", genericDirName, targetStr, consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("cannot deploy from your %s directory - when deploying your API, cortex sends all files in your project directory (i.e. the directory which contains cortex.yaml) to your %s (see https://docs.cortex.dev/v/%s/deployments/realtime-api/predictors#project-files for Realtime API and https://docs.cortex.dev/v/%s/deployments/batch-api/predictors#project-files for Batch API); therefore it is recommended to create a subdirectory for your project files", genericDirName, targetStr, consts.CortexVersionMinor, consts.CortexVersionMinor), }) } diff --git a/cli/local/deploy.go b/cli/local/deploy.go index b12f6bcb0f..75daa45214 100644 --- a/cli/local/deploy.go +++ b/cli/local/deploy.go @@ -69,7 +69,7 @@ func Deploy(env cliconfig.Environment, configPath string, projectFileList []stri err = ValidateLocalAPIs(apiConfigs, projectFiles, awsClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) return schema.DeployResponse{}, err } diff --git a/pkg/operator/endpoints/submit_job.go b/pkg/operator/endpoints/submit_job.go index 6e380f2e8a..44bc1f606c 100644 --- a/pkg/operator/endpoints/submit_job.go +++ b/pkg/operator/endpoints/submit_job.go @@ -60,7 +60,7 @@ func SubmitJob(w http.ResponseWriter, r *http.Request) { err = json.Unmarshal(bodyBytes, &submission) if err != nil { - respondError(w, r, errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/endpoints", consts.CortexVersionMinor))) + respondError(w, r, errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/endpoints", consts.CortexVersionMinor))) return } diff --git a/pkg/operator/resources/batchapi/validations.go b/pkg/operator/resources/batchapi/validations.go index 744a6c250e..13323161b6 100644 --- a/pkg/operator/resources/batchapi/validations.go +++ b/pkg/operator/resources/batchapi/validations.go @@ -86,7 +86,7 @@ func validateJobSubmissionSchema(submission *schema.JobSubmission) error { func validateJobSubmission(submission *schema.JobSubmission) error { err := validateJobSubmissionSchema(submission) if err != nil { - return errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/endpoints", consts.CortexVersionMinor)) + return errors.Append(err, fmt.Sprintf("\n\njob submission schema can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/endpoints", consts.CortexVersionMinor)) } if submission.FilePathLister != nil { diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go index fcf2ec7e84..a962bf770c 100644 --- a/pkg/operator/resources/resources.go +++ b/pkg/operator/resources/resources.go @@ -93,7 +93,7 @@ func Deploy(projectBytes []byte, configFileName string, configBytes []byte, forc err = ValidateClusterAPIs(apiConfigs, projectFiles) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/trafficsplitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/trafficsplitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) return nil, err } diff --git a/pkg/types/spec/errors.go b/pkg/types/spec/errors.go index 753f7569e4..59d9ce33b2 100644 --- a/pkg/types/spec/errors.go +++ b/pkg/types/spec/errors.go @@ -77,14 +77,14 @@ const ( func ErrorMalformedConfig() error { return errors.WithStack(&errors.Error{ Kind: ErrMalformedConfig, - Message: fmt.Sprintf("cortex YAML configuration files must contain a list of maps (see https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("cortex YAML configuration files must contain a list of maps (see https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), }) } func ErrorNoAPIs() error { return errors.WithStack(&errors.Error{ Kind: ErrNoAPIs, - Message: fmt.Sprintf("at least one API must be configured (see https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), + Message: fmt.Sprintf("at least one API must be configured (see https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration for Realtime API documentation and see https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration for Batch API documentation)", consts.CortexVersionMinor, consts.CortexVersionMinor), }) } diff --git a/pkg/types/spec/validations.go b/pkg/types/spec/validations.go index 8a14dd6967..c4dcc877a7 100644 --- a/pkg/types/spec/validations.go +++ b/pkg/types/spec/validations.go @@ -595,9 +595,9 @@ func ExtractAPIConfigs(configBytes []byte, provider types.ProviderType, configFi err = errors.Wrap(errors.FirstError(errs...), userconfig.IdentifyAPI(configFileName, name, kind, i)) switch provider { case types.LocalProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration", consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) case types.AWSProviderType: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) } } @@ -615,11 +615,11 @@ func ExtractAPIConfigs(configBytes []byte, provider types.ProviderType, configFi err = errors.Wrap(errors.FirstError(errs...), userconfig.IdentifyAPI(configFileName, name, kind, i)) switch kind { case userconfig.RealtimeAPIKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/api-configuration", consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Realtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration", consts.CortexVersionMinor)) case userconfig.BatchAPIKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Batch API can be found at https://docs.cortex.dev/v/%s/deployments/batchapi/api-configuration", consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Batch API can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration", consts.CortexVersionMinor)) case userconfig.TrafficSplitterKind: - return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Traffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtimeapi/traffic-splitter", consts.CortexVersionMinor)) + return nil, errors.Append(err, fmt.Sprintf("\n\napi configuration schema for Traffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor)) } } From 10938e227394776155a73c499270d8044a01fc79 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 10:22:38 -0400 Subject: [PATCH 09/12] Update errors.go --- pkg/operator/resources/errors.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/resources/errors.go b/pkg/operator/resources/errors.go index b7d9a54356..1fcacfb43c 100644 --- a/pkg/operator/resources/errors.go +++ b/pkg/operator/resources/errors.go @@ -32,7 +32,7 @@ const ( ErrCannotChangeTypeOfDeployedAPI = "resources.cannot_change_kind_of_deployed_api" ErrNoAvailableNodeComputeLimit = "resources.no_available_node_compute_limit" ErrJobIDRequired = "resources.job_id_required" - ErrAPIUsedByTrafficSplitter = "resources.realtimeapi_used_by_traffic_splitter" + ErrAPIUsedByTrafficSplitter = "resources.realtime_api_used_by_traffic_splitter" ErrNotDeployedAPIsTrafficSplitter = "resources.trafficsplit_apis_not_deployed" ErrAPIGatewayDisabled = "resources.api_gateway_disabled" ) From b2b2d2c3c7aae0da199dedbe83813d6558ae8697 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 10:39:05 -0400 Subject: [PATCH 10/12] Update resources.go --- pkg/operator/resources/resources.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go index a962bf770c..e63d056d5f 100644 --- a/pkg/operator/resources/resources.go +++ b/pkg/operator/resources/resources.go @@ -93,7 +93,7 @@ func Deploy(projectBytes []byte, configFileName string, configBytes []byte, forc err = ValidateClusterAPIs(apiConfigs, projectFiles) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/trafficsplitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\napi configuration schema for:\n\nRealtime API can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/api-configuration\nBatch API can be found at https://docs.cortex.dev/v/%s/deployments/batch-api/api-configuration\nTraffic Splitter can be found at https://docs.cortex.dev/v/%s/deployments/realtime-api/traffic-splitter", consts.CortexVersionMinor, consts.CortexVersionMinor, consts.CortexVersionMinor)) return nil, err } From 0bb54419b5e42a834237e27447be5b5144325688 Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 15:35:46 -0400 Subject: [PATCH 11/12] Respond to PR comments --- examples/onnx/iris-classifier/cortex.yaml | 2 +- .../onnx/multi-model-classifier/cortex.yaml | 2 +- pkg/operator/resources/errors.go | 26 +++++++++---------- pkg/operator/resources/validations.go | 2 +- pkg/operator/schema/schema.go | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/onnx/iris-classifier/cortex.yaml b/examples/onnx/iris-classifier/cortex.yaml index 78138f5d84..fead84f756 100644 --- a/examples/onnx/iris-classifier/cortex.yaml +++ b/examples/onnx/iris-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: iris-classifier - kind: RealtimeAPi + kind: RealtimeAPI predictor: type: onnx path: predictor.py diff --git a/examples/onnx/multi-model-classifier/cortex.yaml b/examples/onnx/multi-model-classifier/cortex.yaml index 6a4174a9f6..1c7cd44e1b 100644 --- a/examples/onnx/multi-model-classifier/cortex.yaml +++ b/examples/onnx/multi-model-classifier/cortex.yaml @@ -1,7 +1,7 @@ # WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.18.*, run `git checkout -b 0.18` or switch to the `0.18` branch on GitHub) - name: multi-model-classifier - kind: RealtimeApi + kind: RealtimeAPI predictor: type: onnx path: predictor.py diff --git a/pkg/operator/resources/errors.go b/pkg/operator/resources/errors.go index 1fcacfb43c..bab90d8612 100644 --- a/pkg/operator/resources/errors.go +++ b/pkg/operator/resources/errors.go @@ -27,14 +27,14 @@ import ( ) const ( - ErrOperationIsOnlySupportedForKind = "resources.operation_is_only_supported_for_kind" - ErrAPINotDeployed = "resources.api_not_deployed" - ErrCannotChangeTypeOfDeployedAPI = "resources.cannot_change_kind_of_deployed_api" - ErrNoAvailableNodeComputeLimit = "resources.no_available_node_compute_limit" - ErrJobIDRequired = "resources.job_id_required" - ErrAPIUsedByTrafficSplitter = "resources.realtime_api_used_by_traffic_splitter" - ErrNotDeployedAPIsTrafficSplitter = "resources.trafficsplit_apis_not_deployed" - ErrAPIGatewayDisabled = "resources.api_gateway_disabled" + ErrOperationIsOnlySupportedForKind = "resources.operation_is_only_supported_for_kind" + ErrAPINotDeployed = "resources.api_not_deployed" + ErrCannotChangeTypeOfDeployedAPI = "resources.cannot_change_kind_of_deployed_api" + ErrNoAvailableNodeComputeLimit = "resources.no_available_node_compute_limit" + ErrJobIDRequired = "resources.job_id_required" + ErrRealtimeAPIUsedByTrafficSplitter = "resources.realtime_api_used_by_traffic_splitter" + ErrAPIsNotDeployed = "resources.apis_not_deployed" + ErrAPIGatewayDisabled = "resources.api_gateway_disabled" ) func ErrorOperationIsOnlySupportedForKind(resource operator.DeployedResource, supportedKind userconfig.Kind, supportedKinds ...userconfig.Kind) error { @@ -78,18 +78,18 @@ func ErrorNoAvailableNodeComputeLimit(resource string, reqStr string, maxStr str func ErrorAPIUsedByTrafficSplitter(trafficSplitters []string) error { return errors.WithStack(&errors.Error{ - Kind: ErrAPIUsedByTrafficSplitter, + Kind: ErrRealtimeAPIUsedByTrafficSplitter, Message: fmt.Sprintf("cannot delete api because it is used by the following %s: %s", strings.PluralS("TrafficSplitter", len(trafficSplitters)), strings.StrsSentence(trafficSplitters, "")), }) } -func ErrorNotDeployedAPIsTrafficSplitter(notDeployedAPIs []string) error { - message := fmt.Sprintf("apis %s were either not found or are not RealtimeAPI kind", strings.StrsAnd(notDeployedAPIs)) +func ErrorAPIsNotDeployed(notDeployedAPIs []string) error { + message := fmt.Sprintf("apis %s were either not found or are not RealtimeAPI", strings.StrsAnd(notDeployedAPIs)) if len(notDeployedAPIs) == 1 { - message = fmt.Sprintf("api %s was either not found or is not RealtimeAPI kind", notDeployedAPIs[0]) + message = fmt.Sprintf("api %s was either not found or is not a RealtimeAPI", notDeployedAPIs[0]) } return errors.WithStack(&errors.Error{ - Kind: ErrNotDeployedAPIsTrafficSplitter, + Kind: ErrAPIsNotDeployed, Message: message, }) } diff --git a/pkg/operator/resources/validations.go b/pkg/operator/resources/validations.go index b9487c33e9..c8265ff97d 100644 --- a/pkg/operator/resources/validations.go +++ b/pkg/operator/resources/validations.go @@ -319,7 +319,7 @@ func checkIfAPIExists(trafficSplitterAPIs []*userconfig.TrafficSplit, apis []use } } if len(missingAPIs) != 0 { - return ErrorNotDeployedAPIsTrafficSplitter(missingAPIs) + return ErrorAPIsNotDeployed(missingAPIs) } return nil diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go index d8a849d8ec..eaf70696ad 100644 --- a/pkg/operator/schema/schema.go +++ b/pkg/operator/schema/schema.go @@ -73,7 +73,7 @@ type TrafficSplitter struct { type GetAPIResponse struct { RealtimeAPI *RealtimeAPI `json:"realtime_api"` BatchAPI *BatchAPI `json:"batch_api"` - TrafficSplitter *TrafficSplitter `json:"traffic_splitters"` + TrafficSplitter *TrafficSplitter `json:"traffic_splitter"` } type BatchAPI struct { From 75db39801bd7c318434d5dc3e5c8975c62c32bcc Mon Sep 17 00:00:00 2001 From: vishal Date: Wed, 19 Aug 2020 15:36:47 -0400 Subject: [PATCH 12/12] Update errors.go --- pkg/operator/resources/errors.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/resources/errors.go b/pkg/operator/resources/errors.go index bab90d8612..cd96120f92 100644 --- a/pkg/operator/resources/errors.go +++ b/pkg/operator/resources/errors.go @@ -84,7 +84,7 @@ func ErrorAPIUsedByTrafficSplitter(trafficSplitters []string) error { } func ErrorAPIsNotDeployed(notDeployedAPIs []string) error { - message := fmt.Sprintf("apis %s were either not found or are not RealtimeAPI", strings.StrsAnd(notDeployedAPIs)) + message := fmt.Sprintf("apis %s were either not found or are not RealtimeAPIs", strings.StrsAnd(notDeployedAPIs)) if len(notDeployedAPIs) == 1 { message = fmt.Sprintf("api %s was either not found or is not a RealtimeAPI", notDeployedAPIs[0]) }