diff --git a/README.md b/README.md index c2ebb2810a..90cb13018a 100644 --- a/README.md +++ b/README.md @@ -106,8 +106,8 @@ The CLI sends configuration and code to the cluster every time you run `cortex d ## Examples - -- [Sentiment analysis](https://github.com/cortexlabs/cortex/tree/0.10/examples/tensorflow/sentiment-analysis) in TensorFlow with BERT + +- [Sentiment analysis](https://github.com/cortexlabs/cortex/tree/0.10/examples/tensorflow/sentiment-analyzer) in TensorFlow with BERT - [Image classification](https://github.com/cortexlabs/cortex/tree/0.10/examples/tensorflow/image-classifier) in TensorFlow with Inception - [Text generation](https://github.com/cortexlabs/cortex/tree/0.10/examples/pytorch/text-generator) in PyTorch with DistilGPT2 - [Reading comprehension](https://github.com/cortexlabs/cortex/tree/0.10/examples/pytorch/text-generator) in PyTorch with ELMo-BiDAF diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go index 4bf068accf..ffa7d2c5be 100644 --- a/cli/cmd/cluster.go +++ b/cli/cmd/cluster.go @@ -57,20 +57,18 @@ func init() { } func addClusterConfigFlag(cmd *cobra.Command) { - cmd.PersistentFlags().StringVarP(&flagClusterConfig, "config", "c", "", "path to a Cortex cluster configuration file") + cmd.PersistentFlags().StringVarP(&flagClusterConfig, "config", "c", "", "path to a cluster configuration file") cmd.PersistentFlags().SetAnnotation("config", cobra.BashCompFilenameExt, configFileExts) } var clusterCmd = &cobra.Command{ Use: "cluster", - Short: "manage a Cortex cluster", - Long: "Manage a Cortex cluster", + Short: "manage a cluster", } var upCmd = &cobra.Command{ Use: "up", - Short: "spin up a Cortex cluster", - Long: `This command spins up a Cortex cluster on your AWS account.`, + Short: "spin up a cluster", Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { if err := checkDockerRunning(); err != nil { @@ -97,8 +95,7 @@ var upCmd = &cobra.Command{ var updateCmd = &cobra.Command{ Use: "update", - Short: "update a Cortex cluster", - Long: `This command updates a Cortex cluster.`, + Short: "update a cluster", Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { if err := checkDockerRunning(); err != nil { @@ -126,8 +123,7 @@ var updateCmd = &cobra.Command{ var infoCmd = &cobra.Command{ Use: "info", - Short: "get information about a Cortex cluster", - Long: `This command gets information about a Cortex cluster.`, + Short: "get information about a cluster", Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { if err := checkDockerRunning(); err != nil { @@ -145,7 +141,9 @@ var infoCmd = &cobra.Command{ if err != nil { errors.Exit(err) } - if strings.Contains(out, "there isn't a cortex cluster") { + + // note: if modifying this string, search the codebase for it and change all occurrences + if strings.Contains(out, "there is no cluster") { errors.Exit() } @@ -176,8 +174,7 @@ var infoCmd = &cobra.Command{ var downCmd = &cobra.Command{ Use: "down", - Short: "spin down a Cortex cluster", - Long: `This command spins down a Cortex cluster.`, + Short: "spin down a cluster", Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { if err := checkDockerRunning(); err != nil { @@ -267,14 +264,16 @@ func refreshCachedClusterConfig(awsCreds *AWSCredentials) *clusterconfig.Cluster } if userClusterConfig.Region == nil { - errors.Exit(fmt.Sprintf("unable to find an existing cortex cluster; please configure \"%s\" to the s3 region of an existing cortex cluster or create a cortex cluster with `cortex cluster up`", clusterconfig.RegionKey)) + errors.Exit(fmt.Sprintf("unable to find an existing cluster; please configure \"%s\" to the s3 region of an existing cluster or create a cluster with `cortex cluster up`", clusterconfig.RegionKey)) } out, err := runRefreshClusterConfig(userClusterConfig, awsCreds) if err != nil { errors.Exit(err) } - if strings.Contains(out, "there isn't a cortex cluster") { + + // note: if modifying this string, search the codebase for it and change all occurrences + if strings.Contains(out, "there is no cluster") { errors.Exit() } diff --git a/cli/cmd/completion.go b/cli/cmd/completion.go index 6ffc03b8be..f184fea320 100644 --- a/cli/cmd/completion.go +++ b/cli/cmd/completion.go @@ -26,16 +26,15 @@ import ( var completionCmd = &cobra.Command{ Use: "completion", Short: "generate bash completion scripts", - Long: `Generate bash completion scripts. + Long: `generate bash completion scripts -Add this to your bashrc or bash profile: +add this to your bashrc or bash profile: source <(cortex completion) -Or run: - echo 'source <(cortex completion)' >> ~/.bash_profile # Mac - echo 'source <(cortex completion)' >> ~/.bashrc # Linux +or run: + echo 'source <(cortex completion)' >> ~/.bash_profile # mac + echo 'source <(cortex completion)' >> ~/.bashrc # linux -This will also add the "cx" alias. -Note: Cortex CLI completion requires the bash_completion package to be installed on your system. +this will also add the "cx" alias (note: cli completion requires the bash_completion package to be installed on your system) `, Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { diff --git a/cli/cmd/configure.go b/cli/cmd/configure.go index 86026ef59d..570840ec3b 100644 --- a/cli/cmd/configure.go +++ b/cli/cmd/configure.go @@ -33,11 +33,8 @@ func init() { var configureCmd = &cobra.Command{ Use: "configure", - Short: "configure the CLI", - Long: `This command configures the Cortex URL and AWS credentials -in order to authenticate and send requests to Cortex. -The configuration is stored in ~/.cortex.`, - Args: cobra.NoArgs, + Short: "configure the cli", + Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { if flagPrint { cliConfig := getDefaults() diff --git a/cli/cmd/delete.go b/cli/cmd/delete.go index db46455763..98dcd9579d 100644 --- a/cli/cmd/delete.go +++ b/cli/cmd/delete.go @@ -38,7 +38,6 @@ func init() { var deleteCmd = &cobra.Command{ Use: "delete [DEPLOYMENT_NAME]", Short: "delete a deployment", - Long: `This command deletes a deployment from the cluster.`, Args: cobra.MaximumNArgs(1), Run: func(cmd *cobra.Command, args []string) { var appName string diff --git a/cli/cmd/deploy.go b/cli/cmd/deploy.go index 14ca490be1..30552583ae 100644 --- a/cli/cmd/deploy.go +++ b/cli/cmd/deploy.go @@ -38,16 +38,14 @@ var flagDeployRefresh bool func init() { deployCmd.PersistentFlags().BoolVarP(&flagDeployForce, "force", "f", false, "override the in-progress deployment update") - deployCmd.PersistentFlags().BoolVarP(&flagDeployRefresh, "refresh", "r", false, "re-deploy all APIs with cleared cache and rolling updates") + deployCmd.PersistentFlags().BoolVarP(&flagDeployRefresh, "refresh", "r", false, "re-deploy all apis with cleared cache and rolling updates") addEnvFlag(deployCmd) } var deployCmd = &cobra.Command{ Use: "deploy", Short: "create or update a deployment", - Long: `This command sends all project configuration and code to Cortex. -If validations pass, Cortex will attempt to create the desired state.`, - Args: cobra.NoArgs, + Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { deploy(flagDeployForce, flagDeployRefresh) }, diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go index 549b46c846..0b11f92743 100644 --- a/cli/cmd/errors.go +++ b/cli/cmd/errors.go @@ -129,7 +129,7 @@ func ErrorFailedConnectURL(url url.URL) error { func ErrorFailedToConnectOperator(urlStr string) error { return Error{ Kind: ErrFailedToConnectOperator, - message: fmt.Sprintf("failed to connect to the operator (%s), run `cortex configure` if you need to update the operator URL", urlStr), + message: fmt.Sprintf("failed to connect to the operator (%s), run `cortex configure` if you need to update the operator endpoint", urlStr), } } diff --git a/cli/cmd/get.go b/cli/cmd/get.go index 86810ea99e..4140c9e814 100644 --- a/cli/cmd/get.go +++ b/cli/cmd/get.go @@ -57,10 +57,8 @@ func init() { var getCmd = &cobra.Command{ Use: "get [API_NAME]", - Short: "get information about APIs", - Long: `This command displays information about APIs. -Adding the -v or --verbose flag displays additional information.`, - Args: cobra.RangeArgs(0, 1), + Short: "get information about deployments", + Args: cobra.RangeArgs(0, 1), Run: func(cmd *cobra.Command, args []string) { rerun(func() (string, error) { return runGet(cmd, args) @@ -424,7 +422,7 @@ func classificationMetricsTable(apiMetrics schema.APIMetrics) string { func describeModelInput(groupStatus *resource.APIGroupStatus, apiEndpoint string) string { if groupStatus.ReadyUpdated+groupStatus.ReadyStaleCompute == 0 { - return "the model's input schema will be available when the API is live" + return "the model's input schema will be available when the api is live" } apiSummary, err := getAPISummary(apiEndpoint) @@ -462,7 +460,7 @@ func describeModelInput(groupStatus *resource.APIGroupStatus, apiEndpoint string func getAPISummary(apiEndpoint string) (*schema.APISummary, error) { req, err := http.NewRequest("GET", apiEndpoint, nil) if err != nil { - return nil, errors.Wrap(err, "unable to request API summary") + return nil, errors.Wrap(err, "unable to request api summary") } req.Header.Set("Content-Type", "application/json") response, err := httpsNoVerifyClient.makeRequest(req) @@ -473,7 +471,7 @@ func getAPISummary(apiEndpoint string) (*schema.APISummary, error) { var apiSummary schema.APISummary err = json.DecodeWithNumber(response, &apiSummary) if err != nil { - return nil, errors.Wrap(err, "unable to parse API summary response") + return nil, errors.Wrap(err, "unable to parse api summary response") } for _, featureSignature := range apiSummary.ModelSignature { diff --git a/cli/cmd/logs.go b/cli/cmd/logs.go index 97421ed9a1..ecd6bb5c8d 100644 --- a/cli/cmd/logs.go +++ b/cli/cmd/logs.go @@ -31,8 +31,7 @@ func init() { var logsCmd = &cobra.Command{ Use: "logs API_NAME", - Short: "get logs for an API", - Long: `This command streams logs from a deployed API.`, + Short: "stream logs from an api", Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { resourceName := args[0] diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go index 13937ac0e1..6a5e9bf962 100644 --- a/cli/cmd/predict.go +++ b/cli/cmd/predict.go @@ -35,15 +35,13 @@ var predictDebug bool func init() { addAppNameFlag(predictCmd) addEnvFlag(predictCmd) - predictCmd.Flags().BoolVar(&predictDebug, "debug", false, "Predict with debug mode") + predictCmd.Flags().BoolVar(&predictDebug, "debug", false, "predict with debug mode") } var predictCmd = &cobra.Command{ Use: "predict API_NAME SAMPLE_FILE", - Short: "make a prediction request", - Long: `This command makes a prediction request using -a JSON file and displays the response.`, - Args: cobra.ExactArgs(2), + Short: "make a prediction request using a json file", + Args: cobra.ExactArgs(2), Run: func(cmd *cobra.Command, args []string) { apiName := args[0] sampleJSONPath := args[1] diff --git a/cli/cmd/root.go b/cli/cmd/root.go index 0169980560..77ffe002e4 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -31,7 +31,6 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/slices" s "github.com/cortexlabs/cortex/pkg/lib/strings" libtime "github.com/cortexlabs/cortex/pkg/lib/time" - "github.com/cortexlabs/cortex/pkg/operator/api/resource" ) var cmdStr string @@ -70,7 +69,6 @@ var rootCmd = &cobra.Command{ Use: "cortex", Aliases: []string{"cx"}, Short: "deploy machine learning models in production", - Long: `Deploy machine learning models in production`, } func Execute() { @@ -107,10 +105,10 @@ func updateRootUsage() { } usage = strings.Replace(usage, "Usage:\n cortex [command]\n\nAliases:\n cortex, cx\n\n", "", 1) - usage = strings.Replace(usage, "Available Commands:", "Deployment commands:", 1) - usage = strings.Replace(usage, "\n cluster", "\n\nCluster commands:\n cluster", 1) - usage = strings.Replace(usage, "\n configure", "\n\nOther commands:\n configure", 1) - usage = strings.Replace(usage, "\nUse \"cortex", " help help about any command\n\nFlags:\n -h, --help help for cortex\n\nUse \"cortex", 1) + usage = strings.Replace(usage, "Available Commands:", "deployment commands:", 1) + usage = strings.Replace(usage, "\n cluster", "\n\ncluster commands:\n cluster", 1) + usage = strings.Replace(usage, "\n configure", "\n\nother commands:\n configure", 1) + usage = strings.Replace(usage, "\n\nUse \"cortex [command] --help\" for more information about a command.", "", 1) cmd.Print(usage) @@ -136,14 +134,6 @@ func addAppNameFlag(cmd *cobra.Command) { cmd.PersistentFlags().StringVarP(&flagAppName, "deployment", "d", "", "deployment name") } -var resourceTypesHelp = fmt.Sprintf("\nResource Types:\n %s\n", strings.Join(resource.VisibleTypes.StringList(), "\n ")) - -func addResourceTypesToHelp(cmd *cobra.Command) { - usage := cmd.UsageTemplate() - usage = strings.Replace(usage, "\nFlags:\n", resourceTypesHelp+"\nFlags:\n", 1) - cmd.SetUsageTemplate(usage) -} - func getTerminalWidth() int { cmd := exec.Command("stty", "size") cmd.Stdin = os.Stdin diff --git a/cli/cmd/support.go b/cli/cmd/support.go index 914f6570a9..e7a713ceaf 100644 --- a/cli/cmd/support.go +++ b/cli/cmd/support.go @@ -66,8 +66,7 @@ var supportPrompValidation = &cr.PromptValidation{ var supportCmd = &cobra.Command{ Use: "support", - Short: "send a support request to Cortex maintainers", - Long: `This command sends a support request to the Cortex maintainers`, + Short: "send a support request to the maintainers", Run: func(cmd *cobra.Command, args []string) { supportRequest := &SupportRequest{} err := cr.ReadPrompt(supportRequest, supportPrompValidation) diff --git a/cli/cmd/version.go b/cli/cmd/version.go index 35efe015e7..1ae69301d8 100644 --- a/cli/cmd/version.go +++ b/cli/cmd/version.go @@ -33,29 +33,28 @@ func init() { var versionCmd = &cobra.Command{ Use: "version", - Short: "print the version of the CLI and cluster", - Long: `This command prints the version of the CLI and cluster`, + Short: "print the cli and cluster versions", Args: cobra.NoArgs, Run: func(cmd *cobra.Command, args []string) { if !isCLIConfigured() { - fmt.Println("CLI version: " + consts.CortexVersion + "\n") - fmt.Println("Run `cortex configure` to connect the CLI to a Cortex cluster") + fmt.Println("cli version: " + consts.CortexVersion + "\n") + fmt.Println("run `cortex configure` to connect the cli to a cluster") return } httpResponse, err := HTTPGet("/info") if err != nil { - fmt.Println("CLI version: " + consts.CortexVersion + "\n") + fmt.Println("cli version: " + consts.CortexVersion + "\n") errors.Exit(err) } var infoResponse schema.InfoResponse err = json.Unmarshal(httpResponse, &infoResponse) if err != nil { - fmt.Println("CLI version: " + consts.CortexVersion + "\n") + fmt.Println("cli version: " + consts.CortexVersion + "\n") errors.Exit(err, "/info", string(httpResponse)) } - fmt.Println("CLI version: " + consts.CortexVersion) - fmt.Println("Cluster version: " + infoResponse.ClusterConfig.APIVersion) + fmt.Println("cli version: " + consts.CortexVersion) + fmt.Println("cluster version: " + infoResponse.ClusterConfig.APIVersion) }, } diff --git a/docs/cluster/cli.md b/docs/cluster/cli.md index 076728266d..da2e7100e6 100644 --- a/docs/cluster/cli.md +++ b/docs/cluster/cli.md @@ -3,8 +3,7 @@ ## deploy ```text -This command sends all project configuration and code to Cortex. -If validations pass, Cortex will attempt to create the desired state. +create or update a deployment Usage: cortex deploy [flags] @@ -13,16 +12,16 @@ Flags: -e, --env string environment (default "default") -f, --force override the in-progress deployment update -h, --help help for deploy - -r, --refresh re-deploy all APIs with cleared cache and rolling updates + -r, --refresh re-deploy all apis with cleared cache and rolling updates ``` ## get ```text -Get information about resources. +get information about deployments Usage: - cortex get [RESOURCE_NAME] [flags] + cortex get [API_NAME] [flags] Flags: -a, --all-deployments list all deployments @@ -37,7 +36,7 @@ Flags: ## logs ```text -This command streams logs from a deployed API. +stream logs from an api Usage: cortex logs API_NAME [flags] @@ -51,8 +50,7 @@ Flags: ## predict ```text -This command makes a prediction request using -a JSON file and displays the response. +make a prediction request using a json file Usage: cortex predict API_NAME SAMPLE_FILE [flags] @@ -67,7 +65,7 @@ Flags: ## delete ```text -This command deletes a deployment from the cluster. +delete a deployment Usage: cortex delete [DEPLOYMENT_NAME] [flags] @@ -81,59 +79,59 @@ Flags: ## cluster up ```text -This command spins up a Cortex cluster on your AWS account. +spin up a cluster Usage: cortex cluster up [flags] Flags: - -c, --config string path to a Cortex cluster configuration file + -c, --config string path to a cortex cluster configuration file -h, --help help for up ``` ## cluster info ```text -This command gets information about a Cortex cluster. +get information about a cluster Usage: cortex cluster info [flags] Flags: - -c, --config string path to a Cortex cluster configuration file + -c, --config string path to a cortex cluster configuration file -h, --help help for info ``` ## cluster update ```text -This command updates a Cortex cluster. +update a cluster Usage: cortex cluster update [flags] Flags: - -c, --config string path to a Cortex cluster configuration file + -c, --config string path to a cortex cluster configuration file -h, --help help for update ``` ## cluster down ```text -This command spins down a Cortex cluster. +spin down a cluster Usage: cortex cluster down [flags] Flags: - -c, --config string path to a Cortex cluster configuration file + -c, --config string path to a cortex cluster configuration file -h, --help help for down ``` ## version ```text -This command prints the version of the CLI and cluster. +print the cli and cluster versions Usage: cortex version [flags] @@ -146,9 +144,7 @@ Flags: ## configure ```text -This command configures the Cortex URL and AWS credentials -in order to authenticate and send requests to Cortex. -The configuration is stored in ~/.cortex. +configure the cli Usage: cortex configure [flags] @@ -162,7 +158,7 @@ Flags: ## support ```text -This command sends a support request to the Cortex maintainers +send a support request to the maintainers Usage: cortex support [flags] @@ -174,16 +170,15 @@ Flags: ## completion ```text -Generate bash completion scripts. +generate bash completion scripts -Add this to your bashrc or bash profile: +add this to your bashrc or bash profile: source <(cortex completion) -Or run: - echo 'source <(cortex completion)' >> ~/.bash_profile # Mac - echo 'source <(cortex completion)' >> ~/.bashrc # Linux +or run: + echo 'source <(cortex completion)' >> ~/.bash_profile # mac + echo 'source <(cortex completion)' >> ~/.bashrc # linux -This will also add the "cx" alias. -Note: Cortex CLI completion requires the bash_completion package to be installed on your system. +this will also add the "cx" alias (note: cli completion requires the bash_completion package to be installed on your system) Usage: cortex completion [flags] diff --git a/docs/cluster/install.md b/docs/cluster/install.md index 934ce86e08..199a00d068 100644 --- a/docs/cluster/install.md +++ b/docs/cluster/install.md @@ -42,7 +42,7 @@ cortex get classifier # Classify a sample curl -X POST -H "Content-Type: application/json" \ - -d '{ "sepal_length": 5.2, "sepal_width": 3.6, "petal_length": 1.4, "petal_width": 0.3 }' \ + -d '{ "sepal_length": 5.2, "sepal_width": 3.6, "petal_length": 1.4 "petal_width": 0.3 }' \ ``` diff --git a/docs/cluster/uninstall.md b/docs/cluster/uninstall.md index a811e4ca39..d8abe08290 100644 --- a/docs/cluster/uninstall.md +++ b/docs/cluster/uninstall.md @@ -5,6 +5,7 @@ 1. [AWS credentials](aws.md) 2. [Docker](https://docs.docker.com/install) 3. [Cortex CLI](install.md) +4. [AWS CLI](https://aws.amazon.com/cli) ## Uninstalling Cortex diff --git a/docs/packaging/tensorflow.md b/docs/packaging/tensorflow.md index 54ce920f13..93fe0b8e35 100644 --- a/docs/packaging/tensorflow.md +++ b/docs/packaging/tensorflow.md @@ -1,7 +1,7 @@ # Packaging TensorFlow models -Export your trained model and upload the export directory, or a checkpoint directory containing the export directory (which is usually the case if you used `estimator.train_and_evaluate`). An example is shown below (here is the [complete example](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/sentiment-analysis)): +Export your trained model and upload the export directory, or a checkpoint directory containing the export directory (which is usually the case if you used `estimator.train_and_evaluate`). An example is shown below (here is the [complete example](https://github.com/cortexlabs/cortex/blob/master/examples/tensorflow/sentiment-analyzer)): ```Python import tensorflow as tf diff --git a/examples/README.md b/examples/README.md index d23d39fb17..5eb1d127a9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,7 +8,7 @@ - [Image classification with Inception V3](tensorflow/image-classifier): deploy an Inception V3 model to classify images. -- [Sentiment analysis with BERT](tensorflow/sentiment-analysis): deploy a BERT model to classify sentiment. +- [Sentiment analysis with BERT](tensorflow/sentiment-analyzer): deploy a BERT model for sentiment analysis. ## PyTorch @@ -32,4 +32,6 @@ ## scikit-learn -- [MPG estimation with scikit-learn](sklearn/mpg-estimation): deploy a linear regression model to estimate MPG. +- [Iris classification](sklearn/iris-classifier): deploy a model to classify iris flowers. + +- [MPG estimation](sklearn/mpg-estimator): deploy a linear regression model to estimate MPG. diff --git a/examples/pytorch/answer-generator/cortex.yaml b/examples/pytorch/answer-generator/cortex.yaml index bd546df0b3..85c673b897 100644 --- a/examples/pytorch/answer-generator/cortex.yaml +++ b/examples/pytorch/answer-generator/cortex.yaml @@ -6,7 +6,8 @@ predictor: path: predictor.py metadata: - device: cpu # use "cuda" to run on GPUs + device: cuda # use "cpu" to run on CPUs compute: cpu: 1 + gpu: 1 mem: 4G diff --git a/examples/pytorch/image-classifier/README.md b/examples/pytorch/image-classifier/README.md deleted file mode 100644 index 12069c50e1..0000000000 --- a/examples/pytorch/image-classifier/README.md +++ /dev/null @@ -1,107 +0,0 @@ -# Deploy Image Classification as an API - -This example shows how to deploy a pretrained image classifier from TorchVision. - -## Predictor - -We implement Cortex's Predictor interface to load the model and make predictions. Cortex will use this implementation to serve the model as an autoscaling API. - -### Initialization - -We can place our code to download and initialize the model in the body of the implementation: - -```python -# predictor.py - -# download the pretrained AlexNet model -model = torchvision.models.alexnet(pretrained=True) -model.eval() - -# declare the necessary image preprocessing -normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) -preprocess = transforms.Compose( - [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] -) - -# download the labels -labels = requests.get( - "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" -).text.split("\n")[1:] -``` - -### Predict - -The `predict()` function will be triggered once per request. The AlexNet model requires a 2-dimensional array of 3-valued tuples representing the RGB values for each pixel in the image, but the API should accept a simple input format such as a URL to an image. Also, instead of returning the model's output as an array of probabilities, the API should return the class name with the highest probability. We use the `predict()` function to download the image specified by the url in the request, process it, feed it to the model, convert the model output weights to a label, and return the label: - -```python -# predictor.py - -def predict(sample, metadata): - image = requests.get(sample["url"]).content - img_pil = Image.open(BytesIO(image)) - img_tensor = preprocess(img_pil) - img_tensor.unsqueeze_(0) - with torch.no_grad(): - prediction = model(img_tensor) - _, index = prediction[0].max(0) - return labels[index] -``` - -See [predictor.py](./predictor.py) for the complete code. - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our implementation available as a web service that can serve real-time predictions. This configuration will deploy the implementation specified in `predictor.py`: - -```yaml -# cortex.yaml - -- kind: deployment - name: image - -- kind: api - name: classifier - predictor: - path: predictor.py -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster: - -```bash -$ cortex deploy - -creating classifier -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the statuses of the APIs using `cortex get`: - -```bash -$ cortex get classifier --watch - -status up-to-date available requested last update avg latency -live 1 1 1 12s - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -We can use `curl` to test our prediction service: - -```bash -$ cortex get classifier - -endpoint: http://***.amazonaws.com/image/classifier - -$ curl http://***.amazonaws.com/image/classifier \ - -X POST -H "Content-Type: application/json" \ - -d '{"url": "https://i.imgur.com/PzXprwl.jpg"}' - -"hotdog" -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/examples/pytorch/image-classifier/cortex.yaml b/examples/pytorch/image-classifier/cortex.yaml index 0782b8a621..2a649e7403 100644 --- a/examples/pytorch/image-classifier/cortex.yaml +++ b/examples/pytorch/image-classifier/cortex.yaml @@ -5,3 +5,7 @@ name: classifier predictor: path: predictor.py + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/examples/pytorch/iris-classifier/README.md b/examples/pytorch/iris-classifier/README.md deleted file mode 100644 index bda16d547b..0000000000 --- a/examples/pytorch/iris-classifier/README.md +++ /dev/null @@ -1,114 +0,0 @@ -# Deploy a PyTorch iris classifier - -This example shows how to deploy a classifier trained on the famous [iris data set](https://archive.ics.uci.edu/ml/datasets/iris) in PyTorch. The PyTorch model being deployed can be found [here](./src/my_model.py). - -## Predictor - -We implement Cortex's Predictor interface to load the model and make predictions. Cortex will use this implementation to serve the model as an autoscaling API. - -### Initialization - -We can place our code to download and initialize the model in the `init()` function. The PyTorch model class is defined in [src/model.py](./src/model.py), and we assume that we've already trained the model and uploaded the state_dict (weights) to S3. - -```python -# predictor.py - -from model import IrisNet - -# instantiate the model -model = IrisNet() - -# define the labels -labels = ["iris-setosa", "iris-versicolor", "iris-virginica"] - -def init(model_path, metadata): - # model_path is a local path pointing to your model weights file - model.load_state_dict(torch.load(model_path)) - model.eval() -``` - -### Predict - -The `predict()` function will be triggered once per request. We extract the features from the sample sent in the request, feed them to the model, and respond with a human-readable label: - -```python -# predictor.py - -def predict(sample, metadata): - input_tensor = torch.FloatTensor( - [ - [ - sample["sepal_length"], - sample["sepal_width"], - sample["petal_length"], - sample["petal_width"], - ] - ] - ) - - output = model(input_tensor) - return labels[torch.argmax(output[0])] -``` - -See [predictor.py](./src/predictor.py) for the complete code. - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our implementation available as a web service that can serve real-time predictions. This configuration will deploy the implementation specified in `predictor.py`. Note that the `metadata` will be passed into the `init()` function, and we specify the `python_path` in the so we can import our model as `from my_model import IrisNet` instead of `from src.my_model import IrisNet` in `predictor.py`. - -```yaml -# cortex.yaml - -- kind: deployment - name: iris - -- kind: api - name: classifier - predictor: - path: src/predictor.py - python_path: src/ - model: s3://cortex-examples/pytorch/iris-classifier/weights.pth - tracker: - model_type: classification -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster: - -```bash -$ cortex deploy - -creating classifier -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the status of a deployment using `cortex get`: - -```bash -$ cortex get classifier --watch - -status up-to-date available requested last update avg latency -live 1 1 1 8s - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -We can use `curl` to test our prediction service: - -```bash -$ cortex get classifier - -endpoint: http://***.amazonaws.com/iris/classifier - -$ curl http://***.amazonaws.com/iris/classifier \ - -X POST -H "Content-Type: application/json" \ - -d '{"sepal_length": 5.2, "sepal_width": 3.6, "petal_length": 1.4, "petal_width": 0.3}' - -"iris-setosa" -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/examples/pytorch/iris-classifier/cortex.yaml b/examples/pytorch/iris-classifier/cortex.yaml index 937c38d7a9..3c0ecbdb66 100644 --- a/examples/pytorch/iris-classifier/cortex.yaml +++ b/examples/pytorch/iris-classifier/cortex.yaml @@ -4,8 +4,7 @@ - kind: api name: classifier predictor: - path: src/predictor.py - python_path: src/ + path: predictor.py model: s3://cortex-examples/pytorch/iris-classifier/weights.pth tracker: model_type: classification diff --git a/examples/pytorch/iris-classifier/src/model.py b/examples/pytorch/iris-classifier/model.py similarity index 100% rename from examples/pytorch/iris-classifier/src/model.py rename to examples/pytorch/iris-classifier/model.py diff --git a/examples/pytorch/iris-classifier/src/predictor.py b/examples/pytorch/iris-classifier/predictor.py similarity index 100% rename from examples/pytorch/iris-classifier/src/predictor.py rename to examples/pytorch/iris-classifier/predictor.py diff --git a/examples/pytorch/language-identifier/cortex.yaml b/examples/pytorch/language-identifier/cortex.yaml index 7473c7382c..9f5609ae56 100644 --- a/examples/pytorch/language-identifier/cortex.yaml +++ b/examples/pytorch/language-identifier/cortex.yaml @@ -7,3 +7,7 @@ path: predictor.py tracker: model_type: classification + compute: + cpu: 1 + gpu: 1 + mem: 4G diff --git a/examples/pytorch/language-identifier/predictor.py b/examples/pytorch/language-identifier/predictor.py index 19b934dc0e..2cf8ffc951 100644 --- a/examples/pytorch/language-identifier/predictor.py +++ b/examples/pytorch/language-identifier/predictor.py @@ -1,10 +1,9 @@ import wget import fasttext -wget.download( - "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin", "lid.176.bin" -) -model = fasttext.load_model("lid.176.bin") + +wget.download("https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin", "model") +model = fasttext.load_model("model") def predict(sample, metadata): diff --git a/examples/pytorch/reading-comprehender/cortex.yaml b/examples/pytorch/reading-comprehender/cortex.yaml index e5600560b1..0477bbd898 100644 --- a/examples/pytorch/reading-comprehender/cortex.yaml +++ b/examples/pytorch/reading-comprehender/cortex.yaml @@ -7,4 +7,5 @@ path: predictor.py compute: cpu: 1 + gpu: 1 mem: 4G diff --git a/examples/pytorch/text-generator/README.md b/examples/pytorch/text-generator/README.md deleted file mode 100644 index 6d7cb6458c..0000000000 --- a/examples/pytorch/text-generator/README.md +++ /dev/null @@ -1,104 +0,0 @@ -# Self-host HuggingFace's GPT-2 as a service - -This example shows how to deploy [HuggingFace's DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation) model as a service on AWS. DistilGPT2 is a compressed version of OpenAI's GPT-2. - -## Predictor - -We implement Cortex's Predictor interface to load the model and make predictions. Cortex will use this implementation to serve the model as an autoscaling API. - -### Initialization - -We can place our code to download and initialize the model in the body of the implementation, and we can load it onto a GPU in the `init()` function: - -```python -# predictor.py - -# download the pretrained DistilGPT2 model and set it to evaluation -model = GPT2LMHeadModel.from_pretrained("distilgpt2") -model.eval() - -# download the tokenizer -tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2") - -def init(model_path, metadata): - # load the model onto the device specified in the metadata field of our api configuration - model.to(metadata["device"]) -``` - -### Predict - -The `predict()` function will be triggered once per request. We tokenize the input, run it through the model, decode the output, and respond with the generated text. - -```python -# predictor.py - -def predict(sample, metadata): - indexed_tokens = tokenizer.encode(sample["text"]) - output = sample_sequence(model, metadata['num_words'], indexed_tokens, device=metadata['device']) - return tokenizer.decode( - output[0, 0:].tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True - ) -``` - -See [predictor.py](./predictor.py) for the complete code. - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our implementation available as a web service that can serve real-time predictions. This configuration will deploy the implementation specified in `predictor.py`: - -```yaml -# cortex.yaml - -- kind: deployment - name: text - -- kind: api - name: generator - predictor: - path: predictor.py - metadata: - num_words: 50 # generate 50 words per request - device: cuda # run on GPU - compute: - gpu: 1 - cpu: 1 -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster. - -```bash -$ cortex deploy - -creating generator -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the status of a deployment using `cortex get`: - -```bash -$ cortex get generator --watch - -status up-to-date available requested last update avg latency -live 1 1 1 9m - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -We can use `curl` to test our prediction service: - -```bash -$ cortex get generator - -endpoint: http://***.amazonaws.com/text/generator - -$ curl http://***.amazonaws.com/text/generator \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "machine learning"}' -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/examples/pytorch/text-generator/cortex.yaml b/examples/pytorch/text-generator/cortex.yaml index 7867b28029..6bfa106b37 100644 --- a/examples/pytorch/text-generator/cortex.yaml +++ b/examples/pytorch/text-generator/cortex.yaml @@ -9,5 +9,5 @@ num_words: 50 device: cuda # use "cpu" to run on CPUs compute: - gpu: 1 cpu: 1 + gpu: 1 diff --git a/examples/sklearn/mpg-estimation/cortex.yaml b/examples/sklearn/mpg-estimator/cortex.yaml similarity index 50% rename from examples/sklearn/mpg-estimation/cortex.yaml rename to examples/sklearn/mpg-estimator/cortex.yaml index f98f689ebb..7e7b773225 100644 --- a/examples/sklearn/mpg-estimation/cortex.yaml +++ b/examples/sklearn/mpg-estimator/cortex.yaml @@ -1,8 +1,8 @@ - kind: deployment - name: auto + name: mpg - kind: api - name: mpg + name: estimator predictor: path: predictor.py - model: s3://cortex-examples/sklearn/mpg-estimation/linreg + model: s3://cortex-examples/sklearn/mpg-estimator/linreg diff --git a/examples/sklearn/mpg-estimation/predictor.py b/examples/sklearn/mpg-estimator/predictor.py similarity index 100% rename from examples/sklearn/mpg-estimation/predictor.py rename to examples/sklearn/mpg-estimator/predictor.py diff --git a/examples/sklearn/mpg-estimation/requirements.txt b/examples/sklearn/mpg-estimator/requirements.txt similarity index 100% rename from examples/sklearn/mpg-estimation/requirements.txt rename to examples/sklearn/mpg-estimator/requirements.txt index 5d36e22b63..99c556c375 100644 --- a/examples/sklearn/mpg-estimation/requirements.txt +++ b/examples/sklearn/mpg-estimator/requirements.txt @@ -1,4 +1,4 @@ mlflow -scikit-learn pandas numpy +scikit-learn diff --git a/examples/sklearn/mpg-estimation/sample.json b/examples/sklearn/mpg-estimator/sample.json similarity index 100% rename from examples/sklearn/mpg-estimation/sample.json rename to examples/sklearn/mpg-estimator/sample.json diff --git a/examples/sklearn/mpg-estimation/model.py b/examples/sklearn/mpg-estimator/trainer.py similarity index 64% rename from examples/sklearn/mpg-estimation/model.py rename to examples/sklearn/mpg-estimator/trainer.py index 4a85c0a069..7cec1e1925 100644 --- a/examples/sklearn/mpg-estimation/model.py +++ b/examples/sklearn/mpg-estimator/trainer.py @@ -11,12 +11,13 @@ df = df.replace("?", np.nan) df = df.dropna() df = df.drop(["name", "origin", "year"], axis=1) # drop categorical variables for simplicity -X = df.drop("mpg", axis=1) -y = df[["mpg"]] - -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) +data = df.drop("mpg", axis=1) +labels = df[["mpg"]] +training_data, test_data, training_labels, test_labels = train_test_split(data, labels) model = LinearRegression() -model.fit(X_train, y_train) +model.fit(training_data, training_labels) +accuracy = model.score(test_data, test_labels) +print("accuracy: {:.2f}".format(accuracy)) mlflow.sklearn.save_model(model, "linreg") diff --git a/examples/tensorflow/iris-classifier/README.md b/examples/tensorflow/iris-classifier/README.md deleted file mode 100644 index 274007a666..0000000000 --- a/examples/tensorflow/iris-classifier/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# Deploy an iris classifier - -This example shows how to deploy a classifier trained on the famous [iris data set](https://archive.ics.uci.edu/ml/datasets/iris). - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our exported model available as a web service that can serve real-time predictions. This configuration will deploy our model from the `cortex-examples` S3 bucket: - -```yaml -# cortex.yaml - -- kind: deployment - name: iris - -- kind: api - name: classifier - tensorflow: - model: s3://cortex-examples/tensorflow/iris-classifier/nn - request_handler: handler.py - tracker: - model_type: classification -``` - - -You can run the code that generated the exported model used in this example [here](https://colab.research.google.com/github/cortexlabs/cortex/blob/master/examples/tensorflow/iris-classifier/tensorflow.ipynb). - -## Add request handling - -The API should convert the model’s prediction to a human readable label before responding. This can be implemented in a request handler file: - -```python -# handler.py - -labels = ["iris-setosa", "iris-versicolor", "iris-virginica"] - -def post_inference(prediction, metadata): - label_index = int(prediction["class_ids"][0]) - return labels[label_index] -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster: - -```bash -$ cortex deploy - -creating classifier -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the status of a deployment using `cortex get`: - -```bash -$ cortex get classifier --watch - -status up-to-date available requested last update avg latency -live 1 1 1 8s - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -We can use `curl` to test our prediction service: - -```bash -$ cortex get classifier - -endpoint: http://***.amazonaws.com/iris/classifier - -$ curl http://***.amazonaws.com/iris/classifier \ - -X POST -H "Content-Type: application/json" \ - -d '{"sepal_length": 5.2, "sepal_width": 3.6, "petal_length": 1.4, "petal_width": 0.3}' - -"iris-setosa" -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/examples/tensorflow/sentiment-analysis/README.md b/examples/tensorflow/sentiment-analysis/README.md deleted file mode 100644 index 79f1812d3e..0000000000 --- a/examples/tensorflow/sentiment-analysis/README.md +++ /dev/null @@ -1,97 +0,0 @@ -# Deploy a BERT sentiment analysis API - -This example shows how to deploy a sentiment analysis classifier trained using [BERT](https://github.com/google-research/bert). - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our exported model available as a web service that can serve real-time predictions. This configuration will download the model from the `cortex-examples` S3 bucket, and will preprocess the payload and postprocess the inference with functions defined in `handler.py`. - -```yaml -# cortex.yaml - -- kind: deployment - name: sentiment - -- kind: api - name: classifier - tensorflow: - model: s3://cortex-examples/tensorflow/sentiment-analysis/bert - request_handler: handler.py - tracker: - model_type: classification -``` - - -You can run the code that generated the exported BERT model [here](https://colab.research.google.com/github/cortexlabs/cortex/blob/master/examples/tensorflow/sentiment-analysis/bert.ipynb). - -## Add request handling - -The model requires tokenized input for inference, but the API should accept strings of natural language as input. It should also map the model’s integer predictions to the actual sentiment label. This can be implemented in a request handler file. Define a `pre_inference()` function to tokenize request payloads and a `post_inference()` function to map inference output to labels before responding to the client: - -```python -# handler.py - -import tensorflow as tf -import tensorflow_hub as hub -from bert import tokenization, run_classifier - -labels = ["negative", "positive"] - -with tf.Graph().as_default(): - bert_module = hub.Module("https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1") - info = bert_module(signature="tokenization_info", as_dict=True) - with tf.Session() as sess: - vocab_file, do_lower_case = sess.run([info["vocab_file"], info["do_lower_case"]]) -tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) - - -def pre_inference(sample, signature, metadata): - input_example = run_classifier.InputExample(guid="", text_a=sample["review"], label=0) - input_feature = run_classifier.convert_single_example(0, input_example, [0, 1], 128, tokenizer) - return {"input_ids": [input_feature.input_ids]} - - -def post_inference(prediction, signature, metadata): - return labels[prediction["labels"][0]] -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster: - -```bash -$ cortex deploy - -creating classifier -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the status of a deployment using `cortex get`: - -```bash -$ cortex get classifier --watch - -status up-to-date available requested last update avg latency -live 1 1 1 8s - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -We can use `curl` to test our prediction service: - -```bash -$ cortex get classifier - -endpoint: http://***.amazonaws.com/sentiment/classifier - -$ curl http://***.amazonaws.com/sentiment/classifier \ - -X POST -H "Content-Type: application/json" \ - -d '{"review": "The movie was great!"}' - -"positive" -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/examples/tensorflow/sentiment-analysis/sample.json b/examples/tensorflow/sentiment-analysis/sample.json deleted file mode 100644 index 6312e4643b..0000000000 --- a/examples/tensorflow/sentiment-analysis/sample.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "review": "The movie was great!" -} diff --git a/examples/tensorflow/sentiment-analysis/bert.ipynb b/examples/tensorflow/sentiment-analyzer/bert.ipynb similarity index 99% rename from examples/tensorflow/sentiment-analysis/bert.ipynb rename to examples/tensorflow/sentiment-analyzer/bert.ipynb index 41a0472a1b..b307e282b6 100644 --- a/examples/tensorflow/sentiment-analysis/bert.ipynb +++ b/examples/tensorflow/sentiment-analyzer/bert.ipynb @@ -933,7 +933,7 @@ "source": [ "AWS_ACCESS_KEY_ID = \"\" #@param {type:\"string\"}\n", "AWS_SECRET_ACCESS_KEY = \"\" #@param {type:\"string\"}\n", - "S3_UPLOAD_PATH = \"s3://my-bucket/sentiment-analysis/bert\" #@param {type:\"string\"}\n", + "S3_UPLOAD_PATH = \"s3://my-bucket/sentiment-analyzer/bert\" #@param {type:\"string\"}\n", "\n", "import sys\n", "import re\n", @@ -997,7 +997,7 @@ }, "source": [ "\n", - "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/sentiment-analysis) for how to deploy the model as an API." + "That's it! See the [example on GitHub](https://github.com/cortexlabs/cortex/tree/master/examples/sentiment-analyzer) for how to deploy the model as an API." ] } ] diff --git a/examples/tensorflow/sentiment-analysis/cortex.yaml b/examples/tensorflow/sentiment-analyzer/cortex.yaml similarity index 62% rename from examples/tensorflow/sentiment-analysis/cortex.yaml rename to examples/tensorflow/sentiment-analyzer/cortex.yaml index 76bf2db4a0..22bc740c60 100644 --- a/examples/tensorflow/sentiment-analysis/cortex.yaml +++ b/examples/tensorflow/sentiment-analyzer/cortex.yaml @@ -2,9 +2,9 @@ name: sentiment - kind: api - name: classifier + name: analyzer tensorflow: - model: s3://cortex-examples/tensorflow/sentiment-analysis/bert + model: s3://cortex-examples/tensorflow/sentiment-analyzer/bert request_handler: handler.py tracker: model_type: classification diff --git a/examples/tensorflow/sentiment-analysis/handler.py b/examples/tensorflow/sentiment-analyzer/handler.py similarity index 100% rename from examples/tensorflow/sentiment-analysis/handler.py rename to examples/tensorflow/sentiment-analyzer/handler.py diff --git a/examples/tensorflow/sentiment-analysis/requirements.txt b/examples/tensorflow/sentiment-analyzer/requirements.txt similarity index 100% rename from examples/tensorflow/sentiment-analysis/requirements.txt rename to examples/tensorflow/sentiment-analyzer/requirements.txt diff --git a/examples/tensorflow/sentiment-analyzer/sample.json b/examples/tensorflow/sentiment-analyzer/sample.json new file mode 100644 index 0000000000..c433e33216 --- /dev/null +++ b/examples/tensorflow/sentiment-analyzer/sample.json @@ -0,0 +1,3 @@ +{ + "review": "the movie was amazing!" +} diff --git a/examples/tensorflow/text-generator/README.md b/examples/tensorflow/text-generator/README.md deleted file mode 100644 index 1d9c96653f..0000000000 --- a/examples/tensorflow/text-generator/README.md +++ /dev/null @@ -1,86 +0,0 @@ -# Self-host OpenAI's GPT-2 as a service - -This example shows how to deploy OpenAI's GPT-2 model as a service on AWS. - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our exported model available as a web service that can serve real-time predictions. This configuration will download the 124M GPT-2 model from the `cortex-examples` S3 bucket, preprocess the payload and postprocess the inference with functions defined in `handler.py`, and deploy each replica of the API on 1 GPU: - -```yaml -# cortex.yaml - -- kind: deployment - name: text - -- kind: api - name: generator - tensorflow: - model: s3://cortex-examples/text-generator/gpt-2/124M - request_handler: handler.py - compute: - cpu: 1 - gpu: 1 -``` - - -You can run the code that generated the exported GPT-2 model [here](https://colab.research.google.com/github/cortexlabs/cortex/blob/master/examples/tensorflow/text-generator/gpt-2.ipynb). - -## Add request handling - -The model requires encoded data for inference, but the API should accept strings of natural language as input. It should also decode the inference output as human-readable text. - -```python -# handler.py - -from encoder import get_encoder -encoder = get_encoder() - -def pre_inference(sample, signature, metadata): - context = encoder.encode(sample["text"]) - return {"context": [context]} - -def post_inference(prediction, signature, metadata): - response = prediction["sample"] - return encoder.decode(response) -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster. - -```bash -$ cortex deploy - -creating generator -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the status of a deployment using `cortex get`: - -```bash -$ cortex get generator --watch - -status up-to-date available requested last update avg latency -live 1 1 1 8s - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -```bash -$ cortex get generator - -We can use `curl` to test our prediction service: - -endpoint: http://***.amazonaws.com/text/generator - -$ curl http://***.amazonaws.com/text/generator \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "machine learning"}' - -Machine learning, with more than one thousand researchers around the world today, are looking to create computer-driven machine learning algorithms that can also be applied to human and social problems, such as education, health care, employment, medicine, politics, or the environment... -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/examples/xgboost/iris-classifier/README.md b/examples/xgboost/iris-classifier/README.md deleted file mode 100644 index e276414604..0000000000 --- a/examples/xgboost/iris-classifier/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# Deploy an iris classifier - -This example shows how to deploy a classifier trained on the famous [iris data set](https://archive.ics.uci.edu/ml/datasets/iris). - -## Define a deployment - -A `deployment` specifies a set of resources that are deployed together. An `api` makes our exported model available as a web service that can serve real-time predictions. This configuration will deploy our model from the `cortex-examples` S3 bucket: - -```yaml -# cortex.yaml - -- kind: deployment - name: iris - -- kind: api - name: xgboost - onnx: - model: s3://cortex-examples/xgboost/iris-classifier/gbtree.onnx - request_handler: handler.py - tracker: - model_type: classification -``` - - -You can run the code that generated the exported model used in this example [here](https://colab.research.google.com/github/cortexlabs/cortex/blob/master/examples/xgboost/iris-classifier/xgboost.ipynb). - -## Add request handling - -The API should convert the model’s prediction to a human readable label before responding. This can be implemented in a request handler file: - -```python -# handler.py - -labels = ["iris-setosa", "iris-versicolor", "iris-virginica"] - -def post_inference(prediction, metadata): - label_index = int(prediction["class_ids"][0]) - return labels[label_index] -``` - -## Deploy to AWS - -`cortex deploy` takes the declarative configuration from `cortex.yaml` and creates it on the cluster: - -```bash -$ cortex deploy - -creating classifier -``` - -Behind the scenes, Cortex containerizes our implementation, makes it servable using Flask, exposes the endpoint with a load balancer, and orchestrates the workload on Kubernetes. - -We can track the status of a deployment using `cortex get`: - -```bash -$ cortex get classifier --watch - -status up-to-date available requested last update avg latency -live 1 1 1 8s - -``` - -The output above indicates that one replica of the API was requested and is available to serve predictions. Cortex will automatically launch more replicas if the load increases and spin down replicas if there is unused capacity. - -## Serve real-time predictions - -We can use `curl` to test our prediction service: - -```bash -$ cortex get classifier - -endpoint: http://***.amazonaws.com/iris/classifier - -$ curl http://***.amazonaws.com/iris/classifier \ - -X POST -H "Content-Type: application/json" \ - -d '{"sepal_length": 5.2, "sepal_width": 3.6, "petal_length": 1.4, "petal_width": 0.3}' - -"iris-setosa" -``` - -Any questions? [chat with us](https://gitter.im/cortexlabs/cortex). diff --git a/manager/info.sh b/manager/info.sh index cb5f140e4d..2bf61f8ab6 100755 --- a/manager/info.sh +++ b/manager/info.sh @@ -27,7 +27,8 @@ function get_apis_endpoint() { } if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION >/dev/null 2>&1; then - echo "error: there isn't a cortex cluster named \"$CORTEX_CLUSTER_NAME\" in $CORTEX_REGION; please update your configuration to point to an existing cortex cluster or create a cortex cluster with \`cortex cluster up\`" + # note: if modifying this string, search the codebase for it and change all occurrences + echo "error: there is no cluster named \"$CORTEX_CLUSTER_NAME\" in $CORTEX_REGION; please update your configuration to point to an existing cortex cluster or create a cortex cluster with \`cortex cluster up\`" exit 1 fi diff --git a/manager/install.sh b/manager/install.sh index b914b6eaa8..43b6094c44 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -28,7 +28,8 @@ function ensure_eks() { # No cluster if [ $cluster_info_exit_code -ne 0 ]; then if [ "$arg1" = "--update" ]; then - echo "error: there isn't a cortex cluster named \"$CORTEX_CLUSTER_NAME\" in $CORTEX_REGION; please update your configuration to point to an existing cortex cluster or create a cortex cluster with \`cortex cluster up\`" + # note: if modifying this string, search the codebase for it and change all occurrences + echo "error: there is no cluster named \"$CORTEX_CLUSTER_NAME\" in $CORTEX_REGION; please update your configuration to point to an existing cortex cluster or create a cortex cluster with \`cortex cluster up\`" exit 1 fi diff --git a/manager/refresh.sh b/manager/refresh.sh index c0a690ccff..c8e73462ee 100755 --- a/manager/refresh.sh +++ b/manager/refresh.sh @@ -18,7 +18,8 @@ set -e if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION >/dev/null 2>&1; then - echo "error: there isn't a cortex cluster named \"$CORTEX_CLUSTER_NAME\" in $CORTEX_REGION; please update your configuration to point to an existing cortex cluster or create a cortex cluster with \`cortex cluster up\`" + # note: if modifying this string, search the codebase for it and change all occurrences + echo "error: there is no cluster named \"$CORTEX_CLUSTER_NAME\" in $CORTEX_REGION; please update your configuration to point to an existing cortex cluster or create a cortex cluster with \`cortex cluster up\`" exit 1 fi