diff --git a/.gitbook.yaml b/.gitbook.yaml index f11605f22c..8e0ed9a10b 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -8,3 +8,6 @@ redirects: tutorial: ../examples/pytorch/text-generator/README.md tutorial/realtime: ../examples/pytorch/text-generator/README.md tutorial/batch: ../examples/batch/image-classifier/README.md + install: ./aws/install.md + uninstall: ./aws/uninstall.md + update: ./aws/update.md diff --git a/cli/cluster/errors.go b/cli/cluster/errors.go index 206dd10a0d..a0a953428b 100644 --- a/cli/cluster/errors.go +++ b/cli/cluster/errors.go @@ -21,6 +21,7 @@ import ( "net/url" "strings" + "github.com/cortexlabs/cortex/pkg/consts" "github.com/cortexlabs/cortex/pkg/lib/errors" "github.com/cortexlabs/cortex/pkg/lib/urls" ) @@ -61,8 +62,7 @@ func ErrorFailedToConnectOperator(originalError error, envName string, operatorU msg += fmt.Sprintf(" → otherwise you can ignore this message, and prevent it in the future with `cortex env delete %s`\n", envName) msg += "\nif you have a cluster running:\n" msg += fmt.Sprintf(" → run `cortex cluster info --configure-env %s` to update your environment (include `--config ` if you have a cluster configuration file)\n", envName) - // CORTEX_VERSION_MINOR - msg += " → if you set `operator_load_balancer_scheme: internal` in your cluster configuration file, your CLI must run from within a VPC that has access to your cluster's VPC (see https://docs.cortex.dev/v/master/guides/vpc-peering)\n" + msg += fmt.Sprintf(" → if you set `operator_load_balancer_scheme: internal` in your cluster configuration file, your CLI must run from within a VPC that has access to your cluster's VPC (see https://docs.cortex.dev/v/%s/aws/vpc-peering)\n", consts.CortexVersionMinor) } return errors.WithStack(&errors.Error{ diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go index db513dff84..7096c5af80 100644 --- a/cli/cmd/errors.go +++ b/cli/cmd/errors.go @@ -248,7 +248,7 @@ func ErrorMissingAWSCredentials() error { func ErrorCredentialsInClusterConfig(cmd string, path string) error { return errors.WithStack(&errors.Error{ Kind: ErrCredentialsInClusterConfig, - Message: fmt.Sprintf("specifying credentials in the cluster configuration is no longer supported, please specify aws credentials using flags (e.g. cortex cluster %s --config %s --aws-key --aws-secret ) or set environment variables; see https://docs.cortex.dev/v/%s/miscellaneous/security#iam-permissions for more information", cmd, path, consts.CortexVersionMinor), + Message: fmt.Sprintf("specifying credentials in the cluster configuration is no longer supported, please specify aws credentials using flags (e.g. cortex cluster %s --config %s --aws-key --aws-secret ) or set environment variables; see https://docs.cortex.dev/v/%s/aws/security#iam-permissions for more information", cmd, path, consts.CortexVersionMinor), }) } diff --git a/cli/cmd/lib_aws_creds.go b/cli/cmd/lib_aws_creds.go index 3b1872a814..d2a8866393 100644 --- a/cli/cmd/lib_aws_creds.go +++ b/cli/cmd/lib_aws_creds.go @@ -21,6 +21,7 @@ import ( "os" "path/filepath" + "github.com/cortexlabs/cortex/pkg/consts" "github.com/cortexlabs/cortex/pkg/lib/aws" "github.com/cortexlabs/cortex/pkg/lib/errors" libjson "github.com/cortexlabs/cortex/pkg/lib/json" @@ -68,7 +69,7 @@ func promptIfNotAdmin(awsClient *aws.Client, disallowPrompt bool) { } if !awsClient.IsAdmin() { - warningStr := fmt.Sprintf("warning: your IAM user%s does not have administrator access. This will likely prevent Cortex from installing correctly, so it is recommended to attach the AdministratorAccess policy to your IAM user (or to a group that your IAM user belongs to) via the AWS IAM console. If you'd like, you may provide separate credentials for your cluster to use after it's running (see https://docs.cortex.dev/miscellaneous/security for instructions).\n\n", accessKeyMsg) + warningStr := fmt.Sprintf("warning: your IAM user%s does not have administrator access. This will likely prevent Cortex from installing correctly, so it is recommended to attach the AdministratorAccess policy to your IAM user (or to a group that your IAM user belongs to) via the AWS IAM console. If you'd like, you may provide separate credentials for your cluster to use after it's running (see https://docs.cortex.dev/v/%s/aws/security for instructions).\n\n", accessKeyMsg, consts.CortexVersionMinor) if disallowPrompt { fmt.Print(warningStr) } else { diff --git a/cli/cmd/lib_cluster_config.go b/cli/cmd/lib_cluster_config.go index fe3b85f2e4..84ca04f1c4 100644 --- a/cli/cmd/lib_cluster_config.go +++ b/cli/cmd/lib_cluster_config.go @@ -70,7 +70,7 @@ func readCachedClusterConfigFile(clusterConfig *clusterconfig.Config, filePath s func readUserClusterConfigFile(clusterConfig *clusterconfig.Config) error { errs := cr.ParseYAMLFile(clusterConfig, clusterconfig.UserValidation, _flagClusterConfig) if errors.HasError(errs) { - return errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/cluster-management/config", consts.CortexVersionMinor)) + return errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) } return nil @@ -85,7 +85,7 @@ func getNewClusterAccessConfig(disallowPrompt bool) (*clusterconfig.AccessConfig if _flagClusterConfig != "" { errs := cr.ParseYAMLFile(accessConfig, clusterconfig.AccessValidation, _flagClusterConfig) if errors.HasError(errs) { - return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/cluster-management/config", consts.CortexVersionMinor)) + return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) } } @@ -121,7 +121,7 @@ func getClusterAccessConfigWithCache(disallowPrompt bool) (*clusterconfig.Access if _flagClusterConfig != "" { errs := cr.ParseYAMLFile(accessConfig, clusterconfig.AccessValidation, _flagClusterConfig) if errors.HasError(errs) { - return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/cluster-management/config", consts.CortexVersionMinor)) + return nil, errors.Append(errors.FirstError(errs...), fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) } } @@ -206,7 +206,7 @@ func getInstallClusterConfig(awsCreds AWSCredentials, accessConfig clusterconfig err = clusterConfig.Validate(awsClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/cluster-management/config", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) if _flagClusterConfig != "" { err = errors.Wrap(err, _flagClusterConfig) } @@ -272,7 +272,7 @@ func getConfigureClusterConfig(cachedClusterConfig clusterconfig.Config, awsCred err = userClusterConfig.Validate(awsClient) if err != nil { - err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/cluster-management/config", consts.CortexVersionMinor)) + err = errors.Append(err, fmt.Sprintf("\n\ncluster configuration schema can be found here: https://docs.cortex.dev/v/%s/aws/install", consts.CortexVersionMinor)) if _flagClusterConfig != "" { err = errors.Wrap(err, _flagClusterConfig) } @@ -556,23 +556,23 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsCreds A fmt.Printf("cortex will also create an s3 bucket (%s) and a cloudwatch log group (%s)%s\n\n", clusterConfig.Bucket, clusterConfig.ClusterName, privateSubnetMsg) if clusterConfig.APIGatewaySetting == clusterconfig.NoneAPIGatewaySetting { - fmt.Print("warning: you've disabled API Gateway cluster-wide, so APIs will not be able to create API Gateway endpoints (they will still be reachable via the API load balancer; see https://docs.cortex.dev/deployments/networking for more information)\n\n") + fmt.Print(fmt.Sprintf("warning: you've disabled API Gateway cluster-wide, so APIs will not be able to create API Gateway endpoints (they will still be reachable via the API load balancer; see https://docs.cortex.dev/v/%s/aws/networking for more information)\n\n", consts.CortexVersionMinor)) } if clusterConfig.OperatorLoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme { - fmt.Print("warning: you've configured the operator load balancer to be internal; you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/guides/vpc-peering)\n\n") + fmt.Print(fmt.Sprintf("warning: you've configured the operator load balancer to be internal; you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/v/%s/aws/vpc-peering)\n\n", consts.CortexVersionMinor)) } if isSpot && clusterConfig.SpotConfig.OnDemandBackup != nil && !*clusterConfig.SpotConfig.OnDemandBackup { if *clusterConfig.SpotConfig.OnDemandBaseCapacity == 0 && *clusterConfig.SpotConfig.OnDemandPercentageAboveBaseCapacity == 0 { - fmt.Printf("warning: you've disabled on-demand instances (%s=0 and %s=0); spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/cluster-management/spot-instances for more information\n\n", clusterconfig.OnDemandBaseCapacityKey, clusterconfig.OnDemandPercentageAboveBaseCapacityKey, consts.CortexVersionMinor) + fmt.Printf("warning: you've disabled on-demand instances (%s=0 and %s=0); spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/aws/spot for more information\n\n", clusterconfig.OnDemandBaseCapacityKey, clusterconfig.OnDemandPercentageAboveBaseCapacityKey, consts.CortexVersionMinor) } else { - fmt.Printf("warning: you've enabled spot instances; spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/cluster-management/spot-instances for more information\n\n", consts.CortexVersionMinor) + fmt.Printf("warning: you've enabled spot instances; spot instances are not guaranteed to be available so please take that into account for production clusters; see https://docs.cortex.dev/v/%s/aws/spot for more information\n\n", consts.CortexVersionMinor) } } if !disallowPrompt { - exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/cluster-management/config for more information", consts.CortexVersionMinor) + exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/aws/install for more information", consts.CortexVersionMinor) prompt.YesOrExit("would you like to continue?", "", exitMessage) } } @@ -581,7 +581,7 @@ func confirmConfigureClusterConfig(clusterConfig clusterconfig.Config, awsCreds fmt.Println(clusterConfigConfirmationStr(clusterConfig, awsCreds, awsClient)) if !disallowPrompt { - exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/cluster-management/config for more information", consts.CortexVersionMinor) + exitMessage := fmt.Sprintf("cluster configuration can be modified via the cluster config file; see https://docs.cortex.dev/v/%s/aws/install for more information", consts.CortexVersionMinor) prompt.YesOrExit(fmt.Sprintf("your cluster named \"%s\" in %s will be updated according to the configuration above, are you sure you want to continue?", clusterConfig.ClusterName, *clusterConfig.Region), "", exitMessage) } } diff --git a/cli/local/api.go b/cli/local/api.go index 66923103fa..0c6a9dd85b 100644 --- a/cli/local/api.go +++ b/cli/local/api.go @@ -56,9 +56,9 @@ func UpdateAPI(apiConfig *userconfig.API, models []spec.CuratedModelResource, co fmt.Sprintf( "api %s was deployed using CLI version %s but the current CLI version is %s; "+ "re-deploying %s with current CLI version %s might yield an unexpected outcome; any cached models won't be deleted\n\n"+ - "it is recommended to download version %s of the CLI from https://docs.cortex.dev/v/%s/install, delete the API using version %s of the CLI and then re-deploy the API using the latest version of the CLI\n\n"+ + "it is recommended to install version %s of the CLI (pip install cortex==%s), delete the API using version %s of the CLI, and then re-deploy the API using the latest version of the CLI\n\n"+ "do you still want to re-deploy?", - apiConfig.Name, incompatibleMinorVersion, consts.CortexVersionMinor, apiConfig.Name, consts.CortexVersionMinor, incompatibleMinorVersion, incompatibleMinorVersion, incompatibleMinorVersion), + apiConfig.Name, incompatibleMinorVersion, consts.CortexVersionMinor, apiConfig.Name, consts.CortexVersionMinor, incompatibleMinorVersion, incompatibleVersion, incompatibleMinorVersion), "", "", ) } diff --git a/cli/local/delete.go b/cli/local/delete.go index b1355a76cc..8d84245553 100644 --- a/cli/local/delete.go +++ b/cli/local/delete.go @@ -48,9 +48,9 @@ func Delete(apiName string, keepCache, deleteForce bool) (schema.DeleteResponse, fmt.Sprintf( "api %s was deployed using CLI version %s but the current CLI version is %s; "+ "deleting %s with current CLI version %s might lead to an unexpected state; any cached models won't be deleted\n\n"+ - "it is recommended to download version %s of the CLI from https://docs.cortex.dev/v/%s/install, delete the API using version %s of the CLI and then re-deploy the API using the latest version of the CLI\n\n"+ + "it is recommended to download version %s of the CLI (pip install cortex==%s), delete the API using version %s of the CLI, and then re-deploy the API using the latest version of the CLI\n\n"+ "do you still want to delete?", - apiName, incompatibleMinorVersion, consts.CortexVersionMinor, apiName, consts.CortexVersionMinor, incompatibleMinorVersion, incompatibleMinorVersion, incompatibleMinorVersion), + apiName, incompatibleMinorVersion, consts.CortexVersionMinor, apiName, consts.CortexVersionMinor, incompatibleMinorVersion, incompatibleVersion, incompatibleMinorVersion), "", "", ) } diff --git a/cli/local/validations.go b/cli/local/validations.go index 5e90e9c196..9a77597b8f 100644 --- a/cli/local/validations.go +++ b/cli/local/validations.go @@ -39,7 +39,7 @@ import ( "github.com/cortexlabs/cortex/pkg/types/userconfig" ) -var _startingPort = 8890 +var _startingPort = 8889 type ProjectFiles struct { relFilePaths []string diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index a017e6f458..0000000000 --- a/docs/README.md +++ /dev/null @@ -1 +0,0 @@ -Please refer to [cortex.dev](https://cortex.dev) for documentation on the latest stable version. diff --git a/docs/aws/credentials.md b/docs/aws/credentials.md new file mode 100644 index 0000000000..0e62e63c2d --- /dev/null +++ b/docs/aws/credentials.md @@ -0,0 +1,7 @@ +# Credentials + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +1. Follow this [tutorial](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key) to create an access key. +1. Enable programmatic access for the IAM user, and attach the built-in `AdministratorAccess` policy to your IAM user. +1. See [security](security.md) if you'd like to use less privileged credentials after spinning up your cluster. diff --git a/docs/guides/custom-domain.md b/docs/aws/custom-domain.md similarity index 100% rename from docs/guides/custom-domain.md rename to docs/aws/custom-domain.md diff --git a/docs/aws/install.md b/docs/aws/install.md new file mode 100644 index 0000000000..67bda7563b --- /dev/null +++ b/docs/aws/install.md @@ -0,0 +1,115 @@ +# Install + +_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ + +## Spin up Cortex on your AWS account + +Make sure [Docker](https://docs.docker.com/install) is running on your machine. + +If you're using GPUs, subscribe to the [EKS-optimized AMI with GPU Support](https://aws.amazon.com/marketplace/pp/B07GRHFXGM) before creating your cluster. + +```bash +# install the CLI +pip install cortex + +# spin up Cortex on your AWS account +cortex cluster up # or: cortex cluster up --config cluster.yaml (see configuration options below) + +# set the default environment +cortex env default aws +``` + + +Try the [tutorial](../../examples/pytorch/text-generator/README.md) or deploy one of our [examples](https://github.com/cortexlabs/cortex/tree/master/examples). + +## Configure Cortex + + +```yaml +# cluster.yaml + +# EKS cluster name +cluster_name: cortex + +# AWS region +region: us-east-1 + +# list of availability zones for your region +availability_zones: # default: 3 random availability zones in your region, e.g. [us-east-1a, us-east-1b, us-east-1c] + +# instance type +instance_type: m5.large + +# minimum number of instances +min_instances: 1 + +# maximum number of instances +max_instances: 5 + +# disk storage size per instance (GB) +instance_volume_size: 50 + +# instance volume type [gp2 | io1 | st1 | sc1] +instance_volume_type: gp2 + +# instance volume iops (only applicable to io1) +# instance_volume_iops: 3000 + +# subnet visibility [public (instances will have public IPs) | private (instances will not have public IPs)] +subnet_visibility: public + +# NAT gateway (required when using private subnets) [none | single | highly_available (a NAT gateway per availability zone)] +nat_gateway: none + +# API load balancer scheme [internet-facing | internal] +api_load_balancer_scheme: internet-facing + +# operator load balancer scheme [internet-facing | internal] +# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/aws/vpc-peering) +operator_load_balancer_scheme: internet-facing + +# API Gateway [public (API Gateway will be used by default, can be disabled per API) | none (API Gateway will be disabled for all APIs)] +api_gateway: public + +# additional tags to assign to AWS resources (all resources will automatically be tagged with cortex.dev/cluster-name: ) +tags: # : map of key/value pairs + +# enable spot instances +spot: false + +# SSL certificate ARN (only necessary when using a custom domain without API Gateway) +ssl_certificate_arn: + +# primary CIDR block for the cluster's VPC +vpc_cidr: 192.168.0.0/16 +``` + +The docker images used by the Cortex cluster can also be overridden, although this is not common. They can be configured by adding any of these keys to your cluster configuration file (default values are shown): + + +```yaml +image_operator: quay.io/cortexlabs/operator:master +image_manager: quay.io/cortexlabs/manager:master +image_downloader: quay.io/cortexlabs/downloader:master +image_request_monitor: quay.io/cortexlabs/request-monitor:master +image_cluster_autoscaler: quay.io/cortexlabs/cluster-autoscaler:master +image_metrics_server: quay.io/cortexlabs/metrics-server:master +image_inferentia: quay.io/cortexlabs/inferentia:master +image_neuron_rtd: quay.io/cortexlabs/neuron-rtd:master +image_nvidia: quay.io/cortexlabs/nvidia:master +image_fluentd: quay.io/cortexlabs/fluentd:master +image_statsd: quay.io/cortexlabs/statsd:master +image_istio_proxy: quay.io/cortexlabs/istio-proxy:master +image_istio_pilot: quay.io/cortexlabs/istio-pilot:master +``` + +The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and in your [Batch API configuration](../deployments/batch-api/api-configuration.md). + +## Advanced + +* [Security](security.md) +* [VPC peering](vpc-peering.md) +* [Custom domain](custom-domain.md) +* [REST API Gateway](rest-api-gateway.md) +* [Spot instances](spot.md) +* [SSH into instances](ssh.md) diff --git a/docs/deployments/networking.md b/docs/aws/networking.md similarity index 66% rename from docs/deployments/networking.md rename to docs/aws/networking.md index 2d9968112b..eddddb4a31 100644 --- a/docs/deployments/networking.md +++ b/docs/aws/networking.md @@ -4,17 +4,17 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t ![api architecture diagram](https://user-images.githubusercontent.com/808475/84695323-8507dd00-aeff-11ea-8b32-5a55cef76c79.png) -APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](realtime-api/api-configuration.md) and [Batch API configuration](batch-api/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: none` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). +APIs are deployed with a public API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and [Batch API configuration](../deployments/batch-api/api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs private to your VPC. See below for common configurations. To disable API Gateway cluster-wide (thereby enforcing that all APIs cannot create API Gateway endpoints), set `api_gateway: none` in your [cluster configuration](install.md) file (before creating your cluster). -By default, the API load balancer is public. You can configure your API load balancer to be private by setting `api_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be public regardless of `api_load_balancer_scheme`. See below for common configurations. +By default, the API load balancer is public. You can configure your API load balancer to be private by setting `api_load_balancer_scheme: internal` in your [cluster configuration](install.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be public regardless of `api_load_balancer_scheme`. See below for common configurations. -The API Gateway that Cortex creates in AWS is the "HTTP" type. If you need to use AWS's "REST" API Gateway, see [here](../guides/rest-api-gateway.md). +The API Gateway that Cortex creates in AWS is the "HTTP" type. If you need to use AWS's "REST" API Gateway, see [here](rest-api-gateway.md). ## Common API networking configurations ### Public https endpoint (with API Gateway) -This is the most common configuration for public APIs. [Custom domains](../guides/custom-domain.md) can be used with this setup, but are not required. +This is the most common configuration for public APIs. [Custom domains](custom-domain.md) can be used with this setup, but are not required. ```yaml # cluster.yaml @@ -33,9 +33,9 @@ api_load_balancer_scheme: internal ### Private https endpoint -You can configure your API to be private. If you do this, you must use [VPC Peering](../guides/vpc-peering.md) to connect to your APIs. +You can configure your API to be private. If you do this, you must use [VPC Peering](vpc-peering.md) to connect to your APIs. -The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](../guides/custom-domain.md), which will use ACM to provision SSL certs for your domain. +The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](custom-domain.md), which will use ACM to provision SSL certs for your domain. ```yaml # cluster.yaml @@ -58,7 +58,7 @@ ssl_certificate_arn: arn:aws:acm:us-west-2:***:certificate/*** ### Private http endpoint -You can configure your API to be private. If you do this, you must use [VPC Peering](../guides/vpc-peering.md) to connect to your APIs. +You can configure your API to be private. If you do this, you must use [VPC Peering](vpc-peering.md) to connect to your APIs. ```yaml # cluster.yaml @@ -79,7 +79,7 @@ api_load_balancer_scheme: internal # this is the default, so can be omitted API gateway is generally recommended for public https APIs, but there may be a situation where you don't wish to use it (e.g. requests take longer than 29 seconds to complete, which is the max for API Gateway). In this case, clients can connect directly to the API load balancer. -The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](../guides/custom-domain.md), which will use ACM to provision SSL certs for your domain. +The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](custom-domain.md), which will use ACM to provision SSL certs for your domain. ```yaml # cluster.yaml diff --git a/docs/guides/rest-api-gateway.md b/docs/aws/rest-api-gateway.md similarity index 96% rename from docs/guides/rest-api-gateway.md rename to docs/aws/rest-api-gateway.md index e0e0be62ef..cc5a5ced37 100644 --- a/docs/guides/rest-api-gateway.md +++ b/docs/aws/rest-api-gateway.md @@ -1,8 +1,8 @@ -# Set up REST API Gateway +# REST API Gateway _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -When `api_gateway: public` is set in your API's `networking` configuration (which is the default setting), Cortex will create an "HTTP" API Gateway in AWS for your API (see the [networking docs](../deployments/networking.md) for more information). +When `api_gateway: public` is set in your API's `networking` configuration (which is the default setting), Cortex will create an "HTTP" API Gateway in AWS for your API (see the [networking docs](networking.md) for more information). However, there may be situations where you need to use AWS's "REST" API Gateway, e.g. to enforce IAM-based auth. Until [#1197](https://github.com/cortexlabs/cortex/issues/1197) is resolved, a REST API Gateway can be used by following these steps. @@ -16,7 +16,7 @@ If your API load balancer is internal (i.e. you set `api_load_balancer_scheme: i Disable the default API Gateway: -* If you haven't created your cluster yet, you can set `api_gateway: none` in your [cluster configuration file](../cluster-management/config.md) before creating your cluster. +* If you haven't created your cluster yet, you can set `api_gateway: none` in your [cluster configuration file](install.md) before creating your cluster. * If you have already created your cluster, you can set `api_gateway: none` in the `networking` field of your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md), and then re-deploy your API. ### Step 2 @@ -95,7 +95,7 @@ Delete the API Gateway before spinning down your Cortex cluster: Disable the default API Gateway: -* If you haven't created your cluster yet, you can set `api_gateway: none` in your [cluster configuration file](../cluster-management/config.md) before creating your cluster. +* If you haven't created your cluster yet, you can set `api_gateway: none` in your [cluster configuration file](install.md) before creating your cluster. * If you have already created your cluster, you can set `api_gateway: none` in the `networking` field of your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md), and then re-deploy your API. ### Step 2 diff --git a/docs/miscellaneous/security.md b/docs/aws/security.md similarity index 92% rename from docs/miscellaneous/security.md rename to docs/aws/security.md index 6795bb3fed..a84faa63d6 100644 --- a/docs/miscellaneous/security.md +++ b/docs/aws/security.md @@ -6,15 +6,15 @@ _The information on this page assumes you are running Cortex on AWS. If you're o ## Private cluster subnets -By default, instances are created in public subnets and are assigned public IP addresses. You can configure all instances in your cluster to use private subnets by setting `subnet_visibility: private` in your [cluster configuration](../cluster-management/config.md) file before creating your cluster. If private subnets are used, instances will not have public IP addresses, and Cortex will create a NAT gateway to allow outgoing network requests. +By default, instances are created in public subnets and are assigned public IP addresses. You can configure all instances in your cluster to use private subnets by setting `subnet_visibility: private` in your [cluster configuration](install.md) file before creating your cluster. If private subnets are used, instances will not have public IP addresses, and Cortex will create a NAT gateway to allow outgoing network requests. ## Private APIs -See [networking](../deployments/networking.md) for a discussion of API visibility. +See [networking](networking.md) for a discussion of API visibility. ## Private operator -By default, the Cortex cluster operator's load balancer is internet-facing, and therefore publicly accessible (the operator is what the `cortex` CLI connects to). The operator validates that the CLI user is an active IAM user in the same AWS account as the Cortex cluster (see [below](#cli)). Therefore it is usually unnecessary to configure the operator's load balancer to be private, but this can be done by by setting `operator_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file. If you do this, you will need to configure [VPC Peering](../guides/vpc-peering.md) to allow your CLI to connect to the Cortex operator (this will be necessary to run any `cortex` commands). +By default, the Cortex cluster operator's load balancer is internet-facing, and therefore publicly accessible (the operator is what the `cortex` CLI connects to). The operator validates that the CLI user is an active IAM user in the same AWS account as the Cortex cluster (see [below](#cli)). Therefore it is usually unnecessary to configure the operator's load balancer to be private, but this can be done by by setting `operator_load_balancer_scheme: internal` in your [cluster configuration](install.md) file. If you do this, you will need to configure [VPC Peering](vpc-peering.md) to allow your CLI to connect to the Cortex operator (this will be necessary to run any `cortex` commands). ## IAM permissions diff --git a/docs/cluster-management/spot-instances.md b/docs/aws/spot.md similarity index 88% rename from docs/cluster-management/spot-instances.md rename to docs/aws/spot.md index e68af55a04..169ddf8466 100644 --- a/docs/cluster-management/spot-instances.md +++ b/docs/aws/spot.md @@ -2,12 +2,12 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -[Spot instances](https://aws.amazon.com/ec2/spot/) are spare capacity that AWS sells at a discount (up to 90%). The caveat is that spot instances may not always be available, and can be recalled by AWS at anytime. Cortex allows you to use spot instances in your cluster to take advantage of the discount while ensuring uptime and reliability of APIs. You can configure your cluster to use spot instances using the configuration below: +[Spot instances](https://aws.amazon.com/ec2/spot) are spare capacity that AWS sells at a discount (up to 90%). The caveat is that spot instances may not always be available, and can be recalled by AWS at anytime. Cortex allows you to use spot instances in your cluster to take advantage of the discount while ensuring uptime and reliability of APIs. You can configure your cluster to use spot instances using the configuration below: ```yaml # cluster.yaml -# whether to use spot instances in the cluster; spot instances are not guaranteed to be available so please take that into account for production clusters (default: false) +# whether to use spot instances in the cluster (default: false) spot: false spot_config: diff --git a/docs/guides/ssh-instance.md b/docs/aws/ssh.md similarity index 96% rename from docs/guides/ssh-instance.md rename to docs/aws/ssh.md index 4aeeb95033..7c716b87fc 100644 --- a/docs/guides/ssh-instance.md +++ b/docs/aws/ssh.md @@ -1,8 +1,8 @@ -# SSH into worker instance +# SSH into instances _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -There are some cases when SSH-ing into an AWS Cortex instance may be necessary. +There are some cases when SSH-ing into an EC2 instance may be necessary. This can be done via the AWS web UI or via the terminal. The first 5 steps are identical for both approaches. diff --git a/docs/troubleshooting/cluster-down.md b/docs/aws/uninstall.md similarity index 62% rename from docs/troubleshooting/cluster-down.md rename to docs/aws/uninstall.md index aee3e666f9..17d86e9d9e 100644 --- a/docs/troubleshooting/cluster-down.md +++ b/docs/aws/uninstall.md @@ -1,7 +1,45 @@ -# Cluster down failures +# Uninstall _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ +## Spin down Cortex + +```bash +# spin down Cortex +cortex cluster down + +# uninstall the CLI +pip uninstall cortex +rm -rf ~/.cortex +``` + +If you modified your bash profile, you may wish to remove `source <(cortex completion bash)` from it (or remove `source <(cortex completion zsh)` for `zsh`). + +## Delete metadata and log groups + +Since you may wish to have access to your data after spinning down your cluster, Cortex's bucket and log groups are not automatically deleted when running `cortex cluster down`. + +To delete them: + +```bash +# set AWS credentials +export AWS_ACCESS_KEY_ID=*** +export AWS_SECRET_ACCESS_KEY=*** + +# identify the name of your cortex S3 bucket +aws s3 ls + +# delete the S3 bucket +aws s3 rb --force s3:// + +# delete the log group (replace with the name of your cluster, default: cortex) +aws logs describe-log-groups --log-group-name-prefix= --query logGroups[*].[logGroupName] --output text | xargs -I {} aws logs delete-log-group --log-group-name {} +``` + +If you've configured a custom domain for your APIs, you can remove the SSL Certificate and Hosted Zone for the domain by following these [instructions](custom-domain.md#cleanup). + +## Troubleshooting + On rare occasions, `cortex cluster down` may not be able to spin down your Cortex cluster. When this happens, follow these steps: 1. If you've manually created any AWS networking resources that are pointed to the cluster or its VPC (e.g. API Gateway VPC links, custom domains, etc), delete them from the AWS console. diff --git a/docs/cluster-management/update.md b/docs/aws/update.md similarity index 90% rename from docs/cluster-management/update.md rename to docs/aws/update.md index 65f118dfca..0591bc8a1f 100644 --- a/docs/cluster-management/update.md +++ b/docs/aws/update.md @@ -2,15 +2,13 @@ _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ -## Updating your cluster configuration - -See [cluster configuration](config.md) to learn how you can customize your cluster. +## Update Cortex configuration ```bash -cortex cluster configure +cortex cluster configure # or: cortex cluster configure --config cluster.yaml ``` -## Upgrading to a newer version of Cortex +## Upgrade to a newer version of Cortex @@ -28,6 +26,8 @@ cortex version cortex cluster up ``` +## Upgrade without downtime + In production environments, you can upgrade your cluster without downtime if you have a backend service or DNS in front of your Cortex cluster: 1. Spin up a new cluster. For example: `cortex cluster up --config new-cluster.yaml --configure-env new` (this will create a CLI environment named `new` for accessing the new cluster). diff --git a/docs/guides/vpc-peering.md b/docs/aws/vpc-peering.md similarity index 99% rename from docs/guides/vpc-peering.md rename to docs/aws/vpc-peering.md index 1a44d538dc..6f0892b831 100644 --- a/docs/guides/vpc-peering.md +++ b/docs/aws/vpc-peering.md @@ -1,4 +1,4 @@ -# Set up VPC peering +# VPC peering _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ diff --git a/docs/cluster-management/aws-credentials.md b/docs/cluster-management/aws-credentials.md deleted file mode 100644 index 28dc4c6c79..0000000000 --- a/docs/cluster-management/aws-credentials.md +++ /dev/null @@ -1,7 +0,0 @@ -# AWS credentials - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -As of now, Cortex only runs locally or on AWS. We plan to support other cloud providers in the future. If you don't have an AWS account you can get started with one [here](https://portal.aws.amazon.com/billing/signup#/start). - -Follow this [tutorial](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key) to create an access key. Enable programmatic access for the IAM user, and attach the built-in `AdministratorAccess` policy to your IAM user. If you'd like to use less privileged credentials once the Cortex cluster has been created, see [security](../miscellaneous/security.md). diff --git a/docs/cluster-management/config.md b/docs/cluster-management/config.md deleted file mode 100644 index 580c85227d..0000000000 --- a/docs/cluster-management/config.md +++ /dev/null @@ -1,100 +0,0 @@ -# Cluster configuration - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -The Cortex cluster may be configured by providing a configuration file to `cortex cluster up` or `cortex cluster configure` via the `--config` flag (e.g. `cortex cluster up --config cluster.yaml`). Below is the schema for the cluster configuration file, with default values shown (unless otherwise specified): - - -```yaml -# cluster.yaml - -# EKS cluster name for cortex (default: cortex) -cluster_name: cortex - -# AWS region -region: us-east-1 - -# S3 bucket (default: -) -# note: your cortex cluster uses this bucket for metadata storage, and it should not be accessed directly (a separate bucket should be used for your models) -bucket: # cortex- - -# list of availability zones for your region (default: 3 random availability zones from the specified region) -availability_zones: # e.g. [us-east-1a, us-east-1b, us-east-1c] - -# instance type -instance_type: m5.large - -# minimum number of instances (must be >= 0) -min_instances: 1 - -# maximum number of instances (must be >= 1) -max_instances: 5 - -# disk storage size per instance (GB) (default: 50) -instance_volume_size: 50 - -# instance volume type [gp2, io1, st1, sc1] (default: gp2) -instance_volume_type: gp2 - -# instance volume iops (only applicable to io1 storage type) (default: 3000) -# instance_volume_iops: 3000 - -# whether the subnets used for EC2 instances should be public or private (default: "public") -# if "public", instances will be assigned public IP addresses; if "private", instances won't have public IPs and a NAT gateway will be created to allow outgoing network requests -# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information -subnet_visibility: public # must be "public" or "private" - -# whether to include a NAT gateway with the cluster (a NAT gateway is necessary when using private subnets) -# default value is "none" if subnet_visibility is set to "public"; "single" if subnet_visibility is "private" -nat_gateway: none # must be "none", "single", or "highly_available" (highly_available means one NAT gateway per availability zone) - -# whether the API load balancer should be internet-facing or internal (default: "internet-facing") -# note: if using "internal", APIs will still be accessible via the public API Gateway endpoint unless you also disable API Gateway in your API's configuration (if you do that, you must configure VPC Peering to connect to your APIs) -# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information -api_load_balancer_scheme: internet-facing # must be "internet-facing" or "internal" - -# whether the operator load balancer should be internet-facing or internal (default: "internet-facing") -# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/guides/vpc-peering) -# see https://docs.cortex.dev/v/master/miscellaneous/security#private-operator for more information -operator_load_balancer_scheme: internet-facing # must be "internet-facing" or "internal" - -# whether to disable API gateway cluster-wide -# if set to "public" (the default), each API can specify whether to use API Gateway -# if set to "none", no APIs will be allowed to use API Gateway -api_gateway: public # must be "public" or "none" - -# additional tags to assign to aws resources for labelling and cost allocation (by default, all resources will be tagged with cortex.dev/cluster-name=) -tags: # : map of key/value pairs - -# whether to use spot instances in the cluster (default: false) -# see https://docs.cortex.dev/v/master/cluster-management/spot-instances for additional details on spot configuration -spot: false - -# see https://docs.cortex.dev/v/master/guides/custom-domain for instructions on how to set up a custom domain -ssl_certificate_arn: - -# primary CIDR block for the cluster's VPC (default: 192.168.0.0/16) -# vpc_cidr: 192.168.0.0/16 -``` - -The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and in your [Batch API configuration](../deployments/batch-api/api-configuration.md). - -The docker images used by the Cortex cluster can also be overridden, although this is not common. They can be configured by adding any of these keys to your cluster configuration file (default values are shown): - - -```yaml -# docker image paths -image_operator: quay.io/cortexlabs/operator:master -image_manager: quay.io/cortexlabs/manager:master -image_downloader: quay.io/cortexlabs/downloader:master -image_request_monitor: quay.io/cortexlabs/request-monitor:master -image_cluster_autoscaler: quay.io/cortexlabs/cluster-autoscaler:master -image_metrics_server: quay.io/cortexlabs/metrics-server:master -image_inferentia: quay.io/cortexlabs/inferentia:master -image_neuron_rtd: quay.io/cortexlabs/neuron-rtd:master -image_nvidia: quay.io/cortexlabs/nvidia:master -image_fluentd: quay.io/cortexlabs/fluentd:master -image_statsd: quay.io/cortexlabs/statsd:master -image_istio_proxy: quay.io/cortexlabs/istio-proxy:master -image_istio_pilot: quay.io/cortexlabs/istio-pilot:master -``` diff --git a/docs/cluster-management/ec2-instances.md b/docs/cluster-management/ec2-instances.md deleted file mode 100644 index 206a8d2790..0000000000 --- a/docs/cluster-management/ec2-instances.md +++ /dev/null @@ -1,21 +0,0 @@ -# EC2 instances - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -There are a variety of instance types to choose from when creating a Cortex cluster. If you are unsure about which instance to pick, review these options as a starting point. - -This is not a comprehensive guide so please refer to the [AWS's documentation](https://aws.amazon.com/ec2/instance-types/) for more information. - -Note: There is an instance limit associated with your AWS account for each instance family in each region, for on-demand and for spot instances. You can check your current limit and request an increase [here](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) (set the region in the upper right corner to your desired region, type "on-demand" or "spot" in the search bar, and click on the quota that matches your instance type). Note that the quota values indicate the number of vCPUs available, not the number of instances; different instances have a different numbers of vCPUs, which can be seen [here](https://aws.amazon.com/ec2/instance-types/). - -| Instance Type | CPU | Memory | GPU Memory | Starting price per hour* | Notes | -| :--- | :--- | :--- | :--- | :--- | :--- | -| [T3](https://aws.amazon.com/ec2/instance-types/t3/) | low | low | - | $0.0416 (t3.medium) | good for dev clusters | -| [M5](https://aws.amazon.com/ec2/instance-types/m5/) | medium | medium | - | $0.096 (m5.large) | standard cpu-based | -| [C5](https://aws.amazon.com/ec2/instance-types/c5/) | high | medium | - | $0.085 (c5.large) | high cpu | -| [R5](https://aws.amazon.com/ec2/instance-types/r5/) | medium | high | - | $0.126 (r5.large) | high memory | -| [G4](https://aws.amazon.com/ec2/instance-types/g4/) | high | high | ~15GB (g4dn.xlarge) | $0.526 (g4dn.xlarge) | standard gpu-based | -| [P2](https://aws.amazon.com/ec2/instance-types/p2/) | high | very high | ~12GB (p2.xlarge) | $0.90 (p2.xlarge) | high host memory gpu-based | -| [Inf1](https://aws.amazon.com/ec2/instance-types/inf1/) | high | medium | ~8GB (inf1.xlarge) | $0.368 (inf1.xlarge) | very good price/performance ratio | - -* on-demand pricing for the US West (Oregon) AWS region. diff --git a/docs/cluster-management/install.md b/docs/cluster-management/install.md deleted file mode 100644 index 1b67d39d57..0000000000 --- a/docs/cluster-management/install.md +++ /dev/null @@ -1,107 +0,0 @@ -# Install - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## Install the CLI - -```bash -pip install cortex -``` - -You must have [Docker](https://docs.docker.com/install) installed to run Cortex locally or to create a cluster on AWS. - -See [here](../miscellaneous/cli.md#install-cortex-cli-without-python-client) to install Cortex CLI without the Python Client. - -## Deploy an example - - -```bash -# clone the Cortex repository -git clone -b master https://github.com/cortexlabs/cortex.git - -# navigate to the Pytorch text generator example -cd cortex/examples/pytorch/text-generator -``` - -### Using the CLI - -```bash -# deploy the model as a realtime api -cortex deploy - -# view the status of the api -cortex get --watch - -# stream logs from the api -cortex logs text-generator - -# get the api's endpoint -cortex get text-generator - -# generate text -curl \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "machine learning is"}' - -# delete the api -cortex delete text-generator -``` - -### In Python - -```python -import cortex -import requests - -local_client = cortex.client("local") - -# deploy the model as a realtime api and wait for it to become active - -api_spec={ - "name": "iris-classifier", - "kind": "RealtimeAPI", - "predictor": { - "type": "python", - "path": "predictor.py", - "config": { - "model": "s3://cortex-examples/pytorch/iris-classifier/weights.pth" - } - } -} - -deployments = local_client.deploy(api_spec, project_dir=".", wait=True) - -# get the api's endpoint -url = deployments[0]["api"]["endpoint"] - -# generate text -print(requests.post(url, json={"text": "machine learning is"}).text) - -# delete the api -local_client.delete_api("text-generator") -``` - -## Running at scale on AWS - -Run the command below to create a cluster with basic configuration, or see [cluster configuration](config.md) to learn how you can customize your cluster with `cluster.yaml`. - -See [EC2 instances](ec2-instances.md) for an overview of several EC2 instance types. To use GPU nodes, you may need to subscribe to the [EKS-optimized AMI with GPU Support](https://aws.amazon.com/marketplace/pp/B07GRHFXGM) and [file an AWS support ticket](https://console.aws.amazon.com/support/cases#/create?issueType=service-limit-increase&limitType=ec2-instances) to increase the limit for your desired instance type. - -```bash -# create a Cortex cluster on your AWS account -cortex cluster up - -# set the default CLI environment (optional) -cortex env default aws -``` - -You can now run the same commands shown above to deploy the text generator to AWS (if you didn't set the default CLI environment, add `--env aws` to the `cortex` commands). - -## Next steps - - -* Try the [tutorial](../../examples/pytorch/text-generator/README.md) to learn more about how to use Cortex. -* Deploy one of our [examples](https://github.com/cortexlabs/cortex/tree/master/examples). -* See our [exporting guide](../guides/exporting.md) for how to export your model to use in an API. -* View documentation for [realtime APIs](../deployments/realtime-api.md) or [batch APIs](../deployments/batch-api.md). -* See [uninstall](uninstall.md) if you'd like to spin down your cluster. diff --git a/docs/cluster-management/uninstall.md b/docs/cluster-management/uninstall.md deleted file mode 100644 index 7482474dc5..0000000000 --- a/docs/cluster-management/uninstall.md +++ /dev/null @@ -1,46 +0,0 @@ -# Uninstall - -_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_ - -## Prerequisites - -1. [AWS credentials](aws-credentials.md) -2. [Docker](https://docs.docker.com/install) -3. [Cortex CLI](install.md) -4. [AWS CLI](https://aws.amazon.com/cli) - -## Uninstalling Cortex - -```bash -# spin down the cluster -cortex cluster down - -# uninstall the CLI -sudo rm /usr/local/bin/cortex -rm -rf ~/.cortex -``` - -If you modified your bash profile, you may wish to remove `source <(cortex completion bash)` from it (or remove `source <(cortex completion zsh)` for `zsh`). - -## Cleaning up AWS - -Since you may wish to have access to your data after spinning down your cluster, Cortex's bucket and log groups are not automatically deleted when running `cortex cluster down`. - -To delete them: - -```bash -# set AWS credentials -export AWS_ACCESS_KEY_ID=*** -export AWS_SECRET_ACCESS_KEY=*** - -# identify the name of your cortex S3 bucket -aws s3 ls - -# delete the S3 bucket -aws s3 rb --force s3:// - -# delete the log group (replace with the name of your cluster, default: cortex) -aws logs describe-log-groups --log-group-name-prefix= --query logGroups[*].[logGroupName] --output text | xargs -I {} aws logs delete-log-group --log-group-name {} -``` - -If you've configured a custom domain for your APIs, you may wish to remove the SSL Certificate and Hosted Zone for the domain by following these [instructions](../guides/custom-domain.md#cleanup). diff --git a/docs/miscellaneous/contact-us.md b/docs/contact.md similarity index 91% rename from docs/miscellaneous/contact-us.md rename to docs/contact.md index 8273ef261a..70a9748f34 100644 --- a/docs/miscellaneous/contact-us.md +++ b/docs/contact.md @@ -12,7 +12,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t ## Contributing -Find instructions for how to set up your development environment in the [development guide](../contributing/development.md). +Find instructions for how to set up your development environment in the [development guide](contributing/development.md). ## We're hiring diff --git a/docs/contributing/development.md b/docs/contributing/development.md index 5cce28dff8..86f4cd0833 100644 --- a/docs/contributing/development.md +++ b/docs/contributing/development.md @@ -2,7 +2,7 @@ ## Remote development -Unless your internet connection is very fast or you will only be working on the CLI, it is recommended to run your development environment on a cloud instance, e.g. an AWS EC2 instance or GCP VM (due to frequent docker registry pushing). There are a variety of ways to develop on a remote VM, feel free to reach out on our [gitter](https://gitter.im/cortexlabs/cortex) and we can point you in the right direction based on your operating system and editor preferences. +We recommend that you run your development environment on a cloud instance, e.g. an AWS EC2 instance or GCP VM (due to frequent docker registry pushing). There are a variety of ways to develop on a remote VM, feel free to reach out on [gitter](https://gitter.im/cortexlabs/cortex) and we can point you in the right direction based on your operating system and editor preferences. ## Prerequisites diff --git a/docs/deployments/batch-api/api-configuration.md b/docs/deployments/batch-api/api-configuration.md index 8079ee944c..eda8e9d067 100644 --- a/docs/deployments/batch-api/api-configuration.md +++ b/docs/deployments/batch-api/api-configuration.md @@ -29,7 +29,7 @@ Reference the section below which corresponds to your Predictor type: [Python](# mem: # memory request per worker, e.g. 200Mi or 1Gi (default: Null) ``` -See additional documentation for [compute](../compute.md), [networking](../networking.md), and [overriding API images](../system-packages.md). +See additional documentation for [compute](../compute.md), [networking](../../aws/networking.md), and [overriding API images](../system-packages.md). ## TensorFlow Predictor @@ -65,7 +65,7 @@ See additional documentation for [compute](../compute.md), [networking](../netwo mem: # memory request per worker, e.g. 200Mi or 1Gi (default: Null) ``` -See additional documentation for [compute](../compute.md), [networking](../networking.md), and [overriding API images](../system-packages.md). +See additional documentation for [compute](../compute.md), [networking](../../aws/networking.md), and [overriding API images](../system-packages.md). ## ONNX Predictor @@ -95,4 +95,4 @@ See additional documentation for [compute](../compute.md), [networking](../netwo mem: # memory request per worker, e.g. 200Mi or 1Gi (default: Null) ``` -See additional documentation for [compute](../compute.md), [networking](../networking.md), and [overriding API images](../system-packages.md). +See additional documentation for [compute](../compute.md), [networking](../../aws/networking.md), and [overriding API images](../system-packages.md). diff --git a/docs/deployments/gpus.md b/docs/deployments/gpus.md index c453c20db3..cc7af572b7 100644 --- a/docs/deployments/gpus.md +++ b/docs/deployments/gpus.md @@ -5,7 +5,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t To use GPUs: 1. Make sure your AWS account is subscribed to the [EKS-optimized AMI with GPU Support](https://aws.amazon.com/marketplace/pp/B07GRHFXGM). -2. You may need to [file an AWS support ticket](https://console.aws.amazon.com/support/cases#/create?issueType=service-limit-increase&limitType=ec2-instances) to increase the limit for your desired instance type. +2. You may need to [request a limit increase](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) for your desired instance type. 3. Set instance type to an AWS GPU instance (e.g. `g4dn.xlarge`) when installing Cortex. 4. Set the `gpu` field in the `compute` configuration for your API. One unit of GPU corresponds to one virtual GPU. Fractional requests are not allowed. diff --git a/docs/deployments/inferentia.md b/docs/deployments/inferentia.md index c530391c27..34390f3a57 100644 --- a/docs/deployments/inferentia.md +++ b/docs/deployments/inferentia.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t To use [Inferentia ASICs](https://aws.amazon.com/machine-learning/inferentia/): -1. You may need to [file an AWS support ticket](https://console.aws.amazon.com/support/cases#/create?issueType=service-limit-increase&limitType=ec2-instances) to increase the limit for your desired instance type. +1. You may need to [request a limit increase](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) for running Inferentia instances. 1. Set the instance type to an AWS Inferentia instance (e.g. `inf1.xlarge`) when creating your Cortex cluster. 1. Set the `inf` field in the `compute` configuration for your API. One unit of `inf` corresponds to one Inferentia ASIC with 4 NeuronCores *(not the same thing as `cpu`)* and 8GB of cache memory *(not the same thing as `mem`)*. Fractional requests are not allowed. diff --git a/docs/deployments/realtime-api/api-configuration.md b/docs/deployments/realtime-api/api-configuration.md index 51c09f84bc..21f1312477 100644 --- a/docs/deployments/realtime-api/api-configuration.md +++ b/docs/deployments/realtime-api/api-configuration.md @@ -60,7 +60,7 @@ Reference the section below which corresponds to your Predictor type: [Python](# max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` -See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). +See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../../aws/networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). ## TensorFlow Predictor @@ -123,7 +123,7 @@ See additional documentation for [models](models.md), [parallelism](parallelism. max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` -See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). +See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../../aws/networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). ## ONNX Predictor @@ -179,4 +179,4 @@ See additional documentation for [models](models.md), [parallelism](parallelism. max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` -See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). +See additional documentation for [models](models.md), [parallelism](parallelism.md), [autoscaling](autoscaling.md), [compute](../compute.md), [networking](../../aws/networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](../system-packages.md). diff --git a/docs/deployments/realtime-api/autoscaling.md b/docs/deployments/realtime-api/autoscaling.md index 16993620d2..81907633cd 100644 --- a/docs/deployments/realtime-api/autoscaling.md +++ b/docs/deployments/realtime-api/autoscaling.md @@ -60,7 +60,7 @@ For example, setting `target_replica_concurrency` to `processes_per_replica` * ` ## Autoscaling Instances -Cortex spins up and down instances based on the aggregate resource requests of all APIs. The number of instances will be at least `min_instances` and no more than `max_instances` ([configured during installation](../../cluster-management/config.md) and modifiable via `cortex cluster configure`). +Cortex spins up and down instances based on the aggregate resource requests of all APIs. The number of instances will be at least `min_instances` and no more than `max_instances` ([configured during installation](../../aws/install.md) and modifiable via `cortex cluster configure`). ## Overprovisioning diff --git a/docs/guides/docker-hub-rate-limiting.md b/docs/guides/docker-hub-rate-limiting.md index b2f1c11567..bcb574a994 100644 --- a/docs/guides/docker-hub-rate-limiting.md +++ b/docs/guides/docker-hub-rate-limiting.md @@ -29,7 +29,7 @@ It is possible to configure Cortex to use the images from Quay instead of Docker ### Update your cluster configuration file -Add the following to your [cluster configuration file](../cluster-management/config.md) (e.g. `cluster.yaml`). In the image paths below, make sure to set `` to your cluster's version. +Add the following to your [cluster configuration file](../aws/install.md) (e.g. `cluster.yaml`). In the image paths below, make sure to set `` to your cluster's version. ```yaml # cluster.yaml diff --git a/docs/guides/low-cost-clusters.md b/docs/guides/low-cost-clusters.md index 04c6ed7e19..5516a4e5a0 100644 --- a/docs/guides/low-cost-clusters.md +++ b/docs/guides/low-cost-clusters.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t Here are some tips for keeping costs down when running small clusters: -* Consider using [spot instances](../cluster-management/spot-instances.md). +* Consider using [spot instances](../aws/spot.md). * CPUs are cheaper than GPUs, so if there is low request volume and low latency is not critical, running on CPU instances will be more cost effective. diff --git a/docs/guides/production.md b/docs/guides/production.md index 9d87755983..bd2f259826 100644 --- a/docs/guides/production.md +++ b/docs/guides/production.md @@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t **Tips for batch and realtime APIs:** -* Consider using [spot instances](../cluster-management/spot-instances.md) to reduce cost. +* Consider using [spot instances](../aws/spot.md) to reduce cost. * If you're using multiple clusters and/or multiple developers are interacting with your cluster(s), see our documention on [environments](../miscellaneous/environments.md) @@ -30,4 +30,4 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t * Consider setting a low value for `max_replica_concurrency`, since if there are many requests in the queue, it will take a long time until newly received requests are processed. See [autoscaling docs](../deployments/realtime-api/autoscaling.md) for more details. -* Keep in mind that API Gateway has a 29 second timeout; if your requests take longer (due to a long inference time and/or long request queues), you will need to disable API Gateway for your API by setting `api_gateway: none` in the `networking` config in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md). Alternatively, you can disable API gateway for all APIs in your cluster by setting `api_gateway: none` in your [cluster configuration file](../cluster-management/config.md) before creating your cluster. +* Keep in mind that API Gateway has a 29 second timeout; if your requests take longer (due to a long inference time and/or long request queues), you will need to disable API Gateway for your API by setting `api_gateway: none` in the `networking` config in your [Realtime API configuration](../deployments/realtime-api/api-configuration.md) and/or [Batch API configuration](../deployments/batch-api/api-configuration.md). Alternatively, you can disable API gateway for all APIs in your cluster by setting `api_gateway: none` in your [cluster configuration file](../aws/install.md) before creating your cluster. diff --git a/docs/guides/self-hosted-images.md b/docs/guides/self-hosted-images.md index e0691cee10..ffeb2f7e44 100644 --- a/docs/guides/self-hosted-images.md +++ b/docs/guides/self-hosted-images.md @@ -128,7 +128,7 @@ done echo "-----------------------------------------------" ``` -The first list of images that were printed (the cluster images) can be directly copy-pasted in your [cluster configuration file](../cluster-management/config.md) before spinning up your cluster. +The first list of images that were printed (the cluster images) can be directly copy-pasted in your [cluster configuration file](../aws/install.md) before spinning up your cluster. The second list of images that were printed (the API images) can be used in your [API configuration files](../deployments/realtime-api/api-configuration.md). The image paths are specified in `predictor.image` (and `predictor.tensorflow_serving_image` for APIs with `kind: tensorflow`). Be advised that by default, the public images offered by Cortex are used for your predictors, so you will need to specify your ECR image paths for all of your APIs. diff --git a/docs/miscellaneous/environments.md b/docs/miscellaneous/environments.md index 95857a7ad8..a21ec980d8 100644 --- a/docs/miscellaneous/environments.md +++ b/docs/miscellaneous/environments.md @@ -87,7 +87,7 @@ On your new machine, run: cortex env configure ``` -This will prompt for the necessary configuration. Note that the AWS credentials that you use here do not need any IAM permissions attached. If you will be running any `cortex cluster` commands specify the preferred AWS credentials using cli flags `--aws-key AWS_ACCESS_KEY_ID --aws-secret AWS_SECRET_ACCESS_KEY`. See [IAM permissions](security.md#iam-permissions) for more details. +This will prompt for the necessary configuration. Note that the AWS credentials that you use here do not need any IAM permissions attached. If you will be running any `cortex cluster` commands specify the preferred AWS credentials using cli flags `--aws-key AWS_ACCESS_KEY_ID --aws-secret AWS_SECRET_ACCESS_KEY`. See [IAM permissions](../aws/security.md#iam-permissions) for more details. ## Environments overview diff --git a/docs/summary.md b/docs/summary.md index cb926277ec..e92d69a771 100644 --- a/docs/summary.md +++ b/docs/summary.md @@ -1,11 +1,24 @@ # Table of contents * [Deploy machine learning models to production](../README.md) -* [Install](cluster-management/install.md) +* [Install](aws/install.md) * [Tutorial](https://docs.cortex.dev/v/master/deployments/realtime-api/text-generator) * [GitHub](https://github.com/cortexlabs/cortex) * [Examples](https://github.com/cortexlabs/cortex/tree/master/examples) -* [Contact us](miscellaneous/contact-us.md) +* [Contact us](contact.md) + +## Running Cortex on AWS + +* [Credentials](aws/credentials.md) +* [Security](aws/security.md) +* [Spot instances](aws/spot.md) +* [Networking](aws/networking.md) +* [VPC peering](aws/vpc-peering.md) +* [Custom domain](aws/custom-domain.md) +* [SSH into instances](aws/ssh.md) +* [REST API Gateway](aws/rest-api-gateway.md) +* [Update](aws/update.md) +* [Uninstall](aws/uninstall.md) ## Deployments @@ -35,16 +48,6 @@ * [Using Inferentia](deployments/inferentia.md) * [Python packages](deployments/python-packages.md) * [System packages](deployments/system-packages.md) -* [Networking](deployments/networking.md) - -## Cluster management - -* [Cluster configuration](cluster-management/config.md) -* [AWS credentials](cluster-management/aws-credentials.md) -* [EC2 instances](cluster-management/ec2-instances.md) -* [Spot instances](cluster-management/spot-instances.md) -* [Update](cluster-management/update.md) -* [Uninstall](cluster-management/uninstall.md) ## Miscellaneous @@ -52,7 +55,6 @@ * [Python client](miscellaneous/python-client.md) * [Environments](miscellaneous/environments.md) * [Architecture diagram](miscellaneous/architecture.md) -* [Security](miscellaneous/security.md) * [Telemetry](miscellaneous/telemetry.md) ## Troubleshooting @@ -62,7 +64,6 @@ * [NVIDIA runtime not found](troubleshooting/nvidia-container-runtime-not-found.md) * [TF session in predict()](troubleshooting/tf-session-in-predict.md) * [Serving-side batching errors](troubleshooting/server-side-batching-errors.md) -* [Cluster down failures](troubleshooting/cluster-down.md) ## Guides @@ -71,15 +72,11 @@ * [View API metrics](guides/metrics.md) * [Running in production](guides/production.md) * [Low-cost clusters](guides/low-cost-clusters.md) -* [Set up a custom domain](guides/custom-domain.md) -* [Set up VPC peering](guides/vpc-peering.md) -* [SSH into worker instance](guides/ssh-instance.md) * [Single node deployment](guides/single-node-deployment.md) * [Set up kubectl](guides/kubectl-setup.md) * [Self-hosted Docker images](guides/self-hosted-images.md) * [Docker Hub rate limiting](guides/docker-hub-rate-limiting.md) * [Private docker registry](guides/private-docker.md) -* [Set up REST API Gateway](guides/rest-api-gateway.md) * [Install CLI on Windows](guides/windows-cli.md) ## Contributing diff --git a/docs/troubleshooting/api-request-errors.md b/docs/troubleshooting/api-request-errors.md index 6ed1929f85..5b575bac78 100644 --- a/docs/troubleshooting/api-request-errors.md +++ b/docs/troubleshooting/api-request-errors.md @@ -8,4 +8,4 @@ When making prediction requests to your API, it's possible to get a `{"message": 1. Your API may have errored during initialization or while responding to a previous request. `cortex get API_NAME` will show the status of your API, and you can view the logs with `cortex logs API_NAME`. 1. If `cortex get API_NAME` shows your API's status as "updating" for a while and if `cortex logs API_NAME` doesn't shed any light onto what may be wrong, please see the [API is stuck updating](stuck-updating.md) troubleshooting guide. -It is also possible to receive a `{"message":"Service Unavailable"}` error message (with HTTP status code `503`) if you are using an API Gateway endpoint for your API and if your request exceeds API Gateway's 29 second timeout. If you don't know whether you are using API Gateway, you can run `cortex get ` and check if `networking.api_gateway` is not set to `none` in the api configuration. If the request is exceeding the API Gateway timeout, your client should receive the `{"message":"Service Unavailable"}` response ~29 seconds after making the request. To confirm that this is the issue, you can modify your `predict()` function to immediately return a response (e.g. `return "ok"`), re-deploy your API, wait for the update to complete, and try making a request. If your client successfully receives the "ok" response, it is likely that the API Gateway timeout is occurring. You can either modify your `predict()` implementation to take less time, run on faster hardware (e.g. GPUs), or disable API Gateway for this API by setting `api_gateway: none` in the `networking` field of the [api configuration](api-configuration.md) (see [networking](../deployments/networking.md) for more details). +It is also possible to receive a `{"message":"Service Unavailable"}` error message (with HTTP status code `503`) if you are using an API Gateway endpoint for your API and if your request exceeds API Gateway's 29 second timeout. If you don't know whether you are using API Gateway, you can run `cortex get ` and check if `networking.api_gateway` is not set to `none` in the api configuration. If the request is exceeding the API Gateway timeout, your client should receive the `{"message":"Service Unavailable"}` response ~29 seconds after making the request. To confirm that this is the issue, you can modify your `predict()` function to immediately return a response (e.g. `return "ok"`), re-deploy your API, wait for the update to complete, and try making a request. If your client successfully receives the "ok" response, it is likely that the API Gateway timeout is occurring. You can either modify your `predict()` implementation to take less time, run on faster hardware (e.g. GPUs), or disable API Gateway for this API by setting `api_gateway: none` in the `networking` field of the [api configuration](api-configuration.md) (see [networking](../aws/networking.md) for more details). diff --git a/docs/troubleshooting/stuck-updating.md b/docs/troubleshooting/stuck-updating.md index cd107898ba..e19531326b 100644 --- a/docs/troubleshooting/stuck-updating.md +++ b/docs/troubleshooting/stuck-updating.md @@ -39,7 +39,7 @@ On the old UI: The most common reason AWS is unable to provision instances is that you have reached your instance limit. There is an instance limit associated with your AWS account for each instance family in each region, for on-demand and for spot instances. You can check your current limit and request an increase [here](https://console.aws.amazon.com/servicequotas/home?#!/services/ec2/quotas) (set the region in the upper right corner to your desired region, type "on-demand" or "spot" in the search bar, and click on the quota that matches your instance type). Note that the quota values indicate the number of vCPUs available, not the number of instances; different instances have a different numbers of vCPUs, which can be seen [here](https://aws.amazon.com/ec2/instance-types/). -If you are using spot instances and don't have `on_demand_backup` set to true, it is also possible that AWS has run out of spot instances for your requested instance type and region. You can enable `on_demand_backup` to allow Cortex to fall back to on-demand instances when spot instances are unavailable, or you can try adding additional alternative instance types in `instance_distribution`. See our [spot documentation](../cluster-management/spot-instances.md). +If you are using spot instances and don't have `on_demand_backup` set to true, it is also possible that AWS has run out of spot instances for your requested instance type and region. You can enable `on_demand_backup` to allow Cortex to fall back to on-demand instances when spot instances are unavailable, or you can try adding additional alternative instance types in `instance_distribution`. See our [spot documentation](../aws/spot.md). ## Disabling rolling updates diff --git a/examples/batch/image-classifier/README.md b/examples/batch/image-classifier/README.md index dff1ac5301..ce328bfdac 100644 --- a/examples/batch/image-classifier/README.md +++ b/examples/batch/image-classifier/README.md @@ -4,12 +4,11 @@ _WARNING: you are on the master branch; please refer to examples on the branch c This example shows how to deploy a batch image classification api that accepts a list of image urls as input, downloads the images, classifies them, and writes the results to S3. -**Batch APIs are only supported on a Cortex cluster (in AWS).** You can find cluster installation documentation [here](../../../docs/cluster-management/install.md#running-at-scale-on-aws). +**Batch APIs are only supported on a Cortex cluster (in AWS).** You can find cluster installation documentation [here](../../../docs/aws/install.md). ## Pre-requisites -* [Install Cortex CLI](../../../docs/cluster-management/install.md#install-the-cli) -* [Create a Cortex Cluster](../../../docs/cluster-management/install.md#running-at-scale-on-aws) +* [Install](../../../docs/aws/install.md) Cortex and create a cluster * Create an S3 bucket/directory to store the results of the batch job * AWS CLI (optional) @@ -578,4 +577,4 @@ Running `cortex delete` will stop all in progress jobs for the API and will dele * Deploy another one of our [batch examples](https://github.com/cortexlabs/cortex/tree/master/examples/batch). * See our [exporting guide](../../../docs/guides/exporting.md) for how to export your model to use in an API. * Try the [realtime API tutorial](../../pytorch/text-generator/README.md) to learn how to deploy realtime APIs in Cortex. -* See [uninstall](../../../docs/cluster-management/uninstall.md) if you'd like to spin down your cluster. +* See [uninstall](../../../docs/aws/uninstall.md) if you'd like to spin down your cluster. diff --git a/examples/pytorch/text-generator/README.md b/examples/pytorch/text-generator/README.md index 4532ca6615..e737106664 100644 --- a/examples/pytorch/text-generator/README.md +++ b/examples/pytorch/text-generator/README.md @@ -1,4 +1,4 @@ -# Deploy models as Realtime APIs +# Deploy machine learning models to production _WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.22.*, run `git checkout -b 0.22` or switch to the `0.22` branch on GitHub)_ @@ -52,7 +52,7 @@ transformers==3.0.* ## Configure your API -Create a `cortex.yaml` file and add the configuration below. A `RealtimeAPI` provides a runtime for inference and makes your `predictor.py` implementation available as a web service that can serve real-time predictions: +Create a `cortex.yaml` file and add the configuration below. A `RealtimeAPI` provides a runtime for inference and makes your `predictor.py` implementation available as a web service that can serve realtime predictions: ```yaml # cortex.yaml @@ -94,7 +94,7 @@ $ cortex get text-generator status last update avg request 2XX live 1m - - -endpoint: http://localhost:8888 +endpoint: http://localhost:8889 ... ``` @@ -109,7 +109,7 @@ $ cortex logs text-generator Once your API is live, use `curl` to test your API (it will take a few seconds to generate the text): ```bash -$ curl http://localhost:8888 \ +$ curl http://localhost:8889 \ -X POST -H "Content-Type: application/json" \ -d '{"text": "machine learning is"}' @@ -295,12 +295,3 @@ deleting text-generator ``` Running `cortex delete` will free up cluster resources and allow Cortex to scale down to the minimum number of instances you specified during cluster creation. It will not spin down your cluster. - -## Next steps - - -* Deploy another one of our [examples](https://github.com/cortexlabs/cortex/tree/master/examples). -* See our [exporting guide](../../../docs/guides/exporting.md) for how to export your model to use in an API. -* Try the [batch API tutorial](../../batch/image-classifier/README.md) to learn how to deploy batch APIs in Cortex. -* See our [traffic splitter example](../../traffic-splitter/README.md) for how to deploy multiple APIs and set up a traffic splitter. -* See [uninstall](../../../docs/cluster-management/uninstall.md) if you'd like to spin down your cluster. diff --git a/manager/check_cortex_version.sh b/manager/check_cortex_version.sh index 39d1866228..4d36e470e2 100755 --- a/manager/check_cortex_version.sh +++ b/manager/check_cortex_version.sh @@ -19,6 +19,6 @@ set -e CORTEX_VERSION=master if [ "$CORTEX_VERSION" != "$CORTEX_CLI_VERSION" ]; then - echo "error: your CLI version ($CORTEX_CLI_VERSION) doesn't match your Cortex manager image version ($CORTEX_VERSION); please update your CLI by following the instructions at https://docs.cortex.dev/install, or update your Cortex manager image by modifying the value for \`image_manager\` in your cluster configuration file (e.g. cluster.yaml) and running \`cortex cluster configure --config cluster.yaml\` (update other image paths in cluster.yaml as well if necessary)" + echo "error: your CLI version ($CORTEX_CLI_VERSION) doesn't match your Cortex manager image version ($CORTEX_VERSION); please update your CLI (pip install cortex==$CORTEX_VERSION), or update your Cortex manager image by modifying the value for \`image_manager\` in your cluster configuration file and running \`cortex cluster configure --config cluster.yaml\` (update other image paths in cluster.yaml as well if necessary)" exit 1 fi diff --git a/manager/debug.sh b/manager/debug.sh index 3275ed0424..46c3a6e03c 100755 --- a/manager/debug.sh +++ b/manager/debug.sh @@ -27,7 +27,7 @@ if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTE fi eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true -out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/miscellaneous/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi +out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi echo -n "gathering cluster data" diff --git a/manager/info.sh b/manager/info.sh index 2878411af2..c754737605 100755 --- a/manager/info.sh +++ b/manager/info.sh @@ -36,7 +36,7 @@ if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTE fi eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true -out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/miscellaneous/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi +out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi operator_endpoint=$(get_operator_endpoint) api_load_balancer_endpoint=$(get_api_load_balancer_endpoint) diff --git a/manager/install.sh b/manager/install.sh index dd332adaaf..b4fea28f23 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -89,7 +89,7 @@ function cluster_up() { echo -e "\ncortex is ready!" if [ "$CORTEX_OPERATOR_LOAD_BALANCER_SCHEME" == "internal" ]; then - echo -e "note: you will need to configure VPC Peering to connect to your cluster: https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/guides/vpc-peering" + echo -e "note: you will need to configure VPC Peering to connect to your cluster: https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/vpc-peering" fi print_endpoints @@ -204,7 +204,7 @@ function check_eks() { function write_kubeconfig() { eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true - out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/miscellaneous/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi + out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi } function setup_configmap() { diff --git a/manager/refresh.sh b/manager/refresh.sh index 576e465ee9..ce1389cdd5 100755 --- a/manager/refresh.sh +++ b/manager/refresh.sh @@ -27,7 +27,7 @@ if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTE fi eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true -out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/miscellaneous/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi +out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/aws/security#running-cortex-cluster-commands-from-different-iam-users"; exit 1; fi kubectl get -n=default configmap cluster-config -o yaml >> cluster_configmap.yaml python refresh_cluster_config.py cluster_configmap.yaml tmp_cluster_config.yaml diff --git a/pkg/lib/configreader/errors.go b/pkg/lib/configreader/errors.go index 51f30c3779..208cbee491 100644 --- a/pkg/lib/configreader/errors.go +++ b/pkg/lib/configreader/errors.go @@ -420,6 +420,6 @@ func ErrorCortexResourceNotAllowed(resourceName string) error { func ErrorImageVersionMismatch(image, tag, cortexVersion string) error { return errors.WithStack(&errors.Error{ Kind: ErrImageVersionMismatch, - Message: fmt.Sprintf("the specified image (%s) has a tag (%s) which does not match your Cortex version (%s); please update the image tag, remove the image registry path from your configuration file (to use the default value), or update your CLI by following the instructions at https://docs.cortex.dev/install", image, tag, cortexVersion), + Message: fmt.Sprintf("the specified image (%s) has a tag (%s) which does not match your Cortex version (%s); please update the image tag, remove the image registry path from your configuration file (to use the default value), or update your CLI (pip install cortex==%s)", image, tag, cortexVersion, cortexVersion), }) } diff --git a/pkg/operator/endpoints/errors.go b/pkg/operator/endpoints/errors.go index b9cad741fa..061df2e0bf 100644 --- a/pkg/operator/endpoints/errors.go +++ b/pkg/operator/endpoints/errors.go @@ -42,7 +42,7 @@ const ( func ErrorAPIVersionMismatch(operatorVersion string, clientVersion string) error { return errors.WithStack(&errors.Error{ Kind: ErrAPIVersionMismatch, - Message: fmt.Sprintf("your CLI version (%s) doesn't match your Cortex operator version (%s); please update your cluster by following the instructions at https://docs.cortex.dev/cluster-management/update, or update your CLI by following the instructions at https://docs.cortex.dev/install", clientVersion, operatorVersion), + Message: fmt.Sprintf("your CLI version (%s) doesn't match your Cortex operator version (%s); please update your cluster by following the instructions at https://docs.cortex.dev/update, or update your CLI (pip install cortex==%s)", clientVersion, operatorVersion, operatorVersion), }) } diff --git a/pkg/workloads/cortex/client/README.md b/pkg/workloads/cortex/client/README.md index b4cab5c27f..17ba782fc4 100644 --- a/pkg/workloads/cortex/client/README.md +++ b/pkg/workloads/cortex/client/README.md @@ -1,104 +1 @@ -Cortex makes it simple to deploy machine learning models in production. - -### Deploy - -* Deploy TensorFlow, PyTorch, ONNX, scikit-learn, and other models. -* Define preprocessing and postprocessing steps in Python. -* Configure APIs as realtime or batch. -* Deploy multiple models per API. - -### Manage - -* Monitor API performance and track predictions. -* Update APIs with no downtime. -* Stream logs from APIs. -* Perform A/B tests. - -### Scale - -* Test locally, scale on your AWS account. -* Autoscale to handle production traffic. -* Reduce cost with spot instances. - - -[documentation](https://docs.cortex.dev) • [tutorial](https://docs.cortex.dev/deployments/realtime-api/text-generator) • [examples](https://github.com/cortexlabs/cortex/tree/0.22/examples) • [chat with us](https://gitter.im/cortexlabs/cortex) - -## Install the CLI - -```bash -pip install cortex -``` - -You must have [Docker](https://docs.docker.com/install) installed to run Cortex locally or to create a cluster on AWS. - -## Deploy an example - - -```bash -# clone the Cortex repository -git clone -b master https://github.com/cortexlabs/cortex.git - -# navigate to the Pytorch text generator example -cd cortex/examples/pytorch/text-generator -``` - -### In Python - -```python -import cortex -import requests - -local_client = cortex.client("local") - -# deploy the model as a realtime api and wait for it to become active -deployments = local_client.deploy_project(config_file="./cortex.yaml", wait=True) - -# get the api's endpoint -url = deployments[0]["api"]["endpoint"] - -# generate text -print(requests.post(url, json={"text": "machine learning is"}).text) - -# delete the api -local_client.delete_api("text-generator") -``` - -### Using the CLI - -```bash -# deploy the model as a realtime api -cortex deploy - -# view the status of the api -cortex get --watch - -# stream logs from the api -cortex logs text-generator - -# get the api's endpoint -cortex get text-generator - -# generate text -curl \ - -X POST -H "Content-Type: application/json" \ - -d '{"text": "machine learning is"}' - -# delete the api -cortex delete text-generator -``` - -## Running at scale on AWS - -Run the command below to create a cluster with basic configuration, or see [cluster configuration](config.md) to learn how you can customize your cluster with `cluster.yaml`. - -See [EC2 instances](ec2-instances.md) for an overview of several EC2 instance types. To use GPU nodes, you may need to subscribe to the [EKS-optimized AMI with GPU Support](https://aws.amazon.com/marketplace/pp/B07GRHFXGM) and [file an AWS support ticket](https://console.aws.amazon.com/support/cases#/create?issueType=service-limit-increase&limitType=ec2-instances) to increase the limit for your desired instance type. - -```bash -# create a Cortex cluster on your AWS account -cortex cluster up - -# set the default CLI environment (optional) -cortex env default aws -``` - -You can now run the same commands shown above to deploy the text generator to AWS (if you didn't set the default CLI environment, add `--env aws` to the `cortex` commands). +Deploy machine learning models to production - [cortex.dev](https://www.cortex.dev) diff --git a/pkg/workloads/cortex/serve/init/bootloader.sh b/pkg/workloads/cortex/serve/init/bootloader.sh index 97defa100a..cd4b91d716 100755 --- a/pkg/workloads/cortex/serve/init/bootloader.sh +++ b/pkg/workloads/cortex/serve/init/bootloader.sh @@ -21,9 +21,9 @@ export EXPECTED_CORTEX_VERSION=master if [ "$CORTEX_VERSION" != "$EXPECTED_CORTEX_VERSION" ]; then if [ "$CORTEX_PROVIDER" == "local" ]; then - echo "error: your Cortex CLI version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your CLI by following the instructions at https://docs.cortex.dev/cluster-management/update#upgrading-to-a-newer-version-of-cortex" + echo "error: your Cortex CLI version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your CLI by following the instructions at https://docs.cortex.dev/update" else - echo "error: your Cortex operator version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your cluster by following the instructions at https://docs.cortex.dev/cluster-management/update#upgrading-to-a-newer-version-of-cortex" + echo "error: your Cortex operator version ($CORTEX_VERSION) doesn't match your predictor image version ($EXPECTED_CORTEX_VERSION); please update your predictor image by modifying the \`image\` field in your API configuration file (e.g. cortex.yaml) and re-running \`cortex deploy\`, or update your cluster by following the instructions at https://docs.cortex.dev/update" fi exit 1 fi