Skip to content

Commit

Permalink
Delete API Gateway if cluster up fails (#1172)
Browse files Browse the repository at this point in the history
(cherry picked from commit b17c225)
  • Loading branch information
deliahu committed Jun 28, 2020
1 parent d9b5785 commit 940811a
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 28 deletions.
58 changes: 48 additions & 10 deletions cli/cmd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ var _upCmd = &cobra.Command{
exit.Error(err)
}

err = CreateBucketIfNotFound(awsClient, clusterConfig.Bucket)
err = createBucketIfNotFound(awsClient, clusterConfig.Bucket)
if err != nil {
exit.Error(err)
}
Expand All @@ -147,7 +147,7 @@ var _upCmd = &cobra.Command{
exit.Error(err)
}

err = CreateLogGroupIfNotFound(awsClient, clusterConfig.LogGroup)
err = createLogGroupIfNotFound(awsClient, clusterConfig.LogGroup)
if err != nil {
exit.Error(err)
}
Expand All @@ -156,16 +156,25 @@ var _upCmd = &cobra.Command{
exit.Error(err)
}

err = createDashboard(awsClient, clusterConfig.ClusterName)
err = createOrClearDashboard(awsClient, clusterConfig.ClusterName)
if err != nil {
exit.Error(err)
}

err = createOrReplaceAPIGateway(awsClient, clusterConfig.ClusterName, clusterConfig.Tags)
if err != nil {
exit.Error(err)
}

out, exitCode, err := runManagerUpdateCommand("/root/install.sh", clusterConfig, awsCreds, _flagClusterEnv)
if err != nil {
awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
exit.Error(err)
}
if exitCode == nil || *exitCode != 0 {
awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
helpStr := "\nDebugging tips (may or may not apply to this error):"
helpStr += fmt.Sprintf("\n* if your cluster started spinning up but was unable to provision instances, additional error information may be found in the activity history of your cluster's autoscaling groups (select each autoscaling group and click the \"Activity History\" tab): https://console.aws.amazon.com/ec2/autoscaling/home?region=%s#AutoScalingGroups:", *clusterConfig.Region)
helpStr += fmt.Sprintf("\n* if your cluster started spinning up, please ensure that your CloudFormation stacks for this cluster have been fully deleted before trying to spin up this cluster again (you can delete your CloudFormation stacks from the AWS console: %s)", getCloudFormationURL(clusterConfig.ClusterName, *clusterConfig.Region))
Expand Down Expand Up @@ -323,11 +332,11 @@ var _downCmd = &cobra.Command{
_, errAPIGateway := awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
_, errVPCLink := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
if errAPIGateway != nil {
fmt.Print("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/apis\n")
fmt.Printf("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/apis\n", *accessConfig.Region)
errors.PrintError(errAPIGateway)
}
if errVPCLink != nil {
fmt.Print("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/vpc-links\n")
fmt.Printf("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/vpc-links\n", *accessConfig.Region)
errors.PrintError(errVPCLink)
}
if errAPIGateway == nil && errVPCLink == nil {
Expand All @@ -339,7 +348,7 @@ var _downCmd = &cobra.Command{
fmt.Print("○ deleting dashboard ")
err = awsClient.DeleteDashboard(*accessConfig.ClusterName)
if err != nil {
fmt.Print("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it manually via the cloudwatch console: https://console.aws.amazon.com/cloudwatch/home#dashboards:\n")
fmt.Printf("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it via the cloudwatch console: https://%s.console.aws.amazon.com/cloudwatch/home#dashboards:\n", *accessConfig.Region)
errors.PrintError(err)
fmt.Println()
} else {
Expand Down Expand Up @@ -716,7 +725,7 @@ func getCloudFormationURLWithAccessConfig(accessConfig *clusterconfig.AccessConf
return getCloudFormationURL(*accessConfig.ClusterName, *accessConfig.Region)
}

func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error {
func createBucketIfNotFound(awsClient *aws.Client, bucket string) error {
bucketFound, err := awsClient.DoesBucketExist(bucket)
if err != nil {
return err
Expand All @@ -725,6 +734,7 @@ func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error {
fmt.Print("○ creating a new s3 bucket: ", bucket)
err = awsClient.CreateBucket(bucket)
if err != nil {
fmt.Print("\n\n")
return err
}
fmt.Println(" ✓")
Expand All @@ -734,7 +744,7 @@ func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error {
return nil
}

func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
func createLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
logGroupFound, err := awsClient.DoesLogGroupExist(logGroup)
if err != nil {
return err
Expand All @@ -743,6 +753,7 @@ func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
fmt.Print("○ creating a new cloudwatch log group: ", logGroup)
err = awsClient.CreateLogGroup(logGroup)
if err != nil {
fmt.Print("\n\n")
return err
}
fmt.Println(" ✓")
Expand All @@ -753,8 +764,8 @@ func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
return nil
}

// createDashboard creates a new dashboard (or clears an existing one if it already exists)
func createDashboard(awsClient *aws.Client, dashboardName string) error {
// createOrClearDashboard creates a new dashboard (or clears an existing one if it already exists)
func createOrClearDashboard(awsClient *aws.Client, dashboardName string) error {
dashboardFound, err := awsClient.DoesDashboardExist(dashboardName)
if err != nil {
return err
Expand All @@ -768,10 +779,37 @@ func createDashboard(awsClient *aws.Client, dashboardName string) error {

err = awsClient.CreateDashboard(dashboardName, consts.DashboardTitle)
if err != nil {
fmt.Print("\n\n")
return err
}

fmt.Println(" ✓")

return nil
}

// createOrReplaceAPIGateway creates an API gateway for the cluster (or clears an existing one if it already exists)
func createOrReplaceAPIGateway(awsClient *aws.Client, clusterName string, tags map[string]string) error {
fmt.Print("○ creating api gateway: ", clusterName)

_, err := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterName)
if err != nil {
fmt.Print("\n\n")
return errors.Append(err, fmt.Sprintf("\n\nunable to delete existing vpc link with tag %s=%s; please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/vpc-links", clusterconfig.ClusterNameTag, clusterName, awsClient.Region))
}

_, err = awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterName)
if err != nil {
fmt.Print("\n\n")
return errors.Append(err, fmt.Sprintf("\n\nunable to delete existing api gateway with tag %s=%s; please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/apis", clusterconfig.ClusterNameTag, clusterName, awsClient.Region))
}

_, err = awsClient.CreateAPIGateway(clusterName, tags)
if err != nil {
fmt.Print("\n\n")
return err
}

fmt.Println(" ✓")
return nil
}
36 changes: 36 additions & 0 deletions manager/get_api_gateway_endpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2020 Cortex Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import boto3
import os


def get_api_gateway_endpoint():
cluster_name = os.environ["CORTEX_CLUSTER_NAME"]
region = os.environ["CORTEX_REGION"]
client_apigateway = boto3.client("apigatewayv2", region_name=region)

paginator = client_apigateway.get_paginator("get_apis")
for api_gateway_page in paginator.paginate():
for api_gateway in api_gateway_page["Items"]:
if api_gateway["Tags"].get("cortex.dev/cluster-name") == cluster_name:
return api_gateway["ApiEndpoint"]

raise Exception(
f"your cluster's api gateway (in {region} with tag cortex.dev/cluster-name={cluster_name}) does not exist"
)


if __name__ == "__main__":
print(get_api_gateway_endpoint(), end="")
36 changes: 36 additions & 0 deletions manager/get_api_gateway_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2020 Cortex Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import boto3
import os


def get_api_gateway_id():
cluster_name = os.environ["CORTEX_CLUSTER_NAME"]
region = os.environ["CORTEX_REGION"]
client_apigateway = boto3.client("apigatewayv2", region_name=region)

paginator = client_apigateway.get_paginator("get_apis")
for api_gateway_page in paginator.paginate():
for api_gateway in api_gateway_page["Items"]:
if api_gateway["Tags"].get("cortex.dev/cluster-name") == cluster_name:
return api_gateway["ApiId"]

raise Exception(
f"your cluster's api gateway (in {region} with tag cortex.dev/cluster-name={cluster_name}) does not exist"
)


if __name__ == "__main__":
print(get_api_gateway_id(), end="")
2 changes: 1 addition & 1 deletion manager/info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ function get_api_load_balancer_endpoint() {
}

function get_api_gateway_endpoint() {
aws apigatewayv2 get-apis --region $CORTEX_REGION | jq ".Items[] | select(.Name == \"${CORTEX_CLUSTER_NAME}\") | .ApiEndpoint" | tr -d '"'
python get_api_gateway_endpoint.py
}

if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION >/dev/null 2>&1; then
Expand Down
13 changes: 1 addition & 12 deletions manager/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,18 +163,6 @@ function ensure_eks() {
function main() {
mkdir -p $CORTEX_CLUSTER_WORKSPACE

# create API Gateway
if [ "$arg1" != "--update" ]; then
create_api_output=$(aws apigatewayv2 create-api --tags $CORTEX_TAGS --region $CORTEX_REGION --name $CORTEX_CLUSTER_NAME --protocol-type HTTP)
api_id=$(echo $create_api_output | jq .ApiId | tr -d '"')
if [ "$api_id" = "" ] || [ "$api_id" = "null" ]; then
echo -e "unable to extract api gateway ID from create-api output:\n$create_api_output"
exit 1
fi
# create default stage; ignore error because default stage is supposed to be already created, but currently it isn't because of a possible bug in create-api
aws apigatewayv2 create-stage --region $CORTEX_REGION --tags $CORTEX_TAGS --api-id $api_id --auto-deploy --stage-name \$default &>/dev/null || true
fi

# create cluster (if it doesn't already exist)
ensure_eks

Expand Down Expand Up @@ -261,6 +249,7 @@ function main() {
# add VPC Link integration to API Gateway
if [ "$arg1" != "--update" ] && [ "$CORTEX_API_LOAD_BALANCER_SCHEME" == "internal" ]; then
echo -n "○ creating api gateway vpc link integration "
api_id=$(python get_api_gateway_id.py)
python create_gateway_integration.py $api_id $vpc_link_id
echo ""
echo -n "○ waiting for api gateway vpc link integration "
Expand Down
48 changes: 43 additions & 5 deletions pkg/lib/aws/apigateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,34 @@ import (
"github.com/cortexlabs/cortex/pkg/lib/errors"
)

// CreateAPIGateway Creates a new API Gateway with the default stage
func (c *Client) CreateAPIGateway(name string, tags map[string]string) (string, error) {
createAPIResponse, err := c.APIGatewayV2().CreateApi(&apigatewayv2.CreateApiInput{
Name: aws.String(name),
ProtocolType: aws.String(apigatewayv2.ProtocolTypeHttp),
Tags: aws.StringMap(tags),
})
if err != nil {
return "", errors.Wrap(err, "failed to create api gateway")
}
if createAPIResponse.ApiId == nil {
return "", errors.ErrorUnexpected("failed to create api gateway")
}

_, err = c.APIGatewayV2().CreateStage(&apigatewayv2.CreateStageInput{
ApiId: createAPIResponse.ApiId,
AutoDeploy: aws.Bool(true),
StageName: aws.String("$default"),
Tags: aws.StringMap(tags),
})
if err != nil {
c.DeleteAPIGateway(*createAPIResponse.ApiId) // best effort cleanup
return "", errors.Wrap(err, "failed to create $default api gateway stage")
}

return *createAPIResponse.ApiId, nil
}

// GetVPCLinkByTag Gets a VPC Link by tag (returns nil if there are no matches)
func (c *Client) GetVPCLinkByTag(tagName string, tagValue string) (*apigatewayv2.VpcLink, error) {
var nextToken *string
Expand Down Expand Up @@ -111,20 +139,30 @@ func (c *Client) DeleteAPIGatewayByTag(tagName string, tagValue string) (*apigat
return nil, nil
}

// Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion)
err = c.DeleteAPIGatewayMappings(*apiGateway.ApiId)
err = c.DeleteAPIGateway(*apiGateway.ApiId)
if err != nil {
return nil, err
}

return apiGateway, nil
}

// DeleteAPIGateway Deletes an API Gateway by ID (returns an error if the API Gateway does not exist)
func (c *Client) DeleteAPIGateway(apiGatewayID string) error {
// Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion)
err := c.DeleteAPIGatewayMappings(apiGatewayID)
if err != nil {
return err
}

_, err = c.APIGatewayV2().DeleteApi(&apigatewayv2.DeleteApiInput{
ApiId: apiGateway.ApiId,
ApiId: aws.String(apiGatewayID),
})
if err != nil {
return nil, errors.Wrap(err, "failed to delete api gateway "+*apiGateway.ApiId)
return errors.Wrap(err, "failed to delete api gateway "+apiGatewayID)
}

return apiGateway, nil
return nil
}

// DeleteAPIGatewayMappingsForDomainName deletes all API mappings that point to the provided api gateway from the provided domain name
Expand Down

0 comments on commit 940811a

Please sign in to comment.