From e42ead6e5c9c288970063279fd2ce6def8137d4a Mon Sep 17 00:00:00 2001 From: chrisammon3000 Date: Mon, 14 Aug 2023 14:20:53 -0400 Subject: [PATCH 01/14] Parameterize VPC, subnet and update DNS A record (#87) * update gitignore * renew gfe-db SSL certificate with SSM Run Command * parameterize VPC and public subnet * update existing DNS A record using Makefile target * pin urllib to avoid Lambda errors * add max errors for build * exit status 1 for max errors --- .gitignore | 5 +- Makefile | 3 +- README.md | 39 +++++--- gfe-db/database/Makefile | 1 + gfe-db/database/scripts/start_task.sh | 4 +- gfe-db/database/template.yaml | 10 -- gfe-db/infrastructure/Makefile | 19 +++- gfe-db/infrastructure/change-batch.json | 17 ++++ gfe-db/infrastructure/template.yaml | 94 ++++++++++--------- .../invoke_pipeline/src/requirements.txt | 1 + gfe-db/pipeline/jobs/build/src/app.py | 12 ++- gfe-db/pipeline/template.yaml | 1 + 12 files changed, 129 insertions(+), 77 deletions(-) create mode 100644 gfe-db/infrastructure/change-batch.json diff --git a/.gitignore b/.gitignore index e0850417..9e1bc163 100755 --- a/.gitignore +++ b/.gitignore @@ -88,6 +88,8 @@ packaged.yaml samconfig.toml # Project files +change-batch.json.tmp +_cache/ tags data/ *.pem @@ -104,5 +106,4 @@ ___* gfedb.zip gfe-db/pipeline/jobs/build/event.json gfe-db/pipeline/statemachines/test* -reports/ -_cache/ \ No newline at end of file +reports/ \ No newline at end of file diff --git a/Makefile b/Makefile index b6b3d7be..91fb5650 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ export AWS_ACCOUNT ?= $(shell aws sts get-caller-identity --query Account --outp export ROOT_DIR := $(shell pwd) export DATABASE_DIR := ${ROOT_DIR}/${APP_NAME}/database +export INFRA_DIR := ${ROOT_DIR}/${APP_NAME}/infrastructure export LOGS_DIR := $(shell echo "${ROOT_DIR}/logs") export CFN_LOG_PATH := $(shell echo "${LOGS_DIR}/cfn/logs.txt") export PURGE_LOGS := false @@ -82,7 +83,7 @@ check.dependencies: $(MAKE) check.dependencies.jq check.dependencies.docker: - @if ! docker info >/dev/null 2>&1; then \ + @if docker info 2>&1 | grep -q 'Is the docker daemon running?'; then \ echo "**** Docker is not running. Please start Docker before deploying. ****" && \ echo "**** Please refer to the documentation for a list of prerequisistes. ****" && \ exit 1; \ diff --git a/README.md b/README.md index 07acee12..1e6c97c1 100755 --- a/README.md +++ b/README.md @@ -126,16 +126,23 @@ The data pipeline layer automates integration of newly released IMGT/HLA data in Follow the steps to build and deploy the application to AWS. ### Quick Start +1. Retrieve the VPC ID and subnet ID from the AWS console or using the AWS CLI. This list outlines the basic steps for deployment. For more details please see the following sections. -1. Purchase or designate a domain in Route53 and create a hosted zone -2. Acquire a subscription for the Bitnami Neo4j AMI through [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-v47qqrn2yy7ie?sr=0-4&ref_=beagle&applicationId=AWSMPContessa) -3. [Install prerequisites](#Prerequisites) -4. [Set environment variables](#environment-variables) -5. Check the [config JSONs](#data-pipeline-config) (parameters and state) and edit the values as desired -6. Run `make deploy` to deploy the stacks to AWS -7. Run `make database.load.run releases=` to load the Neo4j, or `make database.load.run releases= limit=` to run with a limited number of alleles -8. Run `make database.get-credentials` to get the username and password for Neo4j -9. Run `make database.get-url` to get the URL for Neo4j and navigate to the Neo4j browser at the subdomain and host domain, for example `https://gfe-db.cloudftl.com:7473/browser/` +2. Purchase or designate a domain in Route53 and create a hosted zone with an A record for the subdomain. You can use the VPC's IP address for the A record because it will be updated later by the deployment script. +3. Acquire a subscription for the Bitnami Neo4j AMI through [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-v47qqrn2yy7ie?sr=0-4&ref_=beagle&applicationId=AWSMPContessa). +4. [Install prerequisites](#Prerequisites). +5. [Set environment variables](#environment) including the ones from the previous steps: + - VPC_ID (step 1) + - PUBLIC_SUBNET_ID (step 1) + - HOSTED_ZONE_ID (step 2) + - HOST_DOMAIN (step 2) + - SUBDOMAIN (step 2) + - NEO4J_AMI_ID (step 3) +6. Check the [config JSONs](#data-pipeline-config) (parameters and state) and edit the values as desired. +7. Run `make deploy` to deploy the stacks to AWS. +8. Run `make database.load.run releases=` to load the Neo4j, or `make database.load.run releases= limit=` to run with a limited number of alleles. +9. Run `make database.get-credentials` to get the username and password for Neo4j. +10. Run `make database.get-url` to get the URL for Neo4j and navigate to the Neo4j browser at the subdomain and host domain, for example `https://gfe-db.cloudftl.com:7473/browser/`. ### Prerequisites Please refer to the respective documentation for specific installation instructions. @@ -168,15 +175,18 @@ These variables must be defined before running Make. The best way to set these v # .env STAGE= APP_NAME=gfe-db -AWS_REGION= +AWS_REGION= +VPC_ID= # Available through the console or CLI +PUBLIC_SUBNET_ID= # Available through the console or CLI; Public subnets have a route to an internet gateway GITHUB_PERSONAL_ACCESS_TOKEN= -HOST_DOMAIN= +HOSTED_ZONE_ID= # Available through the console or CLI +HOST_DOMAIN= SUBDOMAIN= ADMIN_EMAIL= SUBSCRIBE_EMAILS=,,,... APOC_VERSION=4.4.0.3 GDS_VERSION=2.0.1 -NEO4J_AMI_ID= +NEO4J_AMI_ID= # Requires AWS Marketplace subscription ``` | Variable Name | Example Value | Type | Description | @@ -184,8 +194,11 @@ NEO4J_AMI_ID= | STAGE | dev | string | The stage of the application. | | APP_NAME | gfe-db | string | The name of the application. | | AWS_REGION | us-east-1 | string | The AWS region to deploy to. | +| VPC_ID | vpc-1234567890abcdef | string | The ID of the VPC to deploy to. | +| PUBLIC_SUBNET_ID | subnet-1234567890abcdef | string | The ID of the public subnet to deploy to. | | GITHUB_PERSONAL_ACCESS_TOKEN | | string | GitHub PAT for repository access | -| HOST_DOMAIN | mydomain.com | string | The domain to deploy to. | +| HOSTED_ZONE_ID | Z1234567890ABCDEF | string | The ID of the hosted zone to deploy to. | +| HOST_DOMAIN | example.com | string | The domain to deploy to. | | SUBDOMAIN | gfe-db | string | The subdomain to deploy to. | | ADMIN_EMAIL | user@company.com | string | Admin's email required for SSL certificate | | SUBSCRIBE_EMAILS | user@company.com,user2@company.com | string | Comma-separated list of emails for notifications | diff --git a/gfe-db/database/Makefile b/gfe-db/database/Makefile index 30d328dc..de42feda 100644 --- a/gfe-db/database/Makefile +++ b/gfe-db/database/Makefile @@ -33,6 +33,7 @@ service.config.neo4j.deploy: service.config.update-dns @aws s3 cp neo4j/neo4j.conf s3://$$DATA_BUCKET_NAME/config/neo4j/neo4j.conf 2>&1 | tee -a $$CFN_LOG_PATH @aws s3 cp --recursive neo4j/cypher/ s3://$$DATA_BUCKET_NAME/config/neo4j/cypher/ 2>&1 | tee -a $$CFN_LOG_PATH +# This target will configure the database to use the correct DNS name using env vars service.config.update-dns: @[ "${HOST_DOMAIN}" != "" ] && \ cat neo4j/neo4j.template | \ diff --git a/gfe-db/database/scripts/start_task.sh b/gfe-db/database/scripts/start_task.sh index 5ca311b8..9046d2ec 100644 --- a/gfe-db/database/scripts/start_task.sh +++ b/gfe-db/database/scripts/start_task.sh @@ -22,7 +22,7 @@ send_result () { fi } -trap 'cause="Error on line $LINENO" && error=$? && send_result && kill 0' ERR +trap 'cause="Script failed due to error on line $LINENO. Please see logs in System Manager Run Command history for more details" && error=$? && send_result && kill 0' ERR export AWS_REGION=$(curl --silent http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r '.region') @@ -88,7 +88,7 @@ echo "$(date -u +'%Y-%m-%d %H:%M:%S.%3N') - Task exit status: $TASK_EXIT_STATUS" if [[ $TASK_EXIT_STATUS != "0" ]]; then status="FAILED" error="$TASK_EXIT_STATUS" - cause="Error on line $LINENO" + cause="Task failed due to error on line $LINENO. Please see logs in System Manager Run Command history for more details." send_result kill 0 else diff --git a/gfe-db/database/template.yaml b/gfe-db/database/template.yaml index 36d37f41..45318065 100644 --- a/gfe-db/database/template.yaml +++ b/gfe-db/database/template.yaml @@ -30,16 +30,6 @@ Parameters: Type: String Resources: - Neo4jDNSRecord: - Type: AWS::Route53::RecordSet - Properties: - HostedZoneName: !Sub "${HostDomain}." - Comment: !Sub 'DNS name for ${Stage}-${AppName} server' - Name: !Sub ${Subdomain}.${HostDomain}. - Type: A - TTL: 300 - ResourceRecords: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseEndpoint}}' Neo4jCredentialsSecret: Type: AWS::SecretsManager::Secret diff --git a/gfe-db/infrastructure/Makefile b/gfe-db/infrastructure/Makefile index 82c2689e..d84f0e61 100644 --- a/gfe-db/infrastructure/Makefile +++ b/gfe-db/infrastructure/Makefile @@ -9,6 +9,17 @@ deploy: $(MAKE) service.deploy $(MAKE) service.monitoring.create-subscriptions +service.deploy.update-dns: + @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Updating DNS records" 2>&1 | tee -a $${CFN_LOG_PATH} + @config_path=${INFRA_DIR}/change-batch.json && \ + elastic_ip=$$(aws ssm get-parameters \ + --names "/$${APP_NAME}/$${STAGE}/$${AWS_REGION}/Neo4jDatabaseEndpoint" \ + --with-decryption \ + --query "Parameters[0].Value" \ + --output text) && \ + sed -e "s//${SUBDOMAIN}.${HOST_DOMAIN}./g" -e "s//$$elastic_ip/g" $$config_path > $$config_path.tmp && \ + aws route53 change-resource-record-sets --hosted-zone-id $${HOSTED_ZONE_ID} --change-batch file://$$config_path.tmp + # TODO test AWSCLI output and validate the stack was created successfully service.deploy: @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying CloudFormation" 2>&1 | tee -a $${CFN_LOG_PATH} @@ -19,9 +30,11 @@ service.deploy: --parameter-overrides \ Stage="$${STAGE}" \ AppName="$${APP_NAME}" \ - DataBucketName="$$DATA_BUCKET_NAME" \ - GitHubPersonalAccessToken="$$GITHUB_PERSONAL_ACCESS_TOKEN" \ - HostedZoneId="$${HOSTED_ZONE_ID}" 2>&1 | tee -a $${CFN_LOG_PATH} || true + VpcId="$${VPC_ID}" \ + PublicSubnetId="$${PUBLIC_SUBNET_ID}" \ + DataBucketName="$${DATA_BUCKET_NAME}" \ + GitHubPersonalAccessToken="$$GITHUB_PERSONAL_ACCESS_TOKEN" 2>&1 | tee -a $${CFN_LOG_PATH} || true + $(MAKE) service.deploy.update-dns service.monitoring.create-subscriptions: @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Creating SNS topic subscriptions" 2>&1 | tee -a $${CFN_LOG_PATH} diff --git a/gfe-db/infrastructure/change-batch.json b/gfe-db/infrastructure/change-batch.json new file mode 100644 index 00000000..3ea3589d --- /dev/null +++ b/gfe-db/infrastructure/change-batch.json @@ -0,0 +1,17 @@ +{ + "Changes": [ + { + "Action": "UPSERT", + "ResourceRecordSet": { + "Name": "", + "Type": "A", + "TTL": 300, + "ResourceRecords": [ + { + "Value": "" + } + ] + } + } + ] +} \ No newline at end of file diff --git a/gfe-db/infrastructure/template.yaml b/gfe-db/infrastructure/template.yaml index 612a6365..68af4632 100644 --- a/gfe-db/infrastructure/template.yaml +++ b/gfe-db/infrastructure/template.yaml @@ -9,6 +9,10 @@ Parameters: Type: String DataBucketName: Type: String + VpcId: + Type: String + PublicSubnetId: + Type: String GitHubPersonalAccessToken: Type: String NoEcho: true @@ -32,53 +36,53 @@ Mappings: AvailabilityZone: eu-west-3a Resources: - Vpc: - Type: AWS::EC2::VPC - Properties: - CidrBlock: 10.0.0.0/16 - EnableDnsHostnames: true - EnableDnsSupport: true - Tags: - - Key: Name - Value: !Sub '${Stage}-${AppName}-${AWS::Region}-vpc' + # Vpc: + # Type: AWS::EC2::VPC + # Properties: + # CidrBlock: 10.0.0.0/16 + # EnableDnsHostnames: true + # EnableDnsSupport: true + # Tags: + # - Key: Name + # Value: !Sub '${Stage}-${AppName}-${AWS::Region}-vpc' - PublicSubnet: - Type: AWS::EC2::Subnet - Properties: - CidrBlock: 10.0.0.0/24 - VpcId: !Ref Vpc - AvailabilityZone: !FindInMap [AvailabilityZoneMap, !Ref AWS::Region, AvailabilityZone] - MapPublicIpOnLaunch: true + # PublicSubnet: + # Type: AWS::EC2::Subnet + # Properties: + # CidrBlock: 10.0.0.0/24 + # VpcId: !Ref Vpc + # AvailabilityZone: !FindInMap [AvailabilityZoneMap, !Ref AWS::Region, AvailabilityZone] + # MapPublicIpOnLaunch: true - InternetGateway: - Type: AWS::EC2::InternetGateway + # InternetGateway: + # Type: AWS::EC2::InternetGateway - RouteTable: - Type: AWS::EC2::RouteTable - Properties: - VpcId: !Ref Vpc + # RouteTable: + # Type: AWS::EC2::RouteTable + # Properties: + # VpcId: !Ref Vpc - VpcGatewayAttachment: - Type: AWS::EC2::VPCGatewayAttachment - Properties: - VpcId: !Ref Vpc - InternetGatewayId: !Ref InternetGateway + # VpcGatewayAttachment: + # Type: AWS::EC2::VPCGatewayAttachment + # Properties: + # VpcId: !Ref Vpc + # InternetGatewayId: !Ref InternetGateway - Route: - Type: AWS::EC2::Route - DependsOn: - - InternetGateway - - VpcGatewayAttachment - Properties: - RouteTableId: !Ref RouteTable - DestinationCidrBlock: 0.0.0.0/0 - GatewayId: !Ref InternetGateway + # Route: + # Type: AWS::EC2::Route + # DependsOn: + # - InternetGateway + # - VpcGatewayAttachment + # Properties: + # RouteTableId: !Ref RouteTable + # DestinationCidrBlock: 0.0.0.0/0 + # GatewayId: !Ref InternetGateway - SubnetRouteTableAssociation: - Type: AWS::EC2::SubnetRouteTableAssociation - Properties: - RouteTableId: !Ref RouteTable - SubnetId: !Ref PublicSubnet + # SubnetRouteTableAssociation: + # Type: AWS::EC2::SubnetRouteTableAssociation + # Properties: + # RouteTableId: !Ref RouteTable + # SubnetId: !Ref PublicSubnet DataBucket: Type: AWS::S3::Bucket @@ -91,16 +95,16 @@ Resources: Properties: Type: String Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/VpcID' - Description: "Name of gfe-db VPC network" - Value: !Ref Vpc + Description: !Sub "Name of VPC network for ${AppName}" + Value: !Ref VpcId PublicSubnetIDParameter: Type: AWS::SSM::Parameter Properties: Type: String Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/PublicSubnetID' - Description: "Public Subnet for the gfe-db Neo4j server" - Value: !Ref PublicSubnet + Description: !Sub "Public Subnet for the ${AppName} Neo4j server" + Value: !Ref PublicSubnetId DataBucketNameParameter: Type: AWS::SSM::Parameter diff --git a/gfe-db/pipeline/functions/invoke_pipeline/src/requirements.txt b/gfe-db/pipeline/functions/invoke_pipeline/src/requirements.txt index 3e734911..547c6318 100644 --- a/gfe-db/pipeline/functions/invoke_pipeline/src/requirements.txt +++ b/gfe-db/pipeline/functions/invoke_pipeline/src/requirements.txt @@ -1,2 +1,3 @@ +urllib3<2 requests numpy diff --git a/gfe-db/pipeline/jobs/build/src/app.py b/gfe-db/pipeline/jobs/build/src/app.py index 3a52abc4..f7c9057e 100755 --- a/gfe-db/pipeline/jobs/build/src/app.py +++ b/gfe-db/pipeline/jobs/build/src/app.py @@ -505,7 +505,7 @@ def process_allele(allele, alignments_dict, csv_path=None): help="Option for running in verbose", action="store_true") - # TO DO: add option to specify last n releases UPDATE: instead of having this script handle multiple releases, + # TODO: add option to specify last n releases UPDATE: instead of having this script handle multiple releases, # have it handle one release and just call it multiple times for an array or queue of releases # parser.add_argument("-n", "--number", # required=False, @@ -578,6 +578,8 @@ def process_allele(allele, alignments_dict, csv_path=None): store_features=True, loci=load_loci) + errors = [] + max_errors = 10 for idx, allele in enumerate(alleles): if idx == limit: break @@ -611,6 +613,7 @@ def process_allele(allele, alignments_dict, csv_path=None): else: logger.warn(f'Skipping allele {hla_name} for locus {locus}') except: + errors.append(allele.id) try: logger.info(f'Sending message to {failed_alleles_queue_name}') response = sqs.send_message( @@ -634,7 +637,14 @@ def process_allele(allele, alignments_dict, csv_path=None): except Exception as err: logger.error("Failed to send message") raise err + + if len(errors) > max_errors: + logger.error(f'Max errors ({max_errors}) reached. Exiting...') + break logging.info(f'Finished build for version {imgt_release}') + if len(errors) > 0: + logging.info(f'{len(errors)} errors: {errors}') + exit(1) end = time.time() logging.info(f'****** Build finished in {round(end - start, 2)} seconds ******') diff --git a/gfe-db/pipeline/template.yaml b/gfe-db/pipeline/template.yaml index 10615920..d228704e 100644 --- a/gfe-db/pipeline/template.yaml +++ b/gfe-db/pipeline/template.yaml @@ -190,6 +190,7 @@ Resources: MaxvCpus: 32 InstanceTypes: - c5d.2xlarge + # TODO use private subnet? Subnets: - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/PublicSubnetID}}' SecurityGroupIds: From 92a19682a5da3ce816fa5e99a798af1ed78f56c4 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Wed, 16 Aug 2023 14:27:43 -0700 Subject: [PATCH 02/14] conditionally set VPC variable --- Makefile | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91fb5650..0f00b736 100644 --- a/Makefile +++ b/Makefile @@ -40,8 +40,26 @@ target: $(info ${HELP_MESSAGE}) @exit 0 -deploy: logs.purge check.env ##=> Deploy services +var.vpc.set: +ifeq ($(vpc),true) + $(eval VPC := true) +else ifeq ($(vpc),false) + $(eval VPC := false) +else ifeq ($(vpc),) + $(eval VPC := false) +else + $(error Invalid value for vpc: must be true or false) +endif + +# var.vpc.echo: var.vpc.sets +# @echo ${VPC} + +deploy: logs.purge check.env var.vpc.set ##=> vpc=true/false ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} +ifeq ($(vpc),true) + @echo "Deploying VPC" + $(MAKE) vpc.deploy # TODO catch error and abort +endif $(MAKE) infrastructure.deploy $(MAKE) database.deploy $(MAKE) pipeline.deploy @@ -111,6 +129,9 @@ check.dependencies.jq: fi # Deploy specific stacks +vpc.deploy: + $(MAKE) -C ${APP_NAME}/vpc/ deploy + infrastructure.deploy: $(MAKE) -C ${APP_NAME}/infrastructure/ deploy From 54020e0702af3ce7f8dca7dbe2ed4c552ba29108 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Wed, 16 Aug 2023 17:58:47 -0700 Subject: [PATCH 03/14] validate vpc param and env vars before deployment --- Makefile | 70 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 0f00b736..33c6a89f 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,9 @@ include .env export # Base settings, these should almost never change -export AWS_ACCOUNT ?= $(shell aws sts get-caller-identity --query Account --output text) +export AWS_ACCOUNT ?= $(shell aws sts get-caller-identity \ + --query Account \ + --output text) export ROOT_DIR := $(shell pwd) export DATABASE_DIR := ${ROOT_DIR}/${APP_NAME}/database @@ -36,33 +38,80 @@ export PIPELINE_STATE_PATH := config/IMGTHLA-repository-state.json export PIPELINE_PARAMS_PATH := config/pipeline-input.json export FUNCTIONS_PATH := ${APP_NAME}/pipeline/functions +# print colors +define blue + @tput setaf 4 + @echo $1 + @tput sgr0 +endef + +define green + @tput setaf 2 + @echo $1 + @tput sgr0 +endef + +define yellow + @tput setaf 3 + @echo $1 + @tput sgr0 +endef + +define red + @tput setaf 1 + @echo $1 + @tput sgr0 +endef + target: $(info ${HELP_MESSAGE}) @exit 0 +var.vpc_id.check: +ifeq ($(VPC_ID),) + $(call red, "VPC_ID must be set as an environment variable when \`vpc\` is false") + @exit 1 +else + $(call green, "Found VpcId: ${VPC_ID}") +endif + +var.public_subnet_id.check: +ifeq ($(PUBLIC_SUBNET_ID),) + $(call red, "PUBLIC_SUBNET_ID must be set as an environment variable when \`vpc\` is false") + @exit 1 +else + $(call green, "Found PublicSubnetId: ${PUBLIC_SUBNET_ID}") +endif + var.vpc.set: ifeq ($(vpc),true) + @echo "vpc=$$vpc" + $(call blue, "Creating VPC for this deployment") $(eval VPC := true) else ifeq ($(vpc),false) + @echo "vpc=$$vpc" + $(MAKE) var.vpc_id.check + $(MAKE) var.public_subnet_id.check $(eval VPC := false) else ifeq ($(vpc),) + @echo "vpc not set, defaulting to false" + $(MAKE) var.vpc_id.check + $(MAKE) var.public_subnet_id.check $(eval VPC := false) else - $(error Invalid value for vpc: must be true or false) + $(call red, "Invalid value for \`vpc\`: must be \`true\` or \`false\`") + @exit 1 endif -# var.vpc.echo: var.vpc.sets -# @echo ${VPC} - deploy: logs.purge check.env var.vpc.set ##=> vpc=true/false ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} ifeq ($(vpc),true) @echo "Deploying VPC" $(MAKE) vpc.deploy # TODO catch error and abort endif - $(MAKE) infrastructure.deploy - $(MAKE) database.deploy - $(MAKE) pipeline.deploy + # $(MAKE) infrastructure.deploy + # $(MAKE) database.deploy + # $(MAKE) pipeline.deploy @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Finished deploying ${APP_NAME}" 2>&1 | tee -a ${CFN_LOG_PATH} logs.purge: logs.dirs @@ -77,6 +126,9 @@ logs.dirs: "${LOGS_DIR}/database/bootstrap" || true check.env: check.dependencies +ifndef AWS_ACCOUNT +$(error AWS_ACCOUNT is not set. Please add AWS_ACCOUNT to the environment variables.) +endif ifndef AWS_REGION $(error AWS_REGION is not set. Please add AWS_REGION to the environment variables.) endif @@ -132,7 +184,7 @@ check.dependencies.jq: vpc.deploy: $(MAKE) -C ${APP_NAME}/vpc/ deploy -infrastructure.deploy: +infrastructure.deploy: $(MAKE) -C ${APP_NAME}/infrastructure/ deploy database.deploy: From fc944b706ded6846d26640d52c0651742a321a36 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Thu, 17 Aug 2023 14:59:54 -0700 Subject: [PATCH 04/14] conditionally create VPC --- Makefile | 29 +++--- gfe-db/infrastructure/Makefile | 10 +- gfe-db/infrastructure/template.yaml | 146 +++++++++++++++++++--------- 3 files changed, 125 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 33c6a89f..15fbc689 100644 --- a/Makefile +++ b/Makefile @@ -67,6 +67,7 @@ target: $(info ${HELP_MESSAGE}) @exit 0 +# Only called when vpc=false, checks for VPC_ID in environment var.vpc_id.check: ifeq ($(VPC_ID),) $(call red, "VPC_ID must be set as an environment variable when \`vpc\` is false") @@ -75,6 +76,7 @@ else $(call green, "Found VpcId: ${VPC_ID}") endif +# Only called when vpc=false, checks for PUBLIC_SUBNET_ID in environment var.public_subnet_id.check: ifeq ($(PUBLIC_SUBNET_ID),) $(call red, "PUBLIC_SUBNET_ID must be set as an environment variable when \`vpc\` is false") @@ -83,34 +85,39 @@ else $(call green, "Found PublicSubnetId: ${PUBLIC_SUBNET_ID}") endif +# If vpc=true, VPC_ID & PUBLIC_SUBNET_ID are ignored because they will be created +# If vpc=false, VPC_ID & PUBLIC_SUBNET_ID are required in the environment +# Both variables are referenced as SSM Parameters in the CloudFormation templates var.vpc.set: ifeq ($(vpc),true) @echo "vpc=$$vpc" $(call blue, "Creating VPC for this deployment") - $(eval VPC := true) + $(eval CREATE_VPC := true) else ifeq ($(vpc),false) @echo "vpc=$$vpc" $(MAKE) var.vpc_id.check $(MAKE) var.public_subnet_id.check - $(eval VPC := false) + $(eval CREATE_VPC := false) else ifeq ($(vpc),) @echo "vpc not set, defaulting to false" $(MAKE) var.vpc_id.check $(MAKE) var.public_subnet_id.check - $(eval VPC := false) + $(eval CREATE_VPC := false) else $(call red, "Invalid value for \`vpc\`: must be \`true\` or \`false\`") @exit 1 -endif - +endif + +# TODO BOOKMARK 8/16/23 +# TODO Test optional VPC deployment using ONLY vpc=true/false, should deploy smoothly for both +# TODO use conditional deployment +# TODO parameterize the deployment environment +# TODO add user confirmation before deploying +# TODO need stateful deploy target, subsequent calls to `make deploy` should have the same `vpc` value true/false as the initial deploy: logs.purge check.env var.vpc.set ##=> vpc=true/false ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} -ifeq ($(vpc),true) - @echo "Deploying VPC" - $(MAKE) vpc.deploy # TODO catch error and abort -endif - # $(MAKE) infrastructure.deploy - # $(MAKE) database.deploy + $(MAKE) infrastructure.deploy + $(MAKE) database.deploy # $(MAKE) pipeline.deploy @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Finished deploying ${APP_NAME}" 2>&1 | tee -a ${CFN_LOG_PATH} diff --git a/gfe-db/infrastructure/Makefile b/gfe-db/infrastructure/Makefile index d84f0e61..505fb659 100644 --- a/gfe-db/infrastructure/Makefile +++ b/gfe-db/infrastructure/Makefile @@ -18,11 +18,18 @@ service.deploy.update-dns: --query "Parameters[0].Value" \ --output text) && \ sed -e "s//${SUBDOMAIN}.${HOST_DOMAIN}./g" -e "s//$$elastic_ip/g" $$config_path > $$config_path.tmp && \ - aws route53 change-resource-record-sets --hosted-zone-id $${HOSTED_ZONE_ID} --change-batch file://$$config_path.tmp + echo "Updating DNS records with:" && \ + cat $$config_path.tmp && \ + res=$$(aws route53 change-resource-record-sets --hosted-zone-id $${HOSTED_ZONE_ID} --change-batch file://$$config_path.tmp) && \ + echo && \ + echo "Response:" && \ + echo $$res | jq -r # TODO test AWSCLI output and validate the stack was created successfully +# Add stateful check from SSM Param of vpc=true/false to correctly set CREATE_VPC service.deploy: @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying CloudFormation" 2>&1 | tee -a $${CFN_LOG_PATH} + @echo "${CREATE_VPC}" @aws cloudformation deploy \ --region $${AWS_REGION} \ --template-file template.yaml \ @@ -30,6 +37,7 @@ service.deploy: --parameter-overrides \ Stage="$${STAGE}" \ AppName="$${APP_NAME}" \ + createVpc="$${CREATE_VPC}" \ VpcId="$${VPC_ID}" \ PublicSubnetId="$${PUBLIC_SUBNET_ID}" \ DataBucketName="$${DATA_BUCKET_NAME}" \ diff --git a/gfe-db/infrastructure/template.yaml b/gfe-db/infrastructure/template.yaml index 68af4632..9469e276 100644 --- a/gfe-db/infrastructure/template.yaml +++ b/gfe-db/infrastructure/template.yaml @@ -7,16 +7,30 @@ Parameters: Description: Stage of production AppName: Type: String + createVpc: + Type: String + Default: 'false' + AllowedValues: + - 'true' + - 'false' DataBucketName: Type: String VpcId: Type: String + Description: Required if createVpc is false + Default: '' PublicSubnetId: Type: String + Description: Required if createVpc is false + Default: '' GitHubPersonalAccessToken: Type: String NoEcho: true - + +Conditions: + CreateVpc: !Equals [!Ref createVpc, 'true'] + UseExternalVpc: !Equals [!Ref createVpc, 'false'] + Mappings: # AvailabilityZoneMap defines availability zones where an m5d.xlarge instance is available (used for Neo4j server). AvailabilityZoneMap: @@ -36,76 +50,112 @@ Mappings: AvailabilityZone: eu-west-3a Resources: - # Vpc: - # Type: AWS::EC2::VPC - # Properties: - # CidrBlock: 10.0.0.0/16 - # EnableDnsHostnames: true - # EnableDnsSupport: true - # Tags: - # - Key: Name - # Value: !Sub '${Stage}-${AppName}-${AWS::Region}-vpc' + + CreateVpcParameter: + Type: AWS::SSM::Parameter + Condition: CreateVpc + Properties: + Type: String + Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/CreateVpc' + Description: !Sub "True if stack creates a VPC, false if stack uses an existing VPC" + Value: !Ref createVpc + + Vpc: + Type: AWS::EC2::VPC + Condition: CreateVpc + Properties: + CidrBlock: 10.0.0.0/16 + EnableDnsHostnames: true + EnableDnsSupport: true + Tags: + - Key: Name + Value: !Sub '${Stage}-${AppName}-${AWS::Region}-vpc' - # PublicSubnet: - # Type: AWS::EC2::Subnet - # Properties: - # CidrBlock: 10.0.0.0/24 - # VpcId: !Ref Vpc - # AvailabilityZone: !FindInMap [AvailabilityZoneMap, !Ref AWS::Region, AvailabilityZone] - # MapPublicIpOnLaunch: true + PublicSubnet: + Type: AWS::EC2::Subnet + Condition: CreateVpc + Properties: + CidrBlock: 10.0.0.0/24 + VpcId: !Ref Vpc + AvailabilityZone: !FindInMap [AvailabilityZoneMap, !Ref AWS::Region, AvailabilityZone] + MapPublicIpOnLaunch: true - # InternetGateway: - # Type: AWS::EC2::InternetGateway + InternetGateway: + Type: AWS::EC2::InternetGateway + Condition: CreateVpc - # RouteTable: - # Type: AWS::EC2::RouteTable - # Properties: - # VpcId: !Ref Vpc + RouteTable: + Type: AWS::EC2::RouteTable + Condition: CreateVpc + Properties: + VpcId: !Ref Vpc - # VpcGatewayAttachment: - # Type: AWS::EC2::VPCGatewayAttachment - # Properties: - # VpcId: !Ref Vpc - # InternetGatewayId: !Ref InternetGateway + VpcGatewayAttachment: + Type: AWS::EC2::VPCGatewayAttachment + Condition: CreateVpc + Properties: + VpcId: !Ref Vpc + InternetGatewayId: !Ref InternetGateway - # Route: - # Type: AWS::EC2::Route - # DependsOn: - # - InternetGateway - # - VpcGatewayAttachment - # Properties: - # RouteTableId: !Ref RouteTable - # DestinationCidrBlock: 0.0.0.0/0 - # GatewayId: !Ref InternetGateway + Route: + Type: AWS::EC2::Route + Condition: CreateVpc + DependsOn: + - InternetGateway + - VpcGatewayAttachment + Properties: + RouteTableId: !Ref RouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref InternetGateway - # SubnetRouteTableAssociation: - # Type: AWS::EC2::SubnetRouteTableAssociation - # Properties: - # RouteTableId: !Ref RouteTable - # SubnetId: !Ref PublicSubnet + SubnetRouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Condition: CreateVpc + Properties: + RouteTableId: !Ref RouteTable + SubnetId: !Ref PublicSubnet - DataBucket: - Type: AWS::S3::Bucket - # Condition: CreateDataBucket + CreateVpcConditionVpcIDParameter: + Type: AWS::SSM::Parameter + Condition: CreateVpc Properties: - BucketName: !Ref DataBucketName + Type: String + Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/VpcID' + Description: !Sub "Name of VPC network for ${AppName}" + Value: !Ref Vpc + + CreateVpcConditionPublicSubnetIDParameter: + Type: AWS::SSM::Parameter + Condition: CreateVpc + Properties: + Type: String + Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/PublicSubnetID' + Description: !Sub "Public Subnet for the ${AppName} Neo4j server" + Value: !Ref PublicSubnet - VpcIDParameter: + UseExternalVpcConditionVpcIDParameter: Type: AWS::SSM::Parameter + Condition: UseExternalVpc Properties: Type: String Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/VpcID' Description: !Sub "Name of VPC network for ${AppName}" Value: !Ref VpcId - PublicSubnetIDParameter: + UseExternalVpcConditionPublicSubnetIDParameter: Type: AWS::SSM::Parameter + Condition: UseExternalVpc Properties: Type: String Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/PublicSubnetID' Description: !Sub "Public Subnet for the ${AppName} Neo4j server" Value: !Ref PublicSubnetId + DataBucket: + Type: AWS::S3::Bucket + Properties: + BucketName: !Ref DataBucketName + DataBucketNameParameter: Type: AWS::SSM::Parameter Properties: From 86d4d788facc900eefe9bdad8d0be3870f209d7b Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Fri, 18 Aug 2023 16:29:22 -0700 Subject: [PATCH 05/14] use environment variable instead of command line arg --- Makefile | 116 ++++++++++++++------------------- gfe-db/infrastructure/Makefile | 2 +- 2 files changed, 50 insertions(+), 68 deletions(-) diff --git a/Makefile b/Makefile index 15fbc689..c27b66ff 100644 --- a/Makefile +++ b/Makefile @@ -67,57 +67,17 @@ target: $(info ${HELP_MESSAGE}) @exit 0 -# Only called when vpc=false, checks for VPC_ID in environment -var.vpc_id.check: -ifeq ($(VPC_ID),) - $(call red, "VPC_ID must be set as an environment variable when \`vpc\` is false") - @exit 1 -else - $(call green, "Found VpcId: ${VPC_ID}") -endif - -# Only called when vpc=false, checks for PUBLIC_SUBNET_ID in environment -var.public_subnet_id.check: -ifeq ($(PUBLIC_SUBNET_ID),) - $(call red, "PUBLIC_SUBNET_ID must be set as an environment variable when \`vpc\` is false") - @exit 1 -else - $(call green, "Found PublicSubnetId: ${PUBLIC_SUBNET_ID}") -endif - -# If vpc=true, VPC_ID & PUBLIC_SUBNET_ID are ignored because they will be created -# If vpc=false, VPC_ID & PUBLIC_SUBNET_ID are required in the environment -# Both variables are referenced as SSM Parameters in the CloudFormation templates -var.vpc.set: -ifeq ($(vpc),true) - @echo "vpc=$$vpc" - $(call blue, "Creating VPC for this deployment") - $(eval CREATE_VPC := true) -else ifeq ($(vpc),false) - @echo "vpc=$$vpc" - $(MAKE) var.vpc_id.check - $(MAKE) var.public_subnet_id.check - $(eval CREATE_VPC := false) -else ifeq ($(vpc),) - @echo "vpc not set, defaulting to false" - $(MAKE) var.vpc_id.check - $(MAKE) var.public_subnet_id.check - $(eval CREATE_VPC := false) -else - $(call red, "Invalid value for \`vpc\`: must be \`true\` or \`false\`") - @exit 1 -endif - # TODO BOOKMARK 8/16/23 # TODO Test optional VPC deployment using ONLY vpc=true/false, should deploy smoothly for both # TODO use conditional deployment # TODO parameterize the deployment environment # TODO add user confirmation before deploying # TODO need stateful deploy target, subsequent calls to `make deploy` should have the same `vpc` value true/false as the initial -deploy: logs.purge check.env var.vpc.set ##=> vpc=true/false ##=> Deploy all services +deploy: logs.purge env.validate ##=> vpc=true/false ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} + @echo "(deploy) CREATE_VPC: ${CREATE_VPC}" $(MAKE) infrastructure.deploy - $(MAKE) database.deploy + # $(MAKE) database.deploy # $(MAKE) pipeline.deploy @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Finished deploying ${APP_NAME}" 2>&1 | tee -a ${CFN_LOG_PATH} @@ -132,27 +92,6 @@ logs.dirs: "${LOGS_DIR}/pipeline/load" \ "${LOGS_DIR}/database/bootstrap" || true -check.env: check.dependencies -ifndef AWS_ACCOUNT -$(error AWS_ACCOUNT is not set. Please add AWS_ACCOUNT to the environment variables.) -endif -ifndef AWS_REGION -$(error AWS_REGION is not set. Please add AWS_REGION to the environment variables.) -endif -ifndef AWS_PROFILE -$(error AWS_PROFILE is not set. Please select an AWS profile to use.) -endif -ifndef GITHUB_PERSONAL_ACCESS_TOKEN -$(error GITHUB_PERSONAL_ACCESS_TOKEN is not set. Please add GITHUB_PERSONAL_ACCESS_TOKEN to the environment variables.) -endif -ifndef HOST_DOMAIN -$(error HOST_DOMAIN is not set. Please add HOST_DOMAIN to the environment variables.) -endif -ifndef ADMIN_EMAIL -$(error ADMIN_EMAIL is not set. Please add ADMIN_EMAIL to the environment variables.) -endif - @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Found environment variables" 2>&1 | tee -a ${CFN_LOG_PATH} - check.dependencies: $(MAKE) check.dependencies.docker $(MAKE) check.dependencies.awscli @@ -187,9 +126,52 @@ check.dependencies.jq: exit 1; \ fi -# Deploy specific stacks -vpc.deploy: - $(MAKE) -C ${APP_NAME}/vpc/ deploy +env.validate.no-vpc: +ifeq ($(VPC_ID),) + $(call red, "VPC_ID must be set as an environment variable when \`CREATE_VPC\` is false") + @exit 1 +else + $(call green, "Found VPC_ID: ${VPC_ID}") +endif +ifeq ($(PUBLIC_SUBNET_ID),) + $(call red, "PUBLIC_SUBNET_ID must be set as an environment variable when \`CREATE_VPC\` is false") + @exit 1 +else + $(call green, "Found PUBLIC_SUBNET_ID: ${PUBLIC_SUBNET_ID}") +endif + +env.validate: check.dependencies +ifndef AWS_ACCOUNT + $(error AWS_ACCOUNT is not set. Please add AWS_ACCOUNT to the environment variables.) +endif +ifndef AWS_REGION + $(error AWS_REGION is not set. Please add AWS_REGION to the environment variables.) +endif +ifndef AWS_PROFILE + $(error AWS_PROFILE is not set. Please select an AWS profile to use.) +endif +ifndef GITHUB_PERSONAL_ACCESS_TOKEN + $(error GITHUB_PERSONAL_ACCESS_TOKEN is not set. Please add GITHUB_PERSONAL_ACCESS_TOKEN to the environment variables.) +endif +ifndef HOST_DOMAIN + $(error HOST_DOMAIN is not set. Please add HOST_DOMAIN to the environment variables.) +endif +ifndef ADMIN_EMAIL + $(error ADMIN_EMAIL is not set. Please add ADMIN_EMAIL to the environment variables.) +endif +ifndef CREATE_VPC + $(info 'CREATE_VPC' is not set. Defaulting to 'false') + $(eval export CREATE_VPC := false) + $(call blue, "This deployment uses an existing VPC") + $(MAKE) env.validate.no-vpc +endif +ifeq ($(CREATE_VPC),false) + $(call blue, "This deployment uses an existing VPC") + $(MAKE) env.validate.no-vpc +else ifeq ($(CREATE_VPC),true) + $(call blue, "This deployment includes a VPC") +endif + @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Found environment variables" 2>&1 | tee -a ${CFN_LOG_PATH} infrastructure.deploy: $(MAKE) -C ${APP_NAME}/infrastructure/ deploy diff --git a/gfe-db/infrastructure/Makefile b/gfe-db/infrastructure/Makefile index 505fb659..43e17105 100644 --- a/gfe-db/infrastructure/Makefile +++ b/gfe-db/infrastructure/Makefile @@ -29,7 +29,7 @@ service.deploy.update-dns: # Add stateful check from SSM Param of vpc=true/false to correctly set CREATE_VPC service.deploy: @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying CloudFormation" 2>&1 | tee -a $${CFN_LOG_PATH} - @echo "${CREATE_VPC}" + @echo "(infra) CREATE_VPC: ${CREATE_VPC}" @aws cloudformation deploy \ --region $${AWS_REGION} \ --template-file template.yaml \ From 874710a8b3c955d41decddf09e7e143c69b708d0 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Fri, 18 Aug 2023 16:45:15 -0700 Subject: [PATCH 06/14] source environment variables by stage --- .gitignore | 2 +- Makefile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 9e1bc163..8ef8fd64 100755 --- a/.gitignore +++ b/.gitignore @@ -76,7 +76,7 @@ fil* summary_* # Environment variables -.env +.env* env.sh # PyCharm diff --git a/Makefile b/Makefile index c27b66ff..4308fc6d 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,11 @@ # Bootstrapping variables ########################## -# Application specific environment variables +# Environment variables include .env +include .env.${STAGE} export -# Base settings, these should almost never change export AWS_ACCOUNT ?= $(shell aws sts get-caller-identity \ --query Account \ --output text) From 5b9383b209a55a1e5cafa0c5730be751bcba0986 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Fri, 18 Aug 2023 17:29:56 -0700 Subject: [PATCH 07/14] validate stage value matches deployment environment --- Makefile | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 4308fc6d..01cae82b 100644 --- a/Makefile +++ b/Makefile @@ -68,12 +68,10 @@ target: @exit 0 # TODO BOOKMARK 8/16/23 -# TODO Test optional VPC deployment using ONLY vpc=true/false, should deploy smoothly for both -# TODO use conditional deployment -# TODO parameterize the deployment environment +# TODO use conditional deployment ✅ +# TODO parameterize the deployment environment ✅ # TODO add user confirmation before deploying -# TODO need stateful deploy target, subsequent calls to `make deploy` should have the same `vpc` value true/false as the initial -deploy: logs.purge env.validate ##=> vpc=true/false ##=> Deploy all services +deploy: logs.purge env.validate.stage env.validate ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} @echo "(deploy) CREATE_VPC: ${CREATE_VPC}" $(MAKE) infrastructure.deploy @@ -126,6 +124,23 @@ check.dependencies.jq: exit 1; \ fi +# TODO use cloudformation list-stacks as alternative to SSM parameter +env.validate.stage: + @res=$$(aws ssm get-parameters \ + --names "/${APP_NAME}/${STAGE}/${AWS_REGION}/Stage" \ + --output json \ + | jq -r '.Parameters[0].Value') && \ + echo "Found stage: $${res}" && \ + if [ "$${res}" = "null" ]; then \ + echo "\033[0;32m**** Starting new deployment. ****\033[0m"; \ + elif [ "$${res}" = "${STAGE}" ]; then \ + echo "\033[0;32m**** Found existing deployment for \`${STAGE}\` ****\033[0m"; \ + else \ + echo "\033[0;31m**** STAGE mismatch or bad credential configuration. ****\033[0m" && \ + echo "\033[0;31m**** Please refer to the documentation for a list of prerequisites. ****\033[0m" && \ + exit 1; \ + fi + env.validate.no-vpc: ifeq ($(VPC_ID),) $(call red, "VPC_ID must be set as an environment variable when \`CREATE_VPC\` is false") From 0efe52271ea06510c4b8100531f3e0ce02728725 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Sat, 19 Aug 2023 19:44:57 -0700 Subject: [PATCH 08/14] store stage, app variables in SSM Parameter Store --- gfe-db/infrastructure/template.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/gfe-db/infrastructure/template.yaml b/gfe-db/infrastructure/template.yaml index 9469e276..3010acb3 100644 --- a/gfe-db/infrastructure/template.yaml +++ b/gfe-db/infrastructure/template.yaml @@ -51,9 +51,24 @@ Mappings: Resources: + StageParameter: + Type: AWS::SSM::Parameter + Properties: + Type: String + Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Stage' + Description: "Stage of production" + Value: !Ref Stage + + AppParameter: + Type: AWS::SSM::Parameter + Properties: + Type: String + Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/AppName' + Description: "Name of application" + Value: !Ref AppName + CreateVpcParameter: Type: AWS::SSM::Parameter - Condition: CreateVpc Properties: Type: String Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/CreateVpc' From 58e1e9c01dca033151cf2fb08b19e775f53ce1ea Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Sat, 19 Aug 2023 19:45:48 -0700 Subject: [PATCH 09/14] require user confirmation before deployment --- Makefile | 39 +++++++++++++++++++++++----------- gfe-db/infrastructure/Makefile | 3 +-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 01cae82b..d481ff04 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ ########################## # Environment variables -include .env +# include .env # Optional, include STAGE and AWS_PROFILE include .env.${STAGE} export @@ -67,16 +67,31 @@ target: $(info ${HELP_MESSAGE}) @exit 0 -# TODO BOOKMARK 8/16/23 -# TODO use conditional deployment ✅ -# TODO parameterize the deployment environment ✅ -# TODO add user confirmation before deploying -deploy: logs.purge env.validate.stage env.validate ##=> Deploy all services +app.print: + @echo "\033[0;34m " + @echo "\033[0;34m ____ __ __ " + @echo "\033[0;34m ____ _ / __/___ ____/ // /_ " + @echo "\033[0;32m / __ \`// /_ / _ \ ______ / __ // __ \\" + @echo "\033[0;32m / /_/ // __// __/ /_____/ / /_/ // /_/ /" + @echo "\033[0;34m \__, //_/ \___/ \____//_____/ " + @echo "\033[0;34m/____/ \033[0m" + @echo "\033[0;34m \033[0m" + + +env.print: + @echo "\033[0;33mReview the contents of the .env file:\033[0m" + @echo "+---------------------------------------------------------------------------------+" + @awk '{ if (substr($$0, 1, 1) != "#") { line = substr($$0, 1, 76); if (length($$0) > 76) line = line "..."; printf "| %-79s |\n", line }}' .env.${STAGE} + @echo "+---------------------------------------------------------------------------------+" + @echo "\033[0;33mPlease confirm the above values are correct.\033[0m" + +deploy: app.print logs.purge env.validate.stage env.validate ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} - @echo "(deploy) CREATE_VPC: ${CREATE_VPC}" + $(MAKE) env.print + @echo "Deploy stack to the \`${STAGE}\` environment? [y/N] \c " && read ans && [ $${ans:-N} = y ] $(MAKE) infrastructure.deploy - # $(MAKE) database.deploy - # $(MAKE) pipeline.deploy + $(MAKE) database.deploy + $(MAKE) pipeline.deploy @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Finished deploying ${APP_NAME}" 2>&1 | tee -a ${CFN_LOG_PATH} logs.purge: logs.dirs @@ -177,14 +192,14 @@ endif ifndef CREATE_VPC $(info 'CREATE_VPC' is not set. Defaulting to 'false') $(eval export CREATE_VPC := false) - $(call blue, "This deployment uses an existing VPC") + $(call blue, "**** This deployment uses an existing VPC**** ") $(MAKE) env.validate.no-vpc endif ifeq ($(CREATE_VPC),false) - $(call blue, "This deployment uses an existing VPC") + $(call blue, "**** This deployment uses an existing VPC**** ") $(MAKE) env.validate.no-vpc else ifeq ($(CREATE_VPC),true) - $(call blue, "This deployment includes a VPC") + $(call blue, "**** This deployment includes a VPC**** ") endif @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Found environment variables" 2>&1 | tee -a ${CFN_LOG_PATH} diff --git a/gfe-db/infrastructure/Makefile b/gfe-db/infrastructure/Makefile index 43e17105..f6b85ee7 100644 --- a/gfe-db/infrastructure/Makefile +++ b/gfe-db/infrastructure/Makefile @@ -19,7 +19,7 @@ service.deploy.update-dns: --output text) && \ sed -e "s//${SUBDOMAIN}.${HOST_DOMAIN}./g" -e "s//$$elastic_ip/g" $$config_path > $$config_path.tmp && \ echo "Updating DNS records with:" && \ - cat $$config_path.tmp && \ + cat $$config_path.tmp | jq -r && \ res=$$(aws route53 change-resource-record-sets --hosted-zone-id $${HOSTED_ZONE_ID} --change-batch file://$$config_path.tmp) && \ echo && \ echo "Response:" && \ @@ -29,7 +29,6 @@ service.deploy.update-dns: # Add stateful check from SSM Param of vpc=true/false to correctly set CREATE_VPC service.deploy: @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying CloudFormation" 2>&1 | tee -a $${CFN_LOG_PATH} - @echo "(infra) CREATE_VPC: ${CREATE_VPC}" @aws cloudformation deploy \ --region $${AWS_REGION} \ --template-file template.yaml \ From 0f3dae11183cf16f06cb7213d3a240c6fa8eaf3f Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Sat, 19 Aug 2023 20:28:29 -0700 Subject: [PATCH 10/14] update README --- README.md | 179 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 104 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 1e6c97c1..39fd2c31 100755 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE. ├── Makefile # Use the root Makefile to deploy, delete and manage resources and configuration ├── README.md ├── docs # Sphinx documentation -├── (-gfe-db--neo4j-key.pem) # EC2 key pair for SSH access to Neo4j server, ccreated on deployment +├── (-gfe-db--neo4j-key.pem) # EC2 key pair for SSH access to Neo4j server, created on deployment ├── requirements-dev.txt # Python requirements for local development ├── requirements-docs.txt # Python requirements for documentation └── gfe-db @@ -110,43 +110,63 @@ The `gfe-db` represents IPD-IMGT/HLA sequence data as GFE nodes and relationship This allows the database and pipeline layers to be decoupled from each other and deployed or destroyed independently without affecting the other. Common configuration parameters are shared between resources using environment variables, JSON files, AWS SSM Paramter Store and Secrets Manager. ### Base Infrastructure -The base infrastructure layer deploys a VPC, public subnet, S3 bucket, Elastic IP and common SSM parameters and secrets for the other services to use. +The base infrastructure layer deploys a VPC (optional), public subnet (optional), S3 bucket, Elastic IP and common SSM parameters and secrets for the other services to use. ### Database -The database layer deploys an EC2 instance running the Bitnami Neo4j AMI (Ubuntu 18.04) into a public subnet. CloudFormation also creates an A record for a Route53 domain under a pre-existing Route53 domain and hosted zone so that SSL can be used to connect to Neo4j. During database deploymeny the SSL certificate is created and Cypher queries are run to create constraints and indexes, which help speed up loading and ensure data integrity. Neo4j is ready to be accessed through a browser once the instance has booted sucessfully. +The database layer deploys an EC2 instance running the Bitnami Neo4j AMI (Ubuntu 18.04) into a public subnet. An A record is required for a pre-existing Route53 domain and hosted zone so that SSL can be used to connect to Neo4j. During database deployment the SSL certificate is created and Cypher queries are run to create constraints and indexes, which help speed up loading and ensure data integrity. Neo4j is ready to be accessed through a browser once the instance has booted sucessfully. -During loading, the `invoke_load_script` Lambda function uses SSM Run Command to execute bash scripts on the daatabase instance. These scripts communicate with the Step Functions API to retrieve the job parameters, fetch the CSVs from S3 and load the alleles into Neo4j. +During loading, a Lambda function calls the SSM Run Command API to execute bash scripts on the database instance. These scripts communicate with the Step Functions API to retrieve the job parameters, fetch the CSVs from S3 and populate the graph in Neo4j. It is also possible to backup & restore to and from S3 by specific date checkpoints. ### Data Pipeline -The data pipeline layer automates integration of newly released IMGT/HLA data into Neo4j using a scheduled Lambda which watches the source data repository and invokes the build and load processes when it detects a new IMGT/HLA version. The pipeline consists of a Step Functions state machine which orchestrates two basic processes: build and load. The build process employs a Batch job which produces an intermediate set of CSV files. The load process leverages SSM Run Command to copy the CSV files to the Neo4j server and execute Cypher statements directly on the server (server-side loading). When loading the full dataset of 35,000+ alleles, the build step will generally take around 15 minutes, however the load step can take an hour or more. +The data pipeline layer automates integration of newly released IMGT/HLA data into Neo4j using a scheduled Lambda which watches the source data repository and invokes the build and load processes when it detects a new IMGT/HLA version in the upstream repository. The pipeline consists of a Step Functions state machine which orchestrates the build and load stages. The build process employs a Batch jobs to generate an intermediate set of CSV files. The load process leverages SSM Run Command to copy the CSV files to the Neo4j server and execute Cypher statements directly on the server (server-side loading). When loading the full dataset of 35,000+ alleles, the build step will generally take around 15 minutes, however the load step can take an hour or more. ## Deployment -Follow the steps to build and deploy the application to AWS. +It is possible to deploy gfe-db within it's own VPC, or to connect it to an external VPC by specigying `CREATE_VPC=true/false`. ### Quick Start +These list outline the basic steps for deployments. For more details please see the following sections. + +**Using external VPC** 1. Retrieve the VPC ID and subnet ID from the AWS console or using the AWS CLI. -This list outlines the basic steps for deployment. For more details please see the following sections. -2. Purchase or designate a domain in Route53 and create a hosted zone with an A record for the subdomain. You can use the VPC's IP address for the A record because it will be updated later by the deployment script. +2. Purchase or designate a domain in Route53 and create a hosted zone with an A record for the subdomain. You can use `0.0.0.0` for the A record because it will be updated later by the deployment script. 3. Acquire a subscription for the Bitnami Neo4j AMI through [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-v47qqrn2yy7ie?sr=0-4&ref_=beagle&applicationId=AWSMPContessa). 4. [Install prerequisites](#Prerequisites). -5. [Set environment variables](#environment) including the ones from the previous steps: - - VPC_ID (step 1) - - PUBLIC_SUBNET_ID (step 1) - - HOSTED_ZONE_ID (step 2) - - HOST_DOMAIN (step 2) - - SUBDOMAIN (step 2) - - NEO4J_AMI_ID (step 3) +5. [Set environment variables](#environment) including the ones from the previous steps. You must store these in a file named `.env.`, for example `.env.dev` or `.env.prod`: + - CREATE_VPC=false + - VPC_ID + - PUBLIC_SUBNET_ID + - HOSTED_ZONE_ID + - HOST_DOMAIN + - SUBDOMAIN + - NEO4J_AMI_ID 6. Check the [config JSONs](#data-pipeline-config) (parameters and state) and edit the values as desired. -7. Run `make deploy` to deploy the stacks to AWS. -8. Run `make database.load.run releases=` to load the Neo4j, or `make database.load.run releases= limit=` to run with a limited number of alleles. -9. Run `make database.get-credentials` to get the username and password for Neo4j. -10. Run `make database.get-url` to get the URL for Neo4j and navigate to the Neo4j browser at the subdomain and host domain, for example `https://gfe-db.cloudftl.com:7473/browser/`. +7. Run `STAGE= make deploy` to deploy the stacks to AWS. +8. Run `STAGE= make database.load.run releases=` to load the Neo4j, or `STAGE= make database.load.run releases= limit=` to run with a limited number of alleles. +9. Run `STAGE= make database.get-credentials` to get the username and password for Neo4j. +10. Run `STAGE= make database.get.endpoint` to get the URL for Neo4j and navigate to the Neo4j browser at the subdomain and host domain, for example `https://gfe-db.cloudftl.com:7473/browser/`. + +**Creating a new VPC** +1. Purchase or designate a domain in Route53 and create a hosted zone with an A record for the subdomain. You can use `0.0.0.0` for the A record because it will be updated later by the deployment script. +2. Acquire a subscription for the Bitnami Neo4j AMI through [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-v47qqrn2yy7ie?sr=0-4&ref_=beagle&applicationId=AWSMPContessa). +3. [Install prerequisites](#Prerequisites). +4. [Set environment variables](#environment) including the ones from the previous steps. You must store these in a file named `.env.`, for example `.env.dev` or `.env.prod`: + - CREATE_VPC=true + - HOSTED_ZONE_ID + - HOST_DOMAIN + - SUBDOMAIN + - NEO4J_AMI_ID +5. Check the [config JSONs](#data-pipeline-config) (parameters and state) and edit the values as desired. +6. Run `STAGE= make deploy` to deploy the stacks to AWS. +7. Run `STAGE= make database.load.run releases=` to load the Neo4j, or `STAGE= make database.load.run releases= limit=` to run with a limited number of alleles. +8. Run `STAGE= make database.get-credentials` to get the username and password for Neo4j. +9. Run `STAGE= make database.get.endpoint` to get the URL for Neo4j and navigate to the Neo4j browser at the subdomain and host domain, for example `https://gfe-db.cloudftl.com:7473/browser/`. ### Prerequisites Please refer to the respective documentation for specific installation instructions. -* Route53 domain and hosted zone +* Route53 domain, hosted zone, and A record +* VPC & Public Subnet (if using external VPC) * Bitnami Neo4j AMI subscription and AMI ID * GNU Make 3.81 * coreutils (optional but recommended) @@ -154,7 +174,7 @@ Please refer to the respective documentation for specific installation instructi * SAM CLI * Docker * jq -* Python 3.9 (if developing locally) +* Python 3.9+ (if developing locally) ### Environment @@ -170,56 +190,68 @@ For more information visit the documentation page: [Configuration and credential file settings](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) #### Shell Variables -These variables must be defined before running Make. The best way to set these variables is with a `.env` file following this structure. +These variables must be defined before running Make. The best way to set these variables is with a `.env.` file following this structure. ```bash -# .env +# .env. +AWS_PROFILE= STAGE= -APP_NAME=gfe-db +APP_NAME= AWS_REGION= -VPC_ID= # Available through the console or CLI -PUBLIC_SUBNET_ID= # Available through the console or CLI; Public subnets have a route to an internet gateway -GITHUB_PERSONAL_ACCESS_TOKEN= -HOSTED_ZONE_ID= # Available through the console or CLI -HOST_DOMAIN= -SUBDOMAIN= ADMIN_EMAIL= SUBSCRIBE_EMAILS=,,,... -APOC_VERSION=4.4.0.3 -GDS_VERSION=2.0.1 +GITHUB_REPOSITORY_OWNER= +GITHUB_REPOSITORY_NAME= +HOST_DOMAIN= +CREATE_VPC= +VPC_ID= # if CREATE_VPC=false +PUBLIC_SUBNET_ID= # if CREATE_VPC=false +HOSTED_ZONE_ID= +SUBDOMAIN= NEO4J_AMI_ID= # Requires AWS Marketplace subscription +APOC_VERSION= +GDS_VERSION= +GITHUB_PERSONAL_ACCESS_TOKEN= ``` | Variable Name | Example Value | Type | Description | | ---------------------------- | ---------------------------------- | ------ | ------------------------------------------------ | +| AWS_PROFILE | | string | AWS profile for deployment. | | STAGE | dev | string | The stage of the application. | | APP_NAME | gfe-db | string | The name of the application. | | AWS_REGION | us-east-1 | string | The AWS region to deploy to. | -| VPC_ID | vpc-1234567890abcdef | string | The ID of the VPC to deploy to. | -| PUBLIC_SUBNET_ID | subnet-1234567890abcdef | string | The ID of the public subnet to deploy to. | -| GITHUB_PERSONAL_ACCESS_TOKEN | | string | GitHub PAT for repository access | -| HOSTED_ZONE_ID | Z1234567890ABCDEF | string | The ID of the hosted zone to deploy to. | +| ADMIN_EMAIL | user@company.com | string | Admin's email required for SSL certificate. | +| SUBSCRIBE_EMAILS | user@company.com,user2@company.com | string | Comma-separated list of emails for notifications | +| GITHUB_REPOSITORY_OWNER | | string | GitHub repository owner. | +| GITHUB_REPOSITORY_NAME | | string | GitHub repository name. | | HOST_DOMAIN | example.com | string | The domain to deploy to. | +| CREATE_VPC | true or false | string | Whether to create a new VPC. | +| HOSTED_ZONE_ID | Z1234567890ABCDEF | string | The ID of the hosted zone to deploy to. | | SUBDOMAIN | gfe-db | string | The subdomain to deploy to. | -| ADMIN_EMAIL | user@company.com | string | Admin's email required for SSL certificate | -| SUBSCRIBE_EMAILS | user@company.com,user2@company.com | string | Comma-separated list of emails for notifications | -| APOC_VERSION | 4.4.0.3 | string | APOC version for Neo4j | -| GDS_VERSION | 2.0.1 | string | GDS version for Neo4j | -| NEO4J_AMI_ID | ami-0b9a2b6b1c5b8b5b9 | string | Bitnami Neo4j AMI ID | +| NEO4J_AMI_ID | ami-0b9a2b6b1c5b8b5b9 | string | Bitnami Neo4j AMI ID. | +| APOC_VERSION | 4.4.0.3 | string | APOC version for Neo4j. | +| GDS_VERSION | 2.0.1 | string | GDS version for Neo4j. | +| GITHUB_PERSONAL_ACCESS_TOKEN | | string | GitHub PAT for repository access. | ***Important**:* *Always use a `.env` file or AWS SSM Parameter Store or Secrets Manager for sensitive variables like credentials and API keys. Never hard-code them, including when developing. AWS will quarantine an account if any credentials get accidentally exposed and this will cause problems. Make sure to update `.gitignore` to avoid pushing sensitive data to public repositories.* ### Makefile Usage -Once an AWS profile is configured and environment variables are exported, the application can be deployed using `make`. +Once an AWS profile is configured and environment variables are exported, the application can be deployed using `make`. You are required to specify the `STAGE` variable everytime `make` is called to ensure that the correct environment is selected when there are multiple deployments. ```bash -make deploy +STAGE= make deploy ``` It is also possible to deploy or update the database or pipeline services. ```bash # Deploy/update only the database service -make database.deploy +STAGE= make database.deploy # Deploy/update only the pipeline service -make pipeline.deploy +STAGE= make pipeline.deploy + +# Deploy/update only the pipeline serverless stack +STAGE= make pipeline.functions.deploy + +# Deploy/update only the Docker image for the build job +STAGE= make pipeline.jobs.deploy ``` *Note:* It is recommended to only deploy from the project root. This is because common parameters are passed from the root Makefile to nested Makefiles. If a stack has not been changed, the deployment script will continue until it reaches a stack with changes and deploy that. @@ -227,40 +259,40 @@ make pipeline.deploy To see a list of possible commands using Make, run `make` on the command line. You can also refer to the `Makefile Usage` section in the [Sphinx documentation](#documentation). ```bash # Deploy all CloudFormation based services -make deploy +STAGE= make deploy # Deploy config files and scripts to S3 -make config.deploy +STAGE= make config.deploy # Run the StepFunctions State Machine to load Neo4j -make database.load.run releases= align= kir= limit= +STAGE= make database.load.run releases= align= kir= limit= # Retrieve Neo4j credentials after deployment -make database.get-credentials +STAGE= make database.get.credentials # Retrieve Neo4j URL after deployment -make database.get-url +STAGE= make database.get.endpoint # Download logs from EC2 -make get.logs +STAGE= make get.logs # Download CSV data from S3 -make get.data +STAGE= make get.data # Delete all CloudFormation based services and data -make delete +STAGE= make delete # Delete a specific layer -make pipeline.delete +STAGE= make pipeline.delete # Subscribe an email for notifications (unsubscribe using console) -make monitoring.subscribe-email email= +STAGE= make monitoring.subscribe-email email= ``` ## Managing Configuration Configuration is managed using JSON files, SSM Parameter Store, Secrets Manager, and shell variables. To deploy changes in these files, run the command. ```bash -make config.deploy +STAGE= make config.deploy ``` ### Database Configuration @@ -282,13 +314,10 @@ gfe-db/database/scripts └── start_task.sh # Coordinates database loading with the Step Functions API ``` -To update shell scripts on the Neo4j instance, run the following commands in sequence. +To update shell scripts on the Neo4j instance, run the following command. ```bash -# sync the scripts to S3 -make config.deploy - # sync the scripts from S3 to the instance (using Systems Manager Run Command) -make database.sync-scripts +STAGE= make database.sync-scripts ``` #### Cypher Scripts @@ -330,7 +359,7 @@ Base input parameters (excluding the `releases` value) are passed to the Step Fu The data pipeline can also be invoked from the command line: ```bash -make database.load.run releases= align= kir= limit= +STAGE= make database.load.run releases= align= kir= limit= ``` #### IMGT/HLA Release Versions State @@ -358,19 +387,19 @@ The application's state tracks which releases have been processed and added to t ## Loading Neo4j For each invocation the data pipeline will download raw data from [ANHIG/IMGTHLA](https://github.com/ANHIG/IMGTHLA) GitHub repository, build a set of intermediate CSV files and load these into Neo4j via S3. To invoke the pipeline, run the following command. ```bash -make database.load.run releases="" +STAGE= make database.load.run releases="" # Example for single version -make database.load.run releases="3510" +STAGE= make database.load.run releases="3510" # Example for multiple versions -make database.load.run releases="3490,3500,3510" +STAGE= make database.load.run releases="3490,3500,3510" # Example with limit -make database.load.run releases="3510" limit="1000" +STAGE= make database.load.run releases="3510" limit="1000" # Example with all arguments included -make database.load.run releases="3510" limit="" align="False" kir="False" +STAGE= make database.load.run releases="3510" limit="" align="False" kir="False" ``` These commands build an event payload to send to the `invoke-gfe-db-pipeline` Lambda. @@ -407,15 +436,15 @@ The Lambda function returns the following object which can be viewed in CloudWat ### Clean Up To tear down resources run the command. You will need to manually delete the data in the S3 bucket first to avoid an error in CloudFormation. ```bash -make delete +STAGE= make delete ``` Use the following commands to tear down individual services. Make sure to [backup](#backup--restore) your data first. ```bash # Delete only the database service -make database.delete +STAGE= make database.delete # Delete only the pipeline service -make pipeline.delete +STAGE= make pipeline.delete ``` ## Backup & Restore @@ -425,7 +454,7 @@ make pipeline.delete Backups are orchestrated by Systems Manager and automated everyday at midnight US/Central time by default. To create a backup, run the command. ```bash -make database.backup +STAGE= make database.backup ``` This will create a backup of the Neo4j database and store it in S3 under the path `s3:///backups/neo4j/YYYY/MM/DD/HH/gfedb.zip`. @@ -435,13 +464,13 @@ This will create a backup of the Neo4j database and store it in S3 under the pat To see a list of available backup dates that can be restored, run the command. ```bash -make database.backup.list +STAGE= make database.backup.list ``` To restore from a backup, pass the date of the backup you wish to restore using the format YYYY/MM/DD/HH. ```bash -make database.restore from_date= +STAGE= make database.restore from_date= ``` ## Local Development @@ -471,7 +500,7 @@ jupyter kernelspec uninstall It is not necessary to install Sphinx to view `gfe-db` documentation because it is already built and available in the `docs/` folder, but you will need it to edit them. To get the local `index.html` path run the command and navigate to the URL in a browser. ```bash -make docs.url +STAGE= make docs.url ``` ### Editing and Building the Documentation @@ -485,7 +514,7 @@ pip install -r requirements-docs.txt After making your edits, you can build the HTML assets by running the command. ```bash -make docs.build +STAGE= make docs.build ``` ## Troubleshooting From 3a78f12057b99dde4d6221f7ed44bb2f55f018cc Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Sun, 20 Aug 2023 13:43:24 -0700 Subject: [PATCH 11/14] optionally delete S3 data --- Makefile | 11 ++++++++--- README.md | 8 ++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index d481ff04..c4d66c3b 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ target: $(info ${HELP_MESSAGE}) @exit 0 -app.print: +splash-screen: @echo "\033[0;34m " @echo "\033[0;34m ____ __ __ " @echo "\033[0;34m ____ _ / __/___ ____/ // /_ " @@ -85,7 +85,7 @@ env.print: @echo "+---------------------------------------------------------------------------------+" @echo "\033[0;33mPlease confirm the above values are correct.\033[0m" -deploy: app.print logs.purge env.validate.stage env.validate ##=> Deploy all services +deploy: splash-screen logs.purge env.validate.stage env.validate ##=> Deploy all services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${APP_NAME} to ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} $(MAKE) env.print @echo "Deploy stack to the \`${STAGE}\` environment? [y/N] \c " && read ans && [ $${ans:-N} = y ] @@ -316,9 +316,14 @@ database.get.credentials: database.get.instance-id: @echo "${INSTANCE_ID}" -# TODO add confirmation to proceed +# TODO add confirmation to proceed BOOKMARK delete: # data=true/false ##=> Delete services @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deleting ${APP_NAME} in ${AWS_ACCOUNT}" 2>&1 | tee -a ${CFN_LOG_PATH} + @[[ $$data != true ]] && echo "Data will not be deleted. To delete pass \`data=true\`" || true + @echo "Delete all stacks from the \`${STAGE}\` environment? [y/N] \c " && read ans && [ $${ans:-N} = y ] && \ + if [ "${data}" = "true" ]; then \ + aws s3 rm --recursive s3://${DATA_BUCKET_NAME}; \ + fi $(MAKE) pipeline.delete $(MAKE) database.delete $(MAKE) infrastructure.delete diff --git a/README.md b/README.md index 39fd2c31..a603c5f4 100755 --- a/README.md +++ b/README.md @@ -193,7 +193,7 @@ For more information visit the documentation page: These variables must be defined before running Make. The best way to set these variables is with a `.env.` file following this structure. ```bash # .env. -AWS_PROFILE= +AWS_PROFILE= # Include profile if stacks are in a different accounts STAGE= APP_NAME= AWS_REGION= @@ -279,8 +279,8 @@ STAGE= make get.logs # Download CSV data from S3 STAGE= make get.data -# Delete all CloudFormation based services and data -STAGE= make delete +# Delete all CloudFormation based services and data, default is data=false +STAGE= make delete data= # Delete a specific layer STAGE= make pipeline.delete @@ -436,7 +436,7 @@ The Lambda function returns the following object which can be viewed in CloudWat ### Clean Up To tear down resources run the command. You will need to manually delete the data in the S3 bucket first to avoid an error in CloudFormation. ```bash -STAGE= make delete +STAGE= make delete data= ``` Use the following commands to tear down individual services. Make sure to [backup](#backup--restore) your data first. ```bash From a0703f243a5e4587015a4d3c7c3b447adc5667bf Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Sun, 20 Aug 2023 14:05:42 -0700 Subject: [PATCH 12/14] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a603c5f4..2c9ee2ac 100755 --- a/README.md +++ b/README.md @@ -144,7 +144,7 @@ These list outline the basic steps for deployments. For more details please see 6. Check the [config JSONs](#data-pipeline-config) (parameters and state) and edit the values as desired. 7. Run `STAGE= make deploy` to deploy the stacks to AWS. 8. Run `STAGE= make database.load.run releases=` to load the Neo4j, or `STAGE= make database.load.run releases= limit=` to run with a limited number of alleles. -9. Run `STAGE= make database.get-credentials` to get the username and password for Neo4j. +9. Run `STAGE= make database.get.credentials` to get the username and password for Neo4j. 10. Run `STAGE= make database.get.endpoint` to get the URL for Neo4j and navigate to the Neo4j browser at the subdomain and host domain, for example `https://gfe-db.cloudftl.com:7473/browser/`. **Creating a new VPC** From aa02dd62e52d12c66f68370a50ca716fd7f2533c Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Tue, 22 Aug 2023 12:48:07 -0700 Subject: [PATCH 13/14] fix missing parameter --- gfe-db/infrastructure/Makefile | 3 ++- gfe-db/infrastructure/template.yaml | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/gfe-db/infrastructure/Makefile b/gfe-db/infrastructure/Makefile index 2e989604..54b12800 100644 --- a/gfe-db/infrastructure/Makefile +++ b/gfe-db/infrastructure/Makefile @@ -41,7 +41,8 @@ service.deploy: VpcId="$${VPC_ID}" \ PublicSubnetId="$${PUBLIC_SUBNET_ID}" \ DataBucketName="$${DATA_BUCKET_NAME}" \ - GitHubPersonalAccessToken="$$GITHUB_PERSONAL_ACCESS_TOKEN" 2>&1 | tee -a $${CFN_LOG_PATH} || true + GitHubPersonalAccessToken="$$GITHUB_PERSONAL_ACCESS_TOKEN" \ + AdminEmail="$${ADMIN_EMAIL}" 2>&1 | tee -a $${CFN_LOG_PATH} || true $(MAKE) service.deploy.update-dns service.monitoring.create-subscriptions: diff --git a/gfe-db/infrastructure/template.yaml b/gfe-db/infrastructure/template.yaml index 0f2055ae..74f3b6dc 100644 --- a/gfe-db/infrastructure/template.yaml +++ b/gfe-db/infrastructure/template.yaml @@ -26,6 +26,9 @@ Parameters: GitHubPersonalAccessToken: Type: String NoEcho: true + AdminEmail: + Type: String + Description: Email address for Let's Encrypt SSL certificates Conditions: CreateVpc: !Equals [!Ref createVpc, 'true'] From 9f596aeb625bcbae1c28bc8b6dd09dd76dcd1098 Mon Sep 17 00:00:00 2001 From: Gregory Lindsey Date: Tue, 22 Aug 2023 14:11:32 -0700 Subject: [PATCH 14/14] fetch github branches --- notebooks/0.2-github-api-eda.ipynb | 112 ++++++++++++++++++++++------- notebooks/requirements.txt | 2 + 2 files changed, 89 insertions(+), 25 deletions(-) create mode 100644 notebooks/requirements.txt diff --git a/notebooks/0.2-github-api-eda.ipynb b/notebooks/0.2-github-api-eda.ipynb index 8f2f556b..42404ae8 100644 --- a/notebooks/0.2-github-api-eda.ipynb +++ b/notebooks/0.2-github-api-eda.ipynb @@ -5,19 +5,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# GitHub API EDA\n", - "Using NetworkX for exploration of graph data from the GitHub API." + "# GitHub API EDA" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import os\n", "from dotenv import load_dotenv, find_dotenv\n", - "load_dotenv(find_dotenv());\n", + "load_dotenv('/Users/ammon/Projects/nmdp-bioinformatics/02-Repositories/gfe-db/.env.dev');\n", "from itertools import chain, starmap\n", "import json\n", "import requests\n", @@ -32,7 +31,27 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/ammon/Projects/nmdp-bioinformatics/02-Repositories/gfe-db/notebooks'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -40,13 +59,12 @@ "AWS_REGION = os.environ[\"AWS_REGION\"] \n", "GITHUB_PERSONAL_ACCESS_TOKEN = os.environ[\"GITHUB_PERSONAL_ACCESS_TOKEN\"]\n", "GITHUB_REPOSITORY_OWNER = \"ANHIG\" # os.environ[\"GITHUB_REPOSITORY_OWNER\"]\n", - "GITHUB_REPOSITORY_NAME = \"IMGTHLA\" # os.environ[\"GITHUB_REPOSITORY_NAME\"]\n", - "GFE_BUCKET = os.environ[\"GFE_BUCKET\"]" + "GITHUB_REPOSITORY_NAME = \"IMGTHLA\" # os.environ[\"GITHUB_REPOSITORY_NAME\"]\n" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -88,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -117,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -145,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -174,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -203,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -237,47 +255,91 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ - "# branches = get_branches(GITHUB_REPOSITORY_OWNER, GITHUB_REPOSITORY_NAME)" + "from pygethub import list_branches, GitHubPaginator" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ - "# branch = get_branch(GITHUB_REPOSITORY_OWNER, GITHUB_REPOSITORY_NAME, \"Latest\")" + "paginator = GitHubPaginator(GITHUB_PERSONAL_ACCESS_TOKEN)" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "# branch['commit']['sha']" + "# Get all branches\n", + "\n", + "branches_gen = paginator.get_paginator(list_branches, owner=GITHUB_REPOSITORY_OWNER, repo=GITHUB_REPOSITORY_NAME)" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Page 1: 55 items\n" + ] + } + ], + "source": [ + "all_branches = list(branches_gen)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Page 1: 55 items\n" + ] + } + ], "source": [ - "# branches_df = pd.DataFrame([flatten_json(branch) for branch in branches])" + "branches_df = pd.DataFrame([flatten_json(branch) for branch in branches])" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ - "# branches_df" + "releases = list(branches_df['name'].unique())[:-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "300,310,320,330,340,350,360,370,380,390,3100,3110,3120,3130,3140,3150,3160,3170,3180,3190,3200,3210,3220,3230,3240,3250,3260,3270,3280,3290,3300,3310,3320,3330,3340,3350,3360,3370,3380,3390,3400,3410,3420,3430,3440,3450,3460,3470,3480,3490,3500,3510,3520,3530\n" + ] + } + ], + "source": [ + "print(\",\".join(releases))" ] }, { @@ -1283,7 +1345,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.12" }, "orig_nbformat": 4 }, diff --git a/notebooks/requirements.txt b/notebooks/requirements.txt new file mode 100644 index 00000000..a94cf69e --- /dev/null +++ b/notebooks/requirements.txt @@ -0,0 +1,2 @@ +requests +pandas \ No newline at end of file