From 3c998f90313eab249d1272d4fc7b5225fe11ae99 Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Wed, 10 Sep 2025 10:35:54 -0500 Subject: [PATCH 01/18] build preview --- .../data/docs-nav-data.json | 9 ++ .../automate/cicd.mdx | 3 +- .../lifecycle-management/data-management.mdx | 92 +++++++++++++++++++ .../decommission-infrastructure.mdx | 0 .../track-infrastructure.mdx | 0 5 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx create mode 100644 content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx create mode 100644 content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/track-infrastructure.mdx diff --git a/content/well-architected-framework/data/docs-nav-data.json b/content/well-architected-framework/data/docs-nav-data.json index 2ecb1dd927..5a89a48a14 100644 --- a/content/well-architected-framework/data/docs-nav-data.json +++ b/content/well-architected-framework/data/docs-nav-data.json @@ -415,6 +415,15 @@ "title": "Overview", "path": "optimize-systems" }, + { + "title": "Lifecycle management", + "routes": [ + { + "title": "Implement data retention policies", + "path": "optimize-systems/lifecycle-management/data-management" + } + ] + }, { "title": "Monitor system health", "routes": [ diff --git a/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx b/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx index 2f2a81a940..c0cdef9ec7 100644 --- a/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx +++ b/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx @@ -32,4 +32,5 @@ In this section of Automate your workflows, you learned how to implement CI/CD p Visit the following documents to learn more about the automation workflow: - [Automate testing](/well-architected-framework/define-and-automate-processes/automate/testing) - Implement automated testing in your CI/CD pipeline -- [Automate deployments](/well-architected-framework/define-and-automate-processes/automate/deployments) - Deploy applications through your CI/CD pipeline \ No newline at end of file +- [Automate deployments](/well-architected-framework/define-and-automate-processes/automate/deployments) - Deploy applications through your CI/CD pipeline +- [Running Terraform in automation](/terraform/tutorials/automation/automate-terraform) \ No newline at end of file diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx new file mode 100644 index 0000000000..f54d2b82e4 --- /dev/null +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx @@ -0,0 +1,92 @@ +--- +page_title: Implement data management policies +description: Implement automated data management policies to reduce storage costs, ensure compliance, and manage data lifecycle across cloud environments +--- + +# Implement data management policies + +You can use data management policies to manage the lifecycle of your organization's data. When you store data either in the cloud or on-premise, it is important to define and automate the policies around managing that data. Defining management with infrastructure as code tools, such as Terraform, ensures you consistently apply these policies across all environments and resources. + +## Why you should use lifecycle policies + +Most major cloud providers offer lifecycle management features for their storage services. These features allow you to define rules that automatically transition data between different storage classes based on age or access patterns, and delete data that has reached the end of its retention period. + +When you implement data management policies, you gain the following benefits: +- Reduce storage costs by automatically deleting data that is no longer needed. +- Reduce storage costs by storing data in the most cost-effective storage class based on access patterns and retention requirements. +- Ensure compliance with legal and regulatory requirements for data retention. +- Remove sensitive data after a defined period to minimize security risks. + +## Automate policy management with infrastructure as code + +You can use Terraform to define and manage lifecycle policies with infrastructure as code, and implement the policy configurations across your organization. You can create Terraform modules to create data management policies for different data types and compliance requirements. These modules can automatically apply appropriate lifecycle rules, storage class transitions, and deletion policies to new or existing storage resources. + +The following is an example of using Terraform to [create a data lifecycle policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-object-size) to move AWS S3 data to Glacier Instant Retrieval after 365 days: + +```hcl +resource "aws_s3_bucket_lifecycle_configuration" "example" { + bucket = aws_s3_bucket.bucket.id + + rule { + id = "Allow small object transitions" + + filter { + object_size_greater_than = 1 + } + + status = "Enabled" + + transition { + days = 365 + storage_class = "GLACIER_IR" + } + } +} +``` + +Terraform can also tag resources with appropriate retention metadata. These tags can include creation dates, data classifications, and retention periods. + +For example, you can use the [tags block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-an-object-tag) with AWS S3 to automatically apply tags to all resources created by Terraform. The S3 lifecycle rule specifies a filter based on a tag key and value. The rule then applies only to a subset of objects with the specific tag. + +```hcl +resource "aws_s3_bucket_lifecycle_configuration" "example" { + bucket = aws_s3_bucket.bucket.id + + rule { + id = "rule-1" + + filter { + tag { + key = "Name" + value = "Staging" + } + } + + transition { + days = 30 + storage_class = "GLACIER" + } + + status = "Enabled" + } +} +``` + +For more information on resource tagging, see our [Track infrastructure documentation](To be written). + +It is important to note that in the previous examples, other cloud providers, such as Google Cloud Platform and Microsoft Azure, offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. + +HashiCorp resources: +- Search the [Terraform Registry](https://registry.terraform.io/browse/providers) for the [cloud](https://registry.terraform.io/browse/providers?category=public-cloud) or [database](https://registry.terraform.io/browse/providers?category=database) provider you use. + +External resources: +- Cloud storage: [AWS](https://aws.amazon.com/products/storage/), [GCP](https://cloud.google.com/products/storage), and [Azure](https://azure.microsoft.com/en-us/products/category/storage) +- [Learn how to set the lifecycle configuration for a Google Cloud Bucket](https://cloud.google.com/storage/docs/samples/storage-create-lifecycle-setting-tf) with Terraform. + +## Next steps + +In this section of Lifecycle management, you learned about implementing data management policies, including why you should use lifecycle policies and how to automate policy management with infrastructure as code. Implement data management policies is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. + +To learn more about infrastructure and resource management, see the following resources: +- [Track infrastructure](/well-architected-framework/optimize-systems/lifecycle-management/track-infrastructure) +- [Automate infrastructure provisioning](/well-architected-framework/define-and-automate-processes/process-automation/process-automation-workflow) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx new file mode 100644 index 0000000000..e69de29bb2 diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/track-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/track-infrastructure.mdx new file mode 100644 index 0000000000..e69de29bb2 From 99cc42323d1e124fa18cb7c48d6f27689ce7a3f0 Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 11 Sep 2025 15:29:01 -0500 Subject: [PATCH 02/18] rought draft --- .../data/docs-nav-data.json | 4 + .../lifecycle-management/data-management.mdx | 5 +- .../decommission-infrastructure.mdx | 119 ++++++++++++++++++ .../track-infrastructure.mdx | 0 4 files changed, 126 insertions(+), 2 deletions(-) delete mode 100644 content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/track-infrastructure.mdx diff --git a/content/well-architected-framework/data/docs-nav-data.json b/content/well-architected-framework/data/docs-nav-data.json index 5a89a48a14..ffe0fc786b 100644 --- a/content/well-architected-framework/data/docs-nav-data.json +++ b/content/well-architected-framework/data/docs-nav-data.json @@ -421,6 +421,10 @@ { "title": "Implement data retention policies", "path": "optimize-systems/lifecycle-management/data-management" + }, + { + "title": "Decommission Infrastructure", + "path": "optimize-systems/lifecycle-management/decommission-infrastructure" } ] }, diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx index f54d2b82e4..08779b0be0 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx @@ -5,7 +5,7 @@ description: Implement automated data management policies to reduce storage cost # Implement data management policies -You can use data management policies to manage the lifecycle of your organization's data. When you store data either in the cloud or on-premise, it is important to define and automate the policies around managing that data. Defining management with infrastructure as code tools, such as Terraform, ensures you consistently apply these policies across all environments and resources. +You can use data management policies to manage the lifecycle of your organization's data. When you store data either in the cloud or on-premises, it is important to define and automate the policies around managing that data. Defining management with infrastructure as code tools, such as Terraform, ensures you consistently apply these policies across all environments and resources. ## Why you should use lifecycle policies @@ -19,7 +19,7 @@ When you implement data management policies, you gain the following benefits: ## Automate policy management with infrastructure as code -You can use Terraform to define and manage lifecycle policies with infrastructure as code, and implement the policy configurations across your organization. You can create Terraform modules to create data management policies for different data types and compliance requirements. These modules can automatically apply appropriate lifecycle rules, storage class transitions, and deletion policies to new or existing storage resources. +You can use Terraform to define and manage lifecycle policies with infrastructure as code and implement the policy across your organization. You can create Terraform modules to create data management policies for different data types and compliance requirements. These modules can automatically apply appropriate lifecycle rules, storage class transitions, and deletion policies to new or existing storage resources. The following is an example of using Terraform to [create a data lifecycle policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-object-size) to move AWS S3 data to Glacier Instant Retrieval after 365 days: @@ -82,6 +82,7 @@ HashiCorp resources: External resources: - Cloud storage: [AWS](https://aws.amazon.com/products/storage/), [GCP](https://cloud.google.com/products/storage), and [Azure](https://azure.microsoft.com/en-us/products/category/storage) - [Learn how to set the lifecycle configuration for a Google Cloud Bucket](https://cloud.google.com/storage/docs/samples/storage-create-lifecycle-setting-tf) with Terraform. +- AWS [Enforce data retention policies](https://docs.aws.amazon.com/wellarchitected/latest/framework/cost_decomissioning_resources_data_retention.html) ## Next steps diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index e69de29bb2..e303483b1d 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -0,0 +1,119 @@ +--- +page_title: Decommission resources +description: Learn how to safely retire infrastructure components while maintaining system integrity and avoiding disruptions through proper planning and automation. +--- + +# Decommission resources + +Resource decommissioning is the process of safely removing or deleting infrastructure components, applications, or services that are no longer needed or have reached end-of-life. You should remove unused or obsolete resources such as servers, databases, old images, IAM, and other infrastructure components. + +When you decommission unused resources, you gain the following benefits: +- Reduce costs by removing charges associated with unused resources. +- Minimize security risks by removing outdated or vulnerable resources that bad actors could exploit. +- Reduce configuration drift by only running necessary resources. +- Improve audit and compliance by maintaining a smaller infrastructure footprint. + +To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on your infrastructure implementation, such as if you manually or automate your infrastructure creation, you may need to adjust your decommissioning approach accordingly. + +## Find resources to decommission + +Before you begin decommissioning resources, you need to identify which resources exist in your environment and determine which ones are candidates for removal. This discovery phase helps you avoid accidentally removing resources that are still in use and ensures you target the right components for decommissioning. + +Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that can help identify unused or underutilized resources. Pay particular attention to resources that were created for temporary purposes, like testing or proof-of-concepts, but may have been left running. + +Terraform tracks all infrastructure it manages with state files. You can use the `terraform state list` to see all managed resources and `terraform show` to examine their current configurations. This list of resources will help you identify which resources are still in use and which ones you can decommission. + +## Create a communication plan + +Your plan should outline how you will inform stakeholders about the decommissioning process, including timelines and potential impacts. Effective communication prevents surprises and ensures all affected teams can prepare for the changes. + +Start by identifying all stakeholders who might be affected by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. + +## Create a dependency plan + + Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. + +If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph helps you visualize how resources are connected and identify potential impacts of removing specific components. + +You can create a dependency graph of your Terraform resources by running the following CLI command: + +```bash +terraform graph -type=plan | dot -Tpng >graph.png +``` + + + +The `terraform graph` command requires Graphviz to be installed on your system to generate visualizations. For more information on installing Graphviz, refer to the [Graphviz installation guide](https://graphviz.org/download/). + + + +HashiCorp resources: +- [Terraform Graph Command](/terraform/docs/cli/commands/graph) + +## Create backups + +Before decommissioning, confirm that you have backups of any critical data or configurations associated with the resources you are removing. Backups provide a safety net in case you need to roll back changes. + +The following are some of the resources you may want to back up: +- Servers in the form of machine images +- Database snapshots +- Configuration files +- Metadata + +Since Terraform uses infrastructure as code to manage resources, you can redeploy resources that you have previously decommissioned by reapplying your Terraform configuration. This capability allows you to recover resources quickly if needed. + +For example, if you backed up a server, you can also redeploy it by updating the AMI in your Terraform with the backed-up AMI ID. In the following example, you can change the `ami` attribute to the ID of your backed-up AMI: + +```hcl +resource "aws_instance" "example" { + ami = "ami-0c55b159cbfafe1f0" + instance_type = "t2.micro" +} +``` + +## Gradual removal of resources + +Implement a phased approach to removing resources instead of all at once. Start by redirecting traffic away from the resource, and monitor user traffic to ensure you don't negatively impact users. + +You can use `terraform plan` to preview the changes that will occur when you remove resources from your configuration. This command helps you understand the impact of your changes before applying them. + +You can also set safeguards so you only decommission resources when you are ready. You can use Terraform's `lifecycle` block with `prevent_destroy = true` to prevent accidental deletion of critical resources. The [lifecycle](https://www.terraform.io/docs/language/meta-arguments#lifecycle) setting ensures that you won't destroy resources unless you explicitly remove the `prevent_destroy` attribute. + +```hcl +resource "aws_instance" "example" { + ami = "ami-0c55b159cbfafe1f0" + instance_type = "t2.micro" +} + +lifecycle { + prevent_destroy = true +} +``` + +Consul can help you gradually remove resources by directing traffic away from services you are decommissioning. You can use Consul's service discovery and health checking features to monitor the status of services and ensure that dependent services are not affected during the decommissioning process. + +If you are using orchestration tools like Nomad or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new allocations from being scheduled on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. + +Review our [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. + +## Verify health of infrastructure and applications + +After the decommissioning process, verify that the remaining infrastructure and applications are functioning correctly. Monitor system performance and user feedback to ensure there are no negative impacts. + +The following is validation you should do after decommissioning: +- Validate APIs are functioning +- Check application performance +- Monitor system logs for errors + +HashiCorp resources: +- [Learn to setup monitoring agents](/well-architected-framework/define-and-automate-processes/monitor/setup-monitoring-agents) and [dashboards and alerts](/well-architected-framework/define-and-automate-processes/monitor/dashboards-alerts). + +External resources: +- AWS [Implement a decommissioning process](https://docs.aws.amazon.com/wellarchitected/latest/framework/cost_decomissioning_resources_implement_process.html) + +## Next steps + +In this section of Lifecycle management, you learned about decommissioning resources, including why you should plan decommissioning and how to safely execute the process. Decommission resources is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. + +To learn more about infrastructure and resource management, see the following resources: +- [Data management](/well-architected-framework/optimize-systems/lifecycle-management/data-management) \ No newline at end of file diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/track-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/track-infrastructure.mdx deleted file mode 100644 index e69de29bb2..0000000000 From 30abe4b54927d257e00763f99a5db3135a737e40 Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 11 Sep 2025 15:39:53 -0500 Subject: [PATCH 03/18] vale suggestions --- .../lifecycle-management/data-management.mdx | 4 +--- .../decommission-infrastructure.mdx | 12 ++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx index 08779b0be0..ee9e0149a4 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx @@ -1,6 +1,6 @@ --- page_title: Implement data management policies -description: Implement automated data management policies to reduce storage costs, ensure compliance, and manage data lifecycle across cloud environments +description: Implement data management policies to reduce storage costs, ensure compliance, and manage data lifecycles with infrastructure as code. --- # Implement data management policies @@ -72,8 +72,6 @@ resource "aws_s3_bucket_lifecycle_configuration" "example" { } ``` -For more information on resource tagging, see our [Track infrastructure documentation](To be written). - It is important to note that in the previous examples, other cloud providers, such as Google Cloud Platform and Microsoft Azure, offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. HashiCorp resources: diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index e303483b1d..8d62ef5d5b 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -1,6 +1,6 @@ --- page_title: Decommission resources -description: Learn how to safely retire infrastructure components while maintaining system integrity and avoiding disruptions through proper planning and automation. +description: Learn how to decommission infrastructure components while maintaining system integrity and avoiding disruptions through proper planning and automation. --- # Decommission resources @@ -9,11 +9,11 @@ Resource decommissioning is the process of safely removing or deleting infrastru When you decommission unused resources, you gain the following benefits: - Reduce costs by removing charges associated with unused resources. -- Minimize security risks by removing outdated or vulnerable resources that bad actors could exploit. +- Minimize security risks by removing outdated or vulnerable resources that bad actors can exploit. - Reduce configuration drift by only running necessary resources. - Improve audit and compliance by maintaining a smaller infrastructure footprint. -To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on your infrastructure implementation, such as if you manually or automate your infrastructure creation, you may need to adjust your decommissioning approach accordingly. +To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on your infrastructure implementation, such as if you manually or automate your infrastructure creation, you may need to adjust your decommissioning approach. ## Find resources to decommission @@ -27,7 +27,7 @@ Terraform tracks all infrastructure it manages with state files. You can use the Your plan should outline how you will inform stakeholders about the decommissioning process, including timelines and potential impacts. Effective communication prevents surprises and ensures all affected teams can prepare for the changes. -Start by identifying all stakeholders who might be affected by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. +Start by identifying all stakeholders who you might affect by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. ## Create a dependency plan @@ -43,7 +43,7 @@ terraform graph -type=plan | dot -Tpng >graph.png -The `terraform graph` command requires Graphviz to be installed on your system to generate visualizations. For more information on installing Graphviz, refer to the [Graphviz installation guide](https://graphviz.org/download/). +You need to install Graphviz on your system to use the `terraform graph` command and generate visualizations. For more information on installing Graphviz, refer to the [Graphviz installation guide](https://graphviz.org/download/). @@ -54,7 +54,7 @@ HashiCorp resources: Before decommissioning, confirm that you have backups of any critical data or configurations associated with the resources you are removing. Backups provide a safety net in case you need to roll back changes. -The following are some of the resources you may want to back up: +The following are some resources you may want to back up: - Servers in the form of machine images - Database snapshots - Configuration files From 4ca1910d8bb110305098f8b0bdfed3d110c6810e Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Fri, 12 Sep 2025 09:10:21 -0500 Subject: [PATCH 04/18] Apply suggestions from code review Co-authored-by: Jonathan Frappier <92055993+jonathanfrappier@users.noreply.github.com> --- .../lifecycle-management/decommission-infrastructure.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 8d62ef5d5b..b8e887b634 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -19,7 +19,7 @@ To successfully decommission resources, you need to create a well-defined plan t Before you begin decommissioning resources, you need to identify which resources exist in your environment and determine which ones are candidates for removal. This discovery phase helps you avoid accidentally removing resources that are still in use and ensures you target the right components for decommissioning. -Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that can help identify unused or underutilized resources. Pay particular attention to resources that were created for temporary purposes, like testing or proof-of-concepts, but may have been left running. +Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that help identify unused or underutilized resources. Pay particular attention to active resources created for temporary purposes, like testing or proof-of-concepts. Terraform tracks all infrastructure it manages with state files. You can use the `terraform state list` to see all managed resources and `terraform show` to examine their current configurations. This list of resources will help you identify which resources are still in use and which ones you can decommission. @@ -33,7 +33,7 @@ Start by identifying all stakeholders who you might affect by the decommissionin Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. -If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph helps you visualize how resources are connected and identify potential impacts of removing specific components. +If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph helps you visualize connectivity between resources and identify potential impacts of removing specific components. You can create a dependency graph of your Terraform resources by running the following CLI command: @@ -92,9 +92,9 @@ lifecycle { Consul can help you gradually remove resources by directing traffic away from services you are decommissioning. You can use Consul's service discovery and health checking features to monitor the status of services and ensure that dependent services are not affected during the decommissioning process. -If you are using orchestration tools like Nomad or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new allocations from being scheduled on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. +If you are using orchestration tools like Nomad or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new scheduling new allocations on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. -Review our [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. +Review the [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. ## Verify health of infrastructure and applications From c9117e71253b0d633b41b7d885f0ce61964046a8 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Fri, 12 Sep 2025 10:44:44 -0500 Subject: [PATCH 05/18] Update content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx Co-authored-by: Jonathan Frappier <92055993+jonathanfrappier@users.noreply.github.com> --- .../lifecycle-management/decommission-infrastructure.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index b8e887b634..6cf5d4d49b 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -5,7 +5,7 @@ description: Learn how to decommission infrastructure components while maintaini # Decommission resources -Resource decommissioning is the process of safely removing or deleting infrastructure components, applications, or services that are no longer needed or have reached end-of-life. You should remove unused or obsolete resources such as servers, databases, old images, IAM, and other infrastructure components. +Resource decommissioning is the process of safely removing or deleting infrastructure components, applications, or services that are no longer needed or have reached end-of-life. You should remove unused or obsolete resources such as servers, databases, images, IAM, and other infrastructure components. When you decommission unused resources, you gain the following benefits: - Reduce costs by removing charges associated with unused resources. From 1a7398d51ed39803ddf81324047d57f68b173be6 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:17:59 -0500 Subject: [PATCH 06/18] Apply suggestions from code review Co-authored-by: Anthony --- .../lifecycle-management/data-management.mdx | 6 ++--- .../decommission-infrastructure.mdx | 22 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx index ee9e0149a4..670709dae4 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx @@ -15,11 +15,11 @@ When you implement data management policies, you gain the following benefits: - Reduce storage costs by automatically deleting data that is no longer needed. - Reduce storage costs by storing data in the most cost-effective storage class based on access patterns and retention requirements. - Ensure compliance with legal and regulatory requirements for data retention. -- Remove sensitive data after a defined period to minimize security risks. +- Minimize security risks by removing sensitive data after a defined period of time. ## Automate policy management with infrastructure as code -You can use Terraform to define and manage lifecycle policies with infrastructure as code and implement the policy across your organization. You can create Terraform modules to create data management policies for different data types and compliance requirements. These modules can automatically apply appropriate lifecycle rules, storage class transitions, and deletion policies to new or existing storage resources. +You can use Terraform to define and manage lifecycle policies and implement those policies across your organization. You can create Terraform modules to create data management policies for different data types and compliance requirements. These modules can automatically apply appropriate lifecycle rules, storage class transitions, and deletion policies to new or existing storage resources. The following is an example of using Terraform to [create a data lifecycle policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-object-size) to move AWS S3 data to Glacier Instant Retrieval after 365 days: @@ -72,7 +72,7 @@ resource "aws_s3_bucket_lifecycle_configuration" "example" { } ``` -It is important to note that in the previous examples, other cloud providers, such as Google Cloud Platform and Microsoft Azure, offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. +It is important to note that other cloud providers, such as [Google Cloud Platform](https://registry.terraform.io/providers/hashicorp/google/5.0.0/docs/resources/storage_bucket.html#example-usage---life-cycle-settings-for-storage-bucket-objects) and [Microsoft Azure](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_management_policy), offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. HashiCorp resources: - Search the [Terraform Registry](https://registry.terraform.io/browse/providers) for the [cloud](https://registry.terraform.io/browse/providers?category=public-cloud) or [database](https://registry.terraform.io/browse/providers?category=database) provider you use. diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 6cf5d4d49b..080f966e78 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -13,7 +13,7 @@ When you decommission unused resources, you gain the following benefits: - Reduce configuration drift by only running necessary resources. - Improve audit and compliance by maintaining a smaller infrastructure footprint. -To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on your infrastructure implementation, such as if you manually or automate your infrastructure creation, you may need to adjust your decommissioning approach. +To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on how your infrastructure implementation is done, either manually or automatically, you may need to adjust your decommissioning approach. ## Find resources to decommission @@ -27,18 +27,18 @@ Terraform tracks all infrastructure it manages with state files. You can use the Your plan should outline how you will inform stakeholders about the decommissioning process, including timelines and potential impacts. Effective communication prevents surprises and ensures all affected teams can prepare for the changes. -Start by identifying all stakeholders who you might affect by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. +Start by identifying all stakeholders who might be affected by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. ## Create a dependency plan - Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. +Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. -If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph helps you visualize connectivity between resources and identify potential impacts of removing specific components. +If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph can help you visualize connections between resources and identify potential impacts of removing specific components. You can create a dependency graph of your Terraform resources by running the following CLI command: ```bash -terraform graph -type=plan | dot -Tpng >graph.png +terraform graph -type=plan | dot -Tpng > graph.png ``` @@ -73,7 +73,7 @@ resource "aws_instance" "example" { ## Gradual removal of resources -Implement a phased approach to removing resources instead of all at once. Start by redirecting traffic away from the resource, and monitor user traffic to ensure you don't negatively impact users. +Implement a phased approach to removing resources instead of doing it all at once. Start by redirecting traffic away from the resource, and monitor user traffic to ensure you don't negatively impact users. You can use `terraform plan` to preview the changes that will occur when you remove resources from your configuration. This command helps you understand the impact of your changes before applying them. @@ -98,12 +98,12 @@ Review the [Zero-downtime deployments](/well-architected-framework/define-and-au ## Verify health of infrastructure and applications -After the decommissioning process, verify that the remaining infrastructure and applications are functioning correctly. Monitor system performance and user feedback to ensure there are no negative impacts. +After the decommissioning process, verify that the remaining infrastructure and applications are functioning correctly. Monitor system performance and user feedback to ensure that there are no negative impacts. -The following is validation you should do after decommissioning: -- Validate APIs are functioning -- Check application performance -- Monitor system logs for errors +The following are steps that you should do after decommissioning: +- Validate APIs are functioning. +- Check application performance. +- Monitor system logs for errors. HashiCorp resources: - [Learn to setup monitoring agents](/well-architected-framework/define-and-automate-processes/monitor/setup-monitoring-agents) and [dashboards and alerts](/well-architected-framework/define-and-automate-processes/monitor/dashboards-alerts). From d3f3d9b4f12cf21154080cabc7304113ff063910 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:18:13 -0500 Subject: [PATCH 07/18] Update content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx Co-authored-by: Anthony --- .../lifecycle-management/decommission-infrastructure.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 080f966e78..ff16c314da 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -115,5 +115,5 @@ External resources: In this section of Lifecycle management, you learned about decommissioning resources, including why you should plan decommissioning and how to safely execute the process. Decommission resources is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. -To learn more about infrastructure and resource management, see the following resources: +To learn more about infrastructure and resource management, see the following resource: - [Data management](/well-architected-framework/optimize-systems/lifecycle-management/data-management) \ No newline at end of file From e741fcfe77fcacdfce3cda9d34eb826944a27190 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:33:30 -0500 Subject: [PATCH 08/18] Add CODEOWNERS entry for Well-architected Framework --- CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index d0bd03c3f8..b33fcba060 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -31,3 +31,7 @@ /content/vault/ @hashicorp/vault-education-approvers +# Well-architected Framework + +content/well-architected-framework/ @hashicorp/well-architected-education-approvers + From 6e1ae1390a3224751492969a0961738644fa7313 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:33:45 -0500 Subject: [PATCH 09/18] Update CODEOWNERS --- CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index b33fcba060..af4196387a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -33,5 +33,5 @@ # Well-architected Framework -content/well-architected-framework/ @hashicorp/well-architected-education-approvers +/content/well-architected-framework/ @hashicorp/well-architected-education-approvers From b16fb3bf9d9d0c78c1eae68ecd000a425e057cde Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 18 Sep 2025 10:27:03 -0500 Subject: [PATCH 10/18] added some resources --- .../lifecycle-management/data-management.mdx | 5 ++++- .../decommission-infrastructure.mdx | 14 +++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx index 670709dae4..639919bc1d 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx @@ -75,9 +75,13 @@ resource "aws_s3_bucket_lifecycle_configuration" "example" { It is important to note that other cloud providers, such as [Google Cloud Platform](https://registry.terraform.io/providers/hashicorp/google/5.0.0/docs/resources/storage_bucket.html#example-usage---life-cycle-settings-for-storage-bucket-objects) and [Microsoft Azure](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_management_policy), offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. HashiCorp resources: + - Search the [Terraform Registry](https://registry.terraform.io/browse/providers) for the [cloud](https://registry.terraform.io/browse/providers?category=public-cloud) or [database](https://registry.terraform.io/browse/providers?category=database) provider you use. +- Learn best practices for writing Terraform with the Terraform [style guide](/terraform/language/style). + External resources: + - Cloud storage: [AWS](https://aws.amazon.com/products/storage/), [GCP](https://cloud.google.com/products/storage), and [Azure](https://azure.microsoft.com/en-us/products/category/storage) - [Learn how to set the lifecycle configuration for a Google Cloud Bucket](https://cloud.google.com/storage/docs/samples/storage-create-lifecycle-setting-tf) with Terraform. - AWS [Enforce data retention policies](https://docs.aws.amazon.com/wellarchitected/latest/framework/cost_decomissioning_resources_data_retention.html) @@ -87,5 +91,4 @@ External resources: In this section of Lifecycle management, you learned about implementing data management policies, including why you should use lifecycle policies and how to automate policy management with infrastructure as code. Implement data management policies is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. To learn more about infrastructure and resource management, see the following resources: -- [Track infrastructure](/well-architected-framework/optimize-systems/lifecycle-management/track-infrastructure) - [Automate infrastructure provisioning](/well-architected-framework/define-and-automate-processes/process-automation/process-automation-workflow) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index ff16c314da..7472ad99de 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -48,6 +48,7 @@ You need to install Graphviz on your system to use the `terraform graph` command HashiCorp resources: + - [Terraform Graph Command](/terraform/docs/cli/commands/graph) ## Create backups @@ -90,25 +91,31 @@ lifecycle { } ``` -Consul can help you gradually remove resources by directing traffic away from services you are decommissioning. You can use Consul's service discovery and health checking features to monitor the status of services and ensure that dependent services are not affected during the decommissioning process. +[Consul](/consul) can help you gradually remove resources by directing traffic away from services you are decommissioning. You can use Consul's service discovery and health checking features to monitor the status of services and ensure that dependent services are not affected during the decommissioning process. + +If you are using orchestration tools like [Nomad](/nomad) or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new scheduling new allocations on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. -If you are using orchestration tools like Nomad or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new scheduling new allocations on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. +HashiCorp resources: -Review the [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. +- Review the [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. +- Learn how to [manage resource lifecycles with Terraform](/terraform/tutorials/state/resource-lifecycle). ## Verify health of infrastructure and applications After the decommissioning process, verify that the remaining infrastructure and applications are functioning correctly. Monitor system performance and user feedback to ensure that there are no negative impacts. The following are steps that you should do after decommissioning: + - Validate APIs are functioning. - Check application performance. - Monitor system logs for errors. HashiCorp resources: + - [Learn to setup monitoring agents](/well-architected-framework/define-and-automate-processes/monitor/setup-monitoring-agents) and [dashboards and alerts](/well-architected-framework/define-and-automate-processes/monitor/dashboards-alerts). External resources: + - AWS [Implement a decommissioning process](https://docs.aws.amazon.com/wellarchitected/latest/framework/cost_decomissioning_resources_implement_process.html) ## Next steps @@ -116,4 +123,5 @@ External resources: In this section of Lifecycle management, you learned about decommissioning resources, including why you should plan decommissioning and how to safely execute the process. Decommission resources is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. To learn more about infrastructure and resource management, see the following resource: + - [Data management](/well-architected-framework/optimize-systems/lifecycle-management/data-management) \ No newline at end of file From 678d2f5805e1e92e3d059c43027a6ed6749c97fb Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 18 Sep 2025 10:29:34 -0500 Subject: [PATCH 11/18] added some resources --- .../lifecycle-management/decommission-infrastructure.mdx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 7472ad99de..15b96554e1 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -91,14 +91,16 @@ lifecycle { } ``` -[Consul](/consul) can help you gradually remove resources by directing traffic away from services you are decommissioning. You can use Consul's service discovery and health checking features to monitor the status of services and ensure that dependent services are not affected during the decommissioning process. +Consul can help you gradually remove resources by directing traffic away from services you are decommissioning. You can use Consul's service discovery and health checking features to monitor the status of services and ensure that dependent services are not affected during the decommissioning process. -If you are using orchestration tools like [Nomad](/nomad) or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new scheduling new allocations on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. +If you are using orchestration tools like Nomad or Kubernetes, you can use their built-in capabilities to drain workloads before decommissioning nodes gracefully. Nomad provides node drain functionality through the `nomad node drain` command, which prevents new scheduling new allocations on a node while safely migrating existing jobs to other available nodes. The Kubernetes `kubectl drain` command safely removes pods from nodes while respecting Pod Disruption Budgets, which ensure that a minimum number of application replicas remain available throughout the process. HashiCorp resources: - Review the [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. - Learn how to [manage resource lifecycles with Terraform](/terraform/tutorials/state/resource-lifecycle). +- [Get up and running with Nomad](/nomad/tutorials/get-started) by learning about scheduling, setting up a cluster, and deploying an example job. +- [Learn the [fundamentals of Consul](/consul/tutorials). ## Verify health of infrastructure and applications From f1215bb0ea44f367ddf2e9ec96eadffb60b9a1ca Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:31:19 -0500 Subject: [PATCH 12/18] Apply suggestions from code review Co-authored-by: Tu Nguyen --- .../lifecycle-management/data-management.mdx | 8 ++++---- .../decommission-infrastructure.mdx | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx index 639919bc1d..fdf9e99693 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/data-management.mdx @@ -21,7 +21,7 @@ When you implement data management policies, you gain the following benefits: You can use Terraform to define and manage lifecycle policies and implement those policies across your organization. You can create Terraform modules to create data management policies for different data types and compliance requirements. These modules can automatically apply appropriate lifecycle rules, storage class transitions, and deletion policies to new or existing storage resources. -The following is an example of using Terraform to [create a data lifecycle policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-object-size) to move AWS S3 data to Glacier Instant Retrieval after 365 days: +The following Terraform configuration defines a [data lifecycle policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-object-size) to move AWS S3 data to Glacier Instant Retrieval after 365 days: ```hcl resource "aws_s3_bucket_lifecycle_configuration" "example" { @@ -46,7 +46,7 @@ resource "aws_s3_bucket_lifecycle_configuration" "example" { Terraform can also tag resources with appropriate retention metadata. These tags can include creation dates, data classifications, and retention periods. -For example, you can use the [tags block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-an-object-tag) with AWS S3 to automatically apply tags to all resources created by Terraform. The S3 lifecycle rule specifies a filter based on a tag key and value. The rule then applies only to a subset of objects with the specific tag. +For example, you can use the [`tag` block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration#specifying-a-filter-based-on-an-object-tag) with AWS S3 to automatically apply tags to all resources created by Terraform. The S3 lifecycle rule specifies a filter based on a tag key and value. The rule then applies only to a subset of objects with the specific tag. ```hcl resource "aws_s3_bucket_lifecycle_configuration" "example" { @@ -72,7 +72,7 @@ resource "aws_s3_bucket_lifecycle_configuration" "example" { } ``` -It is important to note that other cloud providers, such as [Google Cloud Platform](https://registry.terraform.io/providers/hashicorp/google/5.0.0/docs/resources/storage_bucket.html#example-usage---life-cycle-settings-for-storage-bucket-objects) and [Microsoft Azure](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_management_policy), offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. +Other cloud providers, such as [Google Cloud Platform](https://registry.terraform.io/providers/hashicorp/google/5.0.0/docs/resources/storage_bucket.html#example-usage---life-cycle-settings-for-storage-bucket-objects) and [Microsoft Azure](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_management_policy), offer similar lifecycle management features for their storage services. You can use Terraform to manage lifecycle policies across multiple cloud providers, ensuring consistent data management practices regardless of where your data resides. HashiCorp resources: @@ -90,5 +90,5 @@ External resources: In this section of Lifecycle management, you learned about implementing data management policies, including why you should use lifecycle policies and how to automate policy management with infrastructure as code. Implement data management policies is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. -To learn more about infrastructure and resource management, see the following resources: +To learn more about infrastructure and resource management, refer to the following resources: - [Automate infrastructure provisioning](/well-architected-framework/define-and-automate-processes/process-automation/process-automation-workflow) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 15b96554e1..709e05a896 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -35,10 +35,10 @@ Your plan should analyze which services, applications, or other resources rely o If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph can help you visualize connections between resources and identify potential impacts of removing specific components. -You can create a dependency graph of your Terraform resources by running the following CLI command: +The following command creates a dependency graph of your Terraform resources: -```bash -terraform graph -type=plan | dot -Tpng > graph.png +```shell-session +$ terraform graph -type=plan | dot -Tpng > graph.png ``` @@ -55,7 +55,7 @@ HashiCorp resources: Before decommissioning, confirm that you have backups of any critical data or configurations associated with the resources you are removing. Backups provide a safety net in case you need to roll back changes. -The following are some resources you may want to back up: +You may want to back up the following resources: - Servers in the form of machine images - Database snapshots - Configuration files @@ -72,7 +72,7 @@ resource "aws_instance" "example" { } ``` -## Gradual removal of resources +## Gradually remove resources Implement a phased approach to removing resources instead of doing it all at once. Start by redirecting traffic away from the resource, and monitor user traffic to ensure you don't negatively impact users. @@ -106,7 +106,7 @@ HashiCorp resources: After the decommissioning process, verify that the remaining infrastructure and applications are functioning correctly. Monitor system performance and user feedback to ensure that there are no negative impacts. -The following are steps that you should do after decommissioning: +You should do the following steps after you decomission the resources: - Validate APIs are functioning. - Check application performance. @@ -124,6 +124,6 @@ External resources: In this section of Lifecycle management, you learned about decommissioning resources, including why you should plan decommissioning and how to safely execute the process. Decommission resources is part of the [Optimize systems](/well-architected-framework/optimize-systems) pillar. -To learn more about infrastructure and resource management, see the following resource: +To learn more about infrastructure and resource management, refer to the following resource: - [Data management](/well-architected-framework/optimize-systems/lifecycle-management/data-management) \ No newline at end of file From 32fef176e401ad521ae4a591975be09c6d610dfa Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 18 Sep 2025 14:50:10 -0500 Subject: [PATCH 13/18] addressing comments --- .../automate/cicd.mdx | 6 +- .../decommission-infrastructure.mdx | 56 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx b/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx index c0cdef9ec7..ff2142f90e 100644 --- a/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx +++ b/content/well-architected-framework/docs/docs/define-and-automate-processes/automate/cicd.mdx @@ -31,6 +31,6 @@ In this section of Automate your workflows, you learned how to implement CI/CD p Visit the following documents to learn more about the automation workflow: -- [Automate testing](/well-architected-framework/define-and-automate-processes/automate/testing) - Implement automated testing in your CI/CD pipeline -- [Automate deployments](/well-architected-framework/define-and-automate-processes/automate/deployments) - Deploy applications through your CI/CD pipeline -- [Running Terraform in automation](/terraform/tutorials/automation/automate-terraform) \ No newline at end of file +- [Automate testing](/well-architected-framework/define-and-automate-processes/automate/testing) in your CI/CD pipelines +- [Automate application deployments](/well-architected-framework/define-and-automate-processes/automate/deployments through your CI/CD pipeline +- Learn how to orchestrate [Terraform runs](/terraform/tutorials/automation/automate-terraform) to ensure consistency between runs. \ No newline at end of file diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 709e05a896..69d8f14382 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -15,20 +15,6 @@ When you decommission unused resources, you gain the following benefits: To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on how your infrastructure implementation is done, either manually or automatically, you may need to adjust your decommissioning approach. -## Find resources to decommission - -Before you begin decommissioning resources, you need to identify which resources exist in your environment and determine which ones are candidates for removal. This discovery phase helps you avoid accidentally removing resources that are still in use and ensures you target the right components for decommissioning. - -Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that help identify unused or underutilized resources. Pay particular attention to active resources created for temporary purposes, like testing or proof-of-concepts. - -Terraform tracks all infrastructure it manages with state files. You can use the `terraform state list` to see all managed resources and `terraform show` to examine their current configurations. This list of resources will help you identify which resources are still in use and which ones you can decommission. - -## Create a communication plan - -Your plan should outline how you will inform stakeholders about the decommissioning process, including timelines and potential impacts. Effective communication prevents surprises and ensures all affected teams can prepare for the changes. - -Start by identifying all stakeholders who might be affected by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. - ## Create a dependency plan Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. @@ -49,7 +35,24 @@ You need to install Graphviz on your system to use the `terraform graph` command HashiCorp resources: -- [Terraform Graph Command](/terraform/docs/cli/commands/graph) +- [Terraform graph command](/terraform/docs/cli/commands/graph) + +## Find resources to decommission + +Before you begin decommissioning resources, you need to identify which resources exist in your environment and determine which ones are candidates for removal. This discovery phase helps you avoid accidentally removing resources that are still in use and ensures you target the right components for decommissioning. + +Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that help identify unused or underutilized resources. Pay particular attention to active resources created for temporary purposes, like testing or proof-of-concepts. + +Terraform tracks all infrastructure it manages with state files. You can use the `terraform state list` to see all managed resources and `terraform show` to examine their current configurations. This list of resources will help you identify which resources are still in use and which ones you can decommission. + +If you're using HCP Terraform, you can use the [workspace explorer](/terraform/cloud-docs/workspaces/explorer) feature to gain visibility into the resources your organization manages with Terraform. The explorer provides a visual representation of your infrastructure, making it easier to identify resources that you no longer need. + + +## Create a communication plan + +Your plan should outline how you will inform stakeholders about the decommissioning process, including timelines and potential impacts. Effective communication prevents surprises and ensures all affected teams can prepare for the changes. + +Start by identifying all stakeholders who might be affected by the decommissioning, including development teams, operations staff, end users, and business owners. Create a notification timeline that provides adequate warning. Your communications should explain what resources you are removing, when the decommissioning will occur, and what actions stakeholders need to take. ## Create backups @@ -72,6 +75,27 @@ resource "aws_instance" "example" { } ``` +You can also use Terraform to create [AWS EBS snapshots](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ebs_snapshot) before decommissioning instances. The following example creates an EBS snapshot of the root volume of an EC2 instance: + +```hcl +resource "aws_ebs_volume" "example" { + availability_zone = "us-west-2a" + size = 40 + + tags = { + Name = "HelloWorld" + } +} + +resource "aws_ebs_snapshot" "example_snapshot" { + volume_id = aws_ebs_volume.example.id + + tags = { + Name = "HelloWorld_snap" + } +} +``` + ## Gradually remove resources Implement a phased approach to removing resources instead of doing it all at once. Start by redirecting traffic away from the resource, and monitor user traffic to ensure you don't negatively impact users. @@ -87,7 +111,7 @@ resource "aws_instance" "example" { } lifecycle { - prevent_destroy = true + prevent_destroy = true } ``` From 8dc39bfb116f94d4c50b78859e4d09f5900eccc2 Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 18 Sep 2025 14:53:12 -0500 Subject: [PATCH 14/18] addressing comments --- .../lifecycle-management/decommission-infrastructure.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 69d8f14382..895667566c 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -17,7 +17,7 @@ To successfully decommission resources, you need to create a well-defined plan t ## Create a dependency plan -Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. +Your plan should analyze which services, applications, or other resources rely on the components you may plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph can help you visualize connections between resources and identify potential impacts of removing specific components. From 603c0dc0e5177186e9a1908a6b026fc5824b182e Mon Sep 17 00:00:00 2001 From: CJ Obermaier Date: Thu, 18 Sep 2025 14:56:54 -0500 Subject: [PATCH 15/18] addressing comments --- .../decommission-infrastructure.mdx | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 895667566c..1a9dde3a7e 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -15,9 +15,19 @@ When you decommission unused resources, you gain the following benefits: To successfully decommission resources, you need to create a well-defined plan that includes dependency analysis, stakeholder communication, and a gradual removal process. Depending on how your infrastructure implementation is done, either manually or automatically, you may need to adjust your decommissioning approach. +## Find resources to decommission + +Before you begin decommissioning resources, you need to identify which resources exist in your environment and determine which ones are candidates for removal. This discovery phase helps you avoid accidentally removing resources that are still in use and ensures you target the right components for decommissioning. + +Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that help identify unused or underutilized resources. Pay particular attention to active resources created for temporary purposes, like testing or proof-of-concepts. + +Terraform tracks all infrastructure it manages with state files. You can use the `terraform state list` to see all managed resources and `terraform show` to examine their current configurations. This list of resources will help you identify which resources are still in use and which ones you can decommission. + +If you're using HCP Terraform, you can use the [workspace explorer](/terraform/cloud-docs/workspaces/explorer) feature to gain visibility into the resThat ources your organization manages with Terraform. The explorer provides a visual representation of your infrastructure, making it easier to identify resources that you no longer need. + ## Create a dependency plan -Your plan should analyze which services, applications, or other resources rely on the components you may plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. +Your plan should analyze which services, applications, or other resources rely on the components you plan to remove. Your plan will lower the risk of unexpected outages by identifying and addressing dependencies before decommissioning. If you are using infrastructure as code tools like Terraform, you can use a dependency graph to understand resource relationships. This graph can help you visualize connections between resources and identify potential impacts of removing specific components. @@ -37,17 +47,6 @@ HashiCorp resources: - [Terraform graph command](/terraform/docs/cli/commands/graph) -## Find resources to decommission - -Before you begin decommissioning resources, you need to identify which resources exist in your environment and determine which ones are candidates for removal. This discovery phase helps you avoid accidentally removing resources that are still in use and ensures you target the right components for decommissioning. - -Start by creating an inventory of your infrastructure. Most cloud providers offer resource tagging and billing reports that help identify unused or underutilized resources. Pay particular attention to active resources created for temporary purposes, like testing or proof-of-concepts. - -Terraform tracks all infrastructure it manages with state files. You can use the `terraform state list` to see all managed resources and `terraform show` to examine their current configurations. This list of resources will help you identify which resources are still in use and which ones you can decommission. - -If you're using HCP Terraform, you can use the [workspace explorer](/terraform/cloud-docs/workspaces/explorer) feature to gain visibility into the resources your organization manages with Terraform. The explorer provides a visual representation of your infrastructure, making it easier to identify resources that you no longer need. - - ## Create a communication plan Your plan should outline how you will inform stakeholders about the decommissioning process, including timelines and potential impacts. Effective communication prevents surprises and ensures all affected teams can prepare for the changes. From d4726972a92b05a3f2a7a645b50ab0b5a0819e95 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:30:04 -0500 Subject: [PATCH 16/18] Fix title casing for 'Decommission infrastructure' --- content/well-architected-framework/data/docs-nav-data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/well-architected-framework/data/docs-nav-data.json b/content/well-architected-framework/data/docs-nav-data.json index 960d93c6d2..187dfb9587 100644 --- a/content/well-architected-framework/data/docs-nav-data.json +++ b/content/well-architected-framework/data/docs-nav-data.json @@ -423,7 +423,7 @@ "path": "optimize-systems/lifecycle-management/data-management" }, { - "title": "Decommission Infrastructure", + "title": "Decommission infrastructure", "path": "optimize-systems/lifecycle-management/decommission-infrastructure" } ] From 3337c79043d9f33b6fa5b1fd26ee358d1a1bf8c8 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:30:47 -0500 Subject: [PATCH 17/18] Rename 'Decommission infrastructure' to 'Decommission resources' --- content/well-architected-framework/data/docs-nav-data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/well-architected-framework/data/docs-nav-data.json b/content/well-architected-framework/data/docs-nav-data.json index 187dfb9587..cd6f69e707 100644 --- a/content/well-architected-framework/data/docs-nav-data.json +++ b/content/well-architected-framework/data/docs-nav-data.json @@ -423,7 +423,7 @@ "path": "optimize-systems/lifecycle-management/data-management" }, { - "title": "Decommission infrastructure", + "title": "Decommission resources", "path": "optimize-systems/lifecycle-management/decommission-infrastructure" } ] From 0b60c06d5cd27857d7d59f2c3546d542be7c8bb8 Mon Sep 17 00:00:00 2001 From: CJ <105300705+cjobermaier@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:35:05 -0500 Subject: [PATCH 18/18] Apply suggestions from code review --- .../lifecycle-management/decommission-infrastructure.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx index 1a9dde3a7e..5150daba5a 100644 --- a/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx +++ b/content/well-architected-framework/docs/docs/optimize-systems/lifecycle-management/decommission-infrastructure.mdx @@ -45,7 +45,7 @@ You need to install Graphviz on your system to use the `terraform graph` command HashiCorp resources: -- [Terraform graph command](/terraform/docs/cli/commands/graph) +- [Terraform graph command](/terraform/cli/commands/graph) ## Create a communication plan @@ -101,7 +101,7 @@ Implement a phased approach to removing resources instead of doing it all at onc You can use `terraform plan` to preview the changes that will occur when you remove resources from your configuration. This command helps you understand the impact of your changes before applying them. -You can also set safeguards so you only decommission resources when you are ready. You can use Terraform's `lifecycle` block with `prevent_destroy = true` to prevent accidental deletion of critical resources. The [lifecycle](https://www.terraform.io/docs/language/meta-arguments#lifecycle) setting ensures that you won't destroy resources unless you explicitly remove the `prevent_destroy` attribute. +You can also set safeguards so you only decommission resources when you are ready. You can use Terraform's `lifecycle` block with `prevent_destroy = true` to prevent accidental deletion of critical resources. The [lifecycle](/terraform/language/meta-arguments#lifecycle) setting ensures that you won't destroy resources unless you explicitly remove the `prevent_destroy` attribute. ```hcl resource "aws_instance" "example" { @@ -123,7 +123,7 @@ HashiCorp resources: - Review the [Zero-downtime deployments](/well-architected-framework/define-and-automate-processes/deploy/zero-downtime-deployments) documentation for strategies on how to redirect traffic and disable functions gradually. - Learn how to [manage resource lifecycles with Terraform](/terraform/tutorials/state/resource-lifecycle). - [Get up and running with Nomad](/nomad/tutorials/get-started) by learning about scheduling, setting up a cluster, and deploying an example job. -- [Learn the [fundamentals of Consul](/consul/tutorials). +- Learn the [fundamentals of Consul](/consul/tutorials). ## Verify health of infrastructure and applications