diff --git a/_includes/sidebar-data-v19.1.json b/_includes/sidebar-data-v19.1.json index f03cbfde883..73fe75a85a4 100644 --- a/_includes/sidebar-data-v19.1.json +++ b/_includes/sidebar-data-v19.1.json @@ -134,9 +134,56 @@ ] }, { - "title": "Cluster Topology Patterns", - "urls": [ - "/${VERSION}/cluster-topology-patterns.html" + "title": "Topology Patterns", + "items": [ + { + "title": "Overview", + "urls": [ + "/${VERSION}/topology-patterns.html" + ] + }, + { + "title": "Development", + "urls": [ + "/${VERSION}/topology-development.html" + ] + }, + { + "title": "Basic Production", + "urls": [ + "/${VERSION}/topology-basic-production.html" + ] + }, + { + "title": "Geo-Partitioned Replicas", + "urls": [ + "/${VERSION}/topology-geo-partitioned-replicas.html" + ] + }, + { + "title": "Geo-Partitioned Leaseholders", + "urls": [ + "/${VERSION}/topology-geo-partitioned-leaseholders.html" + ] + }, + { + "title": "Duplicate Indexes", + "urls": [ + "/${VERSION}/topology-duplicate-indexes.html" + ] + }, + { + "title": "Follower Reads", + "urls": [ + "/${VERSION}/topology-follower-reads.html" + ] + }, + { + "title": "Follow-the-Workload", + "urls": [ + "/${VERSION}/topology-follow-the-workload.html" + ] + } ] }, { diff --git a/_includes/sidebar-data-v19.2.json b/_includes/sidebar-data-v19.2.json index 28aafe3e7d7..6e5561f51d5 100644 --- a/_includes/sidebar-data-v19.2.json +++ b/_includes/sidebar-data-v19.2.json @@ -134,9 +134,56 @@ ] }, { - "title": "Cluster Topology Patterns", - "urls": [ - "/${VERSION}/cluster-topology-patterns.html" + "title": "Topology Patterns", + "items": [ + { + "title": "Overview", + "urls": [ + "/${VERSION}/topology-patterns.html" + ] + }, + { + "title": "Development", + "urls": [ + "/${VERSION}/topology-development.html" + ] + }, + { + "title": "Basic Production", + "urls": [ + "/${VERSION}/topology-basic-production.html" + ] + }, + { + "title": "Geo-Partitioned Replicas", + "urls": [ + "/${VERSION}/topology-geo-partitioned-replicas.html" + ] + }, + { + "title": "Geo-Partitioned Leaseholders", + "urls": [ + "/${VERSION}/topology-geo-partitioned-leaseholders.html" + ] + }, + { + "title": "Duplicate Indexes", + "urls": [ + "/${VERSION}/topology-duplicate-indexes.html" + ] + }, + { + "title": "Follower Reads", + "urls": [ + "/${VERSION}/topology-follower-reads.html" + ] + }, + { + "title": "Follow-the-Workload", + "urls": [ + "/${VERSION}/topology-follow-the-workload.html" + ] + } ] }, { diff --git a/_includes/v19.1/prod-deployment/insecure-recommendations.md b/_includes/v19.1/prod-deployment/insecure-recommendations.md index e6f7fc0b9fe..11bcbe83d83 100644 --- a/_includes/v19.1/prod-deployment/insecure-recommendations.md +++ b/_includes/v19.1/prod-deployment/insecure-recommendations.md @@ -1,5 +1,3 @@ -- If you plan to use CockroachDB in production, carefully review the [Production Checklist](recommended-production-settings.html). - - Consider using a [secure cluster](manual-deployment.html) instead. Using an insecure cluster comes with risks: - Your cluster is open to any client that can access any node's IP addresses. - Any user, even `root`, can log in without providing a password. diff --git a/_includes/v19.1/prod-deployment/insecure-requirements.md b/_includes/v19.1/prod-deployment/insecure-requirements.md index 52640254763..3b45a14b0d5 100644 --- a/_includes/v19.1/prod-deployment/insecure-requirements.md +++ b/_includes/v19.1/prod-deployment/insecure-requirements.md @@ -1,3 +1,5 @@ +- Carefully review the [Production Checklist](recommended-production-settings.html) and recommended [Topology Patterns](topology-patterns.html). + - You must have [SSH access]({{page.ssh-link}}) to each machine. This is necessary for distributing and starting CockroachDB binaries. - Your network configuration must allow TCP communication on the following ports: diff --git a/_includes/v19.1/prod-deployment/secure-recommendations.md b/_includes/v19.1/prod-deployment/secure-recommendations.md index 79d077ee84d..85b0b0b31d0 100644 --- a/_includes/v19.1/prod-deployment/secure-recommendations.md +++ b/_includes/v19.1/prod-deployment/secure-recommendations.md @@ -1,5 +1,3 @@ -- If you plan to use CockroachDB in production, carefully review the [Production Checklist](recommended-production-settings.html). - - Decide how you want to access your Admin UI: Access Level | Description diff --git a/_includes/v19.1/prod-deployment/secure-requirements.md b/_includes/v19.1/prod-deployment/secure-requirements.md index f4a9beb1209..d27643bf706 100644 --- a/_includes/v19.1/prod-deployment/secure-requirements.md +++ b/_includes/v19.1/prod-deployment/secure-requirements.md @@ -1,3 +1,5 @@ +- Carefully review the [Production Checklist](recommended-production-settings.html) and recommended [Topology Patterns](topology-patterns.html). + - You must have [CockroachDB installed](install-cockroachdb.html) locally. This is necessary for generating and managing your deployment's certificates. - You must have [SSH access]({{page.ssh-link}}) to each machine. This is necessary for distributing and starting CockroachDB binaries. diff --git a/_includes/v19.1/topology-patterns/fundamentals.md b/_includes/v19.1/topology-patterns/fundamentals.md new file mode 100644 index 00000000000..daf4364e609 --- /dev/null +++ b/_includes/v19.1/topology-patterns/fundamentals.md @@ -0,0 +1,7 @@ +Before choosing a topology pattern: + +- Review how data is replicated and distributed across a cluster, and how this affects performance. It is especially important to understand the concept of the "leaseholder". For a summary, see [Reads and Writes in CockroachDB](architecture/reads-and-writes-overview.html). For a deeper dive, see the [CockroachDB Architecture](architecture/overview.html) documentation. +- Review the concept of [locality](start-a-node.html#locality), which makes CockroachDB aware of the location of nodes and able to intelligently place and balance data based on how you define [replication controls](configure-replication-zones.html). +- Review the recommendations and requirements in our [Production Checklist](recommended-production-settings.html). +- This topology doesn't account for hardware specifications, so be sure to follow our [hardware recommendations](recommended-production-settings.html#hardware) and perform a POC to size hardware for your use case. +- Adopt relevant [SQL Best Practices](performance-best-practices-overview.html) to ensure optimal performance. diff --git a/_includes/v19.1/topology-patterns/multi-region-cluster-setup.md b/_includes/v19.1/topology-patterns/multi-region-cluster-setup.md new file mode 100644 index 00000000000..02b13d917ef --- /dev/null +++ b/_includes/v19.1/topology-patterns/multi-region-cluster-setup.md @@ -0,0 +1,30 @@ +Each [multi-region topology pattern](topology-patterns.html#multi-region-patterns) assumes the following setup: + +Multi-region hardware setup + +#### Hardware + +- 3 regions + +- Per region, 3+ AZs with 3+ VMs evenly distributed across them + +- Region-specific app instances and load balancers + - Each load balancer redirects to CockroachDB nodes in its region. + - When CockroachDB nodes are unavailable in a region, the load balancer redirects to nodes in other regions. + +#### Cluster + +Each node is started with the [`--locality`](start-a-node.html#locality) flag specifying its region and AZ combination, e.g.: + +{% include copy-clipboard.html %} +~~~ shell +# Example start command for node in the west1 AZ of the us-west region: +$ cockroach start \ +--locality=region=us-west,zone=west1 \ +--certs-dir=certs \ +--advertise-addr= \ +--join=:26257,:26257,:26257 \ +--cache=.25 \ +--max-sql-memory=.25 \ +--background +~~~ diff --git a/_includes/v19.1/topology-patterns/see-also.md b/_includes/v19.1/topology-patterns/see-also.md new file mode 100644 index 00000000000..03844ca34fd --- /dev/null +++ b/_includes/v19.1/topology-patterns/see-also.md @@ -0,0 +1,11 @@ +- [Topology Patterns Overview](topology-patterns.html) + + - Single-region + - [Development](topology-development.html) + - [Basic Production](topology-basic-production.html) + + - Multi-region + - [Geo-Partitioned Replicas](topology-geo-partitioned-replicas.html) + - [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) + - [Duplicate Indexes](topology-duplicate-indexes.html) + - [Follow-the-Workload](topology-follow-the-workload.html) diff --git a/_includes/v19.2/prod-deployment/insecure-recommendations.md b/_includes/v19.2/prod-deployment/insecure-recommendations.md index e6f7fc0b9fe..11bcbe83d83 100644 --- a/_includes/v19.2/prod-deployment/insecure-recommendations.md +++ b/_includes/v19.2/prod-deployment/insecure-recommendations.md @@ -1,5 +1,3 @@ -- If you plan to use CockroachDB in production, carefully review the [Production Checklist](recommended-production-settings.html). - - Consider using a [secure cluster](manual-deployment.html) instead. Using an insecure cluster comes with risks: - Your cluster is open to any client that can access any node's IP addresses. - Any user, even `root`, can log in without providing a password. diff --git a/_includes/v19.2/prod-deployment/insecure-requirements.md b/_includes/v19.2/prod-deployment/insecure-requirements.md index 52640254763..3b45a14b0d5 100644 --- a/_includes/v19.2/prod-deployment/insecure-requirements.md +++ b/_includes/v19.2/prod-deployment/insecure-requirements.md @@ -1,3 +1,5 @@ +- Carefully review the [Production Checklist](recommended-production-settings.html) and recommended [Topology Patterns](topology-patterns.html). + - You must have [SSH access]({{page.ssh-link}}) to each machine. This is necessary for distributing and starting CockroachDB binaries. - Your network configuration must allow TCP communication on the following ports: diff --git a/_includes/v19.2/prod-deployment/secure-recommendations.md b/_includes/v19.2/prod-deployment/secure-recommendations.md index 79d077ee84d..85b0b0b31d0 100644 --- a/_includes/v19.2/prod-deployment/secure-recommendations.md +++ b/_includes/v19.2/prod-deployment/secure-recommendations.md @@ -1,5 +1,3 @@ -- If you plan to use CockroachDB in production, carefully review the [Production Checklist](recommended-production-settings.html). - - Decide how you want to access your Admin UI: Access Level | Description diff --git a/_includes/v19.2/prod-deployment/secure-requirements.md b/_includes/v19.2/prod-deployment/secure-requirements.md index f4a9beb1209..d27643bf706 100644 --- a/_includes/v19.2/prod-deployment/secure-requirements.md +++ b/_includes/v19.2/prod-deployment/secure-requirements.md @@ -1,3 +1,5 @@ +- Carefully review the [Production Checklist](recommended-production-settings.html) and recommended [Topology Patterns](topology-patterns.html). + - You must have [CockroachDB installed](install-cockroachdb.html) locally. This is necessary for generating and managing your deployment's certificates. - You must have [SSH access]({{page.ssh-link}}) to each machine. This is necessary for distributing and starting CockroachDB binaries. diff --git a/_includes/v19.2/topology-patterns/fundamentals.md b/_includes/v19.2/topology-patterns/fundamentals.md new file mode 100644 index 00000000000..daf4364e609 --- /dev/null +++ b/_includes/v19.2/topology-patterns/fundamentals.md @@ -0,0 +1,7 @@ +Before choosing a topology pattern: + +- Review how data is replicated and distributed across a cluster, and how this affects performance. It is especially important to understand the concept of the "leaseholder". For a summary, see [Reads and Writes in CockroachDB](architecture/reads-and-writes-overview.html). For a deeper dive, see the [CockroachDB Architecture](architecture/overview.html) documentation. +- Review the concept of [locality](start-a-node.html#locality), which makes CockroachDB aware of the location of nodes and able to intelligently place and balance data based on how you define [replication controls](configure-replication-zones.html). +- Review the recommendations and requirements in our [Production Checklist](recommended-production-settings.html). +- This topology doesn't account for hardware specifications, so be sure to follow our [hardware recommendations](recommended-production-settings.html#hardware) and perform a POC to size hardware for your use case. +- Adopt relevant [SQL Best Practices](performance-best-practices-overview.html) to ensure optimal performance. diff --git a/_includes/v19.2/topology-patterns/multi-region-cluster-setup.md b/_includes/v19.2/topology-patterns/multi-region-cluster-setup.md new file mode 100644 index 00000000000..a1194c0fdd9 --- /dev/null +++ b/_includes/v19.2/topology-patterns/multi-region-cluster-setup.md @@ -0,0 +1,30 @@ +Each [multi-region topology pattern](topology-patterns.html#multi-region-patterns) assumes the following setup: + +Multi-region hardware setup + +#### Hardware + +- 3 regions + +- Per region, 3+ AZs with 3+ VMs evenly distributed across them + +- Region-specific app instances and load balancers + - Each load balancer redirects to CockroachDB nodes in its region. + - When CockroachDB nodes are unavailable in a region, the load balancer redirects to nodes in other regions. + +#### Cluster + +Each node is started with the [`--locality`](start-a-node.html#locality) flag specifying its region and AZ combination, e.g.: + +{% include copy-clipboard.html %} +~~~ shell +# Example start command for node in the west1 AZ of the us-west region: +$ cockroach start \ +--locality=region=us-west,zone=west1 \ +--certs-dir=certs \ +--advertise-addr= \ +--join=:26257,:26257,:26257 \ +--cache=.25 \ +--max-sql-memory=.25 \ +--background +~~~ diff --git a/_includes/v19.2/topology-patterns/see-also.md b/_includes/v19.2/topology-patterns/see-also.md new file mode 100644 index 00000000000..03844ca34fd --- /dev/null +++ b/_includes/v19.2/topology-patterns/see-also.md @@ -0,0 +1,11 @@ +- [Topology Patterns Overview](topology-patterns.html) + + - Single-region + - [Development](topology-development.html) + - [Basic Production](topology-basic-production.html) + + - Multi-region + - [Geo-Partitioned Replicas](topology-geo-partitioned-replicas.html) + - [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) + - [Duplicate Indexes](topology-duplicate-indexes.html) + - [Follow-the-Workload](topology-follow-the-workload.html) diff --git a/images/v19.1/topology-patterns/basic-local-deployment.png b/images/v19.1/topology-patterns/basic-local-deployment.png deleted file mode 100644 index cab46aa61b0..00000000000 Binary files a/images/v19.1/topology-patterns/basic-local-deployment.png and /dev/null differ diff --git a/images/v19.1/topology-patterns/basic-multi-region-layout.png b/images/v19.1/topology-patterns/basic-multi-region-layout.png deleted file mode 100644 index 514e2509a6e..00000000000 Binary files a/images/v19.1/topology-patterns/basic-multi-region-layout.png and /dev/null differ diff --git a/images/v19.1/topology-patterns/basic-multi-region.png b/images/v19.1/topology-patterns/basic-multi-region.png deleted file mode 100644 index f9b46d59960..00000000000 Binary files a/images/v19.1/topology-patterns/basic-multi-region.png and /dev/null differ diff --git a/images/v19.1/topology-patterns/local-scaling.png b/images/v19.1/topology-patterns/local-scaling.png deleted file mode 100644 index 178e257d551..00000000000 Binary files a/images/v19.1/topology-patterns/local-scaling.png and /dev/null differ diff --git a/images/v19.1/topology-patterns/multi-region-partition.png b/images/v19.1/topology-patterns/multi-region-partition.png deleted file mode 100644 index 0ab2ff69e8c..00000000000 Binary files a/images/v19.1/topology-patterns/multi-region-partition.png and /dev/null differ diff --git a/images/v19.1/topology-patterns/single-region-multi.png b/images/v19.1/topology-patterns/single-region-multi.png deleted file mode 100644 index d2cd33bdf7b..00000000000 Binary files a/images/v19.1/topology-patterns/single-region-multi.png and /dev/null differ diff --git a/images/v19.1/topology-patterns/topology_basic_production1.png b/images/v19.1/topology-patterns/topology_basic_production1.png new file mode 100644 index 00000000000..b96d185197b Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production1.png differ diff --git a/images/v19.1/topology-patterns/topology_basic_production2.png b/images/v19.1/topology-patterns/topology_basic_production2.png new file mode 100644 index 00000000000..22359506c75 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production2.png differ diff --git a/images/v19.1/topology-patterns/topology_basic_production_reads.png b/images/v19.1/topology-patterns/topology_basic_production_reads.png new file mode 100644 index 00000000000..fd6b9a35e40 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production_reads.png differ diff --git a/images/v19.1/topology-patterns/topology_basic_production_resiliency1.png b/images/v19.1/topology-patterns/topology_basic_production_resiliency1.png new file mode 100644 index 00000000000..218e3443668 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production_resiliency1.png differ diff --git a/images/v19.1/topology-patterns/topology_basic_production_resiliency2.png b/images/v19.1/topology-patterns/topology_basic_production_resiliency2.png new file mode 100644 index 00000000000..a9efce59a67 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production_resiliency2.png differ diff --git a/images/v19.1/topology-patterns/topology_basic_production_resiliency3.png b/images/v19.1/topology-patterns/topology_basic_production_resiliency3.png new file mode 100644 index 00000000000..3c3fd57b457 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production_resiliency3.png differ diff --git a/images/v19.1/topology-patterns/topology_basic_production_writes.gif b/images/v19.1/topology-patterns/topology_basic_production_writes.gif new file mode 100644 index 00000000000..5f12f331e7f Binary files /dev/null and b/images/v19.1/topology-patterns/topology_basic_production_writes.gif differ diff --git a/images/v19.1/topology-patterns/topology_development1.png b/images/v19.1/topology-patterns/topology_development1.png new file mode 100644 index 00000000000..2882937e438 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_development1.png differ diff --git a/images/v19.1/topology-patterns/topology_development2.png b/images/v19.1/topology-patterns/topology_development2.png new file mode 100644 index 00000000000..1eed95fbaba Binary files /dev/null and b/images/v19.1/topology-patterns/topology_development2.png differ diff --git a/images/v19.1/topology-patterns/topology_development_latency.png b/images/v19.1/topology-patterns/topology_development_latency.png new file mode 100644 index 00000000000..3aa54c45c13 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_development_latency.png differ diff --git a/images/v19.1/topology-patterns/topology_duplicate_indexes1.png b/images/v19.1/topology-patterns/topology_duplicate_indexes1.png new file mode 100644 index 00000000000..c9ad5d97fa3 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_duplicate_indexes1.png differ diff --git a/images/v19.1/topology-patterns/topology_duplicate_indexes_reads.png b/images/v19.1/topology-patterns/topology_duplicate_indexes_reads.png new file mode 100644 index 00000000000..097927ea410 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_duplicate_indexes_reads.png differ diff --git a/images/v19.1/topology-patterns/topology_duplicate_indexes_resiliency.png b/images/v19.1/topology-patterns/topology_duplicate_indexes_resiliency.png new file mode 100644 index 00000000000..39056e22a48 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_duplicate_indexes_resiliency.png differ diff --git a/images/v19.1/topology-patterns/topology_duplicate_indexes_writes.gif b/images/v19.1/topology-patterns/topology_duplicate_indexes_writes.gif new file mode 100644 index 00000000000..16433549cb4 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_duplicate_indexes_writes.gif differ diff --git a/images/v19.1/topology-patterns/topology_follow_the_workload_reads.png b/images/v19.1/topology-patterns/topology_follow_the_workload_reads.png new file mode 100644 index 00000000000..67b01da4d37 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follow_the_workload_reads.png differ diff --git a/images/v19.1/topology-patterns/topology_follow_the_workload_writes.gif b/images/v19.1/topology-patterns/topology_follow_the_workload_writes.gif new file mode 100644 index 00000000000..6cd6be01196 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follow_the_workload_writes.gif differ diff --git a/images/v19.1/topology-patterns/topology_follower_reads1.png b/images/v19.1/topology-patterns/topology_follower_reads1.png new file mode 100644 index 00000000000..1eb07d53d6a Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follower_reads1.png differ diff --git a/images/v19.1/topology-patterns/topology_follower_reads3.png b/images/v19.1/topology-patterns/topology_follower_reads3.png new file mode 100644 index 00000000000..d6a125c1079 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follower_reads3.png differ diff --git a/images/v19.1/topology-patterns/topology_follower_reads_reads.png b/images/v19.1/topology-patterns/topology_follower_reads_reads.png new file mode 100644 index 00000000000..47657b885b3 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follower_reads_reads.png differ diff --git a/images/v19.1/topology-patterns/topology_follower_reads_resiliency.png b/images/v19.1/topology-patterns/topology_follower_reads_resiliency.png new file mode 100644 index 00000000000..73868163a1e Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follower_reads_resiliency.png differ diff --git a/images/v19.1/topology-patterns/topology_follower_reads_writes.gif b/images/v19.1/topology-patterns/topology_follower_reads_writes.gif new file mode 100644 index 00000000000..8fc4b2c55b7 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_follower_reads_writes.gif differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders1.png b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders1.png new file mode 100644 index 00000000000..66d03a7f113 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders1.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_reads.png b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_reads.png new file mode 100644 index 00000000000..0daa6665d05 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_reads.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_resiliency1.png b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_resiliency1.png new file mode 100644 index 00000000000..09aaa95ded9 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_resiliency1.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_resiliency2.png b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_resiliency2.png new file mode 100644 index 00000000000..f372f14552c Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_resiliency2.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_writes.gif b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_writes.gif new file mode 100644 index 00000000000..f5c8d077818 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioned_leaseholders_writes.gif differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioning1.png b/images/v19.1/topology-patterns/topology_geo-partitioning1.png new file mode 100644 index 00000000000..a7bc25e6279 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioning1.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioning1_no-map.png b/images/v19.1/topology-patterns/topology_geo-partitioning1_no-map.png new file mode 100644 index 00000000000..3b348dd7430 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioning1_no-map.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioning_reads.png b/images/v19.1/topology-patterns/topology_geo-partitioning_reads.png new file mode 100644 index 00000000000..6dcdd7e418e Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioning_reads.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioning_resiliency1.png b/images/v19.1/topology-patterns/topology_geo-partitioning_resiliency1.png new file mode 100644 index 00000000000..d3353c2f8d0 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioning_resiliency1.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioning_resiliency2.png b/images/v19.1/topology-patterns/topology_geo-partitioning_resiliency2.png new file mode 100644 index 00000000000..04191e8ddef Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioning_resiliency2.png differ diff --git a/images/v19.1/topology-patterns/topology_geo-partitioning_writes.gif b/images/v19.1/topology-patterns/topology_geo-partitioning_writes.gif new file mode 100644 index 00000000000..11435a6bd51 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_geo-partitioning_writes.gif differ diff --git a/images/v19.1/topology-patterns/topology_multi-region_hardware.png b/images/v19.1/topology-patterns/topology_multi-region_hardware.png new file mode 100644 index 00000000000..dad856590d0 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_multi-region_hardware.png differ diff --git a/images/v19.1/topology-patterns/topology_pinned_index_leaseholders3.png b/images/v19.1/topology-patterns/topology_pinned_index_leaseholders3.png new file mode 100644 index 00000000000..7d792d3a5ed Binary files /dev/null and b/images/v19.1/topology-patterns/topology_pinned_index_leaseholders3.png differ diff --git a/images/v19.1/topology-patterns/topology_single-region_cluster_resiliency1.png b/images/v19.1/topology-patterns/topology_single-region_cluster_resiliency1.png new file mode 100644 index 00000000000..7fe13079fe0 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_single-region_cluster_resiliency1.png differ diff --git a/images/v19.1/topology-patterns/topology_single-region_cluster_resiliency2.png b/images/v19.1/topology-patterns/topology_single-region_cluster_resiliency2.png new file mode 100644 index 00000000000..f11c5989677 Binary files /dev/null and b/images/v19.1/topology-patterns/topology_single-region_cluster_resiliency2.png differ diff --git a/images/v19.2/topology-patterns/basic-local-deployment.png b/images/v19.2/topology-patterns/basic-local-deployment.png deleted file mode 100644 index cab46aa61b0..00000000000 Binary files a/images/v19.2/topology-patterns/basic-local-deployment.png and /dev/null differ diff --git a/images/v19.2/topology-patterns/basic-multi-region-layout.png b/images/v19.2/topology-patterns/basic-multi-region-layout.png deleted file mode 100644 index 514e2509a6e..00000000000 Binary files a/images/v19.2/topology-patterns/basic-multi-region-layout.png and /dev/null differ diff --git a/images/v19.2/topology-patterns/basic-multi-region.png b/images/v19.2/topology-patterns/basic-multi-region.png deleted file mode 100644 index f9b46d59960..00000000000 Binary files a/images/v19.2/topology-patterns/basic-multi-region.png and /dev/null differ diff --git a/images/v19.2/topology-patterns/local-scaling.png b/images/v19.2/topology-patterns/local-scaling.png deleted file mode 100644 index 178e257d551..00000000000 Binary files a/images/v19.2/topology-patterns/local-scaling.png and /dev/null differ diff --git a/images/v19.2/topology-patterns/multi-region-partition.png b/images/v19.2/topology-patterns/multi-region-partition.png deleted file mode 100644 index 0ab2ff69e8c..00000000000 Binary files a/images/v19.2/topology-patterns/multi-region-partition.png and /dev/null differ diff --git a/images/v19.2/topology-patterns/single-region-multi.png b/images/v19.2/topology-patterns/single-region-multi.png deleted file mode 100644 index d2cd33bdf7b..00000000000 Binary files a/images/v19.2/topology-patterns/single-region-multi.png and /dev/null differ diff --git a/images/v19.2/topology-patterns/topology_basic_production1.png b/images/v19.2/topology-patterns/topology_basic_production1.png new file mode 100644 index 00000000000..b96d185197b Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production1.png differ diff --git a/images/v19.2/topology-patterns/topology_basic_production2.png b/images/v19.2/topology-patterns/topology_basic_production2.png new file mode 100644 index 00000000000..22359506c75 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production2.png differ diff --git a/images/v19.2/topology-patterns/topology_basic_production_reads.png b/images/v19.2/topology-patterns/topology_basic_production_reads.png new file mode 100644 index 00000000000..fd6b9a35e40 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production_reads.png differ diff --git a/images/v19.2/topology-patterns/topology_basic_production_resiliency1.png b/images/v19.2/topology-patterns/topology_basic_production_resiliency1.png new file mode 100644 index 00000000000..218e3443668 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production_resiliency1.png differ diff --git a/images/v19.2/topology-patterns/topology_basic_production_resiliency2.png b/images/v19.2/topology-patterns/topology_basic_production_resiliency2.png new file mode 100644 index 00000000000..a9efce59a67 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production_resiliency2.png differ diff --git a/images/v19.2/topology-patterns/topology_basic_production_resiliency3.png b/images/v19.2/topology-patterns/topology_basic_production_resiliency3.png new file mode 100644 index 00000000000..3c3fd57b457 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production_resiliency3.png differ diff --git a/images/v19.2/topology-patterns/topology_basic_production_writes.gif b/images/v19.2/topology-patterns/topology_basic_production_writes.gif new file mode 100644 index 00000000000..5f12f331e7f Binary files /dev/null and b/images/v19.2/topology-patterns/topology_basic_production_writes.gif differ diff --git a/images/v19.2/topology-patterns/topology_development1.png b/images/v19.2/topology-patterns/topology_development1.png new file mode 100644 index 00000000000..2882937e438 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_development1.png differ diff --git a/images/v19.2/topology-patterns/topology_development2.png b/images/v19.2/topology-patterns/topology_development2.png new file mode 100644 index 00000000000..1eed95fbaba Binary files /dev/null and b/images/v19.2/topology-patterns/topology_development2.png differ diff --git a/images/v19.2/topology-patterns/topology_development_latency.png b/images/v19.2/topology-patterns/topology_development_latency.png new file mode 100644 index 00000000000..3aa54c45c13 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_development_latency.png differ diff --git a/images/v19.2/topology-patterns/topology_duplicate_indexes1.png b/images/v19.2/topology-patterns/topology_duplicate_indexes1.png new file mode 100644 index 00000000000..c9ad5d97fa3 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_duplicate_indexes1.png differ diff --git a/images/v19.2/topology-patterns/topology_duplicate_indexes_reads.png b/images/v19.2/topology-patterns/topology_duplicate_indexes_reads.png new file mode 100644 index 00000000000..097927ea410 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_duplicate_indexes_reads.png differ diff --git a/images/v19.2/topology-patterns/topology_duplicate_indexes_resiliency.png b/images/v19.2/topology-patterns/topology_duplicate_indexes_resiliency.png new file mode 100644 index 00000000000..39056e22a48 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_duplicate_indexes_resiliency.png differ diff --git a/images/v19.2/topology-patterns/topology_duplicate_indexes_writes.gif b/images/v19.2/topology-patterns/topology_duplicate_indexes_writes.gif new file mode 100644 index 00000000000..16433549cb4 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_duplicate_indexes_writes.gif differ diff --git a/images/v19.2/topology-patterns/topology_follow_the_workload_reads.png b/images/v19.2/topology-patterns/topology_follow_the_workload_reads.png new file mode 100644 index 00000000000..67b01da4d37 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follow_the_workload_reads.png differ diff --git a/images/v19.2/topology-patterns/topology_follow_the_workload_writes.gif b/images/v19.2/topology-patterns/topology_follow_the_workload_writes.gif new file mode 100644 index 00000000000..6cd6be01196 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follow_the_workload_writes.gif differ diff --git a/images/v19.2/topology-patterns/topology_follower_reads1.png b/images/v19.2/topology-patterns/topology_follower_reads1.png new file mode 100644 index 00000000000..1eb07d53d6a Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follower_reads1.png differ diff --git a/images/v19.2/topology-patterns/topology_follower_reads3.png b/images/v19.2/topology-patterns/topology_follower_reads3.png new file mode 100644 index 00000000000..d6a125c1079 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follower_reads3.png differ diff --git a/images/v19.2/topology-patterns/topology_follower_reads_reads.png b/images/v19.2/topology-patterns/topology_follower_reads_reads.png new file mode 100644 index 00000000000..47657b885b3 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follower_reads_reads.png differ diff --git a/images/v19.2/topology-patterns/topology_follower_reads_resiliency.png b/images/v19.2/topology-patterns/topology_follower_reads_resiliency.png new file mode 100644 index 00000000000..73868163a1e Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follower_reads_resiliency.png differ diff --git a/images/v19.2/topology-patterns/topology_follower_reads_writes.gif b/images/v19.2/topology-patterns/topology_follower_reads_writes.gif new file mode 100644 index 00000000000..8fc4b2c55b7 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_follower_reads_writes.gif differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders1.png b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders1.png new file mode 100644 index 00000000000..66d03a7f113 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders1.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_reads.png b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_reads.png new file mode 100644 index 00000000000..0daa6665d05 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_reads.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_resiliency1.png b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_resiliency1.png new file mode 100644 index 00000000000..09aaa95ded9 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_resiliency1.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_resiliency2.png b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_resiliency2.png new file mode 100644 index 00000000000..f372f14552c Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_resiliency2.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_writes.gif b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_writes.gif new file mode 100644 index 00000000000..f5c8d077818 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioned_leaseholders_writes.gif differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioning1.png b/images/v19.2/topology-patterns/topology_geo-partitioning1.png new file mode 100644 index 00000000000..a7bc25e6279 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioning1.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioning1_no-map.png b/images/v19.2/topology-patterns/topology_geo-partitioning1_no-map.png new file mode 100644 index 00000000000..3b348dd7430 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioning1_no-map.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioning_reads.png b/images/v19.2/topology-patterns/topology_geo-partitioning_reads.png new file mode 100644 index 00000000000..6dcdd7e418e Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioning_reads.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioning_resiliency1.png b/images/v19.2/topology-patterns/topology_geo-partitioning_resiliency1.png new file mode 100644 index 00000000000..d3353c2f8d0 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioning_resiliency1.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioning_resiliency2.png b/images/v19.2/topology-patterns/topology_geo-partitioning_resiliency2.png new file mode 100644 index 00000000000..04191e8ddef Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioning_resiliency2.png differ diff --git a/images/v19.2/topology-patterns/topology_geo-partitioning_writes.gif b/images/v19.2/topology-patterns/topology_geo-partitioning_writes.gif new file mode 100644 index 00000000000..11435a6bd51 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_geo-partitioning_writes.gif differ diff --git a/images/v19.2/topology-patterns/topology_multi-region_hardware.png b/images/v19.2/topology-patterns/topology_multi-region_hardware.png new file mode 100644 index 00000000000..dad856590d0 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_multi-region_hardware.png differ diff --git a/images/v19.2/topology-patterns/topology_pinned_index_leaseholders3.png b/images/v19.2/topology-patterns/topology_pinned_index_leaseholders3.png new file mode 100644 index 00000000000..7d792d3a5ed Binary files /dev/null and b/images/v19.2/topology-patterns/topology_pinned_index_leaseholders3.png differ diff --git a/images/v19.2/topology-patterns/topology_single-region_cluster_resiliency1.png b/images/v19.2/topology-patterns/topology_single-region_cluster_resiliency1.png new file mode 100644 index 00000000000..7fe13079fe0 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_single-region_cluster_resiliency1.png differ diff --git a/images/v19.2/topology-patterns/topology_single-region_cluster_resiliency2.png b/images/v19.2/topology-patterns/topology_single-region_cluster_resiliency2.png new file mode 100644 index 00000000000..f11c5989677 Binary files /dev/null and b/images/v19.2/topology-patterns/topology_single-region_cluster_resiliency2.png differ diff --git a/releases/v19.1.0-rc.1.md b/releases/v19.1.0-rc.1.md index 8f234c903cd..8044c908e12 100644 --- a/releases/v19.1.0-rc.1.md +++ b/releases/v19.1.0-rc.1.md @@ -103,7 +103,7 @@ $ docker pull cockroachdb/cockroach-unstable:v19.1.0-rc.1 ### Doc updates -- Added a library of common [Cluster Topology Patterns](../v19.1/cluster-topology-patterns.html). [#4235](https://github.com/cockroachdb/docs/pull/4235) +- Added a library of common [Cluster Topology Patterns](../v19.1/topology-patterns.html). [#4235](https://github.com/cockroachdb/docs/pull/4235) - Documented how [reads and writes](../v19.1/architecture/reads-and-writes-overview.html) are affected by the replicated and distributed nature of data in CockroachDB. [#4543](https://github.com/cockroachdb/docs/pull/4543) - Corrected the syntax for [per-replica replication zone constraints](../v19.1/configure-replication-zones.html#scope-of-constraints). [#4569](https://github.com/cockroachdb/docs/pull/4569) - Added more thorough documentation on [CockroachDB dependencies](../v19.1/recommended-production-settings.html#dependencies). [#4567](https://github.com/cockroachdb/docs/pull/4567) diff --git a/releases/v19.1.0.md b/releases/v19.1.0.md index 5da88cd48bc..5d1558082e3 100644 --- a/releases/v19.1.0.md +++ b/releases/v19.1.0.md @@ -124,7 +124,7 @@ Topic | Description **Security** | Added an [overview of CockroachDB security](../v19.1/security-overview.html), with a dedicated page on [authentication](../v19.1/authentication.html), [encryption](../v19.1/encryption.html), [authorization](../v19.1/authorization.html), and [SQL audit logging](../v19.1/sql-audit-logging.html). **Troubleshooting** | Added much more guidance on [troubleshooting cluster setup](../v19.1/cluster-setup-troubleshooting.html) and [troubleshooting SQL behavior](../v19.1/query-behavior-troubleshooting.html). **Architecture** | Added the [Life of a Distributed Transaction](../v19.1/architecture/life-of-a-distributed-transaction.html), which details the path that a query takes through CockroachDB's architecture, starting with a SQL client and progressing all the way to RocksDB (and then back out again). Also added [Reads and Writes in CockroachDB](../v19.1/architecture/reads-and-writes-overview.html), which explains how reads and writes are affected by the replicated and distributed nature of data in CockroachDB. -**Production Guidance** | Expanded the [Production Checklist](../v19.1/recommended-production-settings.html) with more current hardware recommendations and additional guidance on storage, file systems, and clock synchronization. Also added a library of common [Cluster Topology Patterns](../v19.1/cluster-topology-patterns.html). +**Production Guidance** | Expanded the [Production Checklist](../v19.1/recommended-production-settings.html) with more current hardware recommendations and additional guidance on storage, file systems, and clock synchronization. Also added a library of common [Cluster Topology Patterns](../v19.1/topology-patterns.html). **ORMs** | Expanded the [SQLAlchemy tutorial](../v19.1/build-a-python-app-with-cockroachdb-sqlalchemy.html) to provide code for transaction retries and best practices for using SQLAlchemy with CockroachDB. **Training** | Added [geo-partitioning](../v19.1/training/geo-partitioning.html), [Kubernetes](../v19.1/training/orchestration-with-kubernetes.html), and [TPC-C benchmarking](../v19.1/training/performance-benchmarking.html) modules to the intro to CockroachDB training. diff --git a/v19.1/cluster-topology-patterns.md b/v19.1/cluster-topology-patterns.md deleted file mode 100644 index 2df5e19024d..00000000000 --- a/v19.1/cluster-topology-patterns.md +++ /dev/null @@ -1,189 +0,0 @@ ---- -title: Cluster Topology Patterns -summary: Common cluster topology patterns with setup examples and performance considerations. -toc: true ---- - -This page covers common cluster topology patterns with setup examples, as well as the benefits and trade-off for each pattern. Before you select a candidate pattern for your cluster, use the following broad patterns as a starting point and consider trade-offs. - -## Considerations - -Before selecting a pattern: - -- Review the recommendations and requirements in our [Production Checklist](recommended-production-settings.html). -- Review the [CockroachDB architecture](architecture/overview.html). It's especially important to understand how data is stored in ranges, how ranges are replicated, and how one replica in each range serves as the "leaseholder" that coordinates all read and write requests for that range. For more details and some example scenarios, see [Reads and Writes in CockroachDB](architecture/reads-and-writes-overview.html). -- Learn about the concept of [locality](start-a-node.html#locality), which makes CockroachDB aware of the location of nodes and able to intelligently balance replicas across localities. Locality is also a prerequisite for the [follow-the-workload](demo-follow-the-workload.html) feature and for enterprise [partitioning](partitioning.html). -- Learn about [follower reads](follower-reads.html), an enterprise feature, which reduces latency for read queries by letting the closest replica serve the read request at the expense of only not guaranteeing that data is up to date. - -{{site.data.alerts.callout_info}} -This page does not factor in hardware differences. -{{site.data.alerts.end}} - -## Single-region clusters - -### Single datacenter, basic pattern - -This first example is of a single-datacenter cluster, with each node on a different machine as per our [basic topology recommendations](recommended-production-settings.html#basic-topology-recommendations). This pattern is common starting point for smaller organizations who may not have the resources (or need) to worry about a datacenter failure but still want to take advantage of CockroachDB's [high availability](high-availability.html). - -Local deployment - -For the diagram above: - -**Configuration** - -- `App` is an application that accesses CockroachDB. -- `Load Balancer` is a software-based load balancer. -- Leaseholders are denoted by a dashed line. -- The 3 nodes are all running in a single datacenter. -- The cluster is using the default replication factor of 3 (represented by 3 blocks of the same color). Each range (e.g., `r1`) has 3 replicas, with each replica on a different node. - -**Availability expectations** - -- With the default replication factor of 3, the cluster can tolerate 1 node failure. In such a case, all ranges still have 2 replicas on live nodes and, thus, a majority. - -**Performance expectations** - -- The network latency among the nodes is expected to be sub-millisecond. - -### Single datacenter, more performant and/or resilient - -While the [basic single-datacenter deployment](#single-datacenter-basic-pattern) takes advantage of CockroachDB's high availability, shares the load, and spreads capacity, scaling out the nodes has many benefits: - -- Performance: Adding nodes for more processing power and/or storage typically increases throughput. For example, with five nodes and a replication factor of 3, each range has 3 replicas, with each replica on a different node. In this case, there will only be 1-2 replicas on each nod, leaving additional storage and bandwidth available. -- Resiliency: There will be more room to increase the replication factor, which increases resiliency against the failure of more than one node. For example, with 5 nodes and a replication factor of 5, each range has 5 replicas, with each replica on a different node. In this case, even with 2 nodes down, each range retains a majority of its replicas (3/5). - -There are no constraints on node increments. - -Resilient local deployment - -## Multi-region clusters - -### Multiple regions, basic pattern - -Once an organization begins to grow, a datacenter outage isn't acceptable and a cluster needs to be available all of the time. This is where a multi-region cluster is useful. A multi-region cluster is comprised of multiple datacenters in different regions (e.g., `us-east`, `us-west`), each with multiple nodes. CockroachDB will automatically try to diversify replica placement across localities (i.e., place a replica in each region). This setup can be used when your application is not SLA-sensitive, or you do not care about write performance. With this cluster pattern, many organizations will consider transitioning to using a variety of cloud providers (one provider per region). - -In this example, the cluster has an asymmetrical setup where `us-central` is closer to the `us-west` than the `us-east`. This configuration will provide better write latency to the write workloads in `us-west` and `us-central` because there is a lower latency (versus writing in the `us-east`). - -Basic pattern for multi-region - -Each region has 3 nodes across 3 datacenters and does not use partitioning: - -Basic pattern for multi-region - -For this example: - -#### Configuration - -- `App` is an application that accesses CockroachDB. -- `Load Balancer`s are software-based load balancers that direct traffic to each of the regions' nodes at random. -- Leaseholders are denoted by a dashed line. -- 9 nodes are spread across 3 regions (`us-west`, `us-central`, `us-east`) within a country (`us`). -- Every region has 3 nodes, with each node in a different datacenter (e.g., `us-west-a`, `us-west-b`, `us-west-c`). Each node is started with the `--locality` flag to identify which region and datacenter it is in: - - ~~~ - --locality=region=us-west,datacenter=us-west-a - --locality=region=us-west,datacenter=us-west-b - --locality=region=us-west,datacenter=us-west-c - --locality=region=us-central,datacenter=us-central-a - --locality=region=us-central,datacenter=us-central-b - --locality=region=us-central,datacenter=us-central-c - --locality=region=us-east,datacenter=us-east-a - --locality=region=us-east,datacenter=us-east-b - --locality=region=us-east,datacenter=us-east-c - ~~~ - - - -- The cluster is using a replication factor of 3 (represented by 3 blocks of the same color). Each range (e.g., `r1`) has 3 replicas, with each replica on a different node. - -**Availability expectations** - -- If all of the nodes for a preferred locality are down, then the app will try datacenters in other localities. -- The cluster can withstand a datacenter failure without losing a region because there are 2 nodes in each region. -- The cluster can withstand a regional failure because, with `--locality` specified on each node as shown above, the cluster balances each range across all 3 regions; with one region down, each range still has a majority of its replicas (2/3). - -**Performance expectations** - -- The latency numbers (e.g., `60ms`) in the first diagram represent network round-trip from one region to another. -- For reads, if the gateway node (the node the app connects to) is in the region containing the leaseholder replica of the relevant range, latency should be around 2ms. If the gateway node is in a region that does not contain the leaseholder, the cluster will route the request to the node with the leaseholder in another region, that node will retrieve the data, and then the cluster will return the data to the gateway node. In this case, the network round-trips from one region to another will add latency. In some cases, [follow-the-workload](demo-follow-the-workload.html) will increase the speed for reads by moving the leaseholder closer to the application. -- For writes, because a majority of replicas are always required to agree before a write is committed, latencies will be as fast as the slowest quorum between 2 regions. - -### Multiple regions, more performant (with partitioning) - -While the [basic pattern for a multi-region cluster](#multiple-regions-basic-pattern) can help protect against datacenter and regional failures, there will be high latency due to cross-country roundtrips. This is not ideal for organizations who have users spread out across the country (or world). For any multi-region cluster, [partitioning](partitioning.html) should be used to keep data close to the users who access it. - -In this example, a table is partitioned by a column indicating the region where a customer is located (e.g., a table has a `city` column and the values `LA`, `SF`, and `SD` are partitioned to the `us-west` region). Then, [zone configurations](configure-replication-zones.html) are used to keep the replicas and leaseholders for each partition in the closest datacenter to those customer. - -This setup uses a modern [multi-tier architecture](https://en.wikipedia.org/wiki/Multitier_architecture), which is simplified to global server load balancer (`GSLB`), `App`, and `Load Balancer` layers in the below diagram: - -Partitioned multi-region - -**Configuration** - -A multi-region cluster with partitioning has a similar setup as the [basic multi-region pattern](#multiple-regions-basic-pattern): - -- 9 nodes are spread across 3 regions (`us-west`, `us-central`, `us-east`) within a country (`us`). -- A client connects to geographically close `app` server via `GSLB`. -- Inside each region, an `app` server connects to one of the CockroachDB nodes within the region through a software-based `load balancer`. -- Every region has 3 nodes across 3 datacenters (e.g., `us-west-a`, `us-west-b`, `us-west-c`). Each node is started with the `--locality` flag to identify which region it is in: - - ~~~ - --locality=region=us-west,datacenter=us-west-a - --locality=region=us-west,datacenter=us-west-b - --locality=region=us-west,datacenter=us-west-c - --locality=region=us-central,datacenter=us-central-a - --locality=region=us-central,datacenter=us-central-b - --locality=region=us-central,datacenter=us-central-c - --locality=region=us-east,datacenter=us-east-a - --locality=region=us-east,datacenter=us-east-b - --locality=region=us-east,datacenter=us-east-c - ~~~ - - - - -- The cluster is using a replication factor of 3 (represented by the 3 blocks of the same color). Each range (e.g., `r1`) has a prefix (`w-` for West, `c-` for Central, `e-` for East), which denotes the partition that is replicated. -- Leaseholders are denoted by a dashed line. -- Tables are [partitioned](partitioning.html) at the row level by locality, for example: - - ~~~ - > CREATE TABLE customers ( - id INT DEFAULT unique_rowid(), - name STRING, - email STRING, - state STRING, - expected_graduation_date DATE, - PRIMARY KEY (state, id)) - PARTITION BY LIST (state) ( - PARTITION west VALUES IN ('CA','OR','WA'[...]), - PARTITION central VALUES IN ('OH','IL','MI'[...]), - PARTITION east VALUES IN ('NY','MA','VA'[...]), - PARTITION DEFAULT VALUES IN (default) - ); - ~~~ - -- Using [replication zones](partitioning.html#define-table-partitions-by-list), partitions are pinned to the nodes in their locality, for example: - - ~~~ - > ALTER PARTITION west OF TABLE customers \ - CONFIGURE ZONE USING constraints='[+region=us-west]'; - ~~~ - -**Availability expectations** - -- The cluster as a whole can withstand a regional failure because system-level ranges have their replicas balanced across regions. However, because user data is partitioned and pinned to specific regions, region-specific data will be unavailable during a regional failure. -- Within a region, partitions pinned to the region will remain available as long as 2/3 datacenters are up. - -**Performance expectations** - -- Reads respond in 2-4 milliseconds. -- Writes respond in 2-4 milliseconds. -- Symmetrical latency between datacenters. - -## Anti-patterns - -Anti-patterns are commonly used patterns that are ineffective or risky. Consider the following when choosing a cluster pattern: - -- Do not deploy to 2 datacenters. A cluster across 2 datacenters is not protected against datacenter failure. In order to survive the failure of a datacenter, you need to deploy your cluster across 3 or more datacenters. -- Do not deploy to regions with high network latency (e.g., `us-west`, `asia`, and `europe`) without using [partitioning](partitioning.html). -- The cluster's replication factor does not need to be the same as the number of nodes in the cluster. In fact, as you scale your cluster, you should add nodes (but keep the replication factor at 5, for example) to improve performance. This is shown in the [Single datacenter, more resilient and/or performant](#single-datacenter-more-performant-and-or-resilient) section. diff --git a/v19.1/configure-replication-zones.md b/v19.1/configure-replication-zones.md index a81ed47c884..67134cb1064 100644 --- a/v19.1/configure-replication-zones.md +++ b/v19.1/configure-replication-zones.md @@ -125,7 +125,7 @@ Constraint Scope | Description | Syntax ### Node/replica recommendations -See [Cluster Topography](recommended-production-settings.html#cluster-topology) recommendations for production deployments. +See [Cluster Topography](recommended-production-settings.html#topology) recommendations for production deployments. ## View replication zones diff --git a/v19.1/deploy-cockroachdb-on-aws-insecure.md b/v19.1/deploy-cockroachdb-on-aws-insecure.md index 545c10185c0..e3d67e74ebb 100644 --- a/v19.1/deploy-cockroachdb-on-aws-insecure.md +++ b/v19.1/deploy-cockroachdb-on-aws-insecure.md @@ -65,13 +65,13 @@ You can create these rules using [Security Groups' Inbound Rules](http://docs.aw [Create an instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/launching-instance.html) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate instance for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use `m` (general purpose), `c` (compute-optimized), or `i` (storage-optimized) [instances](https://aws.amazon.com/ec2/instance-types/), with SSD-backed [EBS volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) or [Instance Store volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html). For example, Cockroach Labs has used `m3.large` instances (2 vCPUs and 7.5 GiB of RAM per instance) for internal testing. - **Do not** use ["burstable" `t2` instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/t2-instances.html), which limit the load on a single core. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-aws.md b/v19.1/deploy-cockroachdb-on-aws.md index 4eb8d8b536f..b8d4a80eb9d 100644 --- a/v19.1/deploy-cockroachdb-on-aws.md +++ b/v19.1/deploy-cockroachdb-on-aws.md @@ -73,7 +73,7 @@ You can create these rules using [Security Groups' Inbound Rules](http://docs.aw - **Do not** use ["burstable" `t2` instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/t2-instances.html), which limit the load on a single core. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-digital-ocean-insecure.md b/v19.1/deploy-cockroachdb-on-digital-ocean-insecure.md index e4c7ee8d9ef..1de235383fb 100644 --- a/v19.1/deploy-cockroachdb-on-digital-ocean-insecure.md +++ b/v19.1/deploy-cockroachdb-on-digital-ocean-insecure.md @@ -29,11 +29,11 @@ This page shows you how to deploy an insecure multi-node CockroachDB cluster on [Create Droplets](https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate droplet for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use any [droplets](https://www.digitalocean.com/pricing/) except standard droplets with only 1 GB of RAM, which is below our minimum requirement. All Digital Ocean droplets use SSD storage. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 2. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-digital-ocean.md b/v19.1/deploy-cockroachdb-on-digital-ocean.md index e38d523f941..494f9b27d00 100644 --- a/v19.1/deploy-cockroachdb-on-digital-ocean.md +++ b/v19.1/deploy-cockroachdb-on-digital-ocean.md @@ -29,11 +29,11 @@ If you are only testing CockroachDB, or you are not concerned with protecting ne [Create Droplets](https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate Droplet for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use any [droplets](https://www.digitalocean.com/pricing/) except standard droplets with only 1 GB of RAM, which is below our minimum requirement. All Digital Ocean droplets use SSD storage. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 2. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-google-cloud-platform-insecure.md b/v19.1/deploy-cockroachdb-on-google-cloud-platform-insecure.md index fa954d3545b..566c91e3f09 100644 --- a/v19.1/deploy-cockroachdb-on-google-cloud-platform-insecure.md +++ b/v19.1/deploy-cockroachdb-on-google-cloud-platform-insecure.md @@ -56,7 +56,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' [Create an instance](https://cloud.google.com/compute/docs/instances/create-start-instance) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate instance for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use `n1-standard` or `n1-highcpu` [predefined VMs](https://cloud.google.com/compute/pricing#predefined_machine_types), or [custom VMs](https://cloud.google.com/compute/pricing#custommachinetypepricing), with [Local SSDs](https://cloud.google.com/compute/docs/disks/#localssds) or [SSD persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs). For example, Cockroach Labs has used custom VMs (8 vCPUs and 16 GiB of RAM per VM) for internal testing. @@ -64,7 +64,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' - If you used a tag for your firewall rules, when you create the instance, select **Management, disk, networking, SSH keys**. Then on the **Networking** tab, in the **Network tags** field, enter **cockroachdb**. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-google-cloud-platform.md b/v19.1/deploy-cockroachdb-on-google-cloud-platform.md index 67c97d3a696..6af58b8003f 100644 --- a/v19.1/deploy-cockroachdb-on-google-cloud-platform.md +++ b/v19.1/deploy-cockroachdb-on-google-cloud-platform.md @@ -56,7 +56,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' [Create an instance](https://cloud.google.com/compute/docs/instances/create-start-instance) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate instance for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use `n1-standard` or `n1-highcpu` [predefined VMs](https://cloud.google.com/compute/pricing#predefined_machine_types), or [custom VMs](https://cloud.google.com/compute/pricing#custommachinetypepricing), with [Local SSDs](https://cloud.google.com/compute/docs/disks/#localssds) or [SSD persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs). For example, Cockroach Labs has used custom VMs (8 vCPUs and 16 GiB of RAM per VM) for internal testing. @@ -64,7 +64,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' - If you used a tag for your firewall rules, when you create the instance, select **Management, disk, networking, SSH keys**. Then on the **Networking** tab, in the **Network tags** field, enter **cockroachdb**. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-microsoft-azure-insecure.md b/v19.1/deploy-cockroachdb-on-microsoft-azure-insecure.md index 34b17dba091..4f003afa46a 100644 --- a/v19.1/deploy-cockroachdb-on-microsoft-azure-insecure.md +++ b/v19.1/deploy-cockroachdb-on-microsoft-azure-insecure.md @@ -71,7 +71,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and [Create Linux VMs](https://docs.microsoft.com/en-us/azure/virtual-machines/virtual-machines-linux-quick-create-portal) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate VM for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use storage-optimized [Ls-series](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-storage) VMs with [Premium Storage](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/premium-storage) or local SSD storage with a Linux filesystem such as `ext4` (not the Windows `ntfs` filesystem). For example, Cockroach Labs has used `Standard_L4s` VMs (4 vCPUs and 32 GiB of RAM per VM) for internal testing. @@ -81,7 +81,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and - When creating the VMs, make sure to select the **Resource Group**, **Virtual Network**, and **Network Security Group** you created. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.1/deploy-cockroachdb-on-microsoft-azure.md b/v19.1/deploy-cockroachdb-on-microsoft-azure.md index 349fcef85bd..138d855af5a 100644 --- a/v19.1/deploy-cockroachdb-on-microsoft-azure.md +++ b/v19.1/deploy-cockroachdb-on-microsoft-azure.md @@ -68,7 +68,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and [Create Linux VMs](https://docs.microsoft.com/en-us/azure/virtual-machines/virtual-machines-linux-quick-create-portal) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate VM for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use storage-optimized [Ls-series](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-storage) VMs with [Premium Storage](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/premium-storage) or local SSD storage with a Linux filesystem such as `ext4` (not the Windows `ntfs` filesystem). For example, Cockroach Labs has used `Standard_L4s` VMs (4 vCPUs and 32 GiB of RAM per VM) for internal testing. @@ -78,7 +78,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and - When creating the VMs, make sure to select the **Resource Group**, **Virtual Network**, and **Network Security Group** you created. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.1/index.md b/v19.1/index.md index 8ee57af28f2..75177798188 100644 --- a/v19.1/index.md +++ b/v19.1/index.md @@ -62,9 +62,9 @@ CockroachDB is the SQL database for building global, scalable cloud services tha

Deploy

diff --git a/v19.1/performance-best-practices-overview.md b/v19.1/performance-best-practices-overview.md index 9ae3b8170fc..bbc14810fef 100644 --- a/v19.1/performance-best-practices-overview.md +++ b/v19.1/performance-best-practices-overview.md @@ -8,7 +8,7 @@ build_for: [standard, managed] This page provides best practices for optimizing SQL performance in CockroachDB. {{site.data.alerts.callout_success}} -For a demonstration of some of these techniques, see [Performance Tuning](performance-tuning.html). +For a demonstration of some of these techniques, see [Performance Tuning](performance-tuning.html). For guidance on deployment and data location techniques to minimize network latency, see [Topology Patterns](topology-patterns.html). {{site.data.alerts.end}} ## Multi-row DML best practices diff --git a/v19.1/recommended-production-settings.md b/v19.1/recommended-production-settings.md index b8447f4af9e..25097e2b377 100644 --- a/v19.1/recommended-production-settings.md +++ b/v19.1/recommended-production-settings.md @@ -2,44 +2,30 @@ title: Production Checklist summary: Recommended settings for production deployments. toc: true +toc_not_nested: true --- This page provides important recommendations for production deployments of CockroachDB. -## Cluster topology +## Topology -### Terminology +When planning your deployment, it's important to carefully review and choose the [topology patterns](topology-patterns.html) that best meet your latency and resiliency requirements. This is especially crucial for multi-region deployments. -To properly plan your [cluster's topology](cluster-topology-patterns.html), it's important to review some basic CockroachDB-specific terminology: - -Term | Definition ------|------------ -**Cluster** | Your CockroachDB deployment, which acts as a single logical application that contains one or more databases. -**Node** | An individual machine running CockroachDB. Many nodes join to create your cluster. -**Range** | CockroachDB stores all user data and almost all system data in a giant sorted map of key-value pairs. This keyspace is divided into "ranges", contiguous chunks of the keyspace, so that every key can always be found in a single range. -**Replica** | CockroachDB replicates each range (3 times by default) and stores each replica on a different node. -**Range Lease** | For each range, one of the replicas holds the "range lease". This replica, referred to as the "leaseholder", is the one that receives and coordinates all read and write requests for the range. - -### Basic topology recommendations +Also keep in mind some basic topology recommendations: - Run each node on a separate machine. Since CockroachDB replicates across nodes, running more than one node per machine increases the risk of data loss if a machine fails. Likewise, if a machine has multiple disks or SSDs, run one node with multiple `--store` flags and not one node per disk. For more details about stores, see [Start a Node](start-a-node.html#store). -- When deploying in a single datacenter: +- When starting each node, use the [`--locality`](start-a-node.html#locality) flag to describe the node's location, for example, `--locality=region=west,zone=us-west-1`. The key-value pairs should be ordered from most to least inclusive, and the keys and order of key-value pairs must be the same on all nodes. + +- When deploying in a single availability zone: - To be able to tolerate the failure of any 1 node, use at least 3 nodes with the [`.default` 3-way replication factor](configure-replication-zones.html#view-the-default-replication-zone). In this case, if 1 node fails, each range retains 2 of its 3 replicas, a majority. - To be able to tolerate 2 simultaneous node failures, use at least 5 nodes and [increase the `.default` replication factor for user data](configure-replication-zones.html#edit-the-default-replication-zone) to 5. The replication factor for [important internal data](configure-replication-zones.html#create-a-replication-zone-for-a-system-range) is 5 by default, so no adjustments are needed for internal data. In this case, if 2 nodes fail at the same time, each range retains 3 of its 5 replicas, a majority. -- When deploying across multiple datacenters in one or more regions: - - To be able to tolerate the failure of 1 entire datacenter, use at least 3 datacenters and set `--locality` on each node to spread data evenly across datacenters (see next bullet for more details). In this case, if 1 datacenter goes offline, the 2 remaining datacenters retain a majority of replicas. - - When starting each node, use the [`--locality`](start-a-node.html#locality) flag to describe the node's location, for example, `--locality=region=west,datacenter=us-west-1`. The key-value pairs should be ordered from most to least inclusive, and the keys and order of key-value pairs must be the same on all nodes. - - CockroachDB spreads the replicas of each piece of data across as diverse a set of localities as possible, with the order determining the priority. However, locality can also be used to influence the location of data replicas in various ways using [replication zones](configure-replication-zones.html#replication-constraints). - - When there is high latency between nodes, CockroachDB uses locality to move range leases closer to the current workload, reducing network round trips and improving read performance, also known as ["follow-the-workload"](demo-follow-the-workload.html). In a deployment across more than 3 datacenters, however, to ensure that all data benefits from "follow-the-workload", you must [increase the replication factor](configure-replication-zones.html#edit-the-default-replication-zone) to match the total number of datacenters. - - Locality is also a prerequisite for using the [table partitioning](partitioning.html) and [**Node Map**](enable-node-map.html) enterprise features. - -{{site.data.alerts.callout_success}} -For added context about CockroachDB's fault tolerance and automated repair capabilities, see [this training](training/fault-tolerance-and-automated-repair.html). -{{site.data.alerts.end}} +- When deploying across multiple availability zones: + - To be able to tolerate the failure of 1 entire AZ in a region, use at least 3 AZs per region and set `--locality` on each node to spread data evenly across regions and AZs. In this case, if 1 AZ goes offline, the 2 remaining AZs retain a majority of replicas. + - To be able to tolerate the failure of 1 entire region, use at least 3 regions. ## Hardware diff --git a/v19.1/topology-basic-production.md b/v19.1/topology-basic-production.md new file mode 100644 index 00000000000..39236963817 --- /dev/null +++ b/v19.1/topology-basic-production.md @@ -0,0 +1,90 @@ +--- +title: Basic Production Topology +summary: Guidance for a single-region production deployment. +toc: true +--- + +When you're ready to run CockroachDB in production in a single region, it's important to deploy at least 3 CockroachDB nodes to take advantage of CockroachDB's automatic replication, distribution, rebalancing, and resiliency capabilities. + +{{site.data.alerts.callout_success}} +If you haven't already, [review the full range of topology patterns](topology-patterns.html) to ensure you choose the right one for your use case. +{{site.data.alerts.end}} + +## Prerequisites + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +## Configuration + +Basic production topology + +1. Provision hardware as follows: + - 1 region with 3 AZs + - 3+ VMs evenly distributed across AZs; add more VMs to increase throughput + - App and load balancer in same region as VMs for CockroachDB + - The load balancer redirects to CockroachDB nodes in the region + +2. Start each node on a separate VM, setting the [`--locality`](start-a-node.html#locality) flag to the node's region and AZ combination, e.g.: + + {% include copy-clipboard.html %} + ~~~ shell + # Example start command for node in the east1 AZ of the us-east region: + $ cockroach start \ + --locality=region=us-east,zone=east1 \ + --certs-dir=certs \ + --advertise-addr= \ + --join=:26257,:26257,:26257 \ + --cache=.25 \ + --max-sql-memory=.25 \ + --background + ~~~ + +With the default 3-way replication factor and `--locality` set as described above, CockroachDB balances each range of table data across AZs, one replica per AZ. System data is replicated 5 times by default and also balanced across AZs, thus increasing the [resiliency of the cluster](configure-replication-zones.html#create-a-replication-zone-for-a-system-range) as a whole. + +## Characteristics + +### Latency + +#### Reads + +Since all ranges, including leaseholder replicas, are in a single region, read latency is very low. + +For example, in the animation below: + +1. The read request reaches the load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the relevant leaseholder. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Basic production topology + +#### Writes + +Since all ranges are in a single region, writes achieve consensus without leaving the region and, thus, write latency is very low as well. + +For example, in the animation below: + +1. The write request reaches the load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the relevant table and secondary index. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Leaseholder preferences topology + +### Resiliency + +Because each range is balanced across AZs, one AZ can fail without interrupting access to any data: + +Basic production topology + +However, if an additional AZ fails at the same time, the ranges that lose consensus become unavailable for reads and writes: + +Basic production topology + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-development.md b/v19.1/topology-development.md new file mode 100644 index 00000000000..360eb5685ab --- /dev/null +++ b/v19.1/topology-development.md @@ -0,0 +1,39 @@ +--- +title: Development Topology +Guidance for a single-node cluster for local development. +toc: true +--- + +While developing an application against CockroachDB, it's sufficient to deploy a single-node cluster close to your test application, whether that's on a single VM or on your laptop. + +{{site.data.alerts.callout_success}} +If you haven't already, [review the full range of topology patterns](topology-patterns.html) to ensure you choose the right one for your use case. +{{site.data.alerts.end}} + +## Prerequisites + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +## Configuration + +Development topology + +For this pattern, you can either [run CockroachDB locally](start-a-local-cluster.html) or [deploy a single-node cluster on a cloud VM](manual-deployment.html). + +## Characteristics + +### Latency + +With the CockroachDB node in the same region as your client, and without the overhead of replication, both read and write latency are very low: + +Development topology + +### Resiliency + +In a single-node cluster, CockroachDB does not replicate data and, therefore, is not resilient to failures. If the machine where the node is running fails, or if the region or availability zone containing the machine fails, the cluster becomes unavailable: + +Development topology + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-duplicate-indexes.md b/v19.1/topology-duplicate-indexes.md new file mode 100644 index 00000000000..3b5318f11cd --- /dev/null +++ b/v19.1/topology-duplicate-indexes.md @@ -0,0 +1,142 @@ +--- +title: Duplicate Indexes Topology +summary: Guidance on using the duplicate indexes topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, the duplicate indexes pattern is a good choice for tables with the following requirements: + +- Read latency must be low, but write latency can be much higher. +- Reads must be up-to-date for business reasons or because the table is reference by [foreign keys](foreign-key.html). +- Rows in the table, and all latency-sensitive queries, **cannot** be tied to specific geographies. +- Table data must remain available during a region failure. + +In general, this pattern is suited well for immutable/reference tables that are rarely or never updated. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +If reads from a table can be historical (48 seconds or more in the past), consider the [Follower Reads](topology-follower-reads.html) pattern. If rows in the table, and all latency-sensitive queries, can be tied to specific geographies, consider the [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) pattern. Both patterns avoid extra secondary indexes, which increase data replication and, therefore, higher throughput and less storage. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Pinning secondary indexes requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you tell CockroachDB to put the leaseholder for the table itself (also called the primary index) in one region, create 2 secondary indexes on the table, and tell CockroachDB to put the leaseholder for each secondary index in one of the other regions. This means that reads will access the local leaseholder (either for the table itself or for one of the secondary indexes). Writes, however, will still leave the region to get consensus for the table and its secondary indexes. + +Duplicate Indexes topology + +### Steps + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE postal_codes ( + id INT PRIMARY KEY, + code STRING, +); +~~~ + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. [Create a replication zone](configure-zone.html) for the table and set a leaseholder preference telling CockroachDB to put the leaseholder for the table in one of the regions, for example `us-west`: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER TABLE postal_codes + CONFIGURE ZONE USING lease_preferences = '[[+region=us-west]]'; + ~~~ + +3. [Create secondary indexes](create-index.html) on the table for each of your other regions, including all of the columns you wish to read either in the key or in the key and a [`STORING`](create-index.html#store-columns) clause: + + {% include copy-clipboard.html %} + ~~~ sql + > CREATE INDEX idx_central ON postal_codes (id) + STORING (code); + ~~~ + + {% include copy-clipboard.html %} + ~~~ sql + > CREATE INDEX idx_east ON postal_codes (id) + STORING (code); + ~~~ + +4. [Create a replication zone](configure-zone.html) for each secondary index, in each case setting a leaseholder preference telling CockroachDB to put the leaseholder for the index in a distinct region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX postal_codes@idx_central + CONFIGURE ZONE USING lease_preferences = '[[+region=us-central]]'; + ~~~ + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX postal_codes@idx_east + CONFIGURE ZONE USING lease_preferences = '[[+region=us-east]]'; + ~~~ + +## Characteristics + +### Latency + +#### Reads + +Reads access the local leaseholder and, therefore, never leave the region. This makes read latency very low. + +For example, in the animation below: + +1. The read request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the relevant leaseholder. In `us-west`, the leaseholder is for the table itself. In the other regions, the leaseholder is for the relevant index, which the [cost-based optimizer](cost-based-optimizer.html) uses due to the leaseholder preferences. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Pinned secondary indexes topology + +#### Writes + +The replicas for the table and its secondary indexes are spread across all 3 regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. It's also important to understand that the replication of extra indexes can reduce throughput and increase storage cost. + +For example, in the animation below: + +1. The write request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the table and its secondary indexes. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Duplicate Indexes topology + +### Resiliency + +Because this pattern balances the replicas for the table and its secondary indexes across regions, one entire region can fail without interrupting access to the table: + +Pinned Secondary Indexes topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-follow-the-workload.md b/v19.1/topology-follow-the-workload.md new file mode 100644 index 00000000000..333f8c63307 --- /dev/null +++ b/v19.1/topology-follow-the-workload.md @@ -0,0 +1,88 @@ +--- +title: Follow-the-Workload Topology +summary: Guidance on using the follow-the-workload topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, follow-the-workload is the default pattern for tables that use no other pattern. In general, this default pattern is a good choice only for tables with the following requirements: + +- The table is active mostly in one region at a time, e.g., following the sun. +- In the active region, read latency must be low, but write latency can be higher. +- In non-active regions, both read and write latency can be higher. +- Table data must remain available during a region failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +If read performance is your main focus for a table, but you want low-latency reads everywhere instead of just in the most active region, consider the [Duplicate Indexes](topology-duplicate-indexes.html) or [Follower Reads](topology-follower-reads.html) pattern. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +Aside from [deploying a cluster across three regions](#cluster-setup) properly, with each node started with the [`--locality`](start-a-node.html#locality) flag specifying its region and AZ combination, this pattern requires no extra configuration. CockroachDB will balance the replicas for a table across the three regions and will assign the range lease to the replica in the region with the greatest demand at any given time (the [follow-the-workload](demo-follow-the-workload.html) feature). This means that read latency in the active region will be low while read latency in other regions will be higher due to having to leave the region to reach the leaseholder. Write latency will be higher as well due to always involving replicas in multiple regions. + +Follower reads topology + +{{site.data.alerts.callout_info}} +This pattern is also used by [system ranges containing important internal data](configure-replication-zones.html#create-a-replication-zone-for-a-system-range). +{{site.data.alerts.end}} + +## Characteristics + +### Latency + +#### Reads + +Reads in the region with the most demand will access the local leaseholder and, therefore, never leave the region. This makes read latency very low in the currently most active region. Reads in other regions, however, will be routed to the leaseholder in a different region and, thus, read latency will be higher. + +For example, in the animation below, the most active region is `us-east` and, thus, the table's leaseholder is in that region: + +1. The read request in `us-east` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replica. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. In this case, reads in the `us-east` remain in the region and are lower-latency than reads in other regions. + +Follow-the-workload topology + +#### Writes + +The replicas for the table are spread across all 3 regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. + +For example, in the animation below, assuming the most active region is still `us-east`: + +1. The write request in `us-east` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replica. +4. While the leaseholder appends the write to its Raft log, it notifies its follower replicas. +5. As soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Follow-the-workload topology + +### Resiliency + +Because this pattern balances the replicas for the table across regions, one entire region can fail without interrupting access to the table: + +Follow-the-workload topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-follower-reads.md b/v19.1/topology-follower-reads.md new file mode 100644 index 00000000000..6c381fe5994 --- /dev/null +++ b/v19.1/topology-follower-reads.md @@ -0,0 +1,132 @@ +--- +title: Follower Reads Topology +summary: Guidance on using the follower reads topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, the follower reads pattern is a good choice for tables with the following requirements: + +- Read latency must be low, but write latency can be higher. +- Reads can be historical (48 seconds or more in the past). +- Rows in the table, and all latency-sensitive queries, **cannot** be tied to specific geographies (e.g., a reference table). +- Table data must remain available during a region failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +This pattern is compatible with all of the other multi-region patterns except [Geo-Partitioned Replicas](topology-geo-partitioned-replicas.html). However, if reads from a table must be exactly up-to-date, use the [Duplicate Indexes](topology-duplicate-indexes.html) or [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) pattern instead. Up-to-date reads are required by tables referenced by [foreign keys](foreign-key.html), for example. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Follower reads requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you configure your application to use the [follower reads](follower-reads.html) feature by adding an `AS OF SYSTEM TIME` clause when reading from the table. This tells CockroachDB to read slightly historical data (at least 48 seconds in the past) from the closest replica so as to avoid being routed to the leaseholder, which may be in an entirely different region. Writes, however, will still leave the region to get consensus for the table. + +### Steps + +Follower reads topology + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE postal_codes ( + id INT PRIMARY KEY, + code STRING, +); +~~~ + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. Configure your app to use `AS OF SYSTEM TIME experimental_follower_read_timestamp()` whenever reading from the table: + + {{site.data.alerts.callout_info}} + The `experimental_follower_read_timestamp()` [function](functions-and-operators.html) will set the [`AS OF SYSTEM TIME`](as-of-system-time.html) value to the minimum required for follower reads. + {{site.data.alerts.end}} + + {% include copy-clipboard.html %} + ~~~ sql + > SELECT code FROM postal_codes + WHERE id = 5 + AS OF SYSTEM TIME experimental_follower_read_timestamp(); + ~~~ + + Alternately, instead of modifying individual read queries on the table, you can set the `AS OF SYSTEM TIME` value for all operations in a read-only transaction: + + {% include copy-clipboard.html %} + ~~~ sql + > BEGIN AS OF SYSTEM TIME experimental_follower_read_timestamp(); + + SELECT code FROM postal_codes + WHERE id = 5; + + SELECT code FROM postal_codes + WHERE id = 6; + + COMMIT; + ~~~ + +## Characteristics + +### Latency + +#### Reads + +Reads retrieve historical data from the closest replica and, therefore, never leave the region. This makes read latency very low but slightly stale. + +For example, in the animation below: + +1. The read request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the closest replica for the table. In this case, the replica is *not* the leaseholder. +4. The replica retrieves the results as of at least 48 seconds in the past and returns to the gateway node. +5. The gateway node returns the results to the client. + +Follower reads topology + +#### Writes + +The replicas for the table are spread across all 3 regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. + +For example, in the animation below: + +1. The write request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replica for the table in `us-east`. +4. Once the leaseholder has appended the write to its Raft log, it notifies its follower replicas. +5. As soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholder then returns acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Follower reads topology + +### Resiliency + +Because this pattern balances the replicas for the table across regions, one entire region can fail without interrupting access to the table: + +Follower reads topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-geo-partitioned-leaseholders.md b/v19.1/topology-geo-partitioned-leaseholders.md new file mode 100644 index 00000000000..472e5a88189 --- /dev/null +++ b/v19.1/topology-geo-partitioned-leaseholders.md @@ -0,0 +1,179 @@ +--- +title: Geo-Partitioned Leaseholders Topology +summary: Common cluster topology patterns with setup examples and performance considerations. +toc: true +--- + +In a multi-region deployment, the geo-partitioned leaseholders topology is a good choice for tables with the following requirements: + +- Read latency must be low, but write latency can be higher. +- Reads must be up-to-date for business reasons or because the table is reference by [foreign keys](foreign-key.html). +- Rows in the table, and all latency-sensitive queries, can be tied to specific geographies, e.g., city, state, region. +- Table data must remain available during a region failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +If reads from a table can be historical (48 seconds or more in the past), consider the [Follower Reads](topology-follower-reads.html) pattern. If rows in the table, and all latency-sensitive queries, **cannot** be tied to specific geographies, consider the [Duplicate Indexes](topology-duplicate-indexes.html) pattern. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Geo-partitioning requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you design your table schema to allow for [partitioning](partitioning.html#table-creation), with a column identifying geography as the first column in the table's compound primary key (e.g., region/id). You tell CockroachDB to partition the table and all of its secondary indexes by that geography column, each partition becoming its own range of 3 replicas. You then tell CockroachDB to put the leaseholder for each partition in the relevant region (e.g., LA partitions in `us-west`, NY partitions in `us-east`). The other replicas of a partition remain balanced across the other regions. This means that reads in each region will access local leaseholders and, therefore, will have low, intra-region latencies. Writes, however, will leave the region to get consensus and, therefore, will have higher, cross-region latencies. + +Geo-partitioned leaseholders topology + +### Steps + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table and secondary index like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE users ( + id UUID NOT NULL DEFAULT gen_random_uuid(), + city STRING NOT NULL, + first_name STRING NOT NULL, + last_name STRING NOT NULL, + address STRING NOT NULL, + PRIMARY KEY (city ASC, id ASC) +); +~~~ + +{% include copy-clipboard.html %} +~~~ sql +> CREATE INDEX users_last_name_index ON users (city, last_name); +~~~ + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. Partition the table by `city`. For example, assuming there are three possible `city` values, `los angeles`, `chicago`, and `new york`: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER TABLE users PARTITION BY LIST (city) ( + PARTITION la VALUES IN ('los angeles'), + PARTITION chicago VALUES IN ('chicago'), + PARTITION ny VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the table. + +3. Partition the secondary index by `city` as well: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX users_last_name_index PARTITION BY LIST (city) ( + PARTITION la_idx VALUES IN ('los angeles'), + PARTITION chicago_idx VALUES IN ('chicago'), + PARTITION ny_idx VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the secondary index. + +4. For each partition of the table, [create a replication zone](configure-zone.html) that tells CockroachDB to put the partition's leaseholder in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-west":1}', + lease_preferences = '[[+region=us-west]]'; + ALTER PARTITION chicago OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-central":1}', + lease_preferences = '[[+region=us-central]]'; + ALTER PARTITION ny OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-east":1}', + lease_preferences = '[[+region=us-east]]'; + ~~~ + +5. For each partition of the secondary index, [create a replication zone](configure-zone.html) that tells CockroachDB to put the partition's leaseholder in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la_idx OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-west":1}', + lease_preferences = '[[+region=us-west]]'; + ALTER PARTITION chicago_idx OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-central":1}', + lease_preferences = '[[+region=us-central]]'; + ALTER PARTITION ny_idx OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-east":1}', + lease_preferences = '[[+region=us-east]]'; + ~~~ + +{{site.data.alerts.callout_success}} +As you scale and add more cities, you can repeat steps 2 and 3 with the new complete list of cities to re-partition the table and its secondary indexes, and then repeat steps 4 and 5 to create replication zones for the new partitions. +{{site.data.alerts.end}} + +## Characteristics + +### Latency + +#### Reads + +Because each partition's leaseholder is constrained to the relevant region (e.g., the `la` and `la_idx` partitions' leaseholders are located in the `us-west` region), reads that specify the local region key access the relevant leaseholder locally. This makes read latency very low, with the exception of reads that do not specify a region key or that refer to a partition in another region. + +For example, in the animation below: + +1. The read request in `us-west` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder for the relevant partition. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Geo-partitoned leaseholders topology + +#### Writes + +Just like for reads, because each partition's leaseholder is constrained to the relevant region (e.g., the `la` and `la_idx` partitions' leaseholders are located in the `us-west` region), writes that specify the local region key access the relevant leaseholder replicas locally. However, a partition's other replicas are spread across the other regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. + +For example, in the animation below: + +1. The write request in `us-west` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the relevant table and secondary index partitions. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas, which are in the other regions. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Geo-partitoned leaseholders topology + +### Resiliency + +Because this pattern balances the replicas for each partition across regions, one entire region can fail without interrupting access to any partitions. In this case, if any range loses its leaseholder in the region-wide outage, CockroachDB makes one of the range's other replicas the leaseholder: + +Geo-partitioning topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-geo-partitioned-replicas.md b/v19.1/topology-geo-partitioned-replicas.md new file mode 100644 index 00000000000..f98895bc242 --- /dev/null +++ b/v19.1/topology-geo-partitioned-replicas.md @@ -0,0 +1,170 @@ +--- +title: Geo-Partitioned Replicas Topology +Summary: Guidance on using the geo-partitioned replicas topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, the geo-partitioned replicas topology is a good choice for tables with the following requirements: + +- Read and write latency must be low. +- Rows in the table, and all latency-sensitive queries, can be tied to specific geographies, e.g., city, state, region. +- Regional data must remain available during an AZ failure, but it's OK for regional data to become unavailable during a region-wide failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Geo-partitioning requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you design your table schema to allow for [partitioning](partitioning.html#table-creation), with a column identifying geography as the first column in the table's compound primary key (e.g., region/id). You tell CockroachDB to partition the table and all of its secondary indexes by that geography column, each partition becoming its own range of 3 replicas. You then tell CockroachDB to pin each partition (all of its replicas) to the relevant region (e.g., LA partitions in `us-west`, NY partitions in `us-east`). This means that reads and writes in each region will always have access to the relevant replicas and, therefore, will have low, intra-region latencies. + +Geo-partitioning topology + +### Steps + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table and secondary index like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE users ( + id UUID NOT NULL DEFAULT gen_random_uuid(), + city STRING NOT NULL, + first_name STRING NOT NULL, + last_name STRING NOT NULL, + address STRING NOT NULL, + PRIMARY KEY (city ASC, id ASC) +); +~~~ + +{% include copy-clipboard.html %} +~~~ sql +> CREATE INDEX users_last_name_index ON users (city, last_name); +~~~ + +{{site.data.alerts.callout_info}} +A geo-partitioned table does not require a secondary index. However, if the table does have one or more secondary indexes, each index must be partitioned as well. This means that the indexes must start with the column identifying geography, like the table itself, which impacts the queries they'll be useful for. If you can't partition all secondary indexes on a table you want to geo-partition, consider the [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) pattern instead. +{{site.data.alerts.end}} + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. Partition the table by `city`. For example, assuming there are three possible `city` values, `los angeles`, `chicago`, and `new york`: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER TABLE users PARTITION BY LIST (region) ( + PARTITION la VALUES IN ('los angeles'), + PARTITION chicago VALUES IN ('chicago'), + PARTITION ny VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the table. + +3. Partition the secondary index by `city` as well: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX users_last_name_index PARTITION BY LIST (region) ( + PARTITION la_idx VALUES IN ('los angeles'), + PARTITION chicago_idx VALUES IN ('chicago'), + PARTITION ny_idx VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the secondary index. + +4. For each partition of the table, [create a replication zone](configure-zone.html) that constrains the partition's replicas to nodes in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-west]'; + ALTER PARTITION chicago OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-central]'; + ALTER PARTITION ny OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-east]'; + ~~~ + +5. For each partition of the secondary index, [create a replication zone](configure-zone.html) that constrains the partition's replicas to nodes in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la_idx OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-west]'; + ALTER PARTITION chicago_idx OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-central]'; + ALTER PARTITION ny_idx OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-east]'; + ~~~ + +{{site.data.alerts.callout_success}} +As you scale and add more cities, you can repeat steps 2 and 3 with the new complete list of cities to re-partition the table and its secondary indexes, and then repeat steps 4 and 5 to create replication zones for the new partitions. +{{site.data.alerts.end}} + +## Characteristics + +### Latency + +#### Reads + +Because each partition is constrained to the relevant region (e.g., the `la` and `la_idx` partitions are located in the `us-west` region), reads that specify the local region key access the relevant leaseholder locally. This makes read latency very low, with the exception of reads that do not specify a region key or that refer to a partition in another region; such reads will be transactionally consistent but won't have local latencies. + +For example, in the animation below: + +1. The read request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder for the relevant partition. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Geo-partitoning topology + +#### Writes + +Just like for reads, because each partition is constrained to the relevant region (e.g., the `la` and `la_idx` partitions are located in the `us-west` region), writes that specify the local region key access the relevant replicas without leaving the region. This makes write latency very low, with the exception of writes that do not specify a region key or that refer to a partition in another region; such writes will be transactionally consistent but won't have local latencies. + +For example, in the animation below: + +1. The write request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the relevant table and secondary index partitions. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas, which are in the same region. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Geo-partitioning topology + +### Resiliency + +Because each partition is constrained to the relevant region and balanced across the 3 AZs in the region, one AZ can fail per region without interrupting access to the partitions in that region: + +Geo-partitioning topology + +However, if an entire region fails, the partitions in that region become unavailable for reads and writes, even if your load balancer can redirect requests to different region: + +Geo-partitioning topology + +## Tutorial + +For a step-by-step demonstration of how this pattern gets you low-latency reads and writes in a broadly distributed cluster, see the [Geo-Partitioning tutorial](demo-geo-partitioning.html). + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.1/topology-patterns.md b/v19.1/topology-patterns.md new file mode 100644 index 00000000000..ec4665d9d9f --- /dev/null +++ b/v19.1/topology-patterns.md @@ -0,0 +1,42 @@ +--- +title: Topology Patterns +summary: Recommended topology patterns for running CockroachDB in a cloud environment. +toc: true +redirect_from: cluster-topology-patterns.html +key: cluster-topology-patterns.html +--- + +This section provides recommended topology patterns for running CockroachDB in a cloud environment, each with required configurations and latency and resiliency characteristics. + +## Single-region patterns + +When your clients are in a single geographic region, choosing a topology is straightforward. + +Pattern | Latency | Resiliency | Configuration +--------|---------|------------|-------------- +[Development](topology-development.html) |
  • Fast reads and writes
|
  • None
|
  • 1 node
  • No replication
+[Basic Production](topology-basic-production.html) |
  • Fast reads and writes
|
  • 1 AZ failure
|
  • 1 region
  • 3 AZs
  • 3+ nodes across AZs
+ +## Multi-region patterns + +When your clients are in multiple geographic regions, it is important to deploy your cluster across regions properly and then carefully choose the right topology for each of your tables. Not doing so can result in unexpected latency and resiliency. + +{{site.data.alerts.callout_info}} +Multi-region patterns are almost always table-specific. For example, you might use the [Geo-Partitioning](topology-geo-partitioned-replicas.html) pattern for frequently updated tables that are geographically specific and the [Duplicate Indexes](topology-duplicate-indexes.html) pattern for infrequently updated tables (e.g., reference tables) that are not tied to geography. +{{site.data.alerts.end}} + +Pattern | Latency | Resiliency | Configuration +--------|---------|------------|-------------- +[Geo-Partitioned Replicas](topology-geo-partitioned-replicas.html) |
  • Fast regional reads and writes
|
  • 1 AZ failure per partition
|
  • Geo-partitioned table
  • Partition replicas pinned to regions
+[Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) |
  • Fast regional reads
  • Slower cross-region writes
|
  • 1 region failure
|
  • Geo-partitioned table
  • Partition replicas spread across regions
  • Partition leaseholders pinned to regions
+[Duplicate Indexes](topology-duplicate-indexes.html) |
  • Fast regional reads (current)
  • Much slower cross-region writes
|
  • 1 region failure
|
  • Multiple identical indexes
  • Index replicas spread across regions
  • Index leaseholders pinned to regions
+[Follower Reads](topology-follower-reads.html) |
  • Fast regional reads (historical)
  • Slower cross-region writes
|
  • 1 region failure
|
  • App configured to use follower reads
+[Follow-the-Workload](topology-follow-the-workload.html) |
  • Fast regional reads (active region)
  • Slower cross-region reads (elsewhere)
  • Slower cross-region writes
  • |
    • 1 region failure
    |
    • None
    + +## Anti-patterns + +The following anti-patterns are ineffective or risky: + +- Single-region deployments using 2 AZs, or multi-region deployments using 2 regions. In these cases, the cluster would be unable to survive the loss of a single AZ or a single region, respectively. +- Broadly distributed multi-region deployments (e.g., `us-west`, `asia`, and `europe`) using only the default [Follow-the-Workload](topology-follow-the-workload.html) pattern. In this case, latency will likely be unacceptably high. +- [Geo-partitioned tables](topology-geo-partitioned-replicas.html) with non-partitioned secondary indexes. In this case, writes will incur cross-region latency to achieve consensus on the non-partitioned indexes. diff --git a/v19.2/cluster-topology-patterns.md b/v19.2/cluster-topology-patterns.md deleted file mode 100644 index 8fc99c639b9..00000000000 --- a/v19.2/cluster-topology-patterns.md +++ /dev/null @@ -1,189 +0,0 @@ ---- -title: Cluster Topology Patterns -summary: Common cluster topology patterns with setup examples and performance considerations. -toc: true ---- - -This page covers common cluster topology patterns with setup examples, as well as the benefits and trade-off for each pattern. Before you select a candidate pattern for your cluster, use the following broad patterns as a starting point and consider trade-offs. - -## Considerations - -Before selecting a pattern: - -- Review the recommendations and requirements in our [Production Checklist](recommended-production-settings.html). -- Review the [CockroachDB architecture](architecture/overview.html). It's especially important to understand how data is stored in ranges, how ranges are replicated, and how one replica in each range serves as the "leaseholder" that coordinates all read and write requests for that range. For more details and some example scenarios, see [Reads and Writes in CockroachDB](architecture/reads-and-writes-overview.html). -- Learn about the concept of [locality](start-a-node.html#locality), which makes CockroachDB aware of the location of nodes and able to intelligently balance replicas across localities. Locality is also a prerequisite for the [follow-the-workload](demo-follow-the-workload.html) feature and for enterprise [partitioning](partitioning.html). -- Learn about [follower reads](follower-reads.html), an enterprise feature, which reduces latency for read queries by letting the closest replica serve the read request at the expense of only not guaranteeing that data is up to date. - -{{site.data.alerts.callout_info}} -This page does not factor in hardware differences. -{{site.data.alerts.end}} - -## Single-region clusters - -### Single datacenter, basic pattern - -This first example is of a single-datacenter cluster, with each node on a different machine as per our [basic topology recommendations](recommended-production-settings.html#basic-topology-recommendations). This pattern is common starting point for smaller organizations who may not have the resources (or need) to worry about a datacenter failure but still want to take advantage of CockroachDB's [high availability](high-availability.html). - -Local deployment - -For the diagram above: - -**Configuration** - -- `App` is an application that accesses CockroachDB. -- `Load Balancer` is a software-based load balancer. -- Leaseholders are denoted by a dashed line. -- The 3 nodes are all running in a single datacenter. -- The cluster is using the default replication factor of 3 (represented by 3 blocks of the same color). Each range (e.g., `r1`) has 3 replicas, with each replica on a different node. - -**Availability expectations** - -- With the default replication factor of 3, the cluster can tolerate 1 node failure. In such a case, all ranges still have 2 replicas on live nodes and, thus, a majority. - -**Performance expectations** - -- The network latency among the nodes is expected to be sub-millisecond. - -### Single datacenter, more performant and/or resilient - -While the [basic single-datacenter deployment](#single-datacenter-basic-pattern) takes advantage of CockroachDB's high availability, shares the load, and spreads capacity, scaling out the nodes has many benefits: - -- Performance: Adding nodes for more processing power and/or storage typically increases throughput. For example, with five nodes and a replication factor of 3, each range has 3 replicas, with each replica on a different node. In this case, there will only be 1-2 replicas on each nod, leaving additional storage and bandwidth available. -- Resiliency: There will be more room to increase the replication factor, which increases resiliency against the failure of more than one node. For example, with 5 nodes and a replication factor of 5, each range has 5 replicas, with each replica on a different node. In this case, even with 2 nodes down, each range retains a majority of its replicas (3/5). - -There are no constraints on node increments. - -Resilient local deployment - -## Multi-region clusters - -### Multiple regions, basic pattern - -Once an organization begins to grow, a datacenter outage isn't acceptable and a cluster needs to be available all of the time. This is where a multi-region cluster is useful. A multi-region cluster is comprised of multiple datacenters in different regions (e.g., `us-east`, `us-west`), each with multiple nodes. CockroachDB will automatically try to diversify replica placement across localities (i.e., place a replica in each region). This setup can be used when your application is not SLA-sensitive, or you do not care about write performance. With this cluster pattern, many organizations will consider transitioning to using a variety of cloud providers (one provider per region). - -In this example, the cluster has an asymmetrical setup where `us-central` is closer to the `us-west` than the `us-east`. This configuration will provide better write latency to the write workloads in `us-west` and `us-central` because there is a lower latency (versus writing in the `us-east`). - -Basic pattern for multi-region - -Each region has 3 nodes across 3 datacenters and does not use partitioning: - -Basic pattern for multi-region - -For this example: - -#### Configuration - -- `App` is an application that accesses CockroachDB. -- `Load Balancer`s are software-based load balancers that direct traffic to each of the regions' nodes at random. -- Leaseholders are denoted by a dashed line. -- 9 nodes are spread across 3 regions (`us-west`, `us-central`, `us-east`) within a country (`us`). -- Every region has 3 nodes, with each node in a different datacenter (e.g., `us-west-a`, `us-west-b`, `us-west-c`). Each node is started with the `--locality` flag to identify which region and datacenter it is in: - - ~~~ - --locality=region=us-west,datacenter=us-west-a - --locality=region=us-west,datacenter=us-west-b - --locality=region=us-west,datacenter=us-west-c - --locality=region=us-central,datacenter=us-central-a - --locality=region=us-central,datacenter=us-central-b - --locality=region=us-central,datacenter=us-central-c - --locality=region=us-east,datacenter=us-east-a - --locality=region=us-east,datacenter=us-east-b - --locality=region=us-east,datacenter=us-east-c - ~~~ - - - -- The cluster is using a replication factor of 3 (represented by 3 blocks of the same color). Each range (e.g., `r1`) has 3 replicas, with each replica on a different node. - -**Availability expectations** - -- If all of the nodes for a preferred locality are down, then the app will try datacenters in other localities. -- The cluster can withstand a datacenter failure without losing a region because there are 2 nodes in each region. -- The cluster can withstand a regional failure because, with `--locality` specified on each node as shown above, the cluster balances each range across all 3 regions; with one region down, each range still has a majority of its replicas (2/3). - -**Performance expectations** - -- The latency numbers (e.g., `60ms`) in the first diagram represent network round-trip from one region to another. -- For reads, if the gateway node (the node the app connects to) is in the region containing the leaseholder replica of the relevant range, latency should be around 2ms. If the gateway node is in a region that does not contain the leaseholder, the cluster will route the request to the node with the leaseholder in another region, that node will retrieve the data, and then the cluster will return the data to the gateway node. In this case, the network round-trips from one region to another will add latency. In some cases, [follow-the-workload](demo-follow-the-workload.html) will increase the speed for reads by moving the leaseholder closer to the application. -- For writes, because a majority of replicas are always required to agree before a write is committed, latencies will be as fast as the slowest quorum between 2 regions. - -### Multiple regions, more performant (with partitioning) - -While the [basic pattern for a multi-region cluster](#multiple-regions-basic-pattern) can help protect against datacenter and regional failures, there will be high latency due to cross-country roundtrips. This is not ideal for organizations who have users spread out across the country (or world). For any multi-region cluster, [partitioning](partitioning.html) should be used to keep data close to the users who access it. - -In this example, a table is partitioned by a column indicating the region where a customer is located (e.g., a table has a `city` column and the values `LA`, `SF`, and `SD` are partitioned to the `us-west` region). Then, [zone configurations](configure-replication-zones.html) are used to keep the replicas and leaseholders for each partition in the closest datacenter to those customer. - -This setup uses a modern [multi-tier architecture](https://en.wikipedia.org/wiki/Multitier_architecture), which is simplified to global server load balancer (`GSLB`), `App`, and `Load Balancer` layers in the below diagram: - -Partitioned multi-region - -**Configuration** - -A multi-region cluster with partitioning has a similar setup as the [basic multi-region pattern](#multiple-regions-basic-pattern): - -- 9 nodes are spread across 3 regions (`us-west`, `us-central`, `us-east`) within a country (`us`). -- A client connects to geographically close `app` server via `GSLB`. -- Inside each region, an `app` server connects to one of the CockroachDB nodes within the region through a software-based `load balancer`. -- Every region has 3 nodes across 3 datacenters (e.g., `us-west-a`, `us-west-b`, `us-west-c`). Each node is started with the `--locality` flag to identify which region it is in: - - ~~~ - --locality=region=us-west,datacenter=us-west-a - --locality=region=us-west,datacenter=us-west-b - --locality=region=us-west,datacenter=us-west-c - --locality=region=us-central,datacenter=us-central-a - --locality=region=us-central,datacenter=us-central-b - --locality=region=us-central,datacenter=us-central-c - --locality=region=us-east,datacenter=us-east-a - --locality=region=us-east,datacenter=us-east-b - --locality=region=us-east,datacenter=us-east-c - ~~~ - - - - -- The cluster is using a replication factor of 3 (represented by the 3 blocks of the same color). Each range (e.g., `r1`) has a prefix (`w-` for West, `c-` for Central, `e-` for East), which denotes the partition that is replicated. -- Leaseholders are denoted by a dashed line. -- Tables are [partitioned](partitioning.html) at the row level by locality, for example: - - ~~~ - > CREATE TABLE customers ( - id INT DEFAULT unique_rowid(), - name STRING, - email STRING, - state STRING, - expected_graduation_date DATE, - PRIMARY KEY (state, id)) - PARTITION BY LIST (state) ( - PARTITION west VALUES IN ('CA','OR','WA'[...]), - PARTITION central VALUES IN ('OH','IL','MI'[...]), - PARTITION east VALUES IN ('NY','MA','VA'[...]), - PARTITION DEFAULT VALUES IN (default) - ); - ~~~ - -- Using [replication zones](partitioning.html#define-table-partitions-by-list), partitions are pinned to the nodes in their locality, for example: - - ~~~ - > ALTER PARTITION west OF TABLE customers \ - CONFIGURE ZONE USING constraints='[+region=us-west]'; - ~~~ - -**Availability expectations** - -- The cluster as a whole can withstand a regional failure because system-level ranges have their replicas balanced across regions. However, because user data is partitioned and pinned to specific regions, region-specific data will be unavailable during a regional failure. -- Within a region, partitions pinned to the region will remain available as long as 2/3 datacenters are up. - -**Performance expectations** - -- Reads respond in 2-4 milliseconds. -- Writes respond in 2-4 milliseconds. -- Symmetrical latency between datacenters. - -## Anti-patterns - -Anti-patterns are commonly used patterns that are ineffective or risky. Consider the following when choosing a cluster pattern: - -- Do not deploy to 2 datacenters. A cluster across 2 datacenters is not protected against datacenter failure. In order to survive the failure of a datacenter, you need to deploy your cluster across 3 or more datacenters. -- Do not deploy to regions with high network latency (e.g., `us-west`, `asia`, and `europe`) without using [partitioning](partitioning.html). -- The cluster's replication factor does not need to be the same as the number of nodes in the cluster. In fact, as you scale your cluster, you should add nodes (but keep the replication factor at 5, for example) to improve performance. This is shown in the [Single datacenter, more resilient and/or performant](#single-datacenter-more-performant-and-or-resilient) section. diff --git a/v19.2/configure-replication-zones.md b/v19.2/configure-replication-zones.md index 02db0fab414..7ff8e76e1e2 100644 --- a/v19.2/configure-replication-zones.md +++ b/v19.2/configure-replication-zones.md @@ -125,7 +125,7 @@ Constraint Scope | Description | Syntax ### Node/replica recommendations -See [Cluster Topography](recommended-production-settings.html#cluster-topology) recommendations for production deployments. +See [Cluster Topography](recommended-production-settings.html#topology) recommendations for production deployments. ## View replication zones diff --git a/v19.2/deploy-cockroachdb-on-aws-insecure.md b/v19.2/deploy-cockroachdb-on-aws-insecure.md index 545c10185c0..e3d67e74ebb 100644 --- a/v19.2/deploy-cockroachdb-on-aws-insecure.md +++ b/v19.2/deploy-cockroachdb-on-aws-insecure.md @@ -65,13 +65,13 @@ You can create these rules using [Security Groups' Inbound Rules](http://docs.aw [Create an instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/launching-instance.html) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate instance for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use `m` (general purpose), `c` (compute-optimized), or `i` (storage-optimized) [instances](https://aws.amazon.com/ec2/instance-types/), with SSD-backed [EBS volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) or [Instance Store volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html). For example, Cockroach Labs has used `m3.large` instances (2 vCPUs and 7.5 GiB of RAM per instance) for internal testing. - **Do not** use ["burstable" `t2` instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/t2-instances.html), which limit the load on a single core. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-aws.md b/v19.2/deploy-cockroachdb-on-aws.md index 748467af5a0..4c033dd0e06 100644 --- a/v19.2/deploy-cockroachdb-on-aws.md +++ b/v19.2/deploy-cockroachdb-on-aws.md @@ -72,7 +72,7 @@ You can create these rules using [Security Groups' Inbound Rules](http://docs.aw - **Do not** use ["burstable" `t2` instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/t2-instances.html), which limit the load on a single core. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-digital-ocean-insecure.md b/v19.2/deploy-cockroachdb-on-digital-ocean-insecure.md index e4c7ee8d9ef..1de235383fb 100644 --- a/v19.2/deploy-cockroachdb-on-digital-ocean-insecure.md +++ b/v19.2/deploy-cockroachdb-on-digital-ocean-insecure.md @@ -29,11 +29,11 @@ This page shows you how to deploy an insecure multi-node CockroachDB cluster on [Create Droplets](https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate droplet for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use any [droplets](https://www.digitalocean.com/pricing/) except standard droplets with only 1 GB of RAM, which is below our minimum requirement. All Digital Ocean droplets use SSD storage. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 2. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-digital-ocean.md b/v19.2/deploy-cockroachdb-on-digital-ocean.md index e38d523f941..494f9b27d00 100644 --- a/v19.2/deploy-cockroachdb-on-digital-ocean.md +++ b/v19.2/deploy-cockroachdb-on-digital-ocean.md @@ -29,11 +29,11 @@ If you are only testing CockroachDB, or you are not concerned with protecting ne [Create Droplets](https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate Droplet for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use any [droplets](https://www.digitalocean.com/pricing/) except standard droplets with only 1 GB of RAM, which is below our minimum requirement. All Digital Ocean droplets use SSD storage. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 2. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-google-cloud-platform-insecure.md b/v19.2/deploy-cockroachdb-on-google-cloud-platform-insecure.md index fa954d3545b..566c91e3f09 100644 --- a/v19.2/deploy-cockroachdb-on-google-cloud-platform-insecure.md +++ b/v19.2/deploy-cockroachdb-on-google-cloud-platform-insecure.md @@ -56,7 +56,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' [Create an instance](https://cloud.google.com/compute/docs/instances/create-start-instance) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate instance for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use `n1-standard` or `n1-highcpu` [predefined VMs](https://cloud.google.com/compute/pricing#predefined_machine_types), or [custom VMs](https://cloud.google.com/compute/pricing#custommachinetypepricing), with [Local SSDs](https://cloud.google.com/compute/docs/disks/#localssds) or [SSD persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs). For example, Cockroach Labs has used custom VMs (8 vCPUs and 16 GiB of RAM per VM) for internal testing. @@ -64,7 +64,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' - If you used a tag for your firewall rules, when you create the instance, select **Management, disk, networking, SSH keys**. Then on the **Networking** tab, in the **Network tags** field, enter **cockroachdb**. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-google-cloud-platform.md b/v19.2/deploy-cockroachdb-on-google-cloud-platform.md index 67c97d3a696..6af58b8003f 100644 --- a/v19.2/deploy-cockroachdb-on-google-cloud-platform.md +++ b/v19.2/deploy-cockroachdb-on-google-cloud-platform.md @@ -56,7 +56,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' [Create an instance](https://cloud.google.com/compute/docs/instances/create-start-instance) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate instance for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use `n1-standard` or `n1-highcpu` [predefined VMs](https://cloud.google.com/compute/pricing#predefined_machine_types), or [custom VMs](https://cloud.google.com/compute/pricing#custommachinetypepricing), with [Local SSDs](https://cloud.google.com/compute/docs/disks/#localssds) or [SSD persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs). For example, Cockroach Labs has used custom VMs (8 vCPUs and 16 GiB of RAM per VM) for internal testing. @@ -64,7 +64,7 @@ Applications will not connect directly to your CockroachDB nodes. Instead, they' - If you used a tag for your firewall rules, when you create the instance, select **Management, disk, networking, SSH keys**. Then on the **Networking** tab, in the **Network tags** field, enter **cockroachdb**. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-microsoft-azure-insecure.md b/v19.2/deploy-cockroachdb-on-microsoft-azure-insecure.md index 34b17dba091..4f003afa46a 100644 --- a/v19.2/deploy-cockroachdb-on-microsoft-azure-insecure.md +++ b/v19.2/deploy-cockroachdb-on-microsoft-azure-insecure.md @@ -71,7 +71,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and [Create Linux VMs](https://docs.microsoft.com/en-us/azure/virtual-machines/virtual-machines-linux-quick-create-portal) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate VM for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use storage-optimized [Ls-series](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-storage) VMs with [Premium Storage](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/premium-storage) or local SSD storage with a Linux filesystem such as `ext4` (not the Windows `ntfs` filesystem). For example, Cockroach Labs has used `Standard_L4s` VMs (4 vCPUs and 32 GiB of RAM per VM) for internal testing. @@ -81,7 +81,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and - When creating the VMs, make sure to select the **Resource Group**, **Virtual Network**, and **Network Security Group** you created. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.2/deploy-cockroachdb-on-microsoft-azure.md b/v19.2/deploy-cockroachdb-on-microsoft-azure.md index 349fcef85bd..138d855af5a 100644 --- a/v19.2/deploy-cockroachdb-on-microsoft-azure.md +++ b/v19.2/deploy-cockroachdb-on-microsoft-azure.md @@ -68,7 +68,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and [Create Linux VMs](https://docs.microsoft.com/en-us/azure/virtual-machines/virtual-machines-linux-quick-create-portal) for each node you plan to have in your cluster. If you plan to run a sample workload against the cluster, create a separate VM for that workload. -- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#cluster-topology). +- Run at least 3 nodes to [ensure survivability](recommended-production-settings.html#topology). - Use storage-optimized [Ls-series](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-storage) VMs with [Premium Storage](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/premium-storage) or local SSD storage with a Linux filesystem such as `ext4` (not the Windows `ntfs` filesystem). For example, Cockroach Labs has used `Standard_L4s` VMs (4 vCPUs and 32 GiB of RAM per VM) for internal testing. @@ -78,7 +78,7 @@ To enable this in Azure, you must create a Resource Group, Virtual Network, and - When creating the VMs, make sure to select the **Resource Group**, **Virtual Network**, and **Network Security Group** you created. -For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#cluster-topology). +For more details, see [Hardware Recommendations](recommended-production-settings.html#hardware) and [Cluster Topology](recommended-production-settings.html#topology). ## Step 3. Synchronize clocks diff --git a/v19.2/index.md b/v19.2/index.md index 8ee57af28f2..75177798188 100644 --- a/v19.2/index.md +++ b/v19.2/index.md @@ -62,9 +62,9 @@ CockroachDB is the SQL database for building global, scalable cloud services tha

    Deploy

    diff --git a/v19.2/performance-best-practices-overview.md b/v19.2/performance-best-practices-overview.md index 9ae3b8170fc..bbc14810fef 100644 --- a/v19.2/performance-best-practices-overview.md +++ b/v19.2/performance-best-practices-overview.md @@ -8,7 +8,7 @@ build_for: [standard, managed] This page provides best practices for optimizing SQL performance in CockroachDB. {{site.data.alerts.callout_success}} -For a demonstration of some of these techniques, see [Performance Tuning](performance-tuning.html). +For a demonstration of some of these techniques, see [Performance Tuning](performance-tuning.html). For guidance on deployment and data location techniques to minimize network latency, see [Topology Patterns](topology-patterns.html). {{site.data.alerts.end}} ## Multi-row DML best practices diff --git a/v19.2/recommended-production-settings.md b/v19.2/recommended-production-settings.md index b8447f4af9e..d3102adfd86 100644 --- a/v19.2/recommended-production-settings.md +++ b/v19.2/recommended-production-settings.md @@ -6,40 +6,25 @@ toc: true This page provides important recommendations for production deployments of CockroachDB. -## Cluster topology +## Topology -### Terminology +When planning your deployment, it's important to carefully review and choose the [topology patterns](topology-patterns.html) that best meet your latency and resiliency requirements. This is especially crucial for multi-region deployments. -To properly plan your [cluster's topology](cluster-topology-patterns.html), it's important to review some basic CockroachDB-specific terminology: - -Term | Definition ------|------------ -**Cluster** | Your CockroachDB deployment, which acts as a single logical application that contains one or more databases. -**Node** | An individual machine running CockroachDB. Many nodes join to create your cluster. -**Range** | CockroachDB stores all user data and almost all system data in a giant sorted map of key-value pairs. This keyspace is divided into "ranges", contiguous chunks of the keyspace, so that every key can always be found in a single range. -**Replica** | CockroachDB replicates each range (3 times by default) and stores each replica on a different node. -**Range Lease** | For each range, one of the replicas holds the "range lease". This replica, referred to as the "leaseholder", is the one that receives and coordinates all read and write requests for the range. - -### Basic topology recommendations +Also keep in mind some basic topology recommendations: - Run each node on a separate machine. Since CockroachDB replicates across nodes, running more than one node per machine increases the risk of data loss if a machine fails. Likewise, if a machine has multiple disks or SSDs, run one node with multiple `--store` flags and not one node per disk. For more details about stores, see [Start a Node](start-a-node.html#store). -- When deploying in a single datacenter: +- When starting each node, use the [`--locality`](start-a-node.html#locality) flag to describe the node's location, for example, `--locality=region=west,zone=us-west-1`. The key-value pairs should be ordered from most to least inclusive, and the keys and order of key-value pairs must be the same on all nodes. + +- When deploying in a single availability zone: - To be able to tolerate the failure of any 1 node, use at least 3 nodes with the [`.default` 3-way replication factor](configure-replication-zones.html#view-the-default-replication-zone). In this case, if 1 node fails, each range retains 2 of its 3 replicas, a majority. - To be able to tolerate 2 simultaneous node failures, use at least 5 nodes and [increase the `.default` replication factor for user data](configure-replication-zones.html#edit-the-default-replication-zone) to 5. The replication factor for [important internal data](configure-replication-zones.html#create-a-replication-zone-for-a-system-range) is 5 by default, so no adjustments are needed for internal data. In this case, if 2 nodes fail at the same time, each range retains 3 of its 5 replicas, a majority. -- When deploying across multiple datacenters in one or more regions: - - To be able to tolerate the failure of 1 entire datacenter, use at least 3 datacenters and set `--locality` on each node to spread data evenly across datacenters (see next bullet for more details). In this case, if 1 datacenter goes offline, the 2 remaining datacenters retain a majority of replicas. - - When starting each node, use the [`--locality`](start-a-node.html#locality) flag to describe the node's location, for example, `--locality=region=west,datacenter=us-west-1`. The key-value pairs should be ordered from most to least inclusive, and the keys and order of key-value pairs must be the same on all nodes. - - CockroachDB spreads the replicas of each piece of data across as diverse a set of localities as possible, with the order determining the priority. However, locality can also be used to influence the location of data replicas in various ways using [replication zones](configure-replication-zones.html#replication-constraints). - - When there is high latency between nodes, CockroachDB uses locality to move range leases closer to the current workload, reducing network round trips and improving read performance, also known as ["follow-the-workload"](demo-follow-the-workload.html). In a deployment across more than 3 datacenters, however, to ensure that all data benefits from "follow-the-workload", you must [increase the replication factor](configure-replication-zones.html#edit-the-default-replication-zone) to match the total number of datacenters. - - Locality is also a prerequisite for using the [table partitioning](partitioning.html) and [**Node Map**](enable-node-map.html) enterprise features. - -{{site.data.alerts.callout_success}} -For added context about CockroachDB's fault tolerance and automated repair capabilities, see [this training](training/fault-tolerance-and-automated-repair.html). -{{site.data.alerts.end}} +- When deploying across multiple availability zones: + - To be able to tolerate the failure of 1 entire AZ in a region, use at least 3 AZs per region and set `--locality` on each node to spread data evenly across regions and AZs. In this case, if 1 AZ goes offline, the 2 remaining AZs retain a majority of replicas. + - To be able to tolerate the failure of 1 entire region, use at least 3 regions. ## Hardware diff --git a/v19.2/topology-basic-production.md b/v19.2/topology-basic-production.md new file mode 100644 index 00000000000..957a976f363 --- /dev/null +++ b/v19.2/topology-basic-production.md @@ -0,0 +1,90 @@ +--- +title: Basic Production Topology +summary: Guidance for a single-region production deployment. +toc: true +--- + +When you're ready to run CockroachDB in production in a single region, it's important to deploy at least 3 CockroachDB nodes to take advantage of CockroachDB's automatic replication, distribution, rebalancing, and resiliency capabilities. + +{{site.data.alerts.callout_success}} +If you haven't already, [review the full range of topology patterns](topology-patterns.html) to ensure you choose the right one for your use case. +{{site.data.alerts.end}} + +## Prerequisites + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +## Configuration + +Basic production topology + +1. Provision hardware as follows: + - 1 region with 3 AZs + - 3+ VMs evenly distributed across AZs; add more VMs to increase throughput + - App and load balancer in same region as VMs for CockroachDB + - The load balancer redirects to CockroachDB nodes in the region + +2. Start each node on a separate VM, setting the [`--locality`](start-a-node.html#locality) flag to the node's region and AZ combination, e.g.: + + {% include copy-clipboard.html %} + ~~~ shell + # Example start command for node in the east1 AZ of the us-east region: + $ cockroach start \ + --locality=region=us-east,zone=east1 \ + --certs-dir=certs \ + --advertise-addr= \ + --join=:26257,:26257,:26257 \ + --cache=.25 \ + --max-sql-memory=.25 \ + --background + ~~~ + +With the default 3-way replication factor and `--locality` set as described above, CockroachDB balances each range of table data across AZs, one replica per AZ. System data is replicated 5 times by default and also balanced across AZs, thus increasing the [resiliency of the cluster](configure-replication-zones.html#create-a-replication-zone-for-a-system-range) as a whole. + +## Characteristics + +### Latency + +#### Reads + +Since all ranges, including leaseholder replicas, are in a single region, read latency is very low. + +For example, in the animation below: + +1. The read request reaches the load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the relevant leaseholder. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Basic production topology + +#### Writes + +Since all ranges are in a single region, writes achieve consensus without leaving the region and, thus, write latency is very low as well. + +For example, in the animation below: + +1. The write request reaches the load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the relevant table and secondary index. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Leaseholder preferences topology + +### Resiliency + +Because each range is balanced across AZs, one AZ can fail without interrupting access to any data: + +Basic production topology + +However, if an additional AZ fails at the same time, the ranges that lose consensus become unavailable for reads and writes: + +Basic production topology + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-development.md b/v19.2/topology-development.md new file mode 100644 index 00000000000..75dd05c1ee9 --- /dev/null +++ b/v19.2/topology-development.md @@ -0,0 +1,39 @@ +--- +title: Development Topology +summary: Guidance for a single-node cluster for local development. +toc: true +--- + +While developing an application against CockroachDB, it's sufficient to deploy a single-node cluster close to your test application, whether that's on a single VM or on your laptop. + +{{site.data.alerts.callout_success}} +If you haven't already, [review the full range of topology patterns](topology-patterns.html) to ensure you choose the right one for your use case. +{{site.data.alerts.end}} + +## Prerequisites + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +## Configuration + +Development topology + +For this pattern, you can either [run CockroachDB locally](start-a-local-cluster.html) or [deploy a single-node cluster on a cloud VM](manual-deployment.html). + +## Characteristics + +### Latency + +With the CockroachDB node in the same region as your client, and without the overhead of replication, both read and write latency are very low: + +Development topology + +### Resiliency + +In a single-node cluster, CockroachDB does not replicate data and, therefore, is not resilient to failures. If the machine where the node is running fails, or if the region or availability zone containing the machine fails, the cluster becomes unavailable: + +Development topology + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-duplicate-indexes.md b/v19.2/topology-duplicate-indexes.md new file mode 100644 index 00000000000..c88fe365b34 --- /dev/null +++ b/v19.2/topology-duplicate-indexes.md @@ -0,0 +1,142 @@ +--- +title: Duplicate Indexes Topology +summary: Guidance on using the duplicate indexes topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, the duplicate indexes pattern is a good choice for tables with the following requirements: + +- Read latency must be low, but write latency can be much higher. +- Reads must be up-to-date for business reasons or because the table is reference by [foreign keys](foreign-key.html). +- Rows in the table, and all latency-sensitive queries, **cannot** be tied to specific geographies. +- Table data must remain available during a region failure. + +In general, this pattern is suited well for immutable/reference tables that are rarely or never updated. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +If reads from a table can be historical (48 seconds or more in the past), consider the [Follower Reads](topology-follower-reads.html) pattern. If rows in the table, and all latency-sensitive queries, can be tied to specific geographies, consider the [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) pattern. Both patterns avoid extra secondary indexes, which increase data replication and, therefore, higher throughput and less storage. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Pinning secondary indexes requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you tell CockroachDB to put the leaseholder for the table itself (also called the primary index) in one region, create 2 secondary indexes on the table, and tell CockroachDB to put the leaseholder for each secondary index in one of the other regions. This means that reads will access the local leaseholder (either for the table itself or for one of the secondary indexes). Writes, however, will still leave the region to get consensus for the table and its secondary indexes. + +Duplicate Indexes topology + +### Steps + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE postal_codes ( + id INT PRIMARY KEY, + code STRING, +); +~~~ + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. [Create a replication zone](configure-zone.html) for the table and set a leaseholder preference telling CockroachDB to put the leaseholder for the table in one of the regions, for example `us-west`: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER TABLE postal_codes + CONFIGURE ZONE USING lease_preferences = '[[+region=us-west]]'; + ~~~ + +3. [Create secondary indexes](create-index.html) on the table for each of your other regions, including all of the columns you wish to read either in the key or in the key and a [`STORING`](create-index.html#store-columns) clause: + + {% include copy-clipboard.html %} + ~~~ sql + > CREATE INDEX idx_central ON postal_codes (id) + STORING (code); + ~~~ + + {% include copy-clipboard.html %} + ~~~ sql + > CREATE INDEX idx_east ON postal_codes (id) + STORING (code); + ~~~ + +4. [Create a replication zone](configure-zone.html) for each secondary index, in each case setting a leaseholder preference telling CockroachDB to put the leaseholder for the index in a distinct region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX postal_codes@idx_central + CONFIGURE ZONE USING lease_preferences = '[[+region=us-central]]'; + ~~~ + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX postal_codes@idx_east + CONFIGURE ZONE USING lease_preferences = '[[+region=us-east]]'; + ~~~ + +## Characteristics + +### Latency + +#### Reads + +Reads access the local leaseholder and, therefore, never leave the region. This makes read latency very low. + +For example, in the animation below: + +1. The read request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the relevant leaseholder. In `us-west`, the leaseholder is for the table itself. In the other regions, the leaseholder is for the relevant index, which the [cost-based optimizer](cost-based-optimizer.html) uses due to the leaseholder preferences. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Pinned secondary indexes topology + +#### Writes + +The replicas for the table and its secondary indexes are spread across all 3 regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. It's also important to understand that the replication of extra indexes can reduce throughput and increase storage cost. + +For example, in the animation below: + +1. The write request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the table and its secondary indexes. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Duplicate Indexes topology + +### Resiliency + +Because this pattern balances the replicas for the table and its secondary indexes across regions, one entire region can fail without interrupting access to the table: + +Pinned Secondary Indexes topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-follow-the-workload.md b/v19.2/topology-follow-the-workload.md new file mode 100644 index 00000000000..e1087c3cf4c --- /dev/null +++ b/v19.2/topology-follow-the-workload.md @@ -0,0 +1,88 @@ +--- +title: Follow-the-Workload Topology +summary: Guidance on using the follow-the-workload topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, follow-the-workload is the default pattern for tables that use no other pattern. In general, this default pattern is a good choice only for tables with the following requirements: + +- The table is active mostly in one region at a time, e.g., following the sun. +- In the active region, read latency must be low, but write latency can be higher. +- In non-active regions, both read and write latency can be higher. +- Table data must remain available during a region failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +If read performance is your main focus for a table, but you want low-latency reads everywhere instead of just in the most active region, consider the [Duplicate Indexes](topology-duplicate-indexes.html) or [Follower Reads](topology-follower-reads.html) pattern. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +Aside from [deploying a cluster across three regions](#cluster-setup) properly, with each node started with the [`--locality`](start-a-node.html#locality) flag specifying its region and AZ combination, this pattern requires no extra configuration. CockroachDB will balance the replicas for a table across the three regions and will assign the range lease to the replica in the region with the greatest demand at any given time (the [follow-the-workload](demo-follow-the-workload.html) feature). This means that read latency in the active region will be low while read latency in other regions will be higher due to having to leave the region to reach the leaseholder. Write latency will be higher as well due to always involving replicas in multiple regions. + +Follower reads topology + +{{site.data.alerts.callout_info}} +This pattern is also used by [system ranges containing important internal data](configure-replication-zones.html#create-a-replication-zone-for-a-system-range). +{{site.data.alerts.end}} + +## Characteristics + +### Latency + +#### Reads + +Reads in the region with the most demand will access the local leaseholder and, therefore, never leave the region. This makes read latency very low in the currently most active region. Reads in other regions, however, will be routed to the leaseholder in a different region and, thus, read latency will be higher. + +For example, in the animation below, the most active region is `us-east` and, thus, the table's leaseholder is in that region: + +1. The read request in `us-east` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replica. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. In this case, reads in the `us-east` remain in the region and are lower-latency than reads in other regions. + +Follow-the-workload topology + +#### Writes + +The replicas for the table are spread across all 3 regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. + +For example, in the animation below, assuming the most active region is still `us-east`: + +1. The write request in `us-east` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replica. +4. While the leaseholder appends the write to its Raft log, it notifies its follower replicas. +5. As soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Follow-the-workload topology + +### Resiliency + +Because this pattern balances the replicas for the table across regions, one entire region can fail without interrupting access to the table: + +Follow-the-workload topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-follower-reads.md b/v19.2/topology-follower-reads.md new file mode 100644 index 00000000000..8c725c63590 --- /dev/null +++ b/v19.2/topology-follower-reads.md @@ -0,0 +1,132 @@ +--- +title: Follower Reads Topology +summary: Guidance on using the follower reads topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, the follower reads pattern is a good choice for tables with the following requirements: + +- Read latency must be low, but write latency can be higher. +- Reads can be historical (48 seconds or more in the past). +- Rows in the table, and all latency-sensitive queries, **cannot** be tied to specific geographies (e.g., a reference table). +- Table data must remain available during a region failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +This pattern is compatible with all of the other multi-region patterns except [Geo-Partitioned Replicas](topology-geo-partitioned-replicas.html). However, if reads from a table must be exactly up-to-date, use the [Duplicate Indexes](topology-duplicate-indexes.html) or [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) pattern instead. Up-to-date reads are required by tables referenced by [foreign keys](foreign-key.html), for example. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Follower reads requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you configure your application to use the [follower reads](follower-reads.html) feature by adding an `AS OF SYSTEM TIME` clause when reading from the table. This tells CockroachDB to read slightly historical data (at least 48 seconds in the past) from the closest replica so as to avoid being routed to the leaseholder, which may be in an entirely different region. Writes, however, will still leave the region to get consensus for the table. + +### Steps + +Follower reads topology + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE postal_codes ( + id INT PRIMARY KEY, + code STRING, +); +~~~ + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. Configure your app to use `AS OF SYSTEM TIME experimental_follower_read_timestamp()` whenever reading from the table: + + {{site.data.alerts.callout_info}} + The `experimental_follower_read_timestamp()` [function](functions-and-operators.html) will set the [`AS OF SYSTEM TIME`](as-of-system-time.html) value to the minimum required for follower reads. + {{site.data.alerts.end}} + + {% include copy-clipboard.html %} + ~~~ sql + > SELECT code FROM postal_codes + WHERE id = 5 + AS OF SYSTEM TIME experimental_follower_read_timestamp(); + ~~~ + + Alternately, instead of modifying individual read queries on the table, you can set the `AS OF SYSTEM TIME` value for all operations in a read-only transaction: + + {% include copy-clipboard.html %} + ~~~ sql + > BEGIN AS OF SYSTEM TIME experimental_follower_read_timestamp(); + + SELECT code FROM postal_codes + WHERE id = 5; + + SELECT code FROM postal_codes + WHERE id = 6; + + COMMIT; + ~~~ + +## Characteristics + +### Latency + +#### Reads + +Reads retrieve historical data from the closest replica and, therefore, never leave the region. This makes read latency very low but slightly stale. + +For example, in the animation below: + +1. The read request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the closest replica for the table. In this case, the replica is *not* the leaseholder. +4. The replica retrieves the results as of at least 48 seconds in the past and returns to the gateway node. +5. The gateway node returns the results to the client. + +Follower reads topology + +#### Writes + +The replicas for the table are spread across all 3 regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. + +For example, in the animation below: + +1. The write request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replica for the table in `us-east`. +4. Once the leaseholder has appended the write to its Raft log, it notifies its follower replicas. +5. As soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholder then returns acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Follower reads topology + +### Resiliency + +Because this pattern balances the replicas for the table across regions, one entire region can fail without interrupting access to the table: + +Follower reads topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-geo-partitioned-leaseholders.md b/v19.2/topology-geo-partitioned-leaseholders.md new file mode 100644 index 00000000000..3de00983e8f --- /dev/null +++ b/v19.2/topology-geo-partitioned-leaseholders.md @@ -0,0 +1,179 @@ +--- +title: Geo-Partitioned Leaseholders Topology +summary: Common cluster topology patterns with setup examples and performance considerations. +toc: true +--- + +In a multi-region deployment, the geo-partitioned leaseholders topology is a good choice for tables with the following requirements: + +- Read latency must be low, but write latency can be higher. +- Reads must be up-to-date for business reasons or because the table is reference by [foreign keys](foreign-key.html). +- Rows in the table, and all latency-sensitive queries, can be tied to specific geographies, e.g., city, state, region. +- Table data must remain available during a region failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +{{site.data.alerts.callout_success}} +If reads from a table can be historical (48 seconds or more in the past), consider the [Follower Reads](topology-follower-reads.html) pattern. If rows in the table, and all latency-sensitive queries, **cannot** be tied to specific geographies, consider the [Duplicate Indexes](topology-duplicate-indexes.html) pattern. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Geo-partitioning requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you design your table schema to allow for [partitioning](partitioning.html#table-creation), with a column identifying geography as the first column in the table's compound primary key (e.g., region/id). You tell CockroachDB to partition the table and all of its secondary indexes by that geography column, each partition becoming its own range of 3 replicas. You then tell CockroachDB to put the leaseholder for each partition in the relevant region (e.g., LA partitions in `us-west`, NY partitions in `us-east`). The other replicas of a partition remain balanced across the other regions. This means that reads in each region will access local leaseholders and, therefore, will have low, intra-region latencies. Writes, however, will leave the region to get consensus and, therefore, will have higher, cross-region latencies. + +Geo-partitioned leaseholders topology + +### Steps + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table and secondary index like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE users ( + id UUID NOT NULL DEFAULT gen_random_uuid(), + city STRING NOT NULL, + first_name STRING NOT NULL, + last_name STRING NOT NULL, + address STRING NOT NULL, + PRIMARY KEY (city ASC, id ASC) +); +~~~ + +{% include copy-clipboard.html %} +~~~ sql +> CREATE INDEX users_last_name_index ON users (city, last_name); +~~~ + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. Partition the table by `city`. For example, assuming there are three possible `city` values, `los angeles`, `chicago`, and `new york`: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER TABLE users PARTITION BY LIST (city) ( + PARTITION la VALUES IN ('los angeles'), + PARTITION chicago VALUES IN ('chicago'), + PARTITION ny VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the table. + +3. Partition the secondary index by `city` as well: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX users_last_name_index PARTITION BY LIST (city) ( + PARTITION la_idx VALUES IN ('los angeles'), + PARTITION chicago_idx VALUES IN ('chicago'), + PARTITION ny_idx VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the secondary index. + +4. For each partition of the table, [create a replication zone](configure-zone.html) that tells CockroachDB to put the partition's leaseholder in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-west":1}', + lease_preferences = '[[+region=us-west]]'; + ALTER PARTITION chicago OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-central":1}', + lease_preferences = '[[+region=us-central]]'; + ALTER PARTITION ny OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-east":1}', + lease_preferences = '[[+region=us-east]]'; + ~~~ + +5. For each partition of the secondary index, [create a replication zone](configure-zone.html) that tells CockroachDB to put the partition's leaseholder in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la_idx OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-west":1}', + lease_preferences = '[[+region=us-west]]'; + ALTER PARTITION chicago_idx OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-central":1}', + lease_preferences = '[[+region=us-central]]'; + ALTER PARTITION ny_idx OF TABLE users + CONFIGURE ZONE USING + constraints = '{"+us-east":1}', + lease_preferences = '[[+region=us-east]]'; + ~~~ + +{{site.data.alerts.callout_success}} +As you scale and add more cities, you can repeat steps 2 and 3 with the new complete list of cities to re-partition the table and its secondary indexes, and then repeat steps 4 and 5 to create replication zones for the new partitions. +{{site.data.alerts.end}} + +## Characteristics + +### Latency + +#### Reads + +Because each partition's leaseholder is constrained to the relevant region (e.g., the `la` and `la_idx` partitions' leaseholders are located in the `us-west` region), reads that specify the local region key access the relevant leaseholder locally. This makes read latency very low, with the exception of reads that do not specify a region key or that refer to a partition in another region. + +For example, in the animation below: + +1. The read request in `us-west` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder for the relevant partition. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Geo-partitoned leaseholders topology + +#### Writes + +Just like for reads, because each partition's leaseholder is constrained to the relevant region (e.g., the `la` and `la_idx` partitions' leaseholders are located in the `us-west` region), writes that specify the local region key access the relevant leaseholder replicas locally. However, a partition's other replicas are spread across the other regions, so writes involve multiple network hops across regions to achieve consensus. This increases write latency significantly. + +For example, in the animation below: + +1. The write request in `us-west` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the relevant table and secondary index partitions. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas, which are in the other regions. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Geo-partitoned leaseholders topology + +### Resiliency + +Because this pattern balances the replicas for each partition across regions, one entire region can fail without interrupting access to any partitions. In this case, if any range loses its leaseholder in the region-wide outage, CockroachDB makes one of the range's other replicas the leaseholder: + +Geo-partitioning topology + + + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-geo-partitioned-replicas.md b/v19.2/topology-geo-partitioned-replicas.md new file mode 100644 index 00000000000..0c43653060d --- /dev/null +++ b/v19.2/topology-geo-partitioned-replicas.md @@ -0,0 +1,170 @@ +--- +title: Geo-Partitioned Replicas Topology +summary: Guidance on using the geo-partitioned replicas topology in a multi-region deployment. +toc: true +--- + +In a multi-region deployment, the geo-partitioned replicas topology is a good choice for tables with the following requirements: + +- Read and write latency must be low. +- Rows in the table, and all latency-sensitive queries, can be tied to specific geographies, e.g., city, state, region. +- Regional data must remain available during an AZ failure, but it's OK for regional data to become unavailable during a region-wide failure. + +{{site.data.alerts.callout_info}} +Multi-region topology patterns are almost always table-specific. If you haven't already, [review the full range of patterns](topology-patterns.html#multi-region-patterns) to ensure you choose the right one for each of your tables. +{{site.data.alerts.end}} + +## Prerequisites + +### Fundamentals + +{% include {{ page.version.version }}/topology-patterns/fundamentals.md %} + +### Cluster setup + +{% include {{ page.version.version }}/topology-patterns/multi-region-cluster-setup.md %} + +## Configuration + +{{site.data.alerts.callout_info}} +Geo-partitioning requires an [Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). +{{site.data.alerts.end}} + +### Summary + +Using this pattern, you design your table schema to allow for [partitioning](partitioning.html#table-creation), with a column identifying geography as the first column in the table's compound primary key (e.g., region/id). You tell CockroachDB to partition the table and all of its secondary indexes by that geography column, each partition becoming its own range of 3 replicas. You then tell CockroachDB to pin each partition (all of its replicas) to the relevant region (e.g., LA partitions in `us-west`, NY partitions in `us-east`). This means that reads and writes in each region will always have access to the relevant replicas and, therefore, will have low, intra-region latencies. + +Geo-partitioning topology + +### Steps + +Assuming you have a [cluster deployed across three regions](#cluster-setup) and a table and secondary index like the following: + +{% include copy-clipboard.html %} +~~~ sql +> CREATE TABLE users ( + id UUID NOT NULL DEFAULT gen_random_uuid(), + city STRING NOT NULL, + first_name STRING NOT NULL, + last_name STRING NOT NULL, + address STRING NOT NULL, + PRIMARY KEY (city ASC, id ASC) +); +~~~ + +{% include copy-clipboard.html %} +~~~ sql +> CREATE INDEX users_last_name_index ON users (city, last_name); +~~~ + +{{site.data.alerts.callout_info}} +A geo-partitioned table does not require a secondary index. However, if the table does have one or more secondary indexes, each index must be partitioned as well. This means that the indexes must start with the column identifying geography, like the table itself, which impacts the queries they'll be useful for. If you can't partition all secondary indexes on a table you want to geo-partition, consider the [Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) pattern instead. +{{site.data.alerts.end}} + +1. If you do not already have one, [request a trial Enterprise license](https://www.cockroachlabs.com/get-cockroachdb). + +2. Partition the table by `city`. For example, assuming there are three possible `city` values, `los angeles`, `chicago`, and `new york`: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER TABLE users PARTITION BY LIST (region) ( + PARTITION la VALUES IN ('los angeles'), + PARTITION chicago VALUES IN ('chicago'), + PARTITION ny VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the table. + +3. Partition the secondary index by `city` as well: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER INDEX users_last_name_index PARTITION BY LIST (region) ( + PARTITION la_idx VALUES IN ('los angeles'), + PARTITION chicago_idx VALUES IN ('chicago'), + PARTITION ny_idx VALUES IN ('new york') + ); + ~~~ + + This creates distinct ranges for each partition of the secondary index. + +4. For each partition of the table, [create a replication zone](configure-zone.html) that constrains the partition's replicas to nodes in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-west]'; + ALTER PARTITION chicago OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-central]'; + ALTER PARTITION ny OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-east]'; + ~~~ + +5. For each partition of the secondary index, [create a replication zone](configure-zone.html) that constrains the partition's replicas to nodes in the relevant region: + + {% include copy-clipboard.html %} + ~~~ sql + > ALTER PARTITION la_idx OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-west]'; + ALTER PARTITION chicago_idx OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-central]'; + ALTER PARTITION ny_idx OF TABLE users + CONFIGURE ZONE USING constraints = '[+region=us-east]'; + ~~~ + +{{site.data.alerts.callout_success}} +As you scale and add more cities, you can repeat steps 2 and 3 with the new complete list of cities to re-partition the table and its secondary indexes, and then repeat steps 4 and 5 to create replication zones for the new partitions. +{{site.data.alerts.end}} + +## Characteristics + +### Latency + +#### Reads + +Because each partition is constrained to the relevant region (e.g., the `la` and `la_idx` partitions are located in the `us-west` region), reads that specify the local region key access the relevant leaseholder locally. This makes read latency very low, with the exception of reads that do not specify a region key or that refer to a partition in another region; such reads will be transactionally consistent but won't have local latencies. + +For example, in the animation below: + +1. The read request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder for the relevant partition. +4. The leaseholder retrieves the results and returns to the gateway node. +5. The gateway node returns the results to the client. + +Geo-partitoning topology + +#### Writes + +Just like for reads, because each partition is constrained to the relevant region (e.g., the `la` and `la_idx` partitions are located in the `us-west` region), writes that specify the local region key access the relevant replicas without leaving the region. This makes write latency very low, with the exception of writes that do not specify a region key or that refer to a partition in another region; such writes will be transactionally consistent but won't have local latencies. + +For example, in the animation below: + +1. The write request in `us-central` reaches the regional load balancer. +2. The load balancer routes the request to a gateway node. +3. The gateway node routes the request to the leaseholder replicas for the relevant table and secondary index partitions. +4. While each leaseholder appends the write to its Raft log, it notifies its follower replicas, which are in the same region. +5. In each case, as soon as one follower has appended the write to its Raft log (and thus a majority of replicas agree based on identical Raft logs), it notifies the leaseholder and the write is committed on the agreeing replicas. +6. The leaseholders then return acknowledgement of the commit to the gateway node. +7. The gateway node returns the acknowledgement to the client. + +Geo-partitioning topology + +### Resiliency + +Because each partition is constrained to the relevant region and balanced across the 3 AZs in the region, one AZ can fail per region without interrupting access to the partitions in that region: + +Geo-partitioning topology + +However, if an entire region fails, the partitions in that region become unavailable for reads and writes, even if your load balancer can redirect requests to different region: + +Geo-partitioning topology + +## Tutorial + +For a step-by-step demonstration of how this pattern gets you low-latency reads and writes in a broadly distributed cluster, see the [Geo-Partitioning tutorial](demo-geo-partitioning.html). + +## See also + +{% include {{ page.version.version }}/topology-patterns/see-also.md %} diff --git a/v19.2/topology-patterns.md b/v19.2/topology-patterns.md new file mode 100644 index 00000000000..ec4665d9d9f --- /dev/null +++ b/v19.2/topology-patterns.md @@ -0,0 +1,42 @@ +--- +title: Topology Patterns +summary: Recommended topology patterns for running CockroachDB in a cloud environment. +toc: true +redirect_from: cluster-topology-patterns.html +key: cluster-topology-patterns.html +--- + +This section provides recommended topology patterns for running CockroachDB in a cloud environment, each with required configurations and latency and resiliency characteristics. + +## Single-region patterns + +When your clients are in a single geographic region, choosing a topology is straightforward. + +Pattern | Latency | Resiliency | Configuration +--------|---------|------------|-------------- +[Development](topology-development.html) |
    • Fast reads and writes
    |
    • None
    |
    • 1 node
    • No replication
    +[Basic Production](topology-basic-production.html) |
    • Fast reads and writes
    |
    • 1 AZ failure
    |
    • 1 region
    • 3 AZs
    • 3+ nodes across AZs
    + +## Multi-region patterns + +When your clients are in multiple geographic regions, it is important to deploy your cluster across regions properly and then carefully choose the right topology for each of your tables. Not doing so can result in unexpected latency and resiliency. + +{{site.data.alerts.callout_info}} +Multi-region patterns are almost always table-specific. For example, you might use the [Geo-Partitioning](topology-geo-partitioned-replicas.html) pattern for frequently updated tables that are geographically specific and the [Duplicate Indexes](topology-duplicate-indexes.html) pattern for infrequently updated tables (e.g., reference tables) that are not tied to geography. +{{site.data.alerts.end}} + +Pattern | Latency | Resiliency | Configuration +--------|---------|------------|-------------- +[Geo-Partitioned Replicas](topology-geo-partitioned-replicas.html) |
    • Fast regional reads and writes
    |
    • 1 AZ failure per partition
    |
    • Geo-partitioned table
    • Partition replicas pinned to regions
    +[Geo-Partitioned Leaseholders](topology-geo-partitioned-leaseholders.html) |
    • Fast regional reads
    • Slower cross-region writes
    |
    • 1 region failure
    |
    • Geo-partitioned table
    • Partition replicas spread across regions
    • Partition leaseholders pinned to regions
    +[Duplicate Indexes](topology-duplicate-indexes.html) |
    • Fast regional reads (current)
    • Much slower cross-region writes
    |
    • 1 region failure
    |
    • Multiple identical indexes
    • Index replicas spread across regions
    • Index leaseholders pinned to regions
    +[Follower Reads](topology-follower-reads.html) |
    • Fast regional reads (historical)
    • Slower cross-region writes
    |
    • 1 region failure
    |
    • App configured to use follower reads
    +[Follow-the-Workload](topology-follow-the-workload.html) |
    • Fast regional reads (active region)
    • Slower cross-region reads (elsewhere)
    • Slower cross-region writes
    • |
      • 1 region failure
      |
      • None
      + +## Anti-patterns + +The following anti-patterns are ineffective or risky: + +- Single-region deployments using 2 AZs, or multi-region deployments using 2 regions. In these cases, the cluster would be unable to survive the loss of a single AZ or a single region, respectively. +- Broadly distributed multi-region deployments (e.g., `us-west`, `asia`, and `europe`) using only the default [Follow-the-Workload](topology-follow-the-workload.html) pattern. In this case, latency will likely be unacceptably high. +- [Geo-partitioned tables](topology-geo-partitioned-replicas.html) with non-partitioned secondary indexes. In this case, writes will incur cross-region latency to achieve consensus on the non-partitioned indexes.