Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions .envrc.vars
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,7 @@ export CLOUDSDK_COMPUTE_REGION="us-central1"
export DB_CLOUDSDK_COMPUTE_ZONE="${CLOUDSDK_COMPUTE_REGION}-a"
# Default to the scratch environment
export CLOUDSDK_CORE_PROJECT="da-cn-scratchnet"
# Default cluster sizing
export GCP_CLUSTER_NODE_TYPE=e2-standard-16
export GCP_CLUSTER_MIN_NODES=0
# A high max-nodes by default to support large deployments and hard migrations
# Should be set to a lower number (currently 8) on CI clusters that do neither of those.
export GCP_CLUSTER_MAX_NODES=20
# The logging variant supports default, that google recommends for up to 100kb/s logs (https://cloud.google.com/kubernetes-engine/docs/how-to/adjust-log-throughput)
# The max throughput variant supports multiple tens of MB/s of logs, but also the agents require 2CPUs and therefore we lose 2 CPUs per node
export GCP_CLUSTER_LOGGING_VARIANT="DEFAULT"

export GCP_DNS_PROJECT="da-gcp-canton-domain"
export GCP_DNS_SA_SECRET="clouddns-dns01-solver-svc-acct"
# DNS Service Account information
Expand Down
8 changes: 3 additions & 5 deletions build-tools/cncluster
Original file line number Diff line number Diff line change
Expand Up @@ -666,15 +666,14 @@ function subcmd_create() {
location_arg=(--region "${CLOUDSDK_COMPUTE_REGION}" --node-locations "${CLOUDSDK_COMPUTE_REGION}-a")
fi

#### Cluster can be resized from deployment directories with the following command:
#### gcloud container clusters resize "cn-${GCP_CLUSTER_BASENAME}net" --num-nodes 4
### The default cluster pool should be removed after running the pulumi cluster project that creates the node pools we actually use
gcloud container clusters create "${GCP_CLUSTER_NAME}" \
--create-subnetwork name="${GCP_CLUSTER_NAME}-subnet" \
--enable-dataplane-v2 \
--enable-master-authorized-networks \
--enable-ip-alias \
--enable-private-nodes \
--machine-type "${GCP_CLUSTER_NODE_TYPE}" \
--machine-type "e2-standard-8" \
--num-nodes 1 \
--master-ipv4-cidr "${GCP_MASTER_IPV4_CIDR}" \
--node-labels digitalasset.com/cluster-name="${GCP_CLUSTER_NAME}" \
Expand All @@ -684,8 +683,7 @@ function subcmd_create() {
--cluster-dns-scope=cluster \
--enable-autoscaling \
--min-nodes 0 \
--max-nodes "${GCP_CLUSTER_MAX_NODES}" \
--logging-variant="${GCP_CLUSTER_LOGGING_VARIANT}" \
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we control logging now

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. We don't on prod clusters so imho this should be tied to the flag we have for that until we remove it.
  2. gcloud logging is still enabled, it's just disabled for workloads. I guess system logging is small enough that we don't need to bother with the variant though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would honestly just remove it. We seem to not set it for cilr so if that one can work without it yolo

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah fair enough let's nuke it

--max-nodes 2 \
--logging="$(_get_logging_components)" \
"${location_arg[@]}"

Expand Down
12 changes: 12 additions & 0 deletions cluster/deployment/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ svs:
appsLogLevel: DEBUG
cantonLogLevel: DEBUG
cometbftLogLevel: debug
cluster:
nodePools:
apps:
minNodes: 0
# A high max-nodes by default to support large deployments and hard migrations
# Should be set to a lower number (currently 8) on CI clusters that do neither of those.
maxNodes: 20
nodeType: e2-standard-16
infra:
minNodes: 1
maxNodes: 3
nodeType: e2-standard-8
infra:
prometheus:
retentionDuration: "1y"
Expand Down
2 changes: 0 additions & 2 deletions cluster/deployment/mock/.envrc.vars
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ export CLOUDSDK_COMPUTE_REGION="europe-west6"
export DB_CLOUDSDK_COMPUTE_ZONE="${CLOUDSDK_COMPUTE_REGION}-a"

export GCP_CLUSTER_BASENAME="mock"
export GCP_CLUSTER_NODE_TYPE=n2-standard-16
export GCP_MASTER_IPV4_CIDR="1.2.3.4/28"
export GCP_CLUSTER_PROD_LIKE=true
export GCP_CLUSTER_LOGGING_VARIANT="MAX_THROUGHPUT"
export GCP_CLUSTER_HOSTNAME="mock.global.canton.network.digitalasset.com"
export IS_DEVNET=false

Expand Down
4 changes: 4 additions & 0 deletions cluster/deployment/mock/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,5 +156,9 @@ infra:
spec:
key: value
anotherKey: anotherValue
cluster:
nodePools:
apps:
nodeType: n2-standard-16
networkWide:
maxVettingDelay: 1m
3 changes: 0 additions & 3 deletions cluster/expected/cluster/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"labels": {
"cn_apps": "true"
},
"loggingVariant": "MAX_THROUGHPUT",
"machineType": "n2-standard-16",
"taints": [
{
Expand Down Expand Up @@ -44,7 +43,6 @@
"labels": {
"cn_infra": "true"
},
"loggingVariant": "MAX_THROUGHPUT",
"machineType": "e2-standard-8",
"taints": [
{
Expand All @@ -71,7 +69,6 @@
"initialNodeCount": 1,
"name": "gke-pool",
"nodeConfig": {
"loggingVariant": "MAX_THROUGHPUT",
"machineType": "e2-standard-4",
"taints": [
{
Expand Down
22 changes: 22 additions & 0 deletions cluster/pulumi/cluster/src/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
import { clusterSubConfig } from 'splice-pulumi-common/src/config/configLoader';
import { z } from 'zod';

const GkeNodePoolConfigSchema = z.object({
minNodes: z.number(),
maxNodes: z.number(),
nodeType: z.string(),
});
const GkeClusterConfigSchema = z.object({
nodePools: z.object({
infra: GkeNodePoolConfigSchema,
apps: GkeNodePoolConfigSchema,
}),
});

export type GkeClusterConfig = z.infer<typeof GkeClusterConfigSchema>;

export const gkeClusterConfig: GkeClusterConfig = GkeClusterConfigSchema.parse(
clusterSubConfig('cluster')
);
17 changes: 8 additions & 9 deletions cluster/pulumi/cluster/src/nodePools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import * as gcp from '@pulumi/gcp';
import { GCP_PROJECT, config } from 'splice-pulumi-common';

import { gkeClusterConfig } from './config';

export function installNodePools(): void {
const clusterName = `cn-${config.requireEnv('GCP_CLUSTER_BASENAME')}net`;
const cluster = config.optionalEnv('CLOUDSDK_COMPUTE_ZONE')
Expand All @@ -13,7 +15,7 @@ export function installNodePools(): void {
name: 'cn-apps-pool',
cluster,
nodeConfig: {
machineType: config.requireEnv('GCP_CLUSTER_NODE_TYPE'),
machineType: gkeClusterConfig.nodePools.apps.nodeType,
taints: [
{
effect: 'NO_SCHEDULE',
Expand All @@ -24,20 +26,19 @@ export function installNodePools(): void {
labels: {
cn_apps: 'true',
},
loggingVariant: config.requireEnv('GCP_CLUSTER_LOGGING_VARIANT'),
},
initialNodeCount: 0,
autoscaling: {
minNodeCount: parseInt(config.requireEnv('GCP_CLUSTER_MIN_NODES')),
maxNodeCount: parseInt(config.requireEnv('GCP_CLUSTER_MAX_NODES')),
minNodeCount: gkeClusterConfig.nodePools.apps.minNodes,
maxNodeCount: gkeClusterConfig.nodePools.apps.maxNodes,
},
});

new gcp.container.NodePool('cn-infra-node-pool', {
name: 'cn-infra-pool',
cluster,
nodeConfig: {
machineType: config.optionalEnv('INFRA_NODE_POOL_MACHINE_TYPE') || 'e2-standard-8',
machineType: gkeClusterConfig.nodePools.infra.nodeType,
taints: [
{
effect: 'NO_SCHEDULE',
Expand All @@ -48,12 +49,11 @@ export function installNodePools(): void {
labels: {
cn_infra: 'true',
},
loggingVariant: config.requireEnv('GCP_CLUSTER_LOGGING_VARIANT'),
},
initialNodeCount: 1,
autoscaling: {
minNodeCount: 1,
maxNodeCount: 3,
minNodeCount: gkeClusterConfig.nodePools.infra.minNodes,
maxNodeCount: gkeClusterConfig.nodePools.infra.maxNodes,
},
});

Expand All @@ -69,7 +69,6 @@ export function installNodePools(): void {
value: 'true',
},
],
loggingVariant: config.requireEnv('GCP_CLUSTER_LOGGING_VARIANT'),
},
initialNodeCount: 1,
autoscaling: {
Expand Down