From 44093428700d77cc4a873885084e8e0568523796 Mon Sep 17 00:00:00 2001 From: Yanjun Zhou Date: Fri, 8 Apr 2022 16:15:34 -0700 Subject: [PATCH] Support ClickHouse deployment with Persistent Volume Signed-off-by: Yanjun Zhou --- build/yamls/flow-visibility.yml | 45 ++++--- .../yamls/flow-visibility/base/clickhouse.yml | 25 ---- .../provisioning/datasources/create_table.sh | 6 +- .../patches/chmonitor/chMonitor.yml | 24 ++++ .../patches/e2e/imagePullPolicyClickhouse.yml | 3 - .../patches/pv/createLocalPv.yml | 28 ++++ .../patches/pv/createNfsPv.yml | 23 ++++ .../flow-visibility/patches/pv/mountPv.yml | 14 ++ .../flow-visibility/patches/ram/mountRam.yml | 12 ++ docs/network-flow-visibility.md | 123 ++++++++++++++++- hack/generate-manifest-flow-visibility.sh | 127 ++++++++++++++++-- .../flow-visibility/clickhouse-monitor/go.mod | 2 + .../flow-visibility/clickhouse-monitor/go.sum | 4 + .../clickhouse-monitor/main.go | 74 ++++++++-- 14 files changed, 433 insertions(+), 77 deletions(-) create mode 100644 build/yamls/flow-visibility/patches/chmonitor/chMonitor.yml create mode 100644 build/yamls/flow-visibility/patches/pv/createLocalPv.yml create mode 100644 build/yamls/flow-visibility/patches/pv/createNfsPv.yml create mode 100644 build/yamls/flow-visibility/patches/pv/mountPv.yml create mode 100644 build/yamls/flow-visibility/patches/ram/mountRam.yml diff --git a/build/yamls/flow-visibility.yml b/build/yamls/flow-visibility.yml index 7217afb84f7..3f8ba45984d 100644 --- a/build/yamls/flow-visibility.yml +++ b/build/yamls/flow-visibility.yml @@ -86,26 +86,27 @@ data: UInt64,\n reverseThroughputFromDestinationNode UInt64,\n trusted UInt8 DEFAULT 0\n ) engine=MergeTree\n ORDER BY (timeInserted, flowEndSeconds)\n \ TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = - 3600;\n\n CREATE MATERIALIZED VIEW flows_pod_view\n ENGINE = SummingMergeTree\n - \ ORDER BY (\n timeInserted,\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n - \ flowEndSecondsFromDestinationNode,\n sourcePodName,\n destinationPodName,\n - \ destinationIP,\n destinationServicePortName,\n flowType,\n - \ sourcePodNamespace,\n destinationPodNamespace)\n TTL timeInserted - + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = 3600\n POPULATE\n - \ AS SELECT\n timeInserted,\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n - \ flowEndSecondsFromDestinationNode,\n sourcePodName,\n destinationPodName,\n - \ destinationIP,\n destinationServicePortName,\n flowType,\n - \ sourcePodNamespace,\n destinationPodNamespace,\n sum(octetDeltaCount) - AS octetDeltaCount,\n sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n - \ sum(throughput) AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n - \ sum(throughputFromSourceNode) AS throughputFromSourceNode,\n sum(throughputFromDestinationNode) - AS throughputFromDestinationNode\n FROM flows\n GROUP BY\n timeInserted,\n + 3600;\n\n CREATE MATERIALIZED VIEW IF NOT EXISTS flows_pod_view\n ENGINE + = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n + \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n + \ sourcePodName,\n destinationPodName,\n destinationIP,\n + \ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n + \ destinationPodNamespace)\n TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS + merge_with_ttl_timeout = 3600\n POPULATE\n AS SELECT\n timeInserted,\n \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourcePodName,\n destinationPodName,\n destinationIP,\n \ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n - \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW flows_node_view\n - \ ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n + \ destinationPodNamespace,\n sum(octetDeltaCount) AS octetDeltaCount,\n + \ sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n sum(throughput) + AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n sum(throughputFromSourceNode) + AS throughputFromSourceNode,\n sum(throughputFromDestinationNode) AS throughputFromDestinationNode\n + \ FROM flows\n GROUP BY\n timeInserted,\n flowEndSeconds,\n \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n + \ sourcePodName,\n destinationPodName,\n destinationIP,\n + \ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n + \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW IF NOT EXISTS + flows_node_view\n ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n + \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n \ destinationPodNamespace)\n TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = 3600\n POPULATE\n AS SELECT\n timeInserted,\n @@ -120,9 +121,9 @@ data: AS reverseThroughputFromDestinationNode\n FROM flows\n GROUP BY\n timeInserted,\n \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n - \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW flows_policy_view\n - \ ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n - \ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n + \ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW IF NOT EXISTS + flows_policy_view\n ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n + \ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n \ egressNetworkPolicyName,\n egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n \ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace)\n \ TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = @@ -145,7 +146,7 @@ data: \ ORDER BY (timeCreated);\n \nEOSQL\n" kind: ConfigMap metadata: - name: clickhouse-mounted-configmap-dkbmg82ctg + name: clickhouse-mounted-configmap-58fkkt9b56 namespace: flow-visibility --- apiVersion: v1 @@ -4934,12 +4935,14 @@ spec: value: default.flows - name: MV_NAMES value: default.flows_pod_view default.flows_node_view default.flows_policy_view + - name: STORAGE_SIZE + value: 8Gi image: projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor:latest imagePullPolicy: IfNotPresent name: clickhouse-monitor volumes: - configMap: - name: clickhouse-mounted-configmap-dkbmg82ctg + name: clickhouse-mounted-configmap-58fkkt9b56 name: clickhouse-configmap-volume - emptyDir: medium: Memory diff --git a/build/yamls/flow-visibility/base/clickhouse.yml b/build/yamls/flow-visibility/base/clickhouse.yml index 18fe72ee747..cfe35293604 100644 --- a/build/yamls/flow-visibility/base/clickhouse.yml +++ b/build/yamls/flow-visibility/base/clickhouse.yml @@ -45,32 +45,7 @@ spec: volumeMounts: - name: clickhouse-configmap-volume mountPath: /docker-entrypoint-initdb.d - - name: clickhouse-storage-volume - mountPath: /var/lib/clickhouse - - name: clickhouse-monitor - image: flow-visibility-clickhouse-monitor - env: - - name: CLICKHOUSE_USERNAME - valueFrom: - secretKeyRef: - name: clickhouse-secret - key: username - - name: CLICKHOUSE_PASSWORD - valueFrom: - secretKeyRef: - name: clickhouse-secret - key: password - - name: DB_URL - value: "tcp://localhost:9000" - - name: TABLE_NAME - value: "default.flows" - - name: MV_NAMES - value: "default.flows_pod_view default.flows_node_view default.flows_policy_view" volumes: - name: clickhouse-configmap-volume configMap: name: $(CLICKHOUSE_CONFIG_MAP_NAME) - - name: clickhouse-storage-volume - emptyDir: - medium: Memory - sizeLimit: 8Gi diff --git a/build/yamls/flow-visibility/base/provisioning/datasources/create_table.sh b/build/yamls/flow-visibility/base/provisioning/datasources/create_table.sh index 9f135579460..423fa45d00d 100644 --- a/build/yamls/flow-visibility/base/provisioning/datasources/create_table.sh +++ b/build/yamls/flow-visibility/base/provisioning/datasources/create_table.sh @@ -72,7 +72,7 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL TTL timeInserted + INTERVAL 1 HOUR SETTINGS merge_with_ttl_timeout = 3600; - CREATE MATERIALIZED VIEW flows_pod_view + CREATE MATERIALIZED VIEW IF NOT EXISTS flows_pod_view ENGINE = SummingMergeTree ORDER BY ( timeInserted, @@ -121,7 +121,7 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL sourcePodNamespace, destinationPodNamespace; - CREATE MATERIALIZED VIEW flows_node_view + CREATE MATERIALIZED VIEW IF NOT EXISTS flows_node_view ENGINE = SummingMergeTree ORDER BY ( timeInserted, @@ -163,7 +163,7 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL sourcePodNamespace, destinationPodNamespace; - CREATE MATERIALIZED VIEW flows_policy_view + CREATE MATERIALIZED VIEW IF NOT EXISTS flows_policy_view ENGINE = SummingMergeTree ORDER BY ( timeInserted, diff --git a/build/yamls/flow-visibility/patches/chmonitor/chMonitor.yml b/build/yamls/flow-visibility/patches/chmonitor/chMonitor.yml new file mode 100644 index 00000000000..68abcc0bb8a --- /dev/null +++ b/build/yamls/flow-visibility/patches/chmonitor/chMonitor.yml @@ -0,0 +1,24 @@ +- op: add + path: /spec/templates/podTemplates/0/spec/containers/- + value: + name: clickhouse-monitor + image: flow-visibility-clickhouse-monitor + env: + - name: CLICKHOUSE_USERNAME + valueFrom: + secretKeyRef: + name: clickhouse-secret + key: username + - name: CLICKHOUSE_PASSWORD + valueFrom: + secretKeyRef: + name: clickhouse-secret + key: password + - name: DB_URL + value: "tcp://localhost:9000" + - name: TABLE_NAME + value: "default.flows" + - name: MV_NAMES + value: "default.flows_pod_view default.flows_node_view default.flows_policy_view" + - name: STORAGE_SIZE + value: STORAGE_SIZE_VALUE diff --git a/build/yamls/flow-visibility/patches/e2e/imagePullPolicyClickhouse.yml b/build/yamls/flow-visibility/patches/e2e/imagePullPolicyClickhouse.yml index 484649ec385..b49e451783f 100644 --- a/build/yamls/flow-visibility/patches/e2e/imagePullPolicyClickhouse.yml +++ b/build/yamls/flow-visibility/patches/e2e/imagePullPolicyClickhouse.yml @@ -1,6 +1,3 @@ - op: add path: /spec/templates/podTemplates/0/spec/containers/0/imagePullPolicy value: IfNotPresent -- op: add - path: /spec/templates/podTemplates/0/spec/containers/1/imagePullPolicy - value: IfNotPresent diff --git a/build/yamls/flow-visibility/patches/pv/createLocalPv.yml b/build/yamls/flow-visibility/patches/pv/createLocalPv.yml new file mode 100644 index 00000000000..bda1ac4e3e2 --- /dev/null +++ b/build/yamls/flow-visibility/patches/pv/createLocalPv.yml @@ -0,0 +1,28 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: clickhouse-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Retain +allowVolumeExpansion: True +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: clickhouse-pv +spec: + storageClassName: clickhouse-storage + capacity: + storage: STORAGE_SIZE + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + local: + path: LOCAL_PATH + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: antrea.io/clickhouse-data-node + operator: Exists diff --git a/build/yamls/flow-visibility/patches/pv/createNfsPv.yml b/build/yamls/flow-visibility/patches/pv/createNfsPv.yml new file mode 100644 index 00000000000..2cef3949aae --- /dev/null +++ b/build/yamls/flow-visibility/patches/pv/createNfsPv.yml @@ -0,0 +1,23 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: clickhouse-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Retain +allowVolumeExpansion: True +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: clickhouse-pv +spec: + storageClassName: clickhouse-storage + capacity: + storage: STORAGE_SIZE + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + nfs: + path: NFS_SERVER_PATH + server: NFS_SERVER_ADDRESS diff --git a/build/yamls/flow-visibility/patches/pv/mountPv.yml b/build/yamls/flow-visibility/patches/pv/mountPv.yml new file mode 100644 index 00000000000..c3d4d898bb4 --- /dev/null +++ b/build/yamls/flow-visibility/patches/pv/mountPv.yml @@ -0,0 +1,14 @@ +- op: add + path: /spec/defaults/templates/dataVolumeClaimTemplate + value: clickhouse-storage-template +- op: add + path: /spec/templates/volumeClaimTemplates + value: + - name: clickhouse-storage-template + spec: + storageClassName: STORAGECLASS_NAME + accessModes: + - ReadWriteOnce + resources: + requests: + storage: STORAGE_SIZE diff --git a/build/yamls/flow-visibility/patches/ram/mountRam.yml b/build/yamls/flow-visibility/patches/ram/mountRam.yml new file mode 100644 index 00000000000..541136de993 --- /dev/null +++ b/build/yamls/flow-visibility/patches/ram/mountRam.yml @@ -0,0 +1,12 @@ +- op: add + path: /spec/templates/podTemplates/0/spec/volumes/- + value: + name: clickhouse-storage-volume + emptyDir: + medium: Memory + sizeLimit: STORAGE_SIZE +- op: add + path: /spec/templates/podTemplates/0/spec/containers/0/volumeMounts/- + value: + name: clickhouse-storage-volume + mountPath: /var/lib/clickhouse diff --git a/docs/network-flow-visibility.md b/docs/network-flow-visibility.md index f8ce96954a6..d4ff6ef16c9 100644 --- a/docs/network-flow-visibility.md +++ b/docs/network-flow-visibility.md @@ -35,7 +35,10 @@ - [About Grafana and ClickHouse](#about-grafana-and-clickhouse) - [Deployment Steps](#deployment-steps-2) - [Credentials Configuration](#credentials-configuration) - - [ClickHouse Configuration](#clickhouse-configuration) + - [ClickHouse Configuration](#clickhouse-configuration) + - [Service Customization](#service-customization) + - [Performance Configuration](#performance-configuration) + - [Persistent Volumes](#persistent-volumes) - [Pre-built Dashboards](#pre-built-dashboards) - [Flow Records Dashboard](#flow-records-dashboard) - [Pod-to-Pod Flows Dashboard](#pod-to-pod-flows-dashboard) @@ -778,7 +781,9 @@ type: Opaque We recommend changing all the credentials above if you are going to run the Flow Collector in production. -##### ClickHouse Configuration +#### ClickHouse Configuration + +##### Service Customization The ClickHouse database can be accessed through the Service `clickhouse-clickhouse`. The Pod exposes HTTP port at 8123 and TCP port at 9000 by default. The ports are @@ -828,6 +833,8 @@ metadata: namespace: flow-visibility ``` +##### Performance Configuration + The ClickHouse throughput depends on two factors - the storage size of the ClickHouse and the time interval between the batch commits to the ClickHouse. Larger storage size and longer commit interval provide higher throughput. @@ -847,11 +854,121 @@ storage size, please modify the `sizeLimit` in the following section. name: clickhouse-storage-volume ``` +To deploy ClickHouse with Persistent Volumes and limited storage size, please refer +to [Persistent Volumes](#persistent-volumes). + The time interval between the batch commits to the ClickHouse is specified in the [Flow Aggregator Configuration](#configuration-1) as `commitInterval`. The ClickHouse throughput grows sightly when the commit interval grows from 1s to 8s. A commit interval larger than 8s provides little improvement on the throughput. +##### Persistent Volumes + +By default, ClickHouse is deployed in memory. From Antrea v1.7, we support deploying +ClickHouse with Persistent Volumes. + +[PersistentVolume](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) +(PV) is a piece of storage in the K8s cluster, which requires to be manually +provisioned by an administrator or dynamically provisioned using Storage Classes. +A PersistentVolumeClaim (PVC) is a request for storage which consumes PV. As +ClickHouse is deployed as a StatefulSet, the volume can be claimed using +`volumeClaimTemplate`. + +To generate the manifest automatically with default settings, you can clone the +repository and run one of the following commands: + +```yaml +# To generate a manifest with Local PV for the ClickHouse +./hack/generate-manifest-flow-visibility.sh --volume pv --local > flow-visibility.yml + +# To generate a manifest with NFS PV for the ClickHouse +./hack/generate-manifest-flow-visibility.sh --volume pv --nfs :/ > flow-visibility.yml + +# To generate a manifest with a customized StorageClass for the ClickHouse +./hack/generate-manifest-flow-visibility.sh --volume pv --storageclass > flow-visibility.yml +``` + +If you prefer not to clone the repository and prefer to create a customized +manifest manually, please follow the steps below to deploy the ClickHouse with +Persistent Volumes: + +1. Provision the PersistentVolume. K8s supports a great number of +[PersistentVolume types](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#types-of-persistent-volumes). +You can provision your own PersistentVolume per your requirements. Here are +two simple examples for your reference. + + - Local PV allows you to store the ClickHouse data at a pre-defined path on + a specific Node. Refer to [createLocalPv.yml][local_pv_yaml] to create the + PV. Please replace `LOCAL_PATH` with the path to store the ClickHouse data + and label the Node used to store the ClickHouse data with + `antrea.io/clickhouse-data-node=`. + + - NFS PV allows you to store the ClickHouse data on an existing NFS server. + Refer to [createNfsPv.yml][nfs_pv_yaml] to create the PV. Please replace + `NFS_SERVER_ADDRESS` with the host name of the NFS server and `NFS_SERVER_PATH` + with the exported path on the NFS server. + + In both examples, you can set `.spec.capacity.storage` in PersistentVolume + to your storage size. This value is for informative purpose as K8s does not + enforce the capacity of PVs. If you want to limit the storage usage, you need + to ask for your storage system to enforce that. For example, you can create + a Local PV on a partition with the limited size. We recommend using a dedicated + saving space for the ClickHouse if you are going to run the Flow Collector in + production. + + As these examples do not use any dynamic provisioner, the reclaim policy + for the PVs is `Retain` by default. After stopping the Grafana Flow Collector, + if you no long need the data for future use, you may need to manually clean + up the data on the local disk or NFS server. + +1. Request the PV for ClickHouse. Please add a `volumeClaimTemplate` section +under `.spec.templates` for the resource `ClickHouseInstallation` in +`flow-visibility.yml` as shown in the example below. `storageClassName` should +be set to your own `StorageClass` name, and `.resources.requests.storage` +should be set to your storage size. + + ```yaml + volumeClaimTemplates: + - name: clickhouse-storage-template + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 8Gi + storageClassName: clickhouse-storage + ``` + + Then add this template as `dataVolumeClaimTemplate` to the section below. + + ```yaml + defaults: + templates: + dataVolumeClaimTemplate: clickhouse-storage-template + podTemplate: pod-template + serviceTemplate: service-template + ``` + +1. Remove the in-memory related deployment options, by removing the appropriate +`volume` and `volumeMount` for the `ClickHouseInstallation` resource in +`flow-visibility.yml`. + + The `volumeMounts` entry to be removed is the following one: + + ```yaml + - mountPath: /var/lib/clickhouse + name: clickhouse-storage-volume + ``` + + The `volumes` entry to be removed is the following one: + + ```yaml + - emptyDir: + medium: Memory + sizeLimit: 8Gi + name: clickhouse-storage-volume + ``` + #### Pre-built Dashboards The following dashboards are pre-built and are recommended for Antrea flow @@ -1145,3 +1262,5 @@ With filters applied: Visualization Network Policy Dashboard"> [flow_visibility_kustomization_yaml]: ../build/yamls/flow-visibility/base/kustomization.yml +[local_pv_yaml]: ../build/yamls/flow-visibility/patches/pv/createLocalPv.yml +[nfs_pv_yaml]: ../build/yamls/flow-visibility/patches/pv/createNfsPv.yml diff --git a/hack/generate-manifest-flow-visibility.sh b/hack/generate-manifest-flow-visibility.sh index ddd312a89fa..5dd22fa60ac 100755 --- a/hack/generate-manifest-flow-visibility.sh +++ b/hack/generate-manifest-flow-visibility.sh @@ -21,17 +21,28 @@ function echoerr { } _usage="Usage: $0 [--mode (dev|release|e2e)] [--keep] [--help|-h] -Generate a YAML manifest for the Clickhouse-Grafana Flow-visibility Solution, using Kustomize, and +Generate a YAML manifest for the ClickHouse-Grafana Flow-visibility Solution, using Kustomize, and print it to stdout. - --mode (dev|release|e2e) Choose the configuration variant that you need (default is 'dev') - e2e mode generates YAML manifest for e2e test, which includes - clickhouse operator and server with default credentials, - but not Grafana-related functionality - --keep Debug flag which will preserve the generated kustomization.yml - --help, -h Print this message and exit - + --mode (dev|release|e2e) Choose the configuration variant that you need (default is 'dev') + e2e mode generates YAML manifest for e2e test, which includes + ClickHouse operator and server with default credentials, + but not Grafana-related functionality and ClickHouse monitor. + --keep Debug flag which will preserve the generated kustomization.yml + --volume (ram|pv) Choose the volume provider that you need (default is 'ram'). + --storageclass -sc Provide the StorageClass used to dynamically provision the + PersistentVolume for ClickHouse storage. + --local Create the PersistentVolume for ClickHouse with a provided + local path. + --nfs Create the PersistentVolume for ClickHouse with a provided + NFS server hostname or IP address and the path exported in the + form of hostname:path. + --size Deploy the ClickHouse with a specific storage size. Can be a + plain integer or as a fixed-point number using one of these quantity + suffixes: E, P, T, G, M, K. Or the power-of-two equivalents: + Ei, Pi, Ti, Gi, Mi, Ki. The default is 8Gi. + --help, -h Print this message and exit This tool uses kustomize (https://github.com/kubernetes-sigs/kustomize) to generate manifests for -Clickhouse-Grafana Flow-visibility Solution. You can set the KUSTOMIZE environment variable to the +ClickHouse-Grafana Flow-visibility Solution. You can set the KUSTOMIZE environment variable to the path of the kustomize binary you want us to use. Otherwise we will look for kustomize in your PATH and your GOPATH. If we cannot find kustomize there, we will try to install it." @@ -45,11 +56,15 @@ function print_help { MODE="dev" KEEP=false +VOLUME="ram" +STORAGECLASS="" +LOCALPATH="" +NFSPATH="" +SIZE="8Gi" while [[ $# -gt 0 ]] do key="$1" - case $key in --mode) MODE="$2" @@ -59,6 +74,26 @@ case $key in KEEP=true shift ;; + --volume) + VOLUME="$2" + shift 2 + ;; + -sc|--storageclass) + STORAGECLASS="$2" + shift 2 + ;; + --local) + LOCALPATH="$2" + shift 2 + ;; + --nfs) + NFSPATH="$2" + shift 2 + ;; + --size) + SIZE="$2" + shift 2 + ;; -h|--help) print_usage exit 0 @@ -71,7 +106,7 @@ esac done if [ "$MODE" != "dev" ] && [ "$MODE" != "release" ] && [ "$MODE" != "e2e" ]; then - echoerr "--mode must be one of 'dev' or 'release'" + echoerr "--mode must be one of 'dev', 'release' or 'e2e'" print_help exit 1 fi @@ -88,6 +123,33 @@ if [ "$MODE" == "release" ] && [ -z "$IMG_TAG" ]; then exit 1 fi +if [ "$VOLUME" != "ram" ] && [ "$VOLUME" != "pv" ]; then + echoerr "--volume must be one of 'ram' or 'pv'" + print_help + exit 1 +fi + +if [ "$VOLUME" == "pv" ] && [ "$LOCALPATH" == "" ] && [ "$NFSPATH" == "" ] && [ "$STORAGECLASS" == "" ]; then + echoerr "When deploying with 'pv', one of '--local', '--nfs', '--storageclass' should be set" + print_help + exit 1 +fi + +if ([ "$LOCALPATH" != "" ] && [ "$NFSPATH" != "" ]) || ([ "$LOCALPATH" != "" ] && [ "$STORAGECLASS" != "" ]) || ([ "$STORAGECLASS" != "" ] && [ "$NFSPATH" != "" ]); then + echoerr "Cannot set '--local', '--nfs' or '--storageclass' at the same time" + print_help + exit 1 +fi + +if [ "$NFSPATH" != "" ]; then + pathPair=(${NFSPATH//:/ }) + if [ ${#pathPair[@]} != 2 ]; then + echoerr "--nfs must be in the form of hostname:path" + print_help + exit 1 + fi +fi + THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" source $THIS_DIR/verify-kustomize.sh @@ -120,12 +182,14 @@ if [ "$MODE" == "e2e" ]; then cp $KUSTOMIZATION_DIR/base/kustomize-config.yml base/kustomize-config.yml cp $KUSTOMIZATION_DIR/base/provisioning/datasources/create_table.sh base/provisioning/datasources/create_table.sh cp $KUSTOMIZATION_DIR/../clickhouse-operator-install-bundle.yml clickhouse-operator-install-bundle.yml - $KUSTOMIZE edit add base base - $KUSTOMIZE edit set image flow-visibility-clickhouse-monitor=projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor:latest $KUSTOMIZE edit add patch --path imagePullPolicyClickhouse.yml --group clickhouse.altinity.com --version v1 --kind ClickHouseInstallation --name clickhouse else + # patch the clickhouse monitor with desired storage size + cp $KUSTOMIZATION_DIR/patches/chmonitor/*.yml . $KUSTOMIZE edit add base $BASE + sed -i.bak -E "s/STORAGE_SIZE_VALUE/$SIZE/" chMonitor.yml + $KUSTOMIZE edit add patch --path chMonitor.yml --group clickhouse.altinity.com --version v1 --kind ClickHouseInstallation --name clickhouse fi if [ "$MODE" == "dev" ]; then @@ -136,6 +200,43 @@ fi if [ "$MODE" == "release" ]; then $KUSTOMIZE edit set image flow-visibility-clickhouse-monitor=$IMG_NAME:$IMG_TAG fi +BASE=../$MODE +cd .. + +if [ "$VOLUME" == "ram" ]; then + mkdir ram && cd ram + cp $KUSTOMIZATION_DIR/patches/ram/*.yml . + touch kustomization.yml + $KUSTOMIZE edit add base $BASE + sed -i.bak -E "s/STORAGE_SIZE/$SIZE/" mountRam.yml + $KUSTOMIZE edit add patch --path mountRam.yml --group clickhouse.altinity.com --version v1 --kind ClickHouseInstallation --name clickhouse +fi + +if [ "$VOLUME" == "pv" ]; then + mkdir pv && cd pv + cp $KUSTOMIZATION_DIR/patches/pv/*.yml . + touch kustomization.yml + $KUSTOMIZE edit add base $BASE + + if [[ $STORAGECLASS != "" ]]; then + sed -i.bak -E "s/STORAGECLASS_NAME/$STORAGECLASS/" mountPv.yml + else + sed -i.bak -E "s/STORAGECLASS_NAME/clickhouse-storage/" mountPv.yml + fi + if [[ $LOCALPATH != "" ]]; then + sed -i.bak -E "s~LOCAL_PATH~$LOCALPATH~" createLocalPv.yml + sed -i.bak -E "s/STORAGE_SIZE/$SIZE/" createLocalPv.yml + $KUSTOMIZE edit add base createLocalPv.yml + fi + if [[ $NFSPATH != "" ]]; then + sed -i.bak -E "s~NFS_SERVER_ADDRESS~${pathPair[0]}~" createNfsPv.yml + sed -i.bak -E "s~NFS_SERVER_PATH~${pathPair[1]}~" createNfsPv.yml + sed -i.bak -E "s/STORAGE_SIZE/$SIZE/" createNfsPv.yml + $KUSTOMIZE edit add base createNfsPv.yml + fi + sed -i.bak -E "s/STORAGE_SIZE/$SIZE/" mountPv.yml + $KUSTOMIZE edit add patch --path mountPv.yml --group clickhouse.altinity.com --version v1 --kind ClickHouseInstallation --name clickhouse +fi $KUSTOMIZE build diff --git a/plugins/flow-visibility/clickhouse-monitor/go.mod b/plugins/flow-visibility/clickhouse-monitor/go.mod index 0204c59c963..f3dd5d76ad2 100644 --- a/plugins/flow-visibility/clickhouse-monitor/go.mod +++ b/plugins/flow-visibility/clickhouse-monitor/go.mod @@ -11,5 +11,7 @@ require ( require ( github.com/cloudflare/golz4 v0.0.0-20150217214814-ef862a3cdc58 // indirect github.com/go-logr/logr v1.2.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect k8s.io/utils v0.0.0-20211116205334-6203023598ed // indirect ) diff --git a/plugins/flow-visibility/clickhouse-monitor/go.sum b/plugins/flow-visibility/clickhouse-monitor/go.sum index 60371aec430..d187dd50ae0 100644 --- a/plugins/flow-visibility/clickhouse-monitor/go.sum +++ b/plugins/flow-visibility/clickhouse-monitor/go.sum @@ -35,6 +35,7 @@ github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34 github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -55,6 +56,7 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= @@ -100,6 +102,7 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -202,6 +205,7 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/plugins/flow-visibility/clickhouse-monitor/main.go b/plugins/flow-visibility/clickhouse-monitor/main.go index cc1f6c61e46..4e779c3b9ec 100644 --- a/plugins/flow-visibility/clickhouse-monitor/main.go +++ b/plugins/flow-visibility/clickhouse-monitor/main.go @@ -22,6 +22,7 @@ import ( "time" "github.com/ClickHouse/clickhouse-go" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" ) @@ -48,6 +49,8 @@ const ( ) var ( + // Storage size allocated for the ClickHouse in number of bytes + allocatedSpace uint64 // The name of the table to store the flow records tableName = os.Getenv("TABLE_NAME") // The names of the materialized views @@ -58,16 +61,26 @@ var ( func main() { // Check environment variables - if len(tableName) == 0 || len(mvNames) == 0 { - klog.ErrorS(nil, "Unable to load environment variables, TABLE_NAME and MV_NAMES must be defined") + allocatedSpaceStr := os.Getenv("STORAGE_SIZE") + + if len(tableName) == 0 || len(mvNames) == 0 || len(allocatedSpaceStr) == 0 { + klog.ErrorS(nil, "Unable to load environment variables, TABLE_NAME, MV_NAMES and STORAGE_SIZE must be defined") + return + } + var err error + quantity, err := resource.ParseQuantity(allocatedSpaceStr) + if err != nil { + klog.ErrorS(err, "Error when parsing STORAGE_SIZE") return } + allocatedSpace = uint64(quantity.Value()) connect, err := connectLoop() if err != nil { klog.ErrorS(err, "Error when connecting to ClickHouse") os.Exit(1) } + checkStorageCondition(connect) wait.Forever(func() { // The monitor stops working for several rounds after a deletion // as the release of memory space by the ClickHouse MergeTree engine requires time @@ -118,28 +131,69 @@ func connectLoop() (*sql.DB, error) { return connect, nil } -// Checks the memory usage in the ClickHouse, and deletes records when it exceeds the threshold. -func monitorMemory(connect *sql.DB) { +// Check if ClickHouse shares storage space with other software +func checkStorageCondition(connect *sql.DB) { var ( freeSpace uint64 + usedSpace uint64 totalSpace uint64 ) - // Get memory usage from ClickHouse system table + getDiskUsage(connect, &freeSpace, &totalSpace) + getClickHouseUsage(connect, &usedSpace) + availablePercentage := float64(freeSpace+usedSpace) / float64(totalSpace) + klog.InfoS("Low available percentage implies ClickHouse does not save data on a dedicated disk", "availablePercentage", availablePercentage) +} + +func getDiskUsage(connect *sql.DB, freeSpace *uint64, totalSpace *uint64) { + // Get free space from ClickHouse system table + if err := wait.PollImmediate(queryRetryInterval, queryTimeout, func() (bool, error) { + if err := connect.QueryRow("SELECT free_space, total_space FROM system.disks").Scan(freeSpace, totalSpace); err != nil { + klog.ErrorS(err, "Failed to get the disk usage") + return false, nil + } else { + return true, nil + } + }); err != nil { + klog.ErrorS(err, "Failed to get the disk usage", "timeout", queryTimeout) + return + } +} + +func getClickHouseUsage(connect *sql.DB, usedSpace *uint64) { + // Get space usage from ClickHouse system table if err := wait.PollImmediate(queryRetryInterval, queryTimeout, func() (bool, error) { - if err := connect.QueryRow("SELECT free_space, total_space FROM system.disks").Scan(&freeSpace, &totalSpace); err != nil { - klog.ErrorS(err, "Failed to get memory usage for ClickHouse") + if err := connect.QueryRow("SELECT SUM(bytes) FROM system.parts").Scan(usedSpace); err != nil { + klog.ErrorS(err, "Failed to get the used space size by the ClickHouse") return false, nil } else { return true, nil } }); err != nil { - klog.ErrorS(err, "Failed to get memory usage for ClickHouse", "timeout", queryTimeout) + klog.ErrorS(err, "Failed to get the used space size by the ClickHouse", "timeout", queryTimeout) return } +} + +// Checks the memory usage in the ClickHouse, and deletes records when it exceeds the threshold. +func monitorMemory(connect *sql.DB) { + var ( + freeSpace uint64 + usedSpace uint64 + totalSpace uint64 + ) + getDiskUsage(connect, &freeSpace, &totalSpace) + getClickHouseUsage(connect, &usedSpace) + + // Total space for ClickHouse is the smaller one of the user allocated space size and the actual space size on the disk + if (freeSpace + usedSpace) < allocatedSpace { + totalSpace = freeSpace + usedSpace + } else { + totalSpace = allocatedSpace + } // Calculate the memory usage - usagePercentage := float64(totalSpace-freeSpace) / float64(totalSpace) - klog.InfoS("Memory usage", "total", totalSpace, "used", totalSpace-freeSpace, "percentage", usagePercentage) + usagePercentage := float64(usedSpace) / float64(totalSpace) + klog.InfoS("Memory usage", "total", totalSpace, "used", usedSpace, "percentage", usagePercentage) // Delete records when memory usage is larger than threshold if usagePercentage > threshold { timeBoundary, err := getTimeBoundary(connect)