Skip to content

Commit

Permalink
Add data retention methods for in-memory Clickhouse deployment (#3244)
Browse files Browse the repository at this point in the history
This PR updates the Clickhouse in memory deployment with restricted memory storage. It adds the TTL mechanism and an independent monitor as strategies to ensure data retention.
The TTL mechanism is provided by Clickhouse MergeTree Engine which deleted expired data periodically. The monitor is designed to deal with the burst in the data insertion. It runs periodically by a Kubernetes cronjob, which deletes records when the Clickhouse server memory usage is larger than the threshold.

Signed-off-by: Yanjun Zhou <zhouya@vmware.com>
  • Loading branch information
yanjunz97 committed Mar 21, 2022
1 parent f5d2cdf commit 6c4e5a3
Show file tree
Hide file tree
Showing 16 changed files with 1,411 additions and 41 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/build.yml
Expand Up @@ -182,3 +182,34 @@ jobs:
run: |
echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
docker push antrea/flow-aggregator:latest
check-clickhouse-monitor-changes:
name: Check whether flow-visibility-clickhouse-monitor image needs to be built based on diff
runs-on: [ubuntu-latest]
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: antrea-io/has-changes@v1
id: check_diff
with:
args: build/yamls/* ci/* cmd/* docs/* hack/* multicluster/* pkg/* test/* third_party/* plugins/octant/* plugins/grafana-sankey-plugin/* *.md
outputs:
has_changes: ${{ steps.check_diff.outputs.has_changes }}

build-flow-visibility-clickhouse-monitor:
needs: check-clickhouse-monitor-changes
if: ${{ needs.check-clickhouse-monitor-changes.outputs.has_changes == 'yes' || github.event_name == 'push' }}
runs-on: [ubuntu-latest]
steps:
- uses: actions/checkout@v2
- name: Build flow-visibility-clickhouse-monitor Docker image
run: make flow-visibility-clickhouse-monitor
- name: Push flow-visibility-clickhouse-monitor Docker image to registry
if: ${{ github.repository == 'antrea-io/antrea' && github.event_name == 'push' && github.ref == 'refs/heads/main' }}
env:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
run: |
echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
docker push antrea/flow-visibility-clickhouse-monitor:latest
15 changes: 15 additions & 0 deletions .github/workflows/build_tag.yml
Expand Up @@ -118,3 +118,18 @@ jobs:
make flow-aggregator-image
echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
docker push antrea/flow-aggregator:"${VERSION}"
build-flow-visibility-clickhouse-monitor:
runs-on: [ubuntu-latest]
needs: get-version
steps:
- uses: actions/checkout@v2
- name: Build flow-visibility-clickhouse-monitor Docker image and push to registry
env:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
VERSION: ${{ needs.get-version.outputs.version }}
run: |
make flow-visibility-clickhouse-monitor
echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
docker push antrea/flow-visibility-clickhouse-monitor:"${VERSION}"
10 changes: 9 additions & 1 deletion Makefile
Expand Up @@ -354,7 +354,7 @@ manifest:
$(CURDIR)/hack/generate-manifest-octant.sh --mode dev > build/yamls/antrea-octant.yml
$(CURDIR)/hack/generate-manifest-windows.sh --mode dev > build/yamls/antrea-windows.yml
$(CURDIR)/hack/generate-manifest-flow-aggregator.sh --mode dev > build/yamls/flow-aggregator.yml
$(CURDIR)/hack/generate-manifest-flow-visibility.sh > build/yamls/flow-visibility.yml
$(CURDIR)/hack/generate-manifest-flow-visibility.sh --mode dev > build/yamls/flow-visibility.yml

.PHONY: manifest-scale
manifest-scale:
Expand Down Expand Up @@ -387,6 +387,14 @@ endif
docker tag antrea/antrea-mc-controller:$(DOCKER_IMG_VERSION) projects.registry.vmware.com/antrea/antrea-mc-controller
docker tag antrea/antrea-mc-controller:$(DOCKER_IMG_VERSION) projects.registry.vmware.com/antrea/antrea-mc-controller:$(DOCKER_IMG_VERSION)

.PHONY: flow-visibility-clickhouse-monitor
flow-visibility-clickhouse-monitor:
@echo "===> Building antrea/flow-visibility-clickhouse-monitor Docker image <==="
docker build --pull -t antrea/flow-visibility-clickhouse-monitor:$(DOCKER_IMG_VERSION) -f build/images/flow-visibility/Dockerfile.clickhouse-monitor.ubuntu $(DOCKER_BUILD_ARGS) .
docker tag antrea/flow-visibility-clickhouse-monitor:$(DOCKER_IMG_VERSION) antrea/flow-visibility-clickhouse-monitor
docker tag antrea/flow-visibility-clickhouse-monitor:$(DOCKER_IMG_VERSION) projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor
docker tag antrea/flow-visibility-clickhouse-monitor:$(DOCKER_IMG_VERSION) projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor:$(DOCKER_IMG_VERSION)

.PHONY: flow-aggregator-image
flow-aggregator-image:
@echo "===> Building antrea/flow-aggregator Docker image <==="
Expand Down
19 changes: 19 additions & 0 deletions build/images/flow-visibility/Dockerfile.clickhouse-monitor.ubuntu
@@ -0,0 +1,19 @@
ARG GO_VERSION
FROM golang:${GO_VERSION} as clickhouse-monitor-build

COPY . /antrea
WORKDIR /antrea/plugins/flow-visibility/clickhouse-monitor

# Statically links clickhouse-monitor-plugin binary.
RUN CGO_ENABLED=0 make clickhouse-monitor-plugin

FROM scratch

LABEL maintainer="Antrea <projectantrea-dev@googlegroups.com>"
LABEL description="A docker image to deploy the ClickHouse monitor plugin."

ENV USER root

COPY --from=clickhouse-monitor-build /antrea/plugins/flow-visibility/clickhouse-monitor/* /

ENTRYPOINT ["/clickhouse-monitor"]
177 changes: 141 additions & 36 deletions build/yamls/flow-visibility.yml
Expand Up @@ -14,12 +14,41 @@ volumeBindingMode: WaitForFirstConsumer
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: flow-visibility
name: clickhouse-monitor
namespace: flow-visibility
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: grafana
namespace: flow-visibility
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app: flow-visibility
name: clickhouse-monitor-role
namespace: flow-visibility
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- apiGroups:
- ""
resources:
- pods/log
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app: flow-visibility
Expand All @@ -37,6 +66,22 @@ rules:
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app: flow-visibility
name: clickhouse-monitor-role-binding
namespace: flow-visibility
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: clickhouse-monitor-role
subjects:
- kind: ServiceAccount
name: clickhouse-monitor
namespace: flow-visibility
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app: flow-visibility
Expand Down Expand Up @@ -84,53 +129,59 @@ data:
\ throughput UInt64,\n reverseThroughput UInt64,\n throughputFromSourceNode
UInt64,\n throughputFromDestinationNode UInt64,\n reverseThroughputFromSourceNode
UInt64,\n reverseThroughputFromDestinationNode UInt64,\n trusted
UInt8 DEFAULT 0\n ) engine=MergeTree\n ORDER BY (timeInserted, flowEndSeconds);\n\n
\ CREATE MATERIALIZED VIEW flows_pod_view\n ENGINE = SummingMergeTree\n ORDER
BY (\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ sourcePodName,\n destinationPodName,\n destinationIP,\n
\ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n
\ destinationPodNamespace)\n POPULATE\n AS SELECT\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ sourcePodName,\n destinationPodName,\n destinationIP,\n
\ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n
\ destinationPodNamespace,\n sum(octetDeltaCount) AS octetDeltaCount,\n
\ sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n sum(throughput)
AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n sum(throughputFromSourceNode)
AS throughputFromSourceNode,\n sum(throughputFromDestinationNode) AS throughputFromDestinationNode\n
\ FROM flows\n GROUP BY\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n
UInt8 DEFAULT 0\n ) engine=MergeTree\n ORDER BY (timeInserted, flowEndSeconds)\n
\ TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout =
3600;\n\n CREATE MATERIALIZED VIEW flows_pod_view\n ENGINE = SummingMergeTree\n
\ ORDER BY (\n timeInserted,\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n
\ flowEndSecondsFromDestinationNode,\n sourcePodName,\n destinationPodName,\n
\ destinationIP,\n destinationServicePortName,\n flowType,\n
\ sourcePodNamespace,\n destinationPodNamespace)\n TTL timeInserted
+ INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout = 3600\n POPULATE\n
\ AS SELECT\n timeInserted,\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n
\ flowEndSecondsFromDestinationNode,\n sourcePodName,\n destinationPodName,\n
\ destinationIP,\n destinationServicePortName,\n flowType,\n
\ sourcePodNamespace,\n destinationPodNamespace;\n\n CREATE MATERIALIZED
VIEW flows_node_view\n ENGINE = SummingMergeTree\n ORDER BY (\n flowEndSeconds,\n
\ sourcePodNamespace,\n destinationPodNamespace,\n sum(octetDeltaCount)
AS octetDeltaCount,\n sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n
\ sum(throughput) AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n
\ sum(throughputFromSourceNode) AS throughputFromSourceNode,\n sum(throughputFromDestinationNode)
AS throughputFromDestinationNode\n FROM flows\n GROUP BY\n timeInserted,\n
\ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ sourcePodName,\n destinationPodName,\n destinationIP,\n
\ destinationServicePortName,\n flowType,\n sourcePodNamespace,\n
\ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW flows_node_view\n
\ ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n
\ destinationPodNamespace)\n POPULATE\n AS SELECT\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ destinationPodNamespace)\n TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS
merge_with_ttl_timeout = 3600\n POPULATE\n AS SELECT\n timeInserted,\n
\ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n
\ destinationPodNamespace,\n sum(octetDeltaCount) AS octetDeltaCount,\n
\ sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n sum(throughput)
AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n sum(throughputFromSourceNode)
AS throughputFromSourceNode,\n sum(reverseThroughputFromSourceNode) AS
reverseThroughputFromSourceNode,\n sum(throughputFromDestinationNode) AS
throughputFromDestinationNode,\n sum(reverseThroughputFromDestinationNode)
AS reverseThroughputFromDestinationNode\n FROM flows\n GROUP BY\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
AS reverseThroughputFromDestinationNode\n FROM flows\n GROUP BY\n timeInserted,\n
\ flowEndSeconds,\n flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ sourceNodeName,\n destinationNodeName,\n sourcePodNamespace,\n
\ destinationPodNamespace;\n\n CREATE MATERIALIZED VIEW flows_policy_view\n
\ ENGINE = SummingMergeTree\n ORDER BY (\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n
\ flowEndSecondsFromDestinationNode,\n egressNetworkPolicyName,\n
\ egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n ingressNetworkPolicyRuleAction,\n
\ sourcePodNamespace,\n destinationPodNamespace)\n POPULATE\n
\ AS SELECT\n flowEndSeconds,\n flowEndSecondsFromSourceNode,\n
\ flowEndSecondsFromDestinationNode,\n egressNetworkPolicyName,\n
\ egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n ingressNetworkPolicyRuleAction,\n
\ sourcePodNamespace,\n destinationPodNamespace,\n sum(octetDeltaCount)
AS octetDeltaCount,\n sum(reverseOctetDeltaCount) AS reverseOctetDeltaCount,\n
\ sum(throughput) AS throughput,\n sum(reverseThroughput) AS reverseThroughput,\n
\ sum(throughputFromSourceNode) AS throughputFromSourceNode,\n sum(reverseThroughputFromSourceNode)
AS reverseThroughputFromSourceNode,\n sum(throughputFromDestinationNode)
AS throughputFromDestinationNode,\n sum(reverseThroughputFromDestinationNode)
AS reverseThroughputFromDestinationNode\n FROM flows\n GROUP BY\n flowEndSeconds,\n
\ ENGINE = SummingMergeTree\n ORDER BY (\n timeInserted,\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ egressNetworkPolicyName,\n egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n
\ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace)\n
\ TTL timeInserted + INTERVAL 1 HOUR\n SETTINGS merge_with_ttl_timeout =
3600\n POPULATE\n AS SELECT\n timeInserted,\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ egressNetworkPolicyName,\n egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n
\ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace,\n
\ sum(octetDeltaCount) AS octetDeltaCount,\n sum(reverseOctetDeltaCount)
AS reverseOctetDeltaCount,\n sum(throughput) AS throughput,\n sum(reverseThroughput)
AS reverseThroughput,\n sum(throughputFromSourceNode) AS throughputFromSourceNode,\n
\ sum(reverseThroughputFromSourceNode) AS reverseThroughputFromSourceNode,\n
\ sum(throughputFromDestinationNode) AS throughputFromDestinationNode,\n
\ sum(reverseThroughputFromDestinationNode) AS reverseThroughputFromDestinationNode\n
\ FROM flows\n GROUP BY\n timeInserted,\n flowEndSeconds,\n
\ flowEndSecondsFromSourceNode,\n flowEndSecondsFromDestinationNode,\n
\ egressNetworkPolicyName,\n egressNetworkPolicyRuleAction,\n ingressNetworkPolicyName,\n
\ ingressNetworkPolicyRuleAction,\n sourcePodNamespace,\n destinationPodNamespace;\n\n
Expand All @@ -139,7 +190,7 @@ data:
\ ORDER BY (timeCreated);\n \nEOSQL\n"
kind: ConfigMap
metadata:
name: clickhouse-mounted-configmap-44mdg7d2b2
name: clickhouse-mounted-configmap-dkbmg82ctg
namespace: flow-visibility
---
apiVersion: v1
Expand Down Expand Up @@ -4878,6 +4929,54 @@ spec:
name: grafana-dashboard-config-gkkgc9d727
name: grafana-dashboard-config
---
apiVersion: batch/v1
kind: CronJob
metadata:
labels:
app: clickhouse-monitor
name: clickhouse-monitor
namespace: flow-visibility
spec:
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
metadata:
labels:
app: clickhouse-monitor
spec:
containers:
- env:
- name: CLICKHOUSE_USERNAME
valueFrom:
secretKeyRef:
key: username
name: clickhouse-secret
- name: CLICKHOUSE_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: clickhouse-secret
- name: DB_URL
value: tcp://clickhouse-clickhouse.flow-visibility.svc:9000
- name: TABLE_NAME
value: default.flows
- name: MV_NAMES
value: default.flows_pod_view default.flows_node_view default.flows_policy_view
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MONITOR_LABEL
value: app=clickhouse-monitor
image: projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor:latest
imagePullPolicy: IfNotPresent
name: clickhouse-monitor
restartPolicy: OnFailure
serviceAccountName: clickhouse-monitor
schedule: '* * * * *'
successfulJobsHistoryLimit: 1
---
apiVersion: clickhouse.altinity.com/v1
kind: ClickHouseInstallation
metadata:
Expand Down Expand Up @@ -4908,7 +5007,13 @@ spec:
volumeMounts:
- mountPath: /docker-entrypoint-initdb.d
name: clickhouse-configmap-volume
- mountPath: /var/lib/clickhouse
name: clickhouse-storage-volume
volumes:
- configMap:
name: clickhouse-mounted-configmap-44mdg7d2b2
name: clickhouse-mounted-configmap-dkbmg82ctg
name: clickhouse-configmap-volume
- emptyDir:
medium: Memory
sizeLimit: 8Gi
name: clickhouse-storage-volume

0 comments on commit 6c4e5a3

Please sign in to comment.