Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,11 @@ def validate_telemetry_config(
# =========================================================================
# Vector-LDMS bridge can only be enabled when LDMS source is enabled
vector_ldms_enabled = vector_ldms.get("metrics_enabled", False)
vector_ome_metrics_enabled = vector_ome.get("metrics_enabled", False)
vector_ome_logs_enabled = vector_ome.get("logs_enabled", False)
ldms_source_enabled = ldms_source.get("metrics_enabled", False)

# Validation 1: Vector-LDMS requires LDMS source to be enabled
if vector_ldms_enabled and not ldms_source_enabled:
errors.append(create_error_msg(
"telemetry_bridges.vector_ldms.metrics_enabled",
Expand All @@ -579,6 +582,44 @@ def validate_telemetry_config(
f"ldms_source.metrics_enabled={ldms_source_enabled}"
)

# Validation 2: If LDMS source is enabled, Vector-LDMS bridge must also be enabled
# (LDMS only supports Kafka collection, requires Vector bridge to reach VictoriaMetrics)
if ldms_source_enabled and not vector_ldms_enabled:
errors.append(create_error_msg(
"telemetry_sources.ldms.metrics_enabled",
"true",
"LDMS source is enabled but Vector-LDMS bridge is disabled. "
"LDMS metrics can only reach VictoriaMetrics via the Vector-LDMS bridge. "
"If you want to check LDMS Metrics on VicotriaMetircs then:"
"Set telemetry_bridges.vector_ldms.metrics_enabled to true in telemetry_config.yml"
))
logger.error(
"LDMS source enabled without Vector-LDMS bridge: "
f"ldms_source.metrics_enabled={ldms_source_enabled}, "
f"vector_ldms.metrics_enabled={vector_ldms_enabled}"
)

# # Validation 3: Verify Kafka collection target for LDMS
# ldms_collection_targets = ldms_source.get("collection_targets", [])
# if ldms_source_enabled and 'kafka' not in ldms_collection_targets:
# errors.append(create_error_msg(
# "telemetry_sources.ldms.collection_targets",
# str(ldms_collection_targets),
# "LDMS source requires 'kafka' in collection_targets. "
# "LDMS only supports Kafka-based collection."
# ))
# logger.error(
# f"LDMS collection_targets missing 'kafka': {ldms_collection_targets}"
# )

# Validation 3: Log Vector-OME bridge status
if vector_ome_metrics_enabled or vector_ome_logs_enabled:
logger.info(
"Vector-OME bridge validation: "
f"metrics_enabled={vector_ome_metrics_enabled}, "
f"logs_enabled={vector_ome_logs_enabled}"
)

# =========================================================================
# Validate PowerScale telemetry configuration
# =========================================================================
Expand Down
2 changes: 1 addition & 1 deletion input/config/x86_64/rhel/10.0/service_k8s.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
{ "package": "docker.io/victoriametrics/operator", "tag": "v0.68.3", "type": "image" },
{ "package": "docker.io/victoriametrics/operator", "tag": "config-reloader-v0.68.3", "type": "image" },
{ "package": "victoria-metrics-operator-0.59.3", "type": "tarball", "url": "https://github.com/VictoriaMetrics/helm-charts/releases/download/victoria-metrics-operator-0.59.3/victoria-metrics-operator-0.59.3.tgz" },
{ "package": "docker.io/timberio/vector", "tag": "0.54.0-alpine", "type": "image" },
{ "package": "docker.io/timberio/vector", "tag": "0.54.0-debian", "type": "image" },
{ "package": "apptainer", "type": "rpm", "repo_name": "epel" },
{ "package": "doca-ofed", "type": "rpm_repo", "repo_name": "doca" }
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,14 +332,6 @@
ipAddressPools:
- first-pool

{% if idrac_telemetry_support or ldms_support %}
- path: /root/telemetry.sh
owner: root:root
permissions: '0755'
content: |
{{ lookup('template', 'templates/telemetry/telemetry.sh.j2') | indent(12) }}
{% endif %}

runcmd:
- /usr/local/bin/set-ssh.sh
- "systemctl enable chronyd"
Expand Down Expand Up @@ -1048,7 +1040,7 @@

{% if idrac_telemetry_support or ldms_support %}
echo "Applying Telemetry Kubernetes deployments"
/root/telemetry.sh
{{ k8s_client_mount_path }}/telemetry/telemetry.sh
{% endif %}

{% if powerscale_log_enabled | default(false) | bool %}
Expand Down
52 changes: 52 additions & 0 deletions provision/roles/telemetry/tasks/deploy_vector_ldms.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Deploy Vector-LDMS Pipeline
# Purpose: Kafka-to-VictoriaMetrics ingestion pipeline for LDMS metrics
# Spec Reference: Vector HLD Engineering Spec (ESPEC-VECTOR-2026-001) §4.1.3.6
# Deployment Flow: U1 (Fresh deployment) and U7 (Configuration change)
# Note: Pre-flight validation removed - Kafka cluster, topics, and secrets are deployed
# via kustomization.yaml AFTER this task generates the Vector manifests.
# Validation happens during Vector pod startup when it connects to Kafka.

# ============================================================================
# Render Vector-LDMS ConfigMap (TOML Configuration)
# ============================================================================
- name: Render Vector-LDMS ConfigMap
ansible.builtin.template:
src: "telemetry/vector/vector-ldms-configmap.yaml.j2"
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vector-ldms-configmap.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
register: vector_ldms_configmap_rendered

# ============================================================================
# Render Vector-LDMS Deployment
# ============================================================================
- name: Render Vector-LDMS Deployment
ansible.builtin.template:
src: "telemetry/vector/vector-ldms-deployment.yaml.j2"
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vector-ldms-deployment.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
register: vector_ldms_deployment_rendered

# ============================================================================
# Render Vector-LDMS Service
# ============================================================================
- name: Render Vector-LDMS Service
ansible.builtin.template:
src: "telemetry/vector/vector-ldms-service.yaml.j2"
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vector-ldms-service.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
register: vector_ldms_service_rendered
39 changes: 39 additions & 0 deletions provision/roles/telemetry/tasks/derive_sink_support_flags.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,20 @@
victoria_metrics_support: false
victoria_logs_support: false
kafka_support: false
cacheable: true

- name: Set ldms_support based on telemetry_config.yml
ansible.builtin.set_fact:
ldms_support: "{{ telemetry_config.telemetry_sources.ldms.metrics_enabled | default(false) | bool }}"
cacheable: true

- name: Map telemetry_sources to legacy feature flags
ansible.builtin.set_fact:
idrac_telemetry_support: "{{ telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool }}"
powerscale_metrics_enabled: "{{ telemetry_config.telemetry_sources.powerscale.metrics_enabled | default(false) | bool }}"
powerscale_log_enabled: "{{ telemetry_config.telemetry_sources.powerscale.logs_enabled | default(false) | bool }}"
dcgm_support: "{{ telemetry_config.telemetry_sources.dcgm.metrics_enabled | default(true) | bool }}"
cacheable: true

- name: Map powerscale source + configurations to legacy powerscale_configurations
ansible.builtin.set_fact:
Expand All @@ -62,24 +65,60 @@
- name: Check if any source targets victoria_metrics
ansible.builtin.set_fact:
victoria_metrics_support: true
cacheable: true
when: >-
'victoria_metrics' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([])) or
'victoria_metrics' in (telemetry_config.telemetry_sources.powerscale.collection_targets | default([]))

- name: Check if any source targets victoria_logs
ansible.builtin.set_fact:
victoria_logs_support: true
cacheable: true
when: >-
'victoria_logs' in (telemetry_config.telemetry_sources.powerscale.collection_targets | default([])) or
'victoria_logs' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([]))

- name: Check if any source targets Kafka
ansible.builtin.set_fact:
kafka_support: true
cacheable: true
when: >-
'kafka' in (telemetry_config.telemetry_sources.idrac.collection_targets | default([])) or
'kafka' in (telemetry_config.telemetry_sources.ldms.collection_targets | default([]))

# =============================================================================
# VECTOR BRIDGE LOGIC - Determine sink requirements based on Vector bridges
# =============================================================================
# Vector-LDMS bridge: If enabled, requires Kafka + VictoriaMetrics
# Vector-OME bridge: If metrics enabled, requires Kafka + VictoriaMetrics
# If logs enabled, requires Kafka + VictoriaLogs
# =============================================================================

- name: Enable Kafka and VictoriaMetrics if Vector-LDMS bridge is enabled (requires LDMS source)
ansible.builtin.set_fact:
kafka_support: true
victoria_metrics_support: true
cacheable: true
when:
- telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled | default(false) | bool
- ldms_support | default(false) | bool

- name: Enable Kafka and Victoria Metrics if Vector-OME metrics bridge is enabled
ansible.builtin.set_fact:
kafka_support: true
victoria_metrics_support: true
cacheable: true
when:
- telemetry_config.telemetry_bridges.vector_ome.metrics_enabled | default(false) | bool

- name: Enable Kafka and VictoriaLogs if Vector-OME logs bridge is enabled
ansible.builtin.set_fact:
kafka_support: true
victoria_logs_support: true
cacheable: true
when:
- telemetry_config.telemetry_bridges.vector_ome.logs_enabled | default(false) | bool

- name: Log derived sink support flags
ansible.builtin.debug:
msg: >
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@
- ldms_support
- "kafka.topics.ldms.name in kafka_topic_partitions"


- name: Generate Kafka topic files dynamically
ansible.builtin.template:
src: 'telemetry/kafka/kafka.topic.yaml.j2'
Expand Down Expand Up @@ -146,6 +145,50 @@
when: telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool
tags: telemetry_deployment

- name: Vector vmagent configuration
when:
- telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled or telemetry_config.telemetry_bridges.vector_ome.metrics_enabled
block:
- name: Render vmagent-vector Deployment (write buffer for Vector bridges)
ansible.builtin.template:
src: 'telemetry/vector/vmagent-vector-deployment.yaml.j2'
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vmagent-vector-deployment.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
when:
- telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled | default(false) | bool
- victoria_metrics_support | default(false) | bool
tags: telemetry_deployment

- name: Render vmagent-vector Service (write buffer for Vector bridges)
ansible.builtin.template:
src: 'telemetry/vector/vmagent-vector-service.yaml.j2'
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vmagent-vector-service.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
when:
- telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled | default(false) | bool
- victoria_metrics_support | default(false) | bool
tags: telemetry_deployment

- name: Render vlagent-vector Deployment (log write buffer for Vector-OME)
ansible.builtin.template:
src: 'telemetry/vector/vlagent-vector-deployment.yaml.j2'
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vlagent-vector-deployment.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
when:
- telemetry_config.telemetry_bridges.vector_ome.logs_enabled | default(false) | bool
- victoria_logs_support | default(false) | bool
tags: telemetry_deployment

- name: Render vlagent-vector Service (log write buffer for Vector-OME)
ansible.builtin.template:
src: 'telemetry/vector/vlagent-vector-service.yaml.j2'
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector/vlagent-vector-service.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
when:
- telemetry_config.telemetry_bridges.vector_ome.logs_enabled | default(false) | bool
- victoria_logs_support | default(false) | bool
tags: telemetry_deployment

- name: Deploy telemetry cleanup script
ansible.builtin.template:
src: 'telemetry/cleanup_telemetry.sh.j2'
Expand Down
34 changes: 34 additions & 0 deletions provision/roles/telemetry/tasks/generate_telemetry_script.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Generate Telemetry Deployment Script
# Purpose: Create telemetry.sh script in shared directory for cloud-init execution
# This replaces the inline script generation in cloud-init templates
#
# The script is created at: {{ k8s_client_share_path }}/telemetry/telemetry.sh
# Cloud-init will execute this script during node provisioning to deploy telemetry stack

- name: Create telemetry deployment script
ansible.builtin.template:
src: "{{ role_path }}/templates/telemetry/telemetry.sh.j2"
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/telemetry.sh"
mode: "{{ hostvars['localhost']['file_permissions_755'] }}"
vars:
k8s_client_mount_path: "{{ hostvars['localhost']['k8s_client_share_path'] }}"

- name: Display telemetry deployment script location
ansible.builtin.debug:
msg: "Telemetry deployment script created at {{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/telemetry.sh"
verbosity: 1
30 changes: 30 additions & 0 deletions provision/roles/telemetry/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,25 @@
ldms_support | default(false) | bool
ansible.builtin.include_tasks: generate_telemetry_deployments.yml

- name: Deploy Vector-LDMS bridge (Kafka-to-VictoriaMetrics pipeline)
ansible.builtin.include_tasks: deploy_vector_ldms.yml
when:
- telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled | default(false) | bool
- victoria_metrics_support | default(false) | bool
tags:
- telemetry_deployment
- vector_ldms

# - name: Deploy Vector-OME bridge (Kafka-to-Victoria pipeline for OME data)
# ansible.builtin.include_tasks: deploy_vector_ome.yml
# when:
# - telemetry_config.telemetry_bridges.vector_ome.metrics_enabled | default(false) | bool or
# telemetry_config.telemetry_bridges.vector_ome.logs_enabled | default(false) | bool
# - kafka_support | default(false) | bool
# tags:
# - telemetry_deployment
# - vector_ome

- name: Configure of k8s telemetry service
when:
- telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool
Expand Down Expand Up @@ -96,6 +115,17 @@
- ldms_support
- pxe_changed | default(false) | bool

- name: Generate telemetry deployment script
ansible.builtin.include_tasks: generate_telemetry_script.yml
when:
- >-
(telemetry_config.telemetry_sources.idrac.metrics_enabled | default(false) | bool) or
(telemetry_config.telemetry_sources.ldms.metrics_enabled | default(false) | bool) or
(telemetry_config.telemetry_sources.powerscale.metrics_enabled | default(false) | bool) or
ldms_support | default(false) | bool
tags:
- telemetry_deployment

- name: Apply telemetry configurations on upgrade
ansible.builtin.include_tasks: apply_telemetry_on_upgrade.yml
when:
Expand Down
12 changes: 12 additions & 0 deletions provision/roles/telemetry/tasks/telemetry_prereq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,15 @@
src: telemetry/victoria/victoria-tls-secret.yaml.j2
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/victoria-tls-secret.yaml"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"

# Create Vector deployment subdirectory
Comment thread
Kratika-P marked this conversation as resolved.
- name: Create Vector deployment subdirectory
ansible.builtin.file:
path: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/vector"
state: directory
mode: "{{ hostvars['localhost']['dir_permissions_755'] }}"
when:
- telemetry_config.telemetry_bridges.vector_ldms.metrics_enabled | default(false) | bool or
telemetry_config.telemetry_bridges.vector_ome.metrics_enabled | default(false) | bool or
telemetry_config.telemetry_bridges.vector_ome.logs_enabled | default(false) | bool
tags: telemetry_deployment
Loading
Loading