From ae2011aab1c3cf3ceb63195e23e59801e14f7a45 Mon Sep 17 00:00:00 2001 From: Valentin Volkl Date: Thu, 14 Aug 2025 16:08:59 +0200 Subject: [PATCH 1/3] new metric names --- README.md | 97 ++-- cvmfs-client-prometheus.sh | 203 +++++++- grafana-dashboard.json | 950 +++++++++++++++++++++++++++++++++++++ 3 files changed, 1190 insertions(+), 60 deletions(-) create mode 100644 grafana-dashboard.json diff --git a/README.md b/README.md index 17765dc..f221f3e 100644 --- a/README.md +++ b/README.md @@ -88,45 +88,68 @@ curl http://localhost:9868 ## Metrics Summary -The exporter collects the following categories of metrics: - -### Cache Metrics -- `cvmfs_cached_bytes` - Currently cached data size -- `cvmfs_pinned_bytes` - Pinned cache data size -- `cvmfs_total_cache_size_bytes` - Configured cache limit -- `cvmfs_physical_cache_size_bytes` - Physical cache volume size -- `cvmfs_physical_cache_avail_bytes` - Available cache space -- `cvmfs_hitrate` - Cache hit rate percentage -- `cvmfs_ncleanup24` - Cache cleanups in last 24 hours - -### Network & Download Metrics -- `cvmfs_rx_total` - Total bytes downloaded since mount -- `cvmfs_ndownload_total` - Total files downloaded since mount -- `cvmfs_speed` - Average download speed -- `cvmfs_proxy` - Available proxy servers -- `cvmfs_active_proxy` - Currently active proxy -- `cvmfs_timeout` - Proxy connection timeout -- `cvmfs_timeout_direct` - Direct connection timeout - -### Repository Status Metrics +The exporter collects the following categories of metrics with consistent naming: + +### Cache Metrics (`cvmfs_cache_*`) +- `cvmfs_cache_cached_bytes` - Currently cached data size +- `cvmfs_cache_pinned_bytes` - Pinned cache data size +- `cvmfs_cache_total_size_bytes` - Configured cache limit +- `cvmfs_cache_physical_size_bytes` - Physical cache volume size +- `cvmfs_cache_physical_avail_bytes` - Available cache space +- `cvmfs_cache_hitrate` - Cache hit rate percentage +- `cvmfs_cache_ncleanup24` - Cache cleanups in last 24 hours +- `cvmfs_cache_mode` - Cache mode (0=unknown, 1=read-write, 2=read-only) + +### Network & Download Metrics (`cvmfs_net_*`) +- `cvmfs_net_rx_total` - Total bytes downloaded since mount +- `cvmfs_net_ndownload_total` - Total files downloaded since mount +- `cvmfs_net_speed` - Average download speed +- `cvmfs_net_proxy` - Available proxy servers +- `cvmfs_net_active_proxy` - Currently active proxy +- `cvmfs_net_timeout` - Proxy connection timeout +- `cvmfs_net_timeout_direct` - Direct connection timeout + +### Repository Status Metrics (`cvmfs_repo_*`) - `cvmfs_repo` - Repository version and revision information -- `cvmfs_uptime_seconds` - Time since repository mount -- `cvmfs_mount_epoch_timestamp` - Repository mount timestamp +- `cvmfs_repo_uptime_seconds` - Time since repository mount +- `cvmfs_repo_mount_epoch_timestamp` - Repository mount timestamp - `cvmfs_repo_expires_seconds` - Root catalog expiration time - -### System Resource Metrics -- `cvmfs_cpu_user_total` - CPU time in userspace -- `cvmfs_cpu_system_total` - CPU time in kernel space -- `cvmfs_maxfd` - Maximum file descriptors available -- `cvmfs_usedfd` - Currently used file descriptors -- `cvmfs_ndiropen` - Number of open directories -- `cvmfs_pid` - CVMFS process ID - -### Error & Monitoring Metrics -- `cvmfs_nioerr_total` - Total I/O errors encountered -- `cvmfs_timestamp_last_ioerr` - Timestamp of last I/O error -- `cvmfs_nclg` - Number of loaded nested catalogs -- `cvmfs_inode_max` - Highest possible inode number +- `cvmfs_repo_version` - Numeric repository version for easier querying +- `cvmfs_repo_revision` - Repository revision number +- `cvmfs_repo_nclg` - Number of loaded nested catalogs + +### System Resource Metrics (`cvmfs_sys_*`) +- `cvmfs_sys_cpu_user_total` - CPU time in userspace +- `cvmfs_sys_cpu_system_total` - CPU time in kernel space +- `cvmfs_sys_maxfd` - Maximum file descriptors available +- `cvmfs_sys_usedfd` - Currently used file descriptors +- `cvmfs_sys_useddirp` - File descriptors issued to clients +- `cvmfs_sys_ndiropen` - Number of open directories +- `cvmfs_sys_pid` - CVMFS process ID +- `cvmfs_sys_inode_max` - Highest possible inode number +- `cvmfs_sys_memory_usage_bytes` - CVMFS process memory usage +- `cvmfs_sys_nioerr_total` - Total I/O errors encountered +- `cvmfs_sys_timestamp_last_ioerr` - Timestamp of last I/O error +- `cvmfs_sys_drainout_mode` - Drainout mode status +- `cvmfs_sys_maintenance_mode` - Maintenance mode status +- `cvmfs_sys_nfs_mode` - NFS mode enabled status + +### Internal Affairs Metrics (`cvmfs_internal_*`) +- `cvmfs_internal_pathstring_*` - PathString statistics +- `cvmfs_internal_namestring_*` - NameString statistics +- `cvmfs_internal_linkstring_*` - LinkString statistics +- `cvmfs_internal_inode_tracker_*` - Inode tracker statistics +- `cvmfs_internal_dentry_tracker_*` - Dentry tracker statistics +- `cvmfs_internal_page_cache_tracker_*` - Page cache tracker statistics +- `cvmfs_internal_sqlite_*` - SQLite internal statistics + +## Version Compatibility + +The script automatically detects the CVMFS version and adapts accordingly: + +- **CVMFS 2.13.2+**: Uses the native `cvmfs_talk metrics prometheus` command with consistent metric naming +- **CVMFS 2.13.2 (exact)**: Applies postprocessing to rename metrics for consistency +- **Older versions**: Uses legacy extended attribute collection with consistent naming ## Configuration diff --git a/cvmfs-client-prometheus.sh b/cvmfs-client-prometheus.sh index 2429167..5744cfd 100755 --- a/cvmfs-client-prometheus.sh +++ b/cvmfs-client-prometheus.sh @@ -26,6 +26,20 @@ declare -A CVMFS_EXTENDED_ATTRIBUTE_GAUGES=( ['usedfd']='Shows the number of open directories currently used by file system clients.' ) +# Mapping of extended attributes to new metric names +declare -A CVMFS_EXTENDED_ATTRIBUTE_NAMES=( + ['hitrate']='cvmfs_cache_hitrate' + ['inode_max']='cvmfs_sys_inode_max' + ['maxfd']='cvmfs_sys_maxfd' + ['ncleanup24']='cvmfs_cache_ncleanup24' + ['nclg']='cvmfs_repo_nclg' + ['ndiropen']='cvmfs_sys_ndiropen' + ['pid']='cvmfs_sys_pid' + ['speed']='cvmfs_net_speed' + ['useddirp']='cvmfs_sys_useddirp' + ['usedfd']='cvmfs_sys_usedfd' +) + ############################################################# usage() { echo "Usage: $0 [-h|--help] [--http] [--non-standard-mountpoints]" >&2 @@ -119,7 +133,8 @@ get_cvmfs_repo_extended_attribute_gauge_metrics() { for attribute in "${!CVMFS_EXTENDED_ATTRIBUTE_GAUGES[@]}"; do local result result=$(attr -g "${attribute}" "${repomountpoint}" | tail -n +2) - generate_metric "cvmfs_${attribute}" 'gauge' "${CVMFS_EXTENDED_ATTRIBUTE_GAUGES[${attribute}]}" "repo=\"${fqrn}\"" "${result}" + local metric_name="${CVMFS_EXTENDED_ATTRIBUTE_NAMES[${attribute}]}" + generate_metric "${metric_name}" 'gauge' "${CVMFS_EXTENDED_ATTRIBUTE_GAUGES[${attribute}]}" "repo=\"${fqrn}\"" "${result}" done } @@ -151,7 +166,7 @@ get_cvmfs_repo_proxy_metrics() { break fi done - generate_metric "cvmfs_proxy" "gauge" "Shows all registered proxies for this repository." "repo=\"${fqrn}\",group=\"${my_proxy_group}\",url=\"${proxy}\"" 1 + generate_metric "cvmfs_net_proxy" "gauge" "Shows all registered proxies for this repository." "repo=\"${fqrn}\",group=\"${my_proxy_group}\",url=\"${proxy}\"" 1 done } @@ -173,25 +188,25 @@ get_cvmfs_repo_metrics() { local cached_bytes cached_bytes=$(cvmfs_talk -i "${reponame}" cache size | tr -d ')(' | tr -s '[:space:]' | cut -d ' ' -f 6) - generate_metric 'cvmfs_cached_bytes' 'gauge' 'CVMFS currently cached bytes.' "repo=\"${fqrn}\"" "${cached_bytes}" + generate_metric 'cvmfs_cache_cached_bytes' 'gauge' 'CVMFS currently cached bytes.' "repo=\"${fqrn}\"" "${cached_bytes}" local pinned_bytes pinned_bytes=$(cvmfs_talk -i "${reponame}" cache size | tr -d ')(' | tr -s '[:space:]' | cut -d ' ' -f 10) - generate_metric 'cvmfs_pinned_bytes' 'gauge' 'CVMFS currently pinned bytes.' "repo=\"${fqrn}\"" "${pinned_bytes}" + generate_metric 'cvmfs_cache_pinned_bytes' 'gauge' 'CVMFS currently pinned bytes.' "repo=\"${fqrn}\"" "${pinned_bytes}" local total_cache_size_mb total_cache_size_mb=$(cvmfs_talk -i "${reponame}" parameters | grep CVMFS_QUOTA_LIMIT | tr '=' ' ' | tr -s '[:space:]' | cut -d ' ' -f 2) local total_cache_size total_cache_size=$((total_cache_size_mb * 1024 * 1024)) - generate_metric 'cvmfs_total_cache_size_bytes' 'gauge' 'CVMFS configured cache size via CVMFS_QUOTA_LIMIT.' "repo=\"${fqrn}\"" "${total_cache_size}" + generate_metric 'cvmfs_cache_total_size_bytes' 'gauge' 'CVMFS configured cache size via CVMFS_QUOTA_LIMIT.' "repo=\"${fqrn}\"" "${total_cache_size}" local cache_volume_max cache_volume_max=$(df -B1 "${cache_volume}" | tail -n 1 | tr -s '[:space:]' | cut -d ' ' -f 2) - generate_metric 'cvmfs_physical_cache_size_bytes' 'gauge' 'CVMFS cache volume physical size.' "repo=\"${fqrn}\"" "${cache_volume_max}" + generate_metric 'cvmfs_cache_physical_size_bytes' 'gauge' 'CVMFS cache volume physical size.' "repo=\"${fqrn}\"" "${cache_volume_max}" local cache_volume_free cache_volume_free=$(df -B1 "${cache_volume}" | tail -n 1 | tr -s '[:space:]' | cut -d ' ' -f 4) - generate_metric 'cvmfs_physical_cache_avail_bytes' 'gauge' 'CVMFS cache volume physical free space available.' "repo=\"${fqrn}\"" "${cache_volume_free}" + generate_metric 'cvmfs_cache_physical_avail_bytes' 'gauge' 'CVMFS cache volume physical free space available.' "repo=\"${fqrn}\"" "${cache_volume_free}" local cvmfs_mount_version cvmfs_mount_version=$(attr -g version "${repomountpoint}" | tail -n +2) @@ -210,7 +225,7 @@ get_cvmfs_repo_metrics() { cvmfs_mount_rx_kb=$(attr -g rx "${repomountpoint}" | tail -n +2) local cvmfs_mount_rx cvmfs_mount_rx=$((cvmfs_mount_rx_kb * 1024)) - generate_metric 'cvmfs_rx_total' 'counter' 'Shows the overall amount of downloaded bytes since mounting.' "repo=\"${fqrn}\"" "${cvmfs_mount_rx}" + generate_metric 'cvmfs_net_rx_total' 'counter' 'Shows the overall amount of downloaded bytes since mounting.' "repo=\"${fqrn}\"" "${cvmfs_mount_rx}" local cvmfs_mount_uptime_minutes cvmfs_mount_uptime_minutes=$(attr -g uptime "${repomountpoint}" | tail -n +2) @@ -222,8 +237,8 @@ get_cvmfs_repo_metrics() { rounded_now_to_minute=$((now - (now % 60))) cvmfs_mount_uptime=$((cvmfs_mount_uptime_minutes * 60)) cvmfs_mount_epoch_time=$((rounded_now_to_minute - cvmfs_mount_uptime)) - generate_metric 'cvmfs_uptime_seconds' 'counter' 'Shows the time since the repo was mounted.' "repo=\"${fqrn}\"" "${cvmfs_mount_uptime}" - generate_metric 'cvmfs_mount_epoch_timestamp' 'counter' 'Shows the epoch time the repo was mounted.' "repo=\"${fqrn}\"" "${cvmfs_mount_epoch_time}" + generate_metric 'cvmfs_repo_uptime_seconds' 'counter' 'Shows the time since the repo was mounted.' "repo=\"${fqrn}\"" "${cvmfs_mount_uptime}" + generate_metric 'cvmfs_repo_mount_epoch_timestamp' 'counter' 'Shows the epoch time the repo was mounted.' "repo=\"${fqrn}\"" "${cvmfs_mount_epoch_time}" local cvmfs_repo_expires_min cvmfs_repo_expires_min=$(attr -g expires "${repomountpoint}" | tail -n +2) @@ -237,23 +252,23 @@ get_cvmfs_repo_metrics() { local cvmfs_mount_ndownload cvmfs_mount_ndownload=$(attr -g ndownload "${repomountpoint}" | tail -n +2) - generate_metric 'cvmfs_ndownload_total' 'counter' 'Shows the overall number of downloaded files since mounting.' "repo=\"${fqrn}\"" "${cvmfs_mount_ndownload}" + generate_metric 'cvmfs_net_ndownload_total' 'counter' 'Shows the overall number of downloaded files since mounting.' "repo=\"${fqrn}\"" "${cvmfs_mount_ndownload}" local cvmfs_mount_nioerr cvmfs_mount_nioerr=$(attr -g nioerr "${repomountpoint}" | tail -n +2) - generate_metric 'cvmfs_nioerr_total' 'counter' 'Shows the total number of I/O errors encountered since mounting.' "repo=\"${fqrn}\"" "${cvmfs_mount_nioerr}" + generate_metric 'cvmfs_sys_nioerr_total' 'counter' 'Shows the total number of I/O errors encountered since mounting.' "repo=\"${fqrn}\"" "${cvmfs_mount_nioerr}" local cvmfs_mount_timeout cvmfs_mount_timeout=$(attr -g timeout "${repomountpoint}" | tail -n +2) - generate_metric 'cvmfs_timeout' 'gauge' 'Shows the timeout for proxied connections in seconds.' "repo=\"${fqrn}\"" "${cvmfs_mount_timeout}" + generate_metric 'cvmfs_net_timeout' 'gauge' 'Shows the timeout for proxied connections in seconds.' "repo=\"${fqrn}\"" "${cvmfs_mount_timeout}" local cvmfs_mount_timeout_direct cvmfs_mount_timeout_direct=$(attr -g timeout_direct "${repomountpoint}" | tail -n +2) - generate_metric 'cvmfs_timeout_direct' 'gauge' 'Shows the timeout for direct connections in seconds.' "repo=\"${fqrn}\"" "${cvmfs_mount_timeout_direct}" + generate_metric 'cvmfs_net_timeout_direct' 'gauge' 'Shows the timeout for direct connections in seconds.' "repo=\"${fqrn}\"" "${cvmfs_mount_timeout_direct}" local cvmfs_mount_timestamp_last_ioerr cvmfs_mount_timestamp_last_ioerr=$(attr -g timestamp_last_ioerr "${repomountpoint}" | tail -n +2) - generate_metric 'cvmfs_timestamp_last_ioerr' 'counter' 'Shows the timestamp of the last ioerror.' "repo=\"${fqrn}\"" "${cvmfs_mount_timestamp_last_ioerr}" + generate_metric 'cvmfs_sys_timestamp_last_ioerr' 'counter' 'Shows the timestamp of the last ioerror.' "repo=\"${fqrn}\"" "${cvmfs_mount_timestamp_last_ioerr}" local cvmfs_repo_pid_statline cvmfs_repo_pid_statline=$(/dev/null | head -n 1) + if [ $? -ne 0 ]; then + # cvmfs2 command not found + return 1 + fi + + local version + version=$(echo "$version_output" | grep -o '[0-9]\+\.[0-9]\+\.[0-9]\+' | head -n 1) + if [ -z "$version" ]; then + # Could not parse version + return 1 + fi + + [ "$version" = "2.13.2" ] +} + +postprocess_metrics_for_2132() { + # Postprocess metrics for CVMFS version 2.13.2 to rename them for consistency + # This function only runs if cvmfs2 --version equals 2.13.2 + + local tmpfile_new + tmpfile_new=$(mktemp) + + # Check if TMPFILE exists and is readable + if [[ ! -f "${TMPFILE}" ]]; then + return 0 + fi + + # Process the TMPFILE line by line to rename metrics + while IFS= read -r line; do + # Skip empty lines and comments + if [[ -z "$line" || "$line" =~ ^# ]]; then + echo "$line" >> "$tmpfile_new" + continue + fi + + # Cache metrics - rename to cvmfs_cache_* + if [[ "$line" =~ ^cvmfs_cached_bytes ]]; then + echo "${line/cvmfs_cached_bytes/cvmfs_cache_cached_bytes}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_pinned_bytes ]]; then + echo "${line/cvmfs_pinned_bytes/cvmfs_cache_pinned_bytes}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_total_cache_size_bytes ]]; then + echo "${line/cvmfs_total_cache_size_bytes/cvmfs_cache_total_size_bytes}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_physical_cache_size_bytes ]]; then + echo "${line/cvmfs_physical_cache_size_bytes/cvmfs_cache_physical_size_bytes}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_physical_cache_avail_bytes ]]; then + echo "${line/cvmfs_physical_cache_avail_bytes/cvmfs_cache_physical_avail_bytes}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_hitrate ]]; then + echo "${line/cvmfs_hitrate/cvmfs_cache_hitrate}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_ncleanup24 ]]; then + echo "${line/cvmfs_ncleanup24/cvmfs_cache_ncleanup24}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_cache_mode ]]; then + echo "${line/cvmfs_cache_mode/cvmfs_cache_mode}" >> "$tmpfile_new" + + # Network metrics - rename to cvmfs_net_* + elif [[ "$line" =~ ^cvmfs_rx_total ]]; then + echo "${line/cvmfs_rx_total/cvmfs_net_rx_total}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_ndownload_total ]]; then + echo "${line/cvmfs_ndownload_total/cvmfs_net_ndownload_total}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_speed ]]; then + echo "${line/cvmfs_speed/cvmfs_net_speed}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_proxy ]]; then + echo "${line/cvmfs_proxy/cvmfs_net_proxy}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_active_proxy ]]; then + echo "${line/cvmfs_active_proxy/cvmfs_net_active_proxy}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_timeout ]]; then + echo "${line/cvmfs_timeout/cvmfs_net_timeout}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_timeout_direct ]]; then + echo "${line/cvmfs_timeout_direct/cvmfs_net_timeout_direct}" >> "$tmpfile_new" + + # System resource metrics - rename to cvmfs_sys_* + elif [[ "$line" =~ ^cvmfs_cpu_user_total ]]; then + echo "${line/cvmfs_cpu_user_total/cvmfs_sys_cpu_user_total}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_cpu_system_total ]]; then + echo "${line/cvmfs_cpu_system_total/cvmfs_sys_cpu_system_total}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_usedfd ]]; then + echo "${line/cvmfs_usedfd/cvmfs_sys_usedfd}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_useddirp ]]; then + echo "${line/cvmfs_useddirp/cvmfs_sys_useddirp}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_ndiropen ]]; then + echo "${line/cvmfs_ndiropen/cvmfs_sys_ndiropen}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_pid ]]; then + echo "${line/cvmfs_pid/cvmfs_sys_pid}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_nclg ]]; then + echo "${line/cvmfs_nclg/cvmfs_repo_nclg}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_inode_max ]]; then + echo "${line/cvmfs_inode_max/cvmfs_sys_inode_max}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_drainout_mode ]]; then + echo "${line/cvmfs_drainout_mode/cvmfs_sys_drainout_mode}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_maintenance_mode ]]; then + echo "${line/cvmfs_maintenance_mode/cvmfs_sys_maintenance_mode}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_nfs_mode ]]; then + echo "${line/cvmfs_nfs_mode/cvmfs_sys_nfs_mode}" >> "$tmpfile_new" + + # Repository metrics - keep cvmfs_repo_* as is + elif [[ "$line" =~ ^cvmfs_repo ]]; then + echo "$line" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_uptime_seconds ]]; then + echo "${line/cvmfs_uptime_seconds/cvmfs_repo_uptime_seconds}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_mount_epoch_timestamp ]]; then + echo "${line/cvmfs_mount_epoch_timestamp/cvmfs_repo_mount_epoch_timestamp}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_repo_expires_seconds ]]; then + echo "${line/cvmfs_repo_expires_seconds/cvmfs_repo_expires_seconds}" >> "$tmpfile_new" + + # Error metrics + elif [[ "$line" =~ ^cvmfs_nioerr_total ]]; then + echo "${line/cvmfs_nioerr_total/cvmfs_sys_nioerr_total}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_timestamp_last_ioerr ]]; then + echo "${line/cvmfs_timestamp_last_ioerr/cvmfs_sys_timestamp_last_ioerr}" >> "$tmpfile_new" + + # Internal affairs metrics - rename to cvmfs_internal_* + elif [[ "$line" =~ ^cvmfs_pathstring ]]; then + echo "${line/cvmfs_pathstring/cvmfs_internal_pathstring}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_namestring ]]; then + echo "${line/cvmfs_namestring/cvmfs_internal_namestring}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_linkstring ]]; then + echo "${line/cvmfs_linkstring/cvmfs_internal_linkstring}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_inode_tracker ]]; then + echo "${line/cvmfs_inode_tracker/cvmfs_internal_inode_tracker}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_dentry_tracker ]]; then + echo "${line/cvmfs_dentry_tracker/cvmfs_internal_dentry_tracker}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_page_cache_tracker ]]; then + echo "${line/cvmfs_page_cache_tracker/cvmfs_internal_page_cache_tracker}" >> "$tmpfile_new" + elif [[ "$line" =~ ^cvmfs_sqlite ]]; then + echo "${line/cvmfs_sqlite/cvmfs_internal_sqlite}" >> "$tmpfile_new" + + # Default: keep the line as is + else + echo "$line" >> "$tmpfile_new" + fi + done < "${TMPFILE}" + + # Replace the original TMPFILE with the processed one + mv "$tmpfile_new" "${TMPFILE}" +} + ############################################################# # List "uncommon" commands we expect for cmd in attr bc cvmfs_config cvmfs_talk grep; do @@ -393,8 +547,6 @@ for cmd in attr bc cvmfs_config cvmfs_talk grep; do done ############################################################# -# setup args in the right order for making getopt evaluation -# nice and easy. You'll need to read the manpages for more info args=$(getopt --options 'h' --longoptions 'help,http,non-standard-mountpoints' -- "$@") eval set -- "$args" @@ -447,6 +599,11 @@ for REPO in $REPO_LIST; do $METRICS_FUNCTION "${REPO}" done +# Apply postprocessing for version 2.13.2 to rename metrics for consistency +if check_cvmfs_version_exact; then + postprocess_metrics_for_2132 +fi + if [[ "${HTTP_HEADER}" == 'TRUE' ]]; then content_length=$(stat --printf="%s" "${TMPFILE}") echo -ne "HTTP/1.1 200 OK\r\n" diff --git a/grafana-dashboard.json b/grafana-dashboard.json new file mode 100644 index 0000000..b37e61b --- /dev/null +++ b/grafana-dashboard.json @@ -0,0 +1,950 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "Cache Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_cache_cached_bytes{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "Cached - {{repo}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_cache_pinned_bytes{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "Pinned - {{repo}}", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_cache_total_size_bytes{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "Total Limit - {{repo}}", + "refId": "C" + } + ], + "title": "Cache Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "green", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_cache_hitrate{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "{{repo}}", + "refId": "A" + } + ], + "title": "Cache Hit Rate", + "type": "gauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 4, + "panels": [], + "title": "Network & Download Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(cvmfs_net_rx_total{repo=~\"$repository\"}[5m])", + "interval": "", + "legendFormat": "Download Rate - {{repo}}", + "refId": "A" + } + ], + "title": "Download Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(cvmfs_net_ndownload_total{repo=~\"$repository\"}[5m])", + "interval": "", + "legendFormat": "File Download Rate - {{repo}}", + "refId": "A" + } + ], + "title": "File Download Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 7, + "panels": [], + "title": "Repository Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_repo_uptime_seconds{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "Uptime - {{repo}}", + "refId": "A" + } + ], + "title": "Repository Uptime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_repo_nclg{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "Loaded Catalogs - {{repo}}", + "refId": "A" + } + ], + "title": "Loaded Nested Catalogs", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 10, + "panels": [], + "title": "System Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(cvmfs_sys_cpu_user_total{repo=~\"$repository\"}[5m])", + "interval": "", + "legendFormat": "User CPU - {{repo}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(cvmfs_sys_cpu_system_total{repo=~\"$repository\"}[5m])", + "interval": "", + "legendFormat": "System CPU - {{repo}}", + "refId": "B" + } + ], + "title": "CPU Usage Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cvmfs_sys_memory_usage_bytes{repo=~\"$repository\"}", + "interval": "", + "legendFormat": "Memory Usage - {{repo}}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 13, + "panels": [], + "title": "Error Monitoring", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(cvmfs_sys_nioerr_total{repo=~\"$repository\"}[5m])", + "interval": "", + "legendFormat": "I/O Error Rate - {{repo}}", + "refId": "A" + } + ], + "title": "I/O Error Rate", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 36, + "style": "dark", + "tags": [ + "cvmfs", + "monitoring" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(cvmfs_repo, repo)", + "hide": 0, + "includeAll": true, + "label": "Repository", + "multi": true, + "name": "repository", + "options": [], + "query": { + "query": "label_values(cvmfs_repo, repo)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "CVMFS Monitoring Dashboard", + "uid": "cvmfs-monitoring", + "version": 1, + "weekStart": "" +} From 644abbd65679c844b6099b075459c2a8c0c0fa23 Mon Sep 17 00:00:00 2001 From: Valentin Volkl Date: Thu, 14 Aug 2025 16:41:53 +0200 Subject: [PATCH 2/3] up --- cvmfs-client-prometheus.sh | 65 ++++++++++++++++++++++++++++++++++++-- grafana-dashboard.json | 52 +++++++++++++++--------------- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/cvmfs-client-prometheus.sh b/cvmfs-client-prometheus.sh index 5744cfd..5af1403 100755 --- a/cvmfs-client-prometheus.sh +++ b/cvmfs-client-prometheus.sh @@ -431,8 +431,69 @@ postprocess_metrics_for_2132() { # Process the TMPFILE line by line to rename metrics while IFS= read -r line; do - # Skip empty lines and comments - if [[ -z "$line" || "$line" =~ ^# ]]; then + # Skip empty lines + if [[ -z "$line" ]]; then + echo "$line" >> "$tmpfile_new" + continue + fi + + # Process HELP and TYPE comments to rename metric names within them + if [[ "$line" =~ ^#\ (HELP|TYPE) ]]; then + # Apply the same renaming logic to metric names in HELP and TYPE comments + processed_line="$line" + + # Cache metrics - rename to cvmfs_cache_* + processed_line="${processed_line//cvmfs_cached_bytes/cvmfs_cache_cached_bytes}" + processed_line="${processed_line//cvmfs_pinned_bytes/cvmfs_cache_pinned_bytes}" + processed_line="${processed_line//cvmfs_total_cache_size_bytes/cvmfs_cache_total_size_bytes}" + processed_line="${processed_line//cvmfs_physical_cache_size_bytes/cvmfs_cache_physical_size_bytes}" + processed_line="${processed_line//cvmfs_physical_cache_avail_bytes/cvmfs_cache_physical_avail_bytes}" + processed_line="${processed_line//cvmfs_hitrate/cvmfs_cache_hitrate}" + processed_line="${processed_line//cvmfs_ncleanup24/cvmfs_cache_ncleanup24}" + + # Network metrics - rename to cvmfs_net_* + processed_line="${processed_line//cvmfs_rx_total/cvmfs_net_rx_total}" + processed_line="${processed_line//cvmfs_ndownload_total/cvmfs_net_ndownload_total}" + processed_line="${processed_line//cvmfs_speed/cvmfs_net_speed}" + processed_line="${processed_line//cvmfs_proxy/cvmfs_net_proxy}" + processed_line="${processed_line//cvmfs_active_proxy/cvmfs_net_active_proxy}" + processed_line="${processed_line//cvmfs_timeout_direct/cvmfs_net_timeout_direct}" + processed_line="${processed_line//cvmfs_timeout/cvmfs_net_timeout}" + + # System resource metrics - rename to cvmfs_sys_* + processed_line="${processed_line//cvmfs_cpu_user_total/cvmfs_sys_cpu_user_total}" + processed_line="${processed_line//cvmfs_cpu_system_total/cvmfs_sys_cpu_system_total}" + processed_line="${processed_line//cvmfs_usedfd/cvmfs_sys_usedfd}" + processed_line="${processed_line//cvmfs_useddirp/cvmfs_sys_useddirp}" + processed_line="${processed_line//cvmfs_ndiropen/cvmfs_sys_ndiropen}" + processed_line="${processed_line//cvmfs_pid/cvmfs_sys_pid}" + processed_line="${processed_line//cvmfs_inode_max/cvmfs_sys_inode_max}" + processed_line="${processed_line//cvmfs_drainout_mode/cvmfs_sys_drainout_mode}" + processed_line="${processed_line//cvmfs_maintenance_mode/cvmfs_sys_maintenance_mode}" + processed_line="${processed_line//cvmfs_nfs_mode/cvmfs_sys_nfs_mode}" + processed_line="${processed_line//cvmfs_nioerr_total/cvmfs_sys_nioerr_total}" + processed_line="${processed_line//cvmfs_timestamp_last_ioerr/cvmfs_sys_timestamp_last_ioerr}" + + # Repository metrics + processed_line="${processed_line//cvmfs_nclg/cvmfs_repo_nclg}" + processed_line="${processed_line//cvmfs_uptime_seconds/cvmfs_repo_uptime_seconds}" + processed_line="${processed_line//cvmfs_mount_epoch_timestamp/cvmfs_repo_mount_epoch_timestamp}" + + # Internal affairs metrics - rename to cvmfs_internal_* + processed_line="${processed_line//cvmfs_pathstring/cvmfs_internal_pathstring}" + processed_line="${processed_line//cvmfs_namestring/cvmfs_internal_namestring}" + processed_line="${processed_line//cvmfs_linkstring/cvmfs_internal_linkstring}" + processed_line="${processed_line//cvmfs_inode_tracker/cvmfs_internal_inode_tracker}" + processed_line="${processed_line//cvmfs_dentry_tracker/cvmfs_internal_dentry_tracker}" + processed_line="${processed_line//cvmfs_page_cache_tracker/cvmfs_internal_page_cache_tracker}" + processed_line="${processed_line//cvmfs_sqlite/cvmfs_internal_sqlite}" + + echo "$processed_line" >> "$tmpfile_new" + continue + fi + + # Skip other comments + if [[ "$line" =~ ^# ]]; then echo "$line" >> "$tmpfile_new" continue fi diff --git a/grafana-dashboard.json b/grafana-dashboard.json index b37e61b..5e7b51c 100644 --- a/grafana-dashboard.json +++ b/grafana-dashboard.json @@ -38,7 +38,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -113,7 +113,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_cache_cached_bytes{repo=~\"$repository\"}", "interval": "", @@ -123,7 +123,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_cache_pinned_bytes{repo=~\"$repository\"}", "interval": "", @@ -133,7 +133,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_cache_total_size_bytes{repo=~\"$repository\"}", "interval": "", @@ -147,7 +147,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -202,7 +202,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_cache_hitrate{repo=~\"$repository\"}", "interval": "", @@ -229,7 +229,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -304,7 +304,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "rate(cvmfs_net_rx_total{repo=~\"$repository\"}[5m])", "interval": "", @@ -318,7 +318,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -393,7 +393,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "rate(cvmfs_net_ndownload_total{repo=~\"$repository\"}[5m])", "interval": "", @@ -420,7 +420,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -495,7 +495,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_repo_uptime_seconds{repo=~\"$repository\"}", "interval": "", @@ -509,7 +509,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -584,7 +584,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_repo_nclg{repo=~\"$repository\"}", "interval": "", @@ -611,7 +611,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -686,7 +686,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "rate(cvmfs_sys_cpu_user_total{repo=~\"$repository\"}[5m])", "interval": "", @@ -696,7 +696,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "rate(cvmfs_sys_cpu_system_total{repo=~\"$repository\"}[5m])", "interval": "", @@ -710,7 +710,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -785,7 +785,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "cvmfs_sys_memory_usage_bytes{repo=~\"$repository\"}", "interval": "", @@ -812,7 +812,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -887,7 +887,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "expr": "rate(cvmfs_sys_nioerr_total{repo=~\"$repository\"}[5m])", "interval": "", @@ -916,9 +916,8 @@ }, "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, - "definition": "label_values(cvmfs_repo, repo)", "hide": 0, "includeAll": true, "label": "Repository", @@ -926,13 +925,14 @@ "name": "repository", "options": [], "query": { - "query": "label_values(cvmfs_repo, repo)", - "refId": "StandardVariableQuery" + "qryType": "", + "query": "label_values(cvmfs_cache_cached_bytes, repo)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, - "sort": 0, + "sort": 1, "type": "query" } ] From 8f51ff9a7d44d35d7793e6af3e5800741ff515dd Mon Sep 17 00:00:00 2001 From: Valentin Volkl Date: Thu, 14 Aug 2025 16:48:59 +0200 Subject: [PATCH 3/3] up --- grafana-dashboard.json | 950 ----------------------------------------- 1 file changed, 950 deletions(-) delete mode 100644 grafana-dashboard.json diff --git a/grafana-dashboard.json b/grafana-dashboard.json deleted file mode 100644 index 5e7b51c..0000000 --- a/grafana-dashboard.json +++ /dev/null @@ -1,950 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": null, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "panels": [], - "title": "Cache Metrics", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_cache_cached_bytes{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "Cached - {{repo}}", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_cache_pinned_bytes{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "Pinned - {{repo}}", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_cache_total_size_bytes{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "Total Limit - {{repo}}", - "refId": "C" - } - ], - "title": "Cache Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 70 - }, - { - "color": "green", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 3, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "9.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_cache_hitrate{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "{{repo}}", - "refId": "A" - } - ], - "title": "Cache Hit Rate", - "type": "gauge" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 4, - "panels": [], - "title": "Network & Download Metrics", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(cvmfs_net_rx_total{repo=~\"$repository\"}[5m])", - "interval": "", - "legendFormat": "Download Rate - {{repo}}", - "refId": "A" - } - ], - "title": "Download Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 10 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(cvmfs_net_ndownload_total{repo=~\"$repository\"}[5m])", - "interval": "", - "legendFormat": "File Download Rate - {{repo}}", - "refId": "A" - } - ], - "title": "File Download Rate", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 7, - "panels": [], - "title": "Repository Status", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 19 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_repo_uptime_seconds{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "Uptime - {{repo}}", - "refId": "A" - } - ], - "title": "Repository Uptime", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 19 - }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_repo_nclg{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "Loaded Catalogs - {{repo}}", - "refId": "A" - } - ], - "title": "Loaded Nested Catalogs", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 10, - "panels": [], - "title": "System Resources", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(cvmfs_sys_cpu_user_total{repo=~\"$repository\"}[5m])", - "interval": "", - "legendFormat": "User CPU - {{repo}}", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(cvmfs_sys_cpu_system_total{repo=~\"$repository\"}[5m])", - "interval": "", - "legendFormat": "System CPU - {{repo}}", - "refId": "B" - } - ], - "title": "CPU Usage Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 28 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "cvmfs_sys_memory_usage_bytes{repo=~\"$repository\"}", - "interval": "", - "legendFormat": "Memory Usage - {{repo}}", - "refId": "A" - } - ], - "title": "Memory Usage", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 36 - }, - "id": 13, - "panels": [], - "title": "Error Monitoring", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "rate(cvmfs_sys_nioerr_total{repo=~\"$repository\"}[5m])", - "interval": "", - "legendFormat": "I/O Error Rate - {{repo}}", - "refId": "A" - } - ], - "title": "I/O Error Rate", - "type": "timeseries" - } - ], - "refresh": "30s", - "schemaVersion": 36, - "style": "dark", - "tags": [ - "cvmfs", - "monitoring" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "hide": 0, - "includeAll": true, - "label": "Repository", - "multi": true, - "name": "repository", - "options": [], - "query": { - "qryType": "", - "query": "label_values(cvmfs_cache_cached_bytes, repo)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "CVMFS Monitoring Dashboard", - "uid": "cvmfs-monitoring", - "version": 1, - "weekStart": "" -}