diff --git a/README.md b/README.md index ec76ccc..9a772e5 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Nagios-style checks against Kubernetes API. Designed for usage with Nagios, Icin Check apiserver health using tokenfile: ./check_kubernetes.sh -m apiserver -H https://<...>:6443 -t /path/to/tokenfile - OK. Kuberenetes apiserver health is OK + OK. Kubernetes apiserver health is OK Check whether all deployments are available using token: @@ -106,6 +106,12 @@ Checked failed jobs named 'good': ./check_kubernetes.sh -m jobs -n good OK: 0 failed jobs is below threshold +Check utilization if pvc (if consumes more than %): + + ./check_kubernetes.sh -m pvc + CRITICAL. Very high storage utilization on pvc prometheus-data: 93% (86106636288/157459890176 Bytes) + + ## Brief mode All modes support the -b brief option. In this mode, a single numerical output is returned. The number is positive on success and zero or negative on error. diff --git a/check_kubernetes.sh b/check_kubernetes.sh index 6bb85c0..e39a3c8 100755 --- a/check_kubernetes.sh +++ b/check_kubernetes.sh @@ -266,6 +266,8 @@ mode_pvc() { CRIT=${CRIT:-90} WARN_ERROR=0 CRIT_ERROR=0 + PVC_COUNT=0 + data="$(getJSON "get nodes" "api/v1/nodes")" [ $? -gt 0 ] && die "$data" nodes=($(echo "$data" | jq -r ".items[].metadata.name")) @@ -274,7 +276,6 @@ mode_pvc() { data="$(getJSON "get nodes" "api/v1/nodes/$node/proxy/stats/summary")" [ $? -gt 0 ] && die "$data" pods=($(echo "$data" | jq -r ".pods[].podRef.name")) - for pod in "${pods[@]}"; do pod_volumes="$(echo "$data" | jq -r ".pods[] | select(.podRef.name==\"$pod\") | .volume" 2>/dev/null)" [ "$pod_volumes" == "null" ] && continue @@ -292,6 +293,9 @@ mode_pvc() { volume_inodes_capacity=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .inodes") volume_bytes_utilization=$(echo "100 * $volume_bytes_used / $volume_bytes_capacity" | bc) volume_inodes_utilization=$(echo "100 * $volume_inodes_used / $volume_inodes_capacity" | bc) + + ((PVC_COUNT++)) + if [ "$volume_bytes_utilization" -gt "$WARN" ] && [ "$volume_bytes_utilization" -lt "$CRIT" ]; then echo "WARNING. High storage utilization on pvc $volume_name (namespace:$volumes_namespace): \ $volume_bytes_utilization% ($volume_bytes_used/$volume_bytes_capacity Bytes)" @@ -319,7 +323,7 @@ mode_pvc() { done if [ "$WARN_ERROR" -eq "0" ] && [ "$CRIT_ERROR" -eq "0" ]; then - echo "OK. No problem on pvc storage" + echo "OK. No problem on $pvc_count pvc storage" elif [ "$WARN_ERROR" -ne "0" ] && [ "$CRIT_ERROR" -eq "0" ]; then exit 1 elif [ "$CRIT_ERROR" -ne "0" ]; then