From 9eafb8dcbcc993253b490e478afc8c850bbb7da8 Mon Sep 17 00:00:00 2001 From: Neil McKee Date: Tue, 25 Sep 2012 12:05:34 -0700 Subject: [PATCH] adjusted nvml metrics --- gmond/sflow.c | 10 +++++----- gmond/sflow_gmetric.h | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/gmond/sflow.c b/gmond/sflow.c index c2807f724..245c849b6 100644 --- a/gmond/sflow.c +++ b/gmond/sflow.c @@ -321,7 +321,7 @@ submit_sflow_string(Ganglia_host *hostdata, char *metric_prefix, EnumSFLOWGMetri #define SFLOW_CTR_MS_PC(ds, field, mS) ((mS) ? ((float)(SFLOW_CTR_DELTA(ds, field)) * (float)100.0 / (float)mS) : 0) #define SFLOW_CTR_DIVIDE(ds, num, denom) (SFLOW_CTR_DELTA(ds, denom) ? (float)(SFLOW_CTR_DELTA(ds, num)) / (float)(SFLOW_CTR_DELTA(ds, denom)) : 0) -#define SFLOW_GAUGE_DIVIDE(num, denom) ((denom) ? ((double)(num) / (double)(denom)) : (double)0) +/* #define SFLOW_GAUGE_DIVIDE(num, denom) ((denom) ? ((double)(num) / (double)(denom)) : (double)0) */ /* metrics may be marked as "unsupported" by the sender, so check for those reserved values */ #define SFLOW_OK_FLOAT(field) (field != (float)-1) @@ -1087,10 +1087,10 @@ process_struct_NVML_GPU(SFlowXDR *x, SFlowDataSource *dataSource, Ganglia_host * nvml_gpu_fan_speed = SFLOWXDR_next(x); if(x->counterDeltas) { - submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_time, SFLOW_CTR_RATE(dataSource, nvml_gpu_time, ctr_ival_mS), SFLOW_OK_COUNTER32(nvml_gpu_time)); - submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_rw_time, SFLOW_CTR_RATE(dataSource, nvml_gpu_rw_time, ctr_ival_mS), SFLOW_OK_COUNTER32(nvml_gpu_rw_time)); + submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_time, SFLOW_CTR_MS_PC(dataSource, nvml_gpu_time, ctr_ival_mS), SFLOW_OK_COUNTER32(nvml_gpu_time)); + submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_rw_time, SFLOW_CTR_MS_PC(dataSource, nvml_gpu_rw_time, ctr_ival_mS), SFLOW_OK_COUNTER32(nvml_gpu_rw_time)); submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_ecc_errors, SFLOW_CTR_RATE(dataSource, nvml_gpu_ecc_errors, ctr_ival_mS), SFLOW_OK_COUNTER32(nvml_gpu_ecc_errors)); - submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_energy, SFLOW_CTR_RATE(dataSource, nvml_gpu_energy, ctr_ival_mS), SFLOW_OK_COUNTER32(nvml_gpu_energy)); + submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_energy, SFLOW_CTR_RATE(dataSource, nvml_gpu_energy, ctr_ival_mS) / (float)1000.0, SFLOW_OK_COUNTER32(nvml_gpu_energy)); } SFLOW_CTR_LATCH(dataSource, nvml_gpu_time); @@ -1101,7 +1101,7 @@ process_struct_NVML_GPU(SFlowXDR *x, SFlowDataSource *dataSource, Ganglia_host * submit_sflow_uint32(hostdata, metric_prefix, SFLOW_M_nvml_gpu_count, nvml_gpu_count, SFLOW_OK_GAUGE32(nvml_gpu_count)); submit_sflow_uint32(hostdata, metric_prefix, SFLOW_M_nvml_gpu_processes, nvml_gpu_processes, SFLOW_OK_GAUGE32(nvml_gpu_processes)); submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_mem_total, SFLOW_MEM_KB(nvml_gpu_mem_total), SFLOW_OK_GAUGE64(nvml_gpu_mem_total)); - submit_sflow_double(hostdata, metric_prefix, SFLOW_M_nvml_gpu_mem_free, SFLOW_GAUGE_DIVIDE(nvml_gpu_mem_free, nvml_gpu_mem_total), SFLOW_OK_GAUGE64(nvml_gpu_mem_free)); + submit_sflow_float(hostdata, metric_prefix, SFLOW_M_nvml_gpu_mem_free, SFLOW_MEM_KB(nvml_gpu_mem_free), SFLOW_OK_GAUGE64(nvml_gpu_mem_free)); submit_sflow_uint32(hostdata, metric_prefix, SFLOW_M_nvml_gpu_temperature, nvml_gpu_temperature, SFLOW_OK_GAUGE32(nvml_gpu_temperature)); submit_sflow_uint32(hostdata, metric_prefix, SFLOW_M_nvml_gpu_fan_speed, nvml_gpu_fan_speed, SFLOW_OK_GAUGE32(nvml_gpu_fan_speed)); } diff --git a/gmond/sflow_gmetric.h b/gmond/sflow_gmetric.h index d2a82b199..8f0725ffd 100644 --- a/gmond/sflow_gmetric.h +++ b/gmond/sflow_gmetric.h @@ -155,11 +155,11 @@ SFLOW_GMETRIC(SFLOW_M_jvm_vmem_util, "jvm_vmem_util", "%", GANGLIA_SLOPE_ZERO, " SFLOW_GMETRIC(SFLOW_M_jvm_release, "jvm_release", "", GANGLIA_SLOPE_ZERO, "%s", "jvm", NULL, "JVM Release" ) SFLOW_GMETRIC(SFLOW_M_nvml_gpu_count, "nvml_gpu_count", "", GANGLIA_SLOPE_ZERO, "%u", "nvml", NULL, "NVML GPU Count" ) SFLOW_GMETRIC(SFLOW_M_nvml_gpu_processes, "nvml_gpu_processes", "", GANGLIA_SLOPE_BOTH, "%u", "nvml", NULL, "NVML GPU Processes" ) -SFLOW_GMETRIC(SFLOW_M_nvml_gpu_time, "nvml_gpu_time", "mS", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU Time" ) -SFLOW_GMETRIC(SFLOW_M_nvml_gpu_rw_time, "nvml_gpu_rw_time", "mS", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU Mem R/W Time" ) -SFLOW_GMETRIC(SFLOW_M_nvml_gpu_mem_total, "nvml_gpu_mem_total", "kB", GANGLIA_SLOPE_ZERO, "%.0f", "nvml", NULL, "NVML GPU Mem Bytes" ) -SFLOW_GMETRIC(SFLOW_M_nvml_gpu_mem_free, "nvml_gpu_mem_free", "%", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU % Mem Free" ) +SFLOW_GMETRIC(SFLOW_M_nvml_gpu_time, "nvml_gpu_util", "%", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU Utilization" ) +SFLOW_GMETRIC(SFLOW_M_nvml_gpu_rw_time, "nvml_gpu_rw_util", "%", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU Mem R/W Utilization" ) +SFLOW_GMETRIC(SFLOW_M_nvml_gpu_mem_total, "nvml_gpu_mem_total", "KB", GANGLIA_SLOPE_ZERO, "%.0f", "nvml", NULL, "NVML GPU Mem Total" ) +SFLOW_GMETRIC(SFLOW_M_nvml_gpu_mem_free, "nvml_gpu_mem_free", "KB", GANGLIA_SLOPE_BOTH, "%.0f", "nvml", NULL, "NVML GPU Mem Free" ) SFLOW_GMETRIC(SFLOW_M_nvml_gpu_ecc_errors, "nvml_gpu_ecc_errors", "", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU ECC Errors" ) -SFLOW_GMETRIC(SFLOW_M_nvml_gpu_energy, "nvml_gpu_energy", "mJ", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU Energy" ) +SFLOW_GMETRIC(SFLOW_M_nvml_gpu_energy, "nvml_gpu_power", "W", GANGLIA_SLOPE_BOTH, "%.2f", "nvml", NULL, "NVML GPU Power" ) SFLOW_GMETRIC(SFLOW_M_nvml_gpu_temperature, "nvml_gpu_temperature", "C", GANGLIA_SLOPE_BOTH, "%u", "nvml", NULL, "NVML GPU Temperature" ) -SFLOW_GMETRIC(SFLOW_M_nvml_gpu_fan_speed, "nvml_gpu_fan_speed", "C", GANGLIA_SLOPE_BOTH, "%u", "nvml", NULL, "NVML GPU Fan Speed" ) +SFLOW_GMETRIC(SFLOW_M_nvml_gpu_fan_speed, "nvml_gpu_fan_speed", "%", GANGLIA_SLOPE_BOTH, "%u", "nvml", NULL, "NVML GPU Fan Speed" )