diff --git a/cmd/telemetry/telemetry.go b/cmd/telemetry/telemetry.go index 74dfe7ce..39955a04 100644 --- a/cmd/telemetry/telemetry.go +++ b/cmd/telemetry/telemetry.go @@ -7,7 +7,7 @@ package telemetry import ( "fmt" "log/slog" - "regexp" + "os" "slices" "strconv" "strings" @@ -61,12 +61,10 @@ var ( flagPower bool flagTemperature bool flagInstrMix bool - flagGaudi bool flagNoSystemSummary bool flagInstrMixPid int - flagInstrMixFilter []string flagInstrMixFrequency int ) @@ -87,12 +85,10 @@ const ( flagPowerName = "power" flagTemperatureName = "temperature" flagInstrMixName = "instrmix" - flagGaudiName = "gaudi" flagNoSystemSummaryName = "no-summary" flagInstrMixPidName = "instrmix-pid" - flagInstrMixFilterName = "instrmix-filter" flagInstrMixFrequencyName = "instrmix-frequency" ) @@ -110,9 +106,13 @@ var categories = []common.Category{ {FlagName: flagStorageName, FlagVar: &flagStorage, DefaultValue: false, Help: "monitor storage", TableNames: []string{report.DriveTelemetryTableName}}, {FlagName: flagIRQRateName, FlagVar: &flagIRQRate, DefaultValue: false, Help: "monitor IRQ rate", TableNames: []string{report.IRQRateTelemetryTableName}}, {FlagName: flagInstrMixName, FlagVar: &flagInstrMix, DefaultValue: false, Help: "monitor instruction mix", TableNames: []string{report.InstructionTelemetryTableName}}, - {FlagName: flagGaudiName, FlagVar: &flagGaudi, DefaultValue: false, Help: "monitor gaudi", TableNames: []string{report.GaudiTelemetryTableName}}, } +const ( + instrmixFrequencyDefaultSystemWide = 10000000 + instrmixFrequencyDefaultPerPID = 100000 +) + func init() { // set up config category flags for _, cat := range categories { @@ -124,8 +124,7 @@ func init() { Cmd.Flags().IntVar(&flagDuration, flagDurationName, 30, "") Cmd.Flags().IntVar(&flagInterval, flagIntervalName, 2, "") Cmd.Flags().IntVar(&flagInstrMixPid, flagInstrMixPidName, 0, "") - Cmd.Flags().StringSliceVar(&flagInstrMixFilter, flagInstrMixFilterName, []string{"SSE", "AVX", "AVX2", "AVX512", "AMX_TILE"}, "") - Cmd.Flags().IntVar(&flagInstrMixFrequency, flagInstrMixFrequencyName, 10000000, "") // 10 million + Cmd.Flags().IntVar(&flagInstrMixFrequency, flagInstrMixFrequencyName, instrmixFrequencyDefaultSystemWide, "") Cmd.Flags().BoolVar(&flagNoSystemSummary, flagNoSystemSummaryName, false, "") common.AddTargetFlags(Cmd) @@ -193,13 +192,9 @@ func getFlagGroups() []common.FlagGroup { Name: flagInstrMixPidName, Help: "PID to monitor for instruction mix, no PID means all processes", }, - { - Name: flagInstrMixFilterName, - Help: "filter to apply to instruction mix", - }, { Name: flagInstrMixFrequencyName, - Help: "number of instructions between samples when no PID specified", + Help: "number of instructions between samples, default is 10,000,000 when collecting system wide and 100,000 when collecting for a specific PID", }, { Name: flagNoSystemSummaryName, @@ -259,16 +254,12 @@ func validateFlags(cmd *cobra.Command, args []string) error { if flagDuration == 0 && (target != "" || targets != "") { return common.FlagValidationError(cmd, "duration must be greater than 0 when collecting from a remote target") } - if cmd.Flags().Lookup(flagInstrMixFilterName).Changed { - re := regexp.MustCompile("^[A-Z0-9_]+$") - for _, filter := range flagInstrMixFilter { - if !re.MatchString(filter) { - return common.FlagValidationError(cmd, fmt.Sprintf("invalid filter: %s, must be uppercase letters, numbers, and underscores", filter)) - } - } - } if flagInstrMixFrequency < 100000 { // 100,000 instructions is the minimum frequency - return common.FlagValidationError(cmd, "instruction mix frequency must be 100,000 or greater") + return common.FlagValidationError(cmd, "instruction mix frequency must be 100,000 or greater to limit overhead") + } + // warn if instruction mix frequency is low when collecting system wide + if flagInstrMix && flagInstrMixPid == 0 && flagInstrMixFrequency < instrmixFrequencyDefaultSystemWide { + slog.Warn("instruction mix frequency is set to a value lower than default for system wide collection, consider using a higher frequency to limit collection overhead", slog.Int("frequency", flagInstrMixFrequency)) } // common target flags if err := common.ValidateTargetFlags(cmd); err != nil { @@ -289,6 +280,28 @@ func runCmd(cmd *cobra.Command, args []string) error { tableNames = append(tableNames, cat.TableNames...) } } + // confirm proper default for instrmix frequency + if flagInstrMix { + if flagInstrMixPid != 0 && !cmd.Flags().Changed(flagInstrMixFrequencyName) { + // per-PID collection and frequency not changed, set to per-PID default + flagInstrMixFrequency = instrmixFrequencyDefaultPerPID + } + } + // hidden feature - Gaudi telemetry, only enabled when PERFSPECT_GAUDI_HLSMI_PATH is set + gaudiHlsmiPath := os.Getenv("PERFSPECT_GAUDI_HLSMI_PATH") // must be full path to hlsmi binary + if gaudiHlsmiPath != "" { + slog.Info("Gaudi telemetry enabled", slog.String("hlsmi_path", gaudiHlsmiPath)) + tableNames = append(tableNames, report.GaudiTelemetryTableName) + } + // hidden feature - PDU telemetry, only enabled when four environment variables are set + pduHost := os.Getenv("PERFSPECT_PDU_HOST") + pduUser := os.Getenv("PERFSPECT_PDU_USER") + pduPassword := os.Getenv("PERFSPECT_PDU_PASSWORD") + pduOutlet := os.Getenv("PERFSPECT_PDU_OUTLET") + if pduHost != "" && pduUser != "" && pduPassword != "" && pduOutlet != "" { + slog.Info("PDU telemetry enabled", slog.String("host", pduHost), slog.String("outlet", pduOutlet)) + tableNames = append(tableNames, report.PDUTelemetryTableName) + } // include telemetry summary table if all telemetry options are selected var summaryFunc common.SummaryFunc if flagAll { @@ -306,8 +319,12 @@ func runCmd(cmd *cobra.Command, args []string) error { "Interval": strconv.Itoa(flagInterval), "Duration": strconv.Itoa(flagDuration), "InstrMixPID": strconv.Itoa(flagInstrMixPid), - "InstrMixFilter": strings.Join(flagInstrMixFilter, " "), "InstrMixFrequency": strconv.Itoa(flagInstrMixFrequency), + "GaudiHlsmiPath": gaudiHlsmiPath, + "PDUHost": pduHost, + "PDUUser": pduUser, + "PDUPassword": pduPassword, + "PDUOutlet": pduOutlet, }, TableNames: tableNames, SummaryFunc: summaryFunc, diff --git a/internal/report/render_html.go b/internal/report/render_html.go index 30ccee7b..8ab356e7 100644 --- a/internal/report/render_html.go +++ b/internal/report/render_html.go @@ -377,7 +377,8 @@ const datasetTemplate = ` backgroundColor: '{{.Color}}', borderColor: '{{.Color}}', borderWidth: 1, - showLine: true + showLine: true, + hidden: {{.Hidden}} } ` const lineChartTemplate = `
@@ -715,20 +716,39 @@ func dimmTableHTMLRenderer(tableValues TableValues, targetName string) string { return renderHTMLTable(socketTableHeaders, socketTableValues, "pure-table pure-table-bordered", [][]string{}) } -func renderChart(chartType string, allFormattedPoints []string, datasetNames []string, xAxisLabels []string, config chartTemplateStruct) string { +// renderChart generates an HTML/JavaScript representation of a chart using the provided data and configuration. +// It supports different chart types (e.g., "line", "scatter") and uses Go templates to format the datasets and chart. +// Parameters: +// - chartType: the type of chart to render ("line", "scatter"). +// - allFormattedPoints: a slice of strings, each representing formatted data points for a dataset. +// - datasetNames: a slice of dataset names corresponding to each dataset. +// - xAxisLabels: a slice of labels for the x-axis (used for line charts). +// - config: a chartTemplateStruct containing chart configuration and template variables. +// - datasetHiddenFlags: a slice of booleans indicating whether each dataset should be hidden initially. +// +// Returns: +// - A string containing the rendered chart HTML/JavaScript, or an error message if rendering fails. +func renderChart(chartType string, allFormattedPoints []string, datasetNames []string, xAxisLabels []string, config chartTemplateStruct, datasetHiddenFlags []bool) string { datasets := []string{} for dataIdx, formattedPoints := range allFormattedPoints { specValues := formattedPoints dst := texttemplate.Must(texttemplate.New("datasetTemplate").Parse(datasetTemplate)) buf := new(bytes.Buffer) + // determine hidden flag for this dataset + hidden := "false" + if datasetHiddenFlags != nil && dataIdx < len(datasetHiddenFlags) && datasetHiddenFlags[dataIdx] { + hidden = "true" + } err := dst.Execute(buf, struct { - Label string - Data string - Color string + Label string + Data string + Color string + Hidden string }{ - Label: datasetNames[dataIdx], - Data: specValues, - Color: getColor(dataIdx), + Label: datasetNames[dataIdx], + Data: specValues, + Color: getColor(dataIdx), + Hidden: hidden, }) if err != nil { slog.Error("error executing template", slog.String("error", err.Error())) @@ -772,6 +792,17 @@ type scatterPoint struct { y float64 } +// renderScatterChart generates an HTML string for a scatter chart using the provided data and configuration. +// +// Parameters: +// +// data - 2D slice of scatterPoint values, where each inner slice represents a dataset's data points. +// datasetNames - Slice of strings representing the names of each dataset. +// config - chartTemplateStruct containing chart configuration options. +// +// Returns: +// +// A string containing the rendered HTML for the scatter chart. func renderScatterChart(data [][]scatterPoint, datasetNames []string, config chartTemplateStruct) string { allFormattedPoints := []string{} for dataIdx := range data { @@ -781,10 +812,23 @@ func renderScatterChart(data [][]scatterPoint, datasetNames []string, config cha } allFormattedPoints = append(allFormattedPoints, strings.Join(formattedPoints, ",")) } - return renderChart("scatter", allFormattedPoints, datasetNames, nil, config) + return renderChart("scatter", allFormattedPoints, datasetNames, nil, config, nil) } -func renderLineChart(xAxisLabels []string, data [][]float64, datasetNames []string, config chartTemplateStruct) string { +// renderLineChart generates an HTML string for a line chart using the provided data and configuration. +// +// Parameters: +// +// xAxisLabels - Slice of strings representing the labels for the X axis. +// data - 2D slice of float64 values, where each inner slice represents a dataset's data points. +// datasetNames - Slice of strings representing the names of each dataset. +// config - chartTemplateStruct containing chart configuration options. +// datasetHiddenFlags - Slice of booleans indicating whether each dataset should be hidden initially. +// +// Returns: +// +// A string containing the rendered HTML for the line chart. +func renderLineChart(xAxisLabels []string, data [][]float64, datasetNames []string, config chartTemplateStruct, datasetHiddenFlags []bool) string { allFormattedPoints := []string{} for dataIdx := range data { formattedPoints := []string{} @@ -793,7 +837,7 @@ func renderLineChart(xAxisLabels []string, data [][]float64, datasetNames []stri } allFormattedPoints = append(allFormattedPoints, strings.Join(formattedPoints, ",")) } - return renderChart("line", allFormattedPoints, datasetNames, xAxisLabels, config) + return renderChart("line", allFormattedPoints, datasetNames, xAxisLabels, config, datasetHiddenFlags) } func renderFrequencyTable(tableValues TableValues) (out string) { @@ -899,7 +943,7 @@ func getColor(idx int) string { return colors[idx%len(colors)] } -func telemetryTableHTMLRenderer(tableValues TableValues, data [][]float64, datasetNames []string, chartConfig chartTemplateStruct) string { +func telemetryTableHTMLRenderer(tableValues TableValues, data [][]float64, datasetNames []string, chartConfig chartTemplateStruct, datasetHiddenFlags []bool) string { tsFieldIdx := 0 var timestamps []string for i := range tableValues.Fields[0].Values { @@ -908,7 +952,7 @@ func telemetryTableHTMLRenderer(tableValues TableValues, data [][]float64, datas timestamps = append(timestamps, timestamp) } } - return renderLineChart(timestamps, data, datasetNames, chartConfig) + return renderLineChart(timestamps, data, datasetNames, chartConfig, datasetHiddenFlags) } func cpuUtilizationTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -957,7 +1001,7 @@ func cpuUtilizationTelemetryTableHTMLRenderer(tableValues TableValues, targetNam SuggestedMin: "0", SuggestedMax: "100", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func utilizationCategoriesTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -992,7 +1036,7 @@ func utilizationCategoriesTelemetryTableHTMLRenderer(tableValues TableValues, ta SuggestedMin: "0", SuggestedMax: "100", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func irqRateTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1032,7 +1076,7 @@ func irqRateTelemetryTableHTMLRenderer(tableValues TableValues, targetName strin SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } // driveTelemetryTableHTMLRenderer renders charts of drive statistics @@ -1088,7 +1132,7 @@ func driveTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) SuggestedMin: "0", SuggestedMax: "0", } - out += telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + out += telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } return out } @@ -1146,7 +1190,7 @@ func networkTelemetryTableHTMLRenderer(tableValues TableValues, targetName strin SuggestedMin: "0", SuggestedMax: "0", } - out += telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + out += telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } return out } @@ -1183,7 +1227,7 @@ func memoryTelemetryTableHTMLRenderer(tableValues TableValues, targetName string SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func averageFrequencyTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1218,7 +1262,7 @@ func averageFrequencyTelemetryTableHTMLRenderer(tableValues TableValues, targetN SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func powerTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1253,7 +1297,7 @@ func powerTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func temperatureTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1288,7 +1332,7 @@ func temperatureTelemetryTableHTMLRenderer(tableValues TableValues, targetName s SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func ipcTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1323,7 +1367,7 @@ func ipcTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) s SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func c6TelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1358,16 +1402,27 @@ func c6TelemetryTableHTMLRenderer(tableValues TableValues, targetName string) st SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } +// instructionTelemetryTableHTMLRenderer renders instruction set usage statistics. +// Each category is a separate dataset within the chart. +// Categories with zero total usage are hidden by default. +// Categories are sorted in two tiers: first, all non-zero categories are sorted alphabetically; +// then, all zero-sum categories are sorted alphabetically and placed after the non-zero categories. func instructionTelemetryTableHTMLRenderer(tableValues TableValues, targetname string) string { - data := [][]float64{} - datasetNames := []string{} - for _, field := range tableValues.Fields[1:] { + // Collect entries with their sums so we can sort per requirements + type instrEntry struct { + name string + points []float64 + sum float64 + } + entries := []instrEntry{} + for _, field := range tableValues.Fields[1:] { // skip timestamp field points := []float64{} + sum := 0.0 for _, val := range field.Values { - if val == "" { + if val == "" { // end of data for this category break } stat, err := strconv.ParseFloat(val, 64) @@ -1376,12 +1431,34 @@ func instructionTelemetryTableHTMLRenderer(tableValues TableValues, targetname s return "" } points = append(points, stat) + sum += stat } - if len(points) > 0 { - data = append(data, points) - datasetNames = append(datasetNames, field.Name) + if len(points) > 0 { // only include categories with at least one point + entries = append(entries, instrEntry{name: field.Name, points: points, sum: sum}) + } + } + // Partition into non-zero and zero-sum groups + nonZero := []instrEntry{} + zero := []instrEntry{} + for _, e := range entries { + if e.sum > 0 { + nonZero = append(nonZero, e) + } else { + zero = append(zero, e) } } + sort.Slice(nonZero, func(i, j int) bool { return nonZero[i].name < nonZero[j].name }) + sort.Slice(zero, func(i, j int) bool { return zero[i].name < zero[j].name }) + ordered := append(nonZero, zero...) + data := make([][]float64, 0, len(ordered)) + datasetNames := make([]string, 0, len(ordered)) + hiddenFlags := make([]bool, 0, len(ordered)) + for _, e := range ordered { + data = append(data, e.points) + datasetNames = append(datasetNames, e.name) + // hide zero-sum categories by default + hiddenFlags = append(hiddenFlags, e.sum == 0) + } chartConfig := chartTemplateStruct{ ID: fmt.Sprintf("%s%d", tableValues.Name, util.RandUint(10000)), XaxisText: "Time", @@ -1389,11 +1466,11 @@ func instructionTelemetryTableHTMLRenderer(tableValues TableValues, targetname s TitleText: "", DisplayTitle: "false", DisplayLegend: "true", - AspectRatio: "2", + AspectRatio: "1", // extra tall due to large number of data sets SuggestedMin: "0", SuggestedMax: "0", } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, hiddenFlags) } func renderGaudiStatsChart(tableValues TableValues, chartStatFieldName string, titleText string, yAxisText string, suggestedMax string) string { @@ -1450,7 +1527,7 @@ func renderGaudiStatsChart(tableValues TableValues, chartStatFieldName string, t SuggestedMin: "0", SuggestedMax: suggestedMax, } - return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig) + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) } func gaudiTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { @@ -1463,6 +1540,43 @@ func gaudiTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) return out } +func pduTelemetryTableHTMLRenderer(tableValues TableValues, targetName string) string { + data := [][]float64{} + for _, field := range tableValues.Fields[1:] { + points := []float64{} + for _, val := range field.Values { + if val == "" { + break + } + stat, err := strconv.ParseFloat(val, 64) + if err != nil { + slog.Error("error parsing stat", slog.String("error", err.Error())) + return "" + } + points = append(points, stat) + } + if len(points) > 0 { + data = append(data, points) + } + } + datasetNames := []string{} + for _, field := range tableValues.Fields[1:] { + datasetNames = append(datasetNames, field.Name) + } + chartConfig := chartTemplateStruct{ + ID: fmt.Sprintf("%s%d", tableValues.Name, util.RandUint(10000)), + XaxisText: "Time", + YaxisText: "Watts", + TitleText: "", + DisplayTitle: "false", + DisplayLegend: "true", + AspectRatio: "2", + SuggestedMin: "0", + SuggestedMax: "0", + } + return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil) +} + func callStackFrequencyTableHTMLRenderer(tableValues TableValues, targetName string) string { out := `