Skip to content

Commit

Permalink
fix(inputs.intel_powerstat): Add MSR read timeout (influxdata#14088)
Browse files Browse the repository at this point in the history
Signed-off-by: Alyson Deives Pereira <alyson.deivespereira@windriver.com>
  • Loading branch information
alysondeives committed Oct 10, 2023
1 parent 0c1e213 commit 8f1ea19
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 19 deletions.
4 changes: 4 additions & 0 deletions plugins/inputs/intel_powerstat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## "cpu_busy_frequency"
## ATTENTION: cpu_busy_cycles is DEPRECATED - use cpu_c0_state_residency
# cpu_metrics = []

# The user can set the timeout duration in milliseconds for MSR reading
# The Default value is 100 ms.
# msr_read_timeout_ms = 100
```

## Example: Configuration with no per-CPU telemetry
Expand Down
27 changes: 21 additions & 6 deletions plugins/inputs/intel_powerstat/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type fileService interface {
getStringsMatchingPatternOnPath(path string) ([]string, error)
readFile(path string) ([]byte, error)
readFileToFloat64(reader io.Reader) (float64, int64, error)
readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error)
readFileAtOffsetToUint64(reader io.ReaderAt, offset int64, timeout time.Duration) (uint64, error)
}

type fileServiceImpl struct {
Expand Down Expand Up @@ -133,19 +133,34 @@ func (fs *fileServiceImpl) readFileToFloat64(reader io.Reader) (float64, int64,
}

// readFileAtOffsetToUint64 reads 8 bytes from passed file at given offset.
func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) {
func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64, timeout time.Duration) (uint64, error) {
buffer := make([]byte, 8)

if offset == 0 {
return 0, fmt.Errorf("file offset %d should not be 0", offset)
}

_, err := reader.ReadAt(buffer, offset)
if err != nil {
value := make(chan uint64)
errs := make(chan error)

go func() {
if n, readErr := reader.ReadAt(buffer, offset); n >= 0 {
value <- binary.LittleEndian.Uint64(buffer)
} else {
errs <- readErr
}
close(value)
close(errs)
}()

select {
case val := <-value:
return val, nil
case err := <-errs:
return 0, fmt.Errorf("error on reading file at offset %d: %w", offset, err)
case <-time.After(timeout):
return 0, fmt.Errorf("timeout reading file at offset %d", offset)
}

return binary.LittleEndian.Uint64(buffer), nil
}

func newFileService() *fileServiceImpl {
Expand Down
13 changes: 7 additions & 6 deletions plugins/inputs/intel_powerstat/file_mock_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion plugins/inputs/intel_powerstat/intel_powerstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ const (
type PowerStat struct {
CPUMetrics []string `toml:"cpu_metrics"`
PackageMetrics []string `toml:"package_metrics"`
ReadTimeout int64 `toml:"msr_read_timeout"`
Log telegraf.Logger `toml:"-"`

fs fileService
Expand Down Expand Up @@ -96,7 +97,7 @@ func (p *PowerStat) initMSR() {
// Initialize MSR service only when there is at least one metric enabled
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency || p.packageCPUBaseFrequency {
p.msr = newMsrServiceWithFs(p.Log, p.fs)
p.msr = newMsrServiceWithFs(p.Log, p.fs, p.ReadTimeout)
}
}

Expand Down Expand Up @@ -903,6 +904,7 @@ func newPowerStat(fs fileService) *PowerStat {
skipFirstIteration: true,
fs: fs,
logOnce: make(map[string]error),
ReadTimeout: 100,
}

return p
Expand Down
13 changes: 8 additions & 5 deletions plugins/inputs/intel_powerstat/msr.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"path/filepath"
"strings"
"time"

"golang.org/x/sync/errgroup"

Expand Down Expand Up @@ -62,6 +63,7 @@ type msrServiceImpl struct {
msrOffsets []int64
fs fileService
log telegraf.Logger
readTimeout time.Duration
}

func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
Expand Down Expand Up @@ -187,7 +189,7 @@ func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error)
return 0, fmt.Errorf("incorect name of MSR %s", msr)
}

value, err := m.fs.readFileAtOffsetToUint64(msrFile, msrAddress)
value, err := m.fs.readFileAtOffsetToUint64(msrFile, msrAddress, m.readTimeout)
if err != nil {
return 0, err
}
Expand Down Expand Up @@ -256,7 +258,7 @@ func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error
}

func (m *msrServiceImpl) readValueFromFileAtOffset(ctx context.Context, ch chan uint64, reader io.ReaderAt, offset int64) error {
value, err := m.fs.readFileAtOffsetToUint64(reader, offset)
value, err := m.fs.readFileAtOffsetToUint64(reader, offset, m.readTimeout)
if err != nil {
return err
}
Expand Down Expand Up @@ -309,10 +311,11 @@ func (m *msrServiceImpl) setCPUCores() error {
return nil
}

func newMsrServiceWithFs(logger telegraf.Logger, fs fileService) *msrServiceImpl {
func newMsrServiceWithFs(logger telegraf.Logger, fs fileService, readTimeout int64) *msrServiceImpl {
msrService := &msrServiceImpl{
fs: fs,
log: logger,
fs: fs,
log: logger,
readTimeout: time.Duration(readTimeout) * time.Millisecond,
}
err := msrService.setCPUCores()
if err != nil {
Expand Down
3 changes: 2 additions & 1 deletion plugins/inputs/intel_powerstat/msr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ func getMsrServiceWithMockedFs() (*msrServiceImpl, *mockFileService) {
fsMock := &mockFileService{}
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(cores, nil).Once()
msr := newMsrServiceWithFs(logger, fsMock)
timeout := int64(100)
msr := newMsrServiceWithFs(logger, fsMock, timeout)

return msr, fsMock
}
4 changes: 4 additions & 0 deletions plugins/inputs/intel_powerstat/sample.conf
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@
## "cpu_busy_frequency"
## ATTENTION: cpu_busy_cycles is DEPRECATED - use cpu_c0_state_residency
# cpu_metrics = []

# The user can set the timeout duration in milliseconds for MSR reading
# The Default value is 100 ms.
# msr_read_timeout_ms = 100

0 comments on commit 8f1ea19

Please sign in to comment.