Skip to content

Commit

Permalink
feat: add slab metrics input plugin (#11075)
Browse files Browse the repository at this point in the history
Co-authored-by: reimda <reimda@users.noreply.github.com>
Co-authored-by: Joshua Powers <powersj@fastmail.com>
  • Loading branch information
3 people authored and MyaLongmire committed Jul 6, 2022
1 parent 7569744 commit 6ef9d63
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 0 deletions.
3 changes: 3 additions & 0 deletions etc/telegraf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6918,6 +6918,9 @@
# # key = "device_name"
# # value = 'one_of("sda", "sdb")'

# # Get slab statistics from procfs
# [[inputs.slab]]
# # no configuration

# # Get synproxy counter statistics from procfs
# [[inputs.synproxy]]
Expand Down
1 change: 1 addition & 0 deletions plugins/inputs/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/salesforce"
_ "github.com/influxdata/telegraf/plugins/inputs/sensors"
_ "github.com/influxdata/telegraf/plugins/inputs/sflow"
_ "github.com/influxdata/telegraf/plugins/inputs/slab"
_ "github.com/influxdata/telegraf/plugins/inputs/smart"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp"
_ "github.com/influxdata/telegraf/plugins/inputs/snmp_legacy"
Expand Down
54 changes: 54 additions & 0 deletions plugins/inputs/slab/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Slab Input Plugin

This plugin collects details on how much memory each entry in Slab cache is
consuming. For example, it collects the consumption of `kmalloc-1024` and
`xfs_inode`. Since this information is obtained by parsing `/proc/slabinfo`
file, only Linux is supported. The specification of `/proc/slabinfo` has
not changed since [Linux v2.6.12 (April 2005)](https://github.com/torvalds/linux/blob/1da177e4/mm/slab.c#L2848-L2861),
so it can be regarded as sufficiently stable. The memory usage is
equivalent to the `CACHE_SIZE` column of `slabtop` command.
If the HOST_PROC environment variable is set, Telegraf will use its value instead of `/proc`

**Note: `/proc/slabinfo` is usually restricted to read as root user. Make sure telegraf can execute `sudo` without password.**

## Configuration

```toml
# Get slab statistics from procfs
[[inputs.slab]]
# no configuration - please see the plugin's README for steps to configure
# sudo properly
```

## Sudo configuration

Since the slabinfo file is only readable by root, the plugin runs `sudo /bin/cat` to read the file.

Sudo can be configured to allow telegraf to run just the command needed to read the slabinfo file. For example, if telegraf is running as the user 'telegraf' and HOST_PROC is not used, add this to the sudoers file:
`telegraf ALL = (root) NOPASSWD: /bin/cat /proc/slabinfo`

## Metrics

Metrics include generic ones such as `kmalloc_*` as well as those of kernel
subsystems and drivers used by the system such as `xfs_inode`.
Each field with `_size` suffix indicates memory consumption in bytes.

- mem
- fields:
- kmalloc_8_size (integer)
- kmalloc_16_size (integer)
- kmalloc_32_size (integer)
- kmalloc_64_size (integer)
- kmalloc_96_size (integer)
- kmalloc_128_size (integer)
- kmalloc_256_size (integer)
- kmalloc_512_size (integer)
- xfs_ili_size (integer)
- xfs_inode_size (integer)

## Example Output

```shel
slab
kmalloc_1024_size=239927296i,kmalloc_512_size=5582848i 1651049129000000000
```
119 changes: 119 additions & 0 deletions plugins/inputs/slab/slab.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
//go:build linux
// +build linux

package slab

import (
"bufio"
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path"
"strconv"
"strings"
"time"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/inputs"
)

func init() {
inputs.Add("slab", func() telegraf.Input {
return &SlabStats{
statFile: path.Join(getHostProc(), "/slabinfo"),
}
})
}

type SlabStats struct {
Log telegraf.Logger `toml:"-"`

statFile string
}

func (ss *SlabStats) Init() error {
return nil
}

func (ss *SlabStats) Gather(acc telegraf.Accumulator) error {
fields, err := ss.getSlabStats()
if err != nil {
return err
}

acc.AddGauge("slab", fields, nil)
return nil
}

func (ss *SlabStats) getSlabStats() (map[string]interface{}, error) {
fields := map[string]interface{}{}

out, err := ss.runCmd("/bin/cat", []string{ss.statFile})
if err != nil {
return nil, err
}

bytesReader := bytes.NewReader(out)
scanner := bufio.NewScanner(bytesReader)

// Read header rows
scanner.Scan() // for "slabinfo - version: 2.1"
scanner.Scan() // for "# name <active_objs> <num_objs> <objsize> ..."

// Read data rows
for scanner.Scan() {
line := scanner.Text()
cols := strings.Fields(line)

if len(cols) < 4 {
return nil, errors.New("the content of /proc/slabinfo is invalid")
}

var numObj, sizObj int

numObj, err = strconv.Atoi(cols[2])
if err != nil {
return nil, err
}

sizObj, err = strconv.Atoi(cols[3])
if err != nil {
return nil, err
}

fields[normalizeName(cols[0])] = numObj * sizObj
}
return fields, nil
}

func (ss *SlabStats) runCmd(cmd string, args []string) ([]byte, error) {
execCmd := exec.Command(cmd, args...)
if os.Geteuid() != 0 {
execCmd = exec.Command("sudo", append([]string{"-n", cmd}, args...)...)
}

out, err := internal.StdOutputTimeout(execCmd, 5*time.Second)
if err != nil {
return nil, fmt.Errorf(
"failed to run command %s: %s - %s",
strings.Join(execCmd.Args, " "), err, string(out),
)
}

return out, nil
}

func getHostProc() string {
procPath := "/proc"
if os.Getenv("HOST_PROC") != "" {
procPath = os.Getenv("HOST_PROC")
}
return procPath
}

func normalizeName(name string) string {
return strings.ReplaceAll(strings.ToLower(name), "-", "_") + "_size"
}
4 changes: 4 additions & 0 deletions plugins/inputs/slab/slab_notlinux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//go:build !linux
// +build !linux

package slab
11 changes: 11 additions & 0 deletions plugins/inputs/slab/slab_sample_config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:build linux
// +build linux

//go:generate go run ../../../tools/generate_plugindata/main.go
//go:generate go run ../../../tools/generate_plugindata/main.go --clean
// DON'T EDIT; This file is used as a template by tools/generate_plugindata
package slab

func (ss *SlabStats) SampleConfig() string {
return `{{ .SampleConfig }}`
}
89 changes: 89 additions & 0 deletions plugins/inputs/slab/slab_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//go:build linux
// +build linux

package slab

import (
"os"
"testing"

"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)

func makeFakeStatFile(content []byte) string {
tmpfile, err := os.CreateTemp("", "slab_test")
if err != nil {
panic(err)
}

if _, err := tmpfile.Write(content); err != nil {
panic(err)
}
if err := tmpfile.Close(); err != nil {
panic(err)
}

return tmpfile.Name()
}

func TestSlab(t *testing.T) {
slabStats := SlabStats{
statFile: makeFakeStatFile([]byte(procSlabInfo)),
}

var acc testutil.Accumulator
err := acc.GatherError(slabStats.Gather)
require.NoError(t, err)

fields := map[string]interface{}{
"ext4_allocation_context_size": int(16384),
"ext4_extent_status_size": int(8160),
"ext4_free_data_size": int(0),
"ext4_inode_cache_size": int(491520),
"ext4_io_end_size": int(4032),
"ext4_xattr_size": int(0),
"kmalloc_1024_size": int(239927296),
"kmalloc_128_size": int(5586944),
"kmalloc_16_size": int(17002496),
"kmalloc_192_size": int(4015872),
"kmalloc_2048_size": int(3309568),
"kmalloc_256_size": int(5423104),
"kmalloc_32_size": int(3657728),
"kmalloc_4096_size": int(2359296),
"kmalloc_512_size": int(41435136),
"kmalloc_64_size": int(8536064),
"kmalloc_8_size": int(229376),
"kmalloc_8192_size": int(1048576),
"kmalloc_96_size": int(12378240),
"kmem_cache_size": int(81920),
"kmem_cache_node_size": int(36864),
}

acc.AssertContainsFields(t, "slab", fields)
}

var procSlabInfo = `slabinfo - version: 2.1
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
ext4_inode_cache 480 480 1024 32 8 : tunables 0 0 0 : slabdata 15 15 0
ext4_xattr 0 0 88 46 1 : tunables 0 0 0 : slabdata 0 0 0
ext4_free_data 0 0 64 64 1 : tunables 0 0 0 : slabdata 0 0 0
ext4_allocation_context 128 128 128 32 1 : tunables 0 0 0 : slabdata 4 4 0
ext4_io_end 56 56 72 56 1 : tunables 0 0 0 : slabdata 1 1 0
ext4_extent_status 204 204 40 102 1 : tunables 0 0 0 : slabdata 2 2 0
kmalloc-8192 106 128 8192 4 8 : tunables 0 0 0 : slabdata 32 32 0
kmalloc-4096 486 576 4096 8 8 : tunables 0 0 0 : slabdata 72 72 0
kmalloc-2048 1338 1616 2048 16 8 : tunables 0 0 0 : slabdata 101 101 0
kmalloc-1024 155845 234304 1024 32 8 : tunables 0 0 0 : slabdata 7329 7329 0
kmalloc-512 18995 80928 512 32 4 : tunables 0 0 0 : slabdata 2529 2529 0
kmalloc-256 16366 21184 256 32 2 : tunables 0 0 0 : slabdata 662 662 0
kmalloc-192 18835 20916 192 21 1 : tunables 0 0 0 : slabdata 996 996 0
kmalloc-128 23600 43648 128 32 1 : tunables 0 0 0 : slabdata 1364 1364 0
kmalloc-96 95106 128940 96 42 1 : tunables 0 0 0 : slabdata 3070 3070 0
kmalloc-64 82432 133376 64 64 1 : tunables 0 0 0 : slabdata 2084 2084 0
kmalloc-32 78477 114304 32 128 1 : tunables 0 0 0 : slabdata 893 893 0
kmalloc-16 885605 1062656 16 256 1 : tunables 0 0 0 : slabdata 4151 4151 0
kmalloc-8 28672 28672 8 512 1 : tunables 0 0 0 : slabdata 56 56 0
kmem_cache_node 576 576 64 64 1 : tunables 0 0 0 : slabdata 9 9 0
kmem_cache 320 320 256 32 2 : tunables 0 0 0 : slabdata 10 10 0
`

0 comments on commit 6ef9d63

Please sign in to comment.