Skip to content

Commit

Permalink
x-pack/filebeat/input/entityanalytics/azuread: allow fine-grain contr…
Browse files Browse the repository at this point in the history
…ol of API requests

This adds support for specifying which of users/devices to collect from
the AzureAD API endpoints in order to reduce network costs for users who
do not need a full set of entities.

The current change does not change the behaviour of device collection of
registered owners and registered users; when the "devices" dataset is
selected there user entities will still be collected as they are
considered here as an attribute of the device, rather than a component
of the users dataset.
  • Loading branch information
efd6 committed Aug 29, 2023
1 parent 9f6fb2e commit 72b9f37
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- [Azure] Add input metrics to the azure-eventhub input. {pull}35739[35739]
- Reduce HTTPJSON metrics allocations. {pull}36282[36282]
- Add support for a simplified input configuraton when running under Elastic-Agent {pull}36390[36390]
- Allow fine-grained control of entity analytics API requests for AzureAD provider. {issue}36440[36440] {pull}36441[36441]

*Auditbeat*

Expand Down
9 changes: 9 additions & 0 deletions x-pack/filebeat/docs/inputs/input-entity-analytics.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ Example configuration:
enabled: true
id: azure-1
provider: azure-ad
dataset: "all"
sync_interval: "12h"
update_interval: "30m"
client_id: "CLIENT_ID"
Expand All @@ -279,6 +280,14 @@ The client/application ID. Used for authentication. Field is required.

The secret value, used for authentication. Field is required.

[float]
===== `dataset`

The datasets to collect from the API. This can be one of "all", "users" or "devices",
or may be left empty for the default behavior which is to collect all entities.
When the `dataset` is set to "devices", some user entity data is collected in order
to populate the registered users and registered owner fields for each device.

[float]
===== `sync_interval`

Expand Down
42 changes: 31 additions & 11 deletions x-pack/filebeat/input/entityanalytics/provider/azuread/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"context"
"errors"
"fmt"
"strings"
"time"

"github.com/google/uuid"
Expand Down Expand Up @@ -267,19 +268,38 @@ func (p *azure) doFetch(ctx context.Context, state *stateStore, fullSync bool) (
groupsDeltaLink = state.groupsLink
}

changedUsers, userLink, err := p.fetcher.Users(ctx, usersDeltaLink)
if err != nil {
return updatedUsers, updatedDevices, err
}
p.logger.Debugf("Received %d users from API", len(changedUsers))

changedDevices, deviceLink, err := p.fetcher.Devices(ctx, devicesDeltaLink)
if err != nil {
return updatedUsers, updatedDevices, err
var (
changedUsers []*fetcher.User
userLink string
)
switch strings.ToLower(p.conf.Dataset) {
case "", "all", "users":
changedUsers, userLink, err = p.fetcher.Users(ctx, usersDeltaLink)
if err != nil {
return updatedUsers, updatedDevices, err
}
p.logger.Debugf("Received %d users from API", len(changedUsers))
default:
p.logger.Debugf("Skipping user collection from API: dataset=%s", p.conf.Dataset)
}

var (
changedDevices []*fetcher.Device
deviceLink string
)
switch strings.ToLower(p.conf.Dataset) {
case "", "all", "devices":
changedDevices, deviceLink, err = p.fetcher.Devices(ctx, devicesDeltaLink)
if err != nil {
return updatedUsers, updatedDevices, err
}
p.logger.Debugf("Received %d devices from API", len(changedUsers))
default:
p.logger.Debugf("Skipping device collection from API: dataset=%s", p.conf.Dataset)
}
p.logger.Debugf("Received %d devices from API", len(changedUsers))

// Get group changes.
// Get group changes. Groups are required for both users and devices.
// So always collect these.
changedGroups, groupLink, err := p.fetcher.Groups(ctx, groupsDeltaLink)
if err != nil {
return updatedUsers, updatedDevices, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package azuread

import (
"context"
"fmt"
"testing"
"time"

Expand All @@ -18,36 +19,64 @@ import (
)

func TestAzure_DoFetch(t *testing.T) {
dbFilename := "TestAzure_DoFetch.db"
store := testSetupStore(t, dbFilename)
t.Cleanup(func() {
testCleanupStore(store, dbFilename)
})

a := azure{
logger: logp.L(),
auth: mockauth.New(""),
fetcher: mockfetcher.New(),
tests := []struct {
dataset string
wantUsers bool
wantDevices bool
}{
{dataset: "", wantUsers: true, wantDevices: true},
{dataset: "all", wantUsers: true, wantDevices: true},
{dataset: "users", wantUsers: true, wantDevices: false},
{dataset: "devices", wantUsers: false, wantDevices: true},
}

ss, err := newStateStore(store)
require.NoError(t, err)
defer ss.close(false)
for _, test := range tests {
t.Run(test.dataset, func(t *testing.T) {
suffix := test.dataset
if suffix != "" {
suffix = "_" + suffix
}
dbFilename := fmt.Sprintf("TestAzure_DoFetch%s.db", suffix)
store := testSetupStore(t, dbFilename)
t.Cleanup(func() {
testCleanupStore(store, dbFilename)
})

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
gotUsers, gotDevices, err := a.doFetch(ctx, ss, false)
require.NoError(t, err)
a := azure{
conf: conf{Dataset: test.dataset},
logger: logp.L(),
auth: mockauth.New(""),
fetcher: mockfetcher.New(),
}

var wantModifiedUsers collections.UUIDSet
for _, v := range mockfetcher.UserResponse {
wantModifiedUsers.Add(v.ID)
}
var wantModifiedDevices collections.UUIDSet
for _, v := range mockfetcher.DeviceResponse {
wantModifiedDevices.Add(v.ID)
}
ss, err := newStateStore(store)
require.NoError(t, err)
defer ss.close(false)

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
gotUsers, gotDevices, err := a.doFetch(ctx, ss, false)
require.NoError(t, err)

require.Equal(t, wantModifiedUsers.Values(), gotUsers.Values())
require.Equal(t, wantModifiedDevices.Values(), gotDevices.Values())
var wantModifiedUsers collections.UUIDSet
for _, v := range mockfetcher.UserResponse {
wantModifiedUsers.Add(v.ID)
}
var wantModifiedDevices collections.UUIDSet
for _, v := range mockfetcher.DeviceResponse {
wantModifiedDevices.Add(v.ID)
}

if test.wantUsers {
require.Equal(t, wantModifiedUsers.Values(), gotUsers.Values())
} else {
require.Equal(t, 0, gotUsers.Len())
}
if test.wantDevices {
require.Equal(t, wantModifiedDevices.Values(), gotDevices.Values())
} else {
require.Equal(t, 0, gotDevices.Len())
}
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package azuread

import (
"errors"
"strings"
"time"
)

Expand All @@ -21,6 +22,7 @@ type conf struct {
TenantID string `config:"tenant_id" validate:"required"`
SyncInterval time.Duration `config:"sync_interval"`
UpdateInterval time.Duration `config:"update_interval"`
Dataset string `config:"dataset"`
}

// Validate runs validation against the config.
Expand All @@ -34,6 +36,11 @@ func (c *conf) Validate() error {
if c.UpdateInterval == 0 {
return errors.New("update_interval must not be zero")
}
switch strings.ToLower(c.Dataset) {
case "", "all", "users", "devices":
default:
return errors.New("dataset must be 'all', 'users', 'devices' or empty")
}

return nil
}
Expand Down

0 comments on commit 72b9f37

Please sign in to comment.