Skip to content

Commit

Permalink
[Security Solution][Detection Engine] Bubbles up errors when it canno…
Browse files Browse the repository at this point in the history
…t create signal documents (#77687)

## Summary

Fixes: #77255, #63712

This bubbles up errors when we cannot correctly create signal documents. Before this PR, we sometimes would mark documents as being in the error state for the end user but ask them to look in their Kibana logs for the specific error. In some cases we did not bubble up any error states and the signal would look like it had 0 signals when that wasn't true. It had valid signals but could not write the signals to its index because the source index had incompatibilities with ECS and cannot write the document to the signals index.

This fixes those issues to correctly bubble up the errors. If you're interested in manual testing there are two ways. The first way is to take advantage of an existing "threshold bug" by making a "threshold rule" which has a CIDR in it like so below:

<img width="1046" alt="Screen Shot 2020-09-10 at 4 08 18 PM" src="https://user-images.githubusercontent.com/1151048/93532721-cba21480-f8fe-11ea-90e7-27c39fdb870b.png">

On output you should see that the threshold is in an error state for the rule and also additional details:
<img width="1852" alt="Screen Shot 2020-09-16 at 1 26 37 PM" src="https://user-images.githubusercontent.com/1151048/93532789-eaa0a680-f8fe-11ea-9327-81cf128a344e.png">

The second way to trigger this is to create a mock invalid ECS index with an invalid mapping in dev tools:

```ts
# This is invalid because it has an odd "original" inside of it
PUT mock-bad-ecs-index
{
  "mappings": {
    "properties": {
      "@timestamp": {
        "type": "date"
      },
      "message": {
        "properties": {
          "original": {
            "type": "text",
            "index": false,
            "doc_values": false
          }
        }
      }
    }
  }
}

# You might have to change your timestamp to be 5 minutes from now to catch it as a signal or use a really long look back time
PUT mock-bad-ecs-index/_doc/1
{
  "@timestamp": "2020-09-17T21:50:54.240Z",
  "message": {
    "original": "invalid subobject"
  }
}
```

Then create a rule against this index:
<img width="1045" alt="Screen Shot 2020-09-17 at 3 52 40 PM" src="https://user-images.githubusercontent.com/1151048/93532922-30f60580-f8ff-11ea-8131-172aab7b7c68.png">

And you should see an error banner and error state where before it would not show the error message:
<img width="1866" alt="Screen Shot 2020-09-17 at 3 53 20 PM" src="https://user-images.githubusercontent.com/1151048/93532972-4408d580-f8ff-11ea-8105-44b0f767cc70.png">

### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios
  • Loading branch information
FrankHassanabad committed Sep 20, 2020
1 parent 8408e26 commit cd51289
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import uuid from 'uuid';
import { getListItemResponseMock } from '../../../../../lists/common/schemas/response/list_item_schema.mock';
import { listMock } from '../../../../../lists/server/mocks';
import { getExceptionListItemSchemaMock } from '../../../../../lists/common/schemas/response/exception_list_item_schema.mock';
import { BulkResponse } from './types';

const buildRuleMessage = buildRuleMessageFactory({
id: 'fake id',
Expand Down Expand Up @@ -759,7 +760,7 @@ describe('searchAfterAndBulkCreate', () => {
],
})
.mockImplementation(() => {
throw Error('Fake Error');
throw Error('Fake Error'); // throws the exception we are testing
});
listClient.getListItemByValues = jest.fn(({ value }) =>
Promise.resolve(
Expand Down Expand Up @@ -811,4 +812,114 @@ describe('searchAfterAndBulkCreate', () => {
expect(createdSignalsCount).toEqual(0); // should not create signals if search threw error
expect(lastLookBackDate).toEqual(null);
});

test('it returns error array when singleSearchAfter returns errors', async () => {
const sampleParams = sampleRuleAlertParams(30);
const bulkItem: BulkResponse = {
took: 100,
errors: true,
items: [
{
create: {
_version: 1,
_index: 'index-123',
_id: 'id-123',
status: 201,
error: {
type: 'network',
reason: 'error on creation',
shard: 'shard-123',
index: 'index-123',
},
},
},
],
};
mockService.callCluster
.mockResolvedValueOnce(repeatedSearchResultsWithSortId(4, 1, someGuids.slice(0, 3)))
.mockResolvedValueOnce(bulkItem) // adds the response with errors we are testing
.mockResolvedValueOnce(repeatedSearchResultsWithSortId(4, 1, someGuids.slice(3, 6)))
.mockResolvedValueOnce({
took: 100,
errors: false,
items: [
{
fakeItemValue: 'fakeItemKey',
},
{
create: {
status: 201,
},
},
],
})
.mockResolvedValueOnce(repeatedSearchResultsWithSortId(4, 1, someGuids.slice(6, 9)))
.mockResolvedValueOnce({
took: 100,
errors: false,
items: [
{
fakeItemValue: 'fakeItemKey',
},
{
create: {
status: 201,
},
},
],
})
.mockResolvedValueOnce(repeatedSearchResultsWithSortId(4, 1, someGuids.slice(9, 12)))
.mockResolvedValueOnce({
took: 100,
errors: false,
items: [
{
fakeItemValue: 'fakeItemKey',
},
{
create: {
status: 201,
},
},
],
})
.mockResolvedValueOnce(sampleDocSearchResultsNoSortIdNoHits());

const {
success,
createdSignalsCount,
lastLookBackDate,
errors,
} = await searchAfterAndBulkCreate({
ruleParams: sampleParams,
gap: null,
previousStartedAt: new Date(),
listClient,
exceptionsList: [],
services: mockService,
logger: mockLogger,
id: sampleRuleGuid,
inputIndexPattern,
signalsIndex: DEFAULT_SIGNALS_INDEX,
name: 'rule-name',
actions: [],
createdAt: '2020-01-28T15:58:34.810Z',
updatedAt: '2020-01-28T15:59:14.004Z',
createdBy: 'elastic',
updatedBy: 'elastic',
interval: '5m',
enabled: true,
pageSize: 1,
filter: undefined,
refresh: false,
tags: ['some fake tag 1', 'some fake tag 2'],
throttle: 'no_actions',
buildRuleMessage,
});
expect(success).toEqual(false);
expect(errors).toEqual(['error on creation']);
expect(mockService.callCluster).toHaveBeenCalledTimes(9);
expect(createdSignalsCount).toEqual(4);
expect(lastLookBackDate).toEqual(new Date('2020-04-20T21:27:45+0000'));
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export interface SearchAfterAndBulkCreateReturnType {
bulkCreateTimes: string[];
lastLookBackDate: Date | null | undefined;
createdSignalsCount: number;
errors: string[];
}

// search_after through documents and re-index using bulk endpoint.
Expand Down Expand Up @@ -81,11 +82,12 @@ export const searchAfterAndBulkCreate = async ({
buildRuleMessage,
}: SearchAfterAndBulkCreateParams): Promise<SearchAfterAndBulkCreateReturnType> => {
const toReturn: SearchAfterAndBulkCreateReturnType = {
success: false,
success: true,
searchAfterTimes: [],
bulkCreateTimes: [],
lastLookBackDate: null,
createdSignalsCount: 0,
errors: [],
};

// sortId tells us where to start our next consecutive search_after query
Expand All @@ -111,6 +113,7 @@ export const searchAfterAndBulkCreate = async ({
if (tuple == null || tuple.to == null || tuple.from == null) {
logger.error(buildRuleMessage(`[-] malformed date tuple`));
toReturn.success = false;
toReturn.errors = [...new Set([...toReturn.errors, 'malformed date tuple'])];
return toReturn;
}
signalsCreatedCount = 0;
Expand Down Expand Up @@ -163,7 +166,6 @@ export const searchAfterAndBulkCreate = async ({
} was 0, exiting and moving on to next tuple`
)
);
toReturn.success = true;
break;
}
toReturn.lastLookBackDate =
Expand Down Expand Up @@ -199,6 +201,8 @@ export const searchAfterAndBulkCreate = async ({
const {
bulkCreateDuration: bulkDuration,
createdItemsCount: createdCount,
success: bulkSuccess,
errors: bulkErrors,
} = await singleBulkCreate({
filteredEvents,
ruleParams,
Expand Down Expand Up @@ -229,6 +233,8 @@ export const searchAfterAndBulkCreate = async ({
logger.debug(
buildRuleMessage(`filteredEvents.hits.hits: ${filteredEvents.hits.hits.length}`)
);
toReturn.success = toReturn.success && bulkSuccess;
toReturn.errors = [...new Set([...toReturn.errors, ...bulkErrors])];
}

// we are guaranteed to have searchResult hits at this point
Expand All @@ -239,17 +245,16 @@ export const searchAfterAndBulkCreate = async ({
sortId = lastSortId[0];
} else {
logger.debug(buildRuleMessage('sortIds was empty on searchResult'));
toReturn.success = true;
break;
}
} catch (exc) {
} catch (exc: unknown) {
logger.error(buildRuleMessage(`[-] search_after and bulk threw an error ${exc}`));
toReturn.success = false;
toReturn.errors = [...new Set([...toReturn.errors, `${exc}`])];
return toReturn;
}
}
}
logger.debug(buildRuleMessage(`[+] completed bulk index of ${toReturn.createdSignalsCount}`));
toReturn.success = true;
return toReturn;
};
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ import {
} from './utils';
import { parseScheduleDates } from '../../../../common/detection_engine/parse_schedule_dates';
import { RuleExecutorOptions } from './types';
import { searchAfterAndBulkCreate } from './search_after_bulk_create';
import {
searchAfterAndBulkCreate,
SearchAfterAndBulkCreateReturnType,
} from './search_after_bulk_create';
import { scheduleNotificationActions } from '../notifications/schedule_notification_actions';
import { RuleAlertType } from '../rules/types';
import { findMlSignals } from './find_ml_signals';
Expand Down Expand Up @@ -481,17 +484,19 @@ describe('rules_notification_alert_type', () => {

describe('should catch error', () => {
it('when bulk indexing failed', async () => {
(searchAfterAndBulkCreate as jest.Mock).mockResolvedValue({
const result: SearchAfterAndBulkCreateReturnType = {
success: false,
searchAfterTimes: [],
bulkCreateTimes: [],
lastLookBackDate: null,
createdSignalsCount: 0,
});
errors: ['Error that bubbled up.'],
};
(searchAfterAndBulkCreate as jest.Mock).mockResolvedValue(result);
await alert.executor(payload);
expect(logger.error).toHaveBeenCalled();
expect(logger.error.mock.calls[0][0]).toContain(
'Bulk Indexing of signals failed. Check logs for further details.'
'Bulk Indexing of signals failed: Error that bubbled up. name: "Detect Root/Admin Users" id: "04128c15-0d1b-4716-a4c5-46997ac7f3bd" rule id: "rule-1" signals index: ".siem-signals"'
);
expect(ruleStatusService.error).toHaveBeenCalled();
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ export const signalRulesAlertType = ({
searchAfterTimes: [],
lastLookBackDate: null,
createdSignalsCount: 0,
errors: [],
};
const ruleStatusClient = ruleStatusSavedObjectsClientFactory(services.savedObjectsClient);
const ruleStatusService = await ruleStatusServiceFactory({
Expand Down Expand Up @@ -221,7 +222,12 @@ export const signalRulesAlertType = ({
logger.info(buildRuleMessage(`Found ${anomalyCount} signals from ML anomalies.`));
}

const { success, bulkCreateDuration, createdItemsCount } = await bulkCreateMlSignals({
const {
success,
errors,
bulkCreateDuration,
createdItemsCount,
} = await bulkCreateMlSignals({
actions,
throttle,
someResult: anomalyResults,
Expand All @@ -241,6 +247,7 @@ export const signalRulesAlertType = ({
tags,
});
result.success = success;
result.errors = errors;
result.createdSignalsCount = createdItemsCount;
if (bulkCreateDuration) {
result.bulkCreateTimes.push(bulkCreateDuration);
Expand Down Expand Up @@ -274,6 +281,7 @@ export const signalRulesAlertType = ({
success,
bulkCreateDuration,
createdItemsCount,
errors,
} = await bulkCreateThresholdSignals({
actions,
throttle,
Expand All @@ -297,6 +305,7 @@ export const signalRulesAlertType = ({
tags,
});
result.success = success;
result.errors = errors;
result.createdSignalsCount = createdItemsCount;
if (bulkCreateDuration) {
result.bulkCreateTimes.push(bulkCreateDuration);
Expand Down Expand Up @@ -391,7 +400,8 @@ export const signalRulesAlertType = ({
}
} else {
const errorMessage = buildRuleMessage(
'Bulk Indexing of signals failed. Check logs for further details.'
'Bulk Indexing of signals failed:',
result.errors.join()
);
logger.error(errorMessage);
await ruleStatusService.error(errorMessage, {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,11 @@ describe('singleBulkCreate', () => {
expect(createdItemsCount).toEqual(1);
});

test('create successful bulk create when bulk create has multiple error statuses', async () => {
test('create failed bulk create when bulk create has multiple error statuses', async () => {
const sampleParams = sampleRuleAlertParams();
const sampleSearchResult = sampleDocSearchResultsNoSortId;
mockService.callCluster.mockResolvedValue(sampleBulkCreateErrorResult);
const { success, createdItemsCount } = await singleBulkCreate({
const { success, createdItemsCount, errors } = await singleBulkCreate({
filteredEvents: sampleSearchResult(),
ruleParams: sampleParams,
services: mockService,
Expand All @@ -275,9 +275,9 @@ describe('singleBulkCreate', () => {
tags: ['some fake tag 1', 'some fake tag 2'],
throttle: 'no_actions',
});

expect(mockLogger.error).toHaveBeenCalled();
expect(success).toEqual(true);
expect(errors).toEqual(['[4]: internal server error']);
expect(success).toEqual(false);
expect(createdItemsCount).toEqual(1);
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export interface SingleBulkCreateResponse {
success: boolean;
bulkCreateDuration?: string;
createdItemsCount: number;
errors: string[];
}

// Bulk Index documents.
Expand All @@ -89,7 +90,7 @@ export const singleBulkCreate = async ({
logger.debug(`about to bulk create ${filteredEvents.hits.hits.length} events`);
if (filteredEvents.hits.hits.length === 0) {
logger.debug(`all events were duplicates`);
return { success: true, createdItemsCount: 0 };
return { success: true, createdItemsCount: 0, errors: [] };
}
// index documents after creating an ID based on the
// source documents' originating index, and the original
Expand Down Expand Up @@ -138,18 +139,31 @@ export const singleBulkCreate = async ({
logger.debug(`individual bulk process time took: ${makeFloatString(end - start)} milliseconds`);
logger.debug(`took property says bulk took: ${response.took} milliseconds`);

if (response.errors) {
const duplicateSignalsCount = countBy(response.items, 'create.status')['409'];
const createdItemsCount = countBy(response.items, 'create.status')['201'] ?? 0;
const duplicateSignalsCount = countBy(response.items, 'create.status')['409'];
const errorCountByMessage = errorAggregator(response, [409]);

logger.debug(`bulk created ${createdItemsCount} signals`);
if (duplicateSignalsCount > 0) {
logger.debug(`ignored ${duplicateSignalsCount} duplicate signals`);
const errorCountByMessage = errorAggregator(response, [409]);
if (!isEmpty(errorCountByMessage)) {
logger.error(
`[-] bulkResponse had errors with responses of: ${JSON.stringify(errorCountByMessage)}`
);
}
}

const createdItemsCount = countBy(response.items, 'create.status')['201'] ?? 0;
logger.debug(`bulk created ${createdItemsCount} signals`);
return { success: true, bulkCreateDuration: makeFloatString(end - start), createdItemsCount };
if (!isEmpty(errorCountByMessage)) {
logger.error(
`[-] bulkResponse had errors with responses of: ${JSON.stringify(errorCountByMessage)}`
);
return {
errors: Object.keys(errorCountByMessage),
success: false,
bulkCreateDuration: makeFloatString(end - start),
createdItemsCount,
};
} else {
return {
errors: [],
success: true,
bulkCreateDuration: makeFloatString(end - start),
createdItemsCount,
};
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ export interface SignalSource {
}

export interface BulkItem {
create: {
create?: {
_index: string;
_type?: string;
_id: string;
Expand Down
Loading

0 comments on commit cd51289

Please sign in to comment.