Skip to content

Commit

Permalink
[ResponseOps] Investigate auto-healing when no write index is set for…
Browse files Browse the repository at this point in the history
… alerts as data alias (#184161)

Resolves #179829

## Summary

We've run into multiple SDHs where concrete indices exist for an
alerts-as-data resource but none of them are set as the write index for
an alias. This PR adds code to pick a concrete index and set it as the
write index to avoid these types of failures.

### Checklist

- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios


### To verify

1. Go to [dev tools](http://localhost:5601/app/dev_tools#/console)
2. Create an ES Query rule
```
POST kbn:/api/alerting/rule
{
  "params": {
    "searchType": "esQuery",
    "timeWindowSize": 5,
    "timeWindowUnit": "m",
    "threshold": [
      -1
    ],
    "thresholdComparator": ">",
    "size": 100,
    "esQuery": "{\n    \"query\":{\n      \"match_all\" : {}\n    }\n  }",
    "aggType": "count",
    "groupBy": "all",
    "termSize": 5,
    "excludeHitsFromPreviousRun": false,
    "sourceFields": [],
    "index": [
      ".kibana"
    ],
    "timeField": "created_at"
  },
  "consumer": "stackAlerts",
  "schedule": {
    "interval": "1m"
  },
  "tags": [],
  "name": "test",
  "rule_type_id": ".es-query",
  "actions": []
}
```

3. Run the following commands to set `"is_write_index": false`
```
POST /_aliases
{
  "actions": [
    {
      "remove": {
        "index": ".internal.alerts-stack.alerts-default-000001",
        "alias": ".alerts-stack.alerts-default"
      }
    }, {
      "add": {
        "index": ".internal.alerts-stack.alerts-default-000001",
        "alias": ".alerts-stack.alerts-default",
        "is_write_index": false
      }
    }
  ]
}

GET .internal.alerts-stack.alerts-default-000001/_alias/*
```
4. Stop Kibana, but keep ES running
5. Start Kibana and verify that the rule runs successfully
6. Run the GET alias command to verify `"is_write_index": true`
```
GET .internal.alerts-stack.alerts-default-000001/_alias/*
```
  • Loading branch information
doakalexi committed May 29, 2024
1 parent aa7ffc4 commit 5a5f9bd
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1253,16 +1253,10 @@ describe('Alerts Service', () => {
TestRegistrationContext.context,
DEFAULT_NAMESPACE_STRING
)
).toEqual({
error:
'Failure during installation. Indices matching pattern .internal.alerts-test.alerts-default-* exist but none are set as the write index for alias .alerts-test.alerts-default',
result: false,
});
).toEqual({ result: true });

expect(logger.error).toHaveBeenCalledWith(
new Error(
`Indices matching pattern .internal.alerts-test.alerts-default-* exist but none are set as the write index for alias .alerts-test.alerts-default`
)
expect(logger.debug).toHaveBeenCalledWith(
`Indices matching pattern .internal.alerts-test.alerts-default-* exist but none are set as the write index for alias .alerts-test.alerts-default`
);

expect(clusterClient.ilm.putLifecycle).toHaveBeenCalled();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { errors as EsErrors } from '@elastic/elasticsearch';
import { IndicesGetDataStreamResponse } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { createConcreteWriteIndex } from './create_concrete_write_index';
import { createConcreteWriteIndex, setConcreteWriteIndex } from './create_concrete_write_index';
import { getDataStreamAdapter } from './data_stream_adapter';

const randomDelayMultiplier = 0.01;
Expand Down Expand Up @@ -680,7 +680,7 @@ describe('createConcreteWriteIndex', () => {
}
});

it(`should throw error when there are concrete indices but none of them are the write index`, async () => {
it(`should log an error and try to set write index when there are concrete indices but none of them are the write index`, async () => {
if (useDataStream) return;

clusterClient.indices.getAlias.mockImplementationOnce(async () => ({
Expand All @@ -701,18 +701,101 @@ describe('createConcreteWriteIndex', () => {
async () => SimulateTemplateResponse
);

await expect(() =>
createConcreteWriteIndex({
logger,
esClient: clusterClient,
indexPatterns: IndexPatterns,
totalFieldsLimit: 2500,
dataStreamAdapter,
})
).rejects.toThrowErrorMatchingInlineSnapshot(
`"Indices matching pattern .internal.alerts-test.alerts-default-* exist but none are set as the write index for alias .alerts-test.alerts-default"`
await createConcreteWriteIndex({
logger,
esClient: clusterClient,
indexPatterns: IndexPatterns,
totalFieldsLimit: 2500,
dataStreamAdapter,
});

expect(logger.debug).toHaveBeenCalledWith(
'Indices matching pattern .internal.alerts-test.alerts-default-* exist but none are set as the write index for alias .alerts-test.alerts-default'
);
expect(clusterClient.indices.updateAliases).toHaveBeenCalled();
});
});
}
});

describe('setConcreteWriteIndex', () => {
beforeEach(() => {
jest.resetAllMocks();
});

it(`should call updateAliases to set the concrete write index`, async () => {
await setConcreteWriteIndex({
logger,
esClient: clusterClient,
concreteIndices: [
{
index: '.internal.alerts-test.alerts-default-000003',
alias: '.alerts-test.alerts-default',
isWriteIndex: false,
},
{
index: '.internal.alerts-test.alerts-default-000004',
alias: '.alerts-test.alerts-default',
isWriteIndex: false,
},
{
index: '.internal.alerts-test.alerts-default-000001',
alias: '.alerts-test.alerts-default',
isWriteIndex: false,
},
{
index: '.internal.alerts-test.alerts-default-000002',
alias: '.alerts-test.alerts-default',
isWriteIndex: false,
},
],
});

expect(logger.debug).toHaveBeenCalledWith(
'Attempting to set index: .internal.alerts-test.alerts-default-000004 as the write index for alias: .alerts-test.alerts-default.'
);
expect(clusterClient.indices.updateAliases).toHaveBeenCalledWith({
body: {
actions: [
{
remove: {
alias: '.alerts-test.alerts-default',
index: '.internal.alerts-test.alerts-default-000004',
},
},
{
add: {
alias: '.alerts-test.alerts-default',
index: '.internal.alerts-test.alerts-default-000004',
is_write_index: true,
},
},
],
},
});
expect(logger.info).toHaveBeenCalledWith(
'Successfully set index: .internal.alerts-test.alerts-default-000004 as the write index for alias: .alerts-test.alerts-default.'
);
});

it(`should throw an error if there is a failure setting the concrete write index`, async () => {
const error = new Error(`fail`) as EsError;
clusterClient.indices.updateAliases.mockRejectedValueOnce(error);

await expect(() =>
setConcreteWriteIndex({
logger,
esClient: clusterClient,
concreteIndices: [
{
index: '.internal.alerts-test.alerts-default-000001',
alias: '.alerts-test.alerts-default',
isWriteIndex: false,
},
],
})
).rejects.toThrowErrorMatchingInlineSnapshot(
`"Failed to set index: .internal.alerts-test.alerts-default-000001 as the write index for alias: .alerts-test.alerts-default."`
);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import { IndicesSimulateIndexTemplateResponse } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { Logger, ElasticsearchClient } from '@kbn/core/server';
import { get } from 'lodash';
import { get, sortBy } from 'lodash';
import { IIndexPatternString } from '../resource_installer_utils';
import { retryTransientEsErrors } from './retry_transient_es_errors';
import { DataStreamAdapter } from './data_stream_adapter';
Expand Down Expand Up @@ -165,3 +165,45 @@ export interface CreateConcreteWriteIndexOpts {
export const createConcreteWriteIndex = async (opts: CreateConcreteWriteIndexOpts) => {
await opts.dataStreamAdapter.createStream(opts);
};

interface SetConcreteWriteIndexOpts {
logger: Logger;
esClient: ElasticsearchClient;
concreteIndices: ConcreteIndexInfo[];
}

export async function setConcreteWriteIndex(opts: SetConcreteWriteIndexOpts) {
const { logger, esClient, concreteIndices } = opts;
const lastIndex = concreteIndices.length - 1;
const concreteIndex = sortBy(concreteIndices, ['index'])[lastIndex];
logger.debug(
`Attempting to set index: ${concreteIndex.index} as the write index for alias: ${concreteIndex.alias}.`
);
try {
await retryTransientEsErrors(
() =>
esClient.indices.updateAliases({
body: {
actions: [
{ remove: { index: concreteIndex.index, alias: concreteIndex.alias } },
{
add: {
index: concreteIndex.index,
alias: concreteIndex.alias,
is_write_index: true,
},
},
],
},
}),
{ logger }
);
logger.info(
`Successfully set index: ${concreteIndex.index} as the write index for alias: ${concreteIndex.alias}.`
);
} catch (error) {
throw new Error(
`Failed to set index: ${concreteIndex.index} as the write index for alias: ${concreteIndex.alias}.`
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
CreateConcreteWriteIndexOpts,
ConcreteIndexInfo,
updateIndexMappings,
setConcreteWriteIndex,
} from './create_concrete_write_index';
import { retryTransientEsErrors } from './retry_transient_es_errors';

Expand Down Expand Up @@ -186,9 +187,11 @@ async function createAliasStream(opts: CreateConcreteWriteIndexOpts): Promise<vo
// If there are some concrete indices but none of them are the write index, we'll throw an error
// because one of the existing indices should have been the write target.
if (concreteIndicesExist && !concreteWriteIndicesExist) {
throw new Error(
logger.debug(
`Indices matching pattern ${indexPatterns.pattern} exist but none are set as the write index for alias ${indexPatterns.alias}`
);
await setConcreteWriteIndex({ logger, esClient, concreteIndices });
concreteWriteIndicesExist = true;
}
}

Expand Down

0 comments on commit 5a5f9bd

Please sign in to comment.