Skip to content
14 changes: 14 additions & 0 deletions packages/elastic_agent/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
# newer versions go on top
- version: "2.6.8"
changes:
- description: Adds processor for health_status field to status change logs data stream
type: enhancement
link: https://github.com/elastic/integrations/pull/15852
- description: Add new alerting rules for agent health status changes
type: enhancement
link: https://github.com/elastic/integrations/pull/15852
- description: Use more specifc index and remove RLIKE usage for system metrics alerting rules
type: enhancement
link: https://github.com/elastic/integrations/pull/15852
- description: Use system.process.cpu.total.normalized.pct for CPU usage alerting rule
type: bugfix
link: https://github.com/elastic/integrations/pull/15852
- version: "2.6.7"
changes:
- description: Add mapping for error fields for beats logs.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"events": [
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "online",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "offline",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "error",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "degraded",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "updating",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "enrolling",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "unenrolling",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"expected": [
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "online",
"health_status": "healthy",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "offline",
"health_status": "offline",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "error",
"health_status": "unhealthy",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "degraded",
"health_status": "unhealthy",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "updating",
"health_status": "updating",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "enrolling",
"health_status": "updating",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
},
{
"@timestamp": "2024-01-15T10:30:00.000Z",
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "unenrolling",
"health_status": "updating",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
description: Pipeline for Elastic Agent status change logs.
processors:
- script:
description: Derive health_status from status field
if: ctx.status != null
lang: painless
source: |
String status = ctx.status;
String healthStatus;

if (status == 'online') {
healthStatus = 'healthy';
} else if (status == 'error' || status == 'degraded') {
healthStatus = 'unhealthy';
} else if (status == 'updating' || status == 'enrolling' || status == 'unenrolling') {
healthStatus = 'updating';
} else {
healthStatus = status;
}

ctx.health_status = healthStatus;
ignore_failure: true
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
- name: status
type: keyword
- name: health_status
type: keyword
- name: policy_id
type: keyword
- name: agentless
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
{
"@timestamp": 1576280412771,
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "HEALTHY",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
}
"@timestamp": 1576280412771,
"data_stream": {
"type": "logs",
"dataset": "elastic_agent.status_change",
"namespace": "default"
},
"agent": {
"id": "f2b3c4d5-e6f7-8a9b-b0c1-d2e3f4g5h6i7"
},
"status": "online",
"health_status": "healthy",
"policy_id": "test-policy",
"agentless": false,
"space_id": "default",
"hostname": "test-host"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*, *:metrics-*\n| WHERE process.executable RLIKE \".*[Ee]lastic.*[Aa]gent.*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS cpu_process_pct = MAX(system.process.cpu.total.pct) * 100\n BY elastic_agent.id, process.name,\n time_bucket = BUCKET(@timestamp, 1 minute)\n// Count the 1 minute timebuckets that are above 80% by process and agent\n| WHERE cpu_process_pct >= 80\n| STATS count_above_threshold = COUNT(*)\n BY elastic_agent.id, process.name\n// Alert if there are 5 or more occurences\n| WHERE count_above_threshold >= 5"
"esql": "FROM metrics-system*, *:metrics-system*\n| WHERE TO_LOWER(process.executable) LIKE \"*elastic*agent*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS cpu_process_pct = MAX(system.process.cpu.total.norm.pct) * 100\n BY elastic_agent.id, process.name,\n time_bucket = BUCKET(@timestamp, 1 minute)\n// Count the 1 minute timebuckets that are above 80% by process and agent\n| WHERE cpu_process_pct >= 80\n| STATS count_above_threshold = COUNT(*)\n BY elastic_agent.id, process.name\n// Alert if there are 5 or more occurences\n| WHERE count_above_threshold >= 5"
},
"aggType": "count",
"groupBy": "row",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*, *:metrics-*\n| WHERE process.executable RLIKE \".*[Ee]lastic.*[Aa]gent.*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS max_memory_per_process = MAX(system.process.memory.rss.pct * 100) BY agent.id, process.name\n| STATS total_memory_usage = SUM(max_memory_per_process) BY agent.id\n| WHERE total_memory_usage > 50"
"esql": "FROM metrics-system*, *:metrics-system*\n| WHERE TO_LOWER(process.executable) LIKE \"*elastic*agent*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS max_memory_per_process = MAX(system.process.memory.rss.pct * 100) BY agent.id, process.name\n| STATS total_memory_usage = SUM(max_memory_per_process) BY agent.id\n| WHERE total_memory_usage > 50"
},
"aggType": "count",
"groupBy": "row",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*, *:metrics-*\n| WHERE process.executable RLIKE \".*[Ee]lastic.*[Aa]gent.*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS restart_count = COUNT_DISTINCT(process.cpu.start_time) BY host.name, process.name, bucket(@timestamp,5 minute) \n| WHERE restart_count > 10\n| STATS MAX(restart_count) BY host.name, process.name"
"esql": "FROM metrics-system*, *:metrics-system*\n| WHERE TO_LOWER(process.executable) LIKE \"*elastic*agent*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS restart_count = COUNT_DISTINCT(process.cpu.start_time) BY host.name, process.name, bucket(@timestamp,5 minute) \n| WHERE restart_count > 10\n| STATS MAX(restart_count) BY host.name, process.name"
},
"aggType": "count",
"groupBy": "row",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-offline-status",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Offline status",
"tags": ["Elastic Agent"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM logs-elastic_agent.status_change-default, *:logs-elastic_agent.status_change-default\n| WHERE data_stream.dataset == \"elastic_agent.status_change\" and agentless == false and health_status == \"offline\""
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Loading