From c4eced5f5446522f21ac28f75a614240223c02fe Mon Sep 17 00:00:00 2001 From: jdv Date: Thu, 20 Nov 2025 11:28:32 +0100 Subject: [PATCH 01/13] iteration 0 : Claude generated --- .../getting_started/installation/whm.mdx | 2 +- .../acquisition_troubleshoot.mdx | 2 +- .../post_installation/troubleshoot.mdx | 4 +- .../troubleshooting/console_issues.md | 97 +++++ .../troubleshooting/engine_no_alerts.md | 136 +++++++ .../troubleshooting/engine_too_many_alerts.md | 183 +++++++++ .../troubleshooting/fw_integration_offline.md | 306 +++++++++++++++ .../unversioned/troubleshooting/intro.md | 27 +- .../troubleshooting/lp_no_alerts.md | 169 ++++++++ .../troubleshooting/lp_no_logs_parsed.md | 270 +++++++++++++ .../troubleshooting/lp_no_logs_read.md | 295 ++++++++++++++ .../troubleshooting/rc_integration_offline.md | 371 ++++++++++++++++++ 12 files changed, 1854 insertions(+), 8 deletions(-) create mode 100644 crowdsec-docs/unversioned/troubleshooting/console_issues.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md create mode 100644 crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md diff --git a/crowdsec-docs/unversioned/getting_started/installation/whm.mdx b/crowdsec-docs/unversioned/getting_started/installation/whm.mdx index 11fd5f429..cbe991473 100644 --- a/crowdsec-docs/unversioned/getting_started/installation/whm.mdx +++ b/crowdsec-docs/unversioned/getting_started/installation/whm.mdx @@ -148,7 +148,7 @@ Most of the time it will be a port conflict or config file error - Check the logs for error - In CrowdSec's logs sudo less /var/log/crowdsec.log: Note that it might be very verbose. - You can also check: sudo journalctl -u crowdsec -- Ultimately, you can check the [Security Engine Troubleshooting section](/troubleshooting/security_engine.mdx) +- Ultimately, you can check the [Security Engine Troubleshooting section](/u/troubleshooting/security_engine.mdx) ### Changing port configuration diff --git a/crowdsec-docs/unversioned/getting_started/post_installation/acquisition_troubleshoot.mdx b/crowdsec-docs/unversioned/getting_started/post_installation/acquisition_troubleshoot.mdx index fd175fbd0..de42414c6 100644 --- a/crowdsec-docs/unversioned/getting_started/post_installation/acquisition_troubleshoot.mdx +++ b/crowdsec-docs/unversioned/getting_started/post_installation/acquisition_troubleshoot.mdx @@ -21,7 +21,7 @@ The first thing to check is that the log file is found and readable by the Crowd Within the CrowdSec log file it will log if the file was found or not. -Log file locations change by distribution, you can find the default log location [outlined here](/troubleshooting/security_engine.mdx#where-are-the-logs-stored). +Log file locations change by distribution, you can find the default log location [outlined here](/u/troubleshooting/security_engine.mdx#where-are-the-logs-stored). " --type ` to test parsing + +### If scenarios are in simulation mode + +Check if scenarios are in simulation: + +```bash +sudo cscli simulation status +``` + +If scenarios are in simulation mode, they will be listed. To disable simulation for all scenarios: + +```bash +sudo cscli simulation disable --all +sudo systemctl reload crowdsec +``` + +Or for specific scenarios: + +```bash +sudo cscli simulation disable crowdsecurity/ssh-bf +sudo systemctl reload crowdsec +``` + +### If this is a low-activity environment + +In genuinely clean environments, you can: + +1. **Test with dummy scenarios** using the [Health Check guide](/u/getting_started/health_check) to verify detection works +2. **Subscribe to Community Blocklist** decisions in the Console to add proactive blocking +3. **Monitor metrics regularly** to ensure the pipeline stays healthy + +## Verify Resolution + +After making changes: + +1. Restart CrowdSec: `sudo systemctl restart crowdsec` +2. Wait a few minutes for log processing +3. Check metrics again: `sudo cscli metrics show scenarios` +4. Trigger a test alert using the [Health Check detection tests](/u/getting_started/health_check#-detection-checks) + +## Related Issues + +- [LP No Logs Read](/u/troubleshooting/lp_no_logs_read) - If acquisition is not working +- [LP No Logs Parsed](/u/troubleshooting/lp_no_logs_parsed) - If parsing is failing +- [Security Engine Troubleshooting](/u/troubleshooting/security_engine) - General Security Engine issues + +## Getting Help + +If you've verified logs are being read and parsed correctly but still see no alerts: + +- Check [Discourse](https://discourse.crowdsec.net/) for similar cases +- Ask on [Discord](https://discord.gg/crowdsec) with your `cscli metrics` output +- Review your scenarios and log samples using [CrowdSec Playground](https://playground.crowdsec.net/) \ No newline at end of file diff --git a/crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md b/crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md new file mode 100644 index 000000000..d66e61011 --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md @@ -0,0 +1,183 @@ +--- +title: Engine Too Many Alerts +id: engine_too_many_alerts +--- + +The **Engine Too Many Alerts** issue appears when your Security Engine generates an abnormally high volume of alertsβ€”more than 250,000 in a 6-hour period. This usually indicates a misconfigured scenario, false positives, or an ongoing large-scale attack. + +## What Triggers This Issue + +- **Trigger condition**: More than 250,000 alerts in 6 hours +- **Criticality**: High +- **Impact**: May indicate false positives, performance issues, or a real attack + +## Common Root Causes + +### Misconfigured or overly sensitive scenario +A scenario with thresholds set too low or matching too broadly can trigger excessive alerts. + +### Log duplication +The same log file is being read multiple times due to acquisition misconfiguration. + +### Actual large-scale attack +A genuine distributed attack (DDoS, brute force campaign) targeting your infrastructure. + +### Parser creating duplicate events +A parser issue causing the same log line to generate multiple events. + +## How to Diagnose + +### Check alert volume by scenario + +Identify which scenarios are generating the most alerts: + +```bash +# On host +sudo cscli alerts list -l 100 + +# Docker +docker exec crowdsec cscli alerts list -l 100 + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli alerts list -l 100 +``` + +Look for patterns: +- Is one scenario dominating the alert count? +- Are the same IPs repeatedly triggering alerts? +- Are alerts legitimate threats or false positives? + +### Check metrics for scenario overflow + +```bash +# On host +sudo cscli metrics show scenarios + +# Docker +docker exec crowdsec cscli metrics show scenarios + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli metrics show scenarios +``` + +Look for scenarios with extremely high "Overflow" counts or "Current count" numbers. + +### Check for log duplication + +Review acquisition configuration to ensure log files aren't listed multiple times: + +```bash +# On host +sudo cat /etc/crowdsec/acquis.yaml +sudo ls -la /etc/crowdsec/acquis.d/ + +# Docker +docker exec crowdsec cat /etc/crowdsec/acquis.yaml + +# Kubernetes +kubectl get configmap -n crowdsec crowdsec-config -o yaml | grep -A 20 acquis +``` + +Also check metrics for duplicate acquisition sources: + +```bash +sudo cscli metrics show acquisition +``` + +## How to Resolve + +### For misconfigured scenarios + +#### Put the problematic scenario in simulation mode + +This allows you to investigate without generating alerts: + +```bash +# On host +sudo cscli simulation enable crowdsecurity/scenario-name + +# Docker +docker exec crowdsec cscli simulation enable crowdsecurity/scenario-name + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli simulation enable crowdsecurity/scenario-name +``` + +Then reload: +```bash +sudo systemctl reload crowdsec +``` + +#### Tune the scenario threshold + +If the scenario is triggering too easily, you can create a custom version with adjusted thresholds. See the [scenario documentation](/docs/scenarios/intro) for details on customizing scenarios. + +#### Use whitelists + +If specific IPs or patterns are causing false positives, create a whitelist. See [Parser Whitelists](/docs/log_processor/whitelist/intro) or [Profiles](/docs/local_api/profiles/intro). + +### For log duplication + +Remove duplicate entries from your acquisition configuration: + +1. Edit acquisition files: `/etc/crowdsec/acquis.yaml` or files in `/etc/crowdsec/acquis.d/` +2. Ensure each log source appears only once +3. Restart CrowdSec: `sudo systemctl restart crowdsec` + +### For legitimate large-scale attacks + +If you're experiencing a real attack: + +1. **Verify your remediation components are working** to block attackers +2. **Check that decisions are being applied**: `cscli decisions list` +3. **Consider increasing timeout durations** in profiles if attackers are returning +4. **Subscribe to Community Blocklist** for proactive blocking of known malicious IPs +5. **Monitor your infrastructure** for the attack's impact + +### For parser issues + +If a parser is creating duplicate events: + +1. Use `cscli explain` to test parsing: + ```bash + sudo cscli explain --log "" --type + ``` +2. Check if the log line generates multiple events incorrectly +3. Review parser configuration or report the issue to the [CrowdSec Hub](https://github.com/crowdsecurity/hub/issues) + +## Verify Resolution + +After making changes: + +1. Restart or reload CrowdSec: `sudo systemctl restart crowdsec` +2. Monitor alert generation for 30 minutes: + ```bash + watch -n 30 'cscli alerts list | head -20' + ``` +3. Check metrics: `sudo cscli metrics show scenarios` +4. Verify alert volume has returned to normal levels + +## Performance Impact + +Excessive alerts can impact performance: + +- **High memory usage**: Each active scenario bucket consumes memory +- **Database growth**: Large numbers of alerts increase database size +- **API latency**: Bouncers may experience slower decision pulls + +If performance is degraded, consider: +- Cleaning old alerts: `cscli alerts delete --all` (after investigation) +- Reviewing database maintenance: [Database documentation](/docs/local_api/database) + +## Related Issues + +- [Security Engine Troubleshooting](/u/troubleshooting/security_engine) - General Security Engine issues +- [LP No Logs Parsed](/u/troubleshooting/lp_no_logs_parsed) - If parsing is creating unusual events + +## Getting Help + +If you need assistance analyzing alert patterns: + +- Share anonymized alert samples on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with your `cscli metrics show scenarios` output +- Use the [CrowdSec Playground](https://playground.crowdsec.net/) to test scenario behavior diff --git a/crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md new file mode 100644 index 000000000..466247f32 --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md @@ -0,0 +1,306 @@ +--- +title: Firewall Integration Offline +id: fw_integration_offline +--- + +The **Firewall Integration Offline** issue appears when a firewall-based remediation component (bouncer) has not pulled decisions from the Local API for more than 24 hours. This means blocked IPs are not being enforced at the firewall level. + +## What Triggers This Issue + +- **Trigger condition**: No decision pulls for 24 hours +- **Criticality**: Critical +- **Impact**: Firewall-based blocking is not working - detected threats are not being blocked + +## Common Root Causes + +### Bouncer service stopped +The firewall bouncer systemd service or process is not running. + +### Authentication failure +API key is invalid, expired, or the bouncer was removed from the Security Engine. + +### Network connectivity issues +The bouncer cannot reach the Local API endpoint (different host, port closed, etc.). + +### Configuration errors +Incorrect API URL, missing configuration file, or malformed settings. + +### Bouncer installation issue +The bouncer may not be properly installed or registered. + +## How to Diagnose + +### Check bouncer status in Security Engine + +From the Security Engine (or LAPI host): + +```bash +# On host +sudo cscli bouncers list + +# Docker +docker exec crowdsec cscli bouncers list + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli bouncers list +``` + +**What to look for:** +- Is your firewall bouncer listed? +- Check the "Last API Pull" timestamp - is it older than 24 hours? +- Is the bouncer marked as "βœ“" (valid)? + +### Check bouncer service status + +On the host where the firewall bouncer is installed: + +```bash +# For systemd-based bouncers +sudo systemctl status crowdsec-firewall-bouncer + +# Or for other firewall bouncers +sudo systemctl status cs-firewall-bouncer +``` + +**Common firewall bouncers:** +- `crowdsec-firewall-bouncer` - iptables/nftables bouncer +- `cs-firewall-bouncer` - (legacy name) +- Platform-specific: check your installation method + +### Check bouncer logs + +```bash +# Linux +sudo tail -50 /var/log/crowdsec-firewall-bouncer.log + +# Or check journald +sudo journalctl -u crowdsec-firewall-bouncer -n 50 + +# FreeBSD (OPNsense/pfSense) +sudo tail -50 /var/log/crowdsec/crowdsec-firewall-bouncer.log +``` + +**Look for errors like:** +- `connection refused` - API is unreachable +- `401 Unauthorized` or `403 Forbidden` - Authentication failed +- `invalid configuration` - Config file issues +- `cannot bind` or `permission denied` - Firewall permission issues + +### Test connectivity to Local API + +From the bouncer host: + +```bash +# Test network connectivity +curl -I http://:8080/ + +# Test with API key +curl -H "X-Api-Key: " http://:8080/v1/decisions +``` + +## How to Resolve + +### Restart the bouncer service + +```bash +# Restart the service +sudo systemctl restart crowdsec-firewall-bouncer + +# Enable it to start on boot +sudo systemctl enable crowdsec-firewall-bouncer + +# Check status +sudo systemctl status crowdsec-firewall-bouncer +``` + +### Re-register the bouncer + +If the API key is invalid or missing: + +#### Generate a new API key on the Security Engine + +```bash +# On Security Engine / LAPI host +sudo cscli bouncers add firewall-bouncer-01 + +# Copy the generated API key +``` + +#### Update bouncer configuration + +Edit the bouncer configuration file (usually `/etc/crowdsec/bouncers/crowdsec-firewall-bouncer.yaml`): + +```yaml +api_url: http://:8080/ +api_key: +``` + +#### Restart the bouncer + +```bash +sudo systemctl restart crowdsec-firewall-bouncer +``` + +### Fix connectivity issues + +If the bouncer is on a different host than the Security Engine: + +#### Check firewall rules allow access + +```bash +# Test from bouncer host +nc -zv 8080 +``` + +If connection fails: +- Open port 8080 on the Security Engine host firewall +- Check network security groups / iptables rules +- Verify no proxy is blocking the connection + +#### Verify API URL in bouncer config + +Edit `/etc/crowdsec/bouncers/crowdsec-firewall-bouncer.yaml`: + +```yaml +# For local LAPI +api_url: http://127.0.0.1:8080/ + +# For remote LAPI +api_url: http://:8080/ + +# For HTTPS +api_url: https://:8080/ +``` + +**Important:** Don't forget the trailing `/` + +### Fix configuration errors + +If bouncer logs show configuration errors: + +```bash +# Validate YAML syntax +sudo cat /etc/crowdsec/bouncers/crowdsec-firewall-bouncer.yaml + +# Check for common issues: +# - Incorrect indentation (YAML is whitespace-sensitive) +# - Missing api_key or api_url +# - Incorrect mode (iptables vs nftables) +``` + +**Example minimal configuration:** +```yaml +mode: iptables # or nftables +pid_dir: /var/run/ +update_frequency: 10s +daemonize: true +log_mode: file +log_dir: /var/log/ +log_level: info +api_url: http://127.0.0.1:8080/ +api_key: +deny_action: DROP +deny_log: false +``` + +### Fix firewall permission issues + +Some firewall bouncers need specific permissions: + +```bash +# For iptables +sudo setcap cap_net_admin+ep /usr/bin/crowdsec-firewall-bouncer + +# Verify iptables rules are being applied +sudo iptables -L crowdsec-chain -n -v + +# For nftables +sudo nft list ruleset | grep crowdsec +``` + +### Reinstall the bouncer (if needed) + +If the bouncer is corrupted or not properly installed: + +```bash +# Remove old installation +sudo apt remove crowdsec-firewall-bouncer # Debian/Ubuntu +sudo yum remove crowdsec-firewall-bouncer # RHEL/CentOS + +# Reinstall +sudo apt install crowdsec-firewall-bouncer +# Or follow installation instructions for your platform + +# Re-register with new API key +sudo cscli bouncers add firewall-bouncer-new +# Update config with the new key +# Restart service +``` + +## Verify Resolution + +After making changes: + +1. **Check bouncer status:** + ```bash + sudo systemctl status crowdsec-firewall-bouncer + ``` + Should show "active (running)" + +2. **Verify API pulls on Security Engine:** + ```bash + sudo cscli bouncers list + ``` + "Last API Pull" should update to a recent timestamp (within seconds) + +3. **Check firewall rules are applied:** + ```bash + # iptables + sudo iptables -L crowdsec-chain -n -v + + # nftables + sudo nft list table inet crowdsec + ``` + +4. **Test blocking:** + Add a test decision and verify it appears in firewall rules: + ```bash + sudo cscli decisions add --ip 192.0.2.1 --duration 5m --reason "test" + + # Wait 10-15 seconds for bouncer to pull + sudo iptables -L crowdsec-chain -n -v | grep 192.0.2.1 + ``` + +## Platform-Specific Notes + +### OPNsense / pfSense +- Bouncer name: `crowdsec-firewall-bouncer` or `os-crowdsec` +- Config: `/usr/local/etc/crowdsec/bouncers/` +- Logs: `/var/log/crowdsec/` +- Service: Check via OPNsense/pfSense GUI or `service crowdsec-firewall-bouncer status` + +### Docker +If running the bouncer in Docker, ensure: +- Container is running: `docker ps | grep bouncer` +- Network connectivity to LAPI container/host +- Proper capabilities: `--cap-add=NET_ADMIN --cap-add=NET_RAW` + +### Kubernetes +For Kubernetes network policies or firewall controllers: +- Check pod status: `kubectl get pods -n ` +- Check logs: `kubectl logs -n ` +- Verify service connectivity to LAPI + +## Related Issues + +- [RC Integration Offline](/u/troubleshooting/rc_integration_offline) - Similar issue for non-firewall bouncers +- [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components) - General bouncer issues + +## Getting Help + +If your firewall bouncer still doesn't work: + +- Share bouncer logs on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with `cscli bouncers list` output +- Check firewall bouncer documentation: [Firewall Bouncer Docs](/u/bouncers/firewall) +- Report bugs: [GitHub Issues](https://github.com/crowdsecurity/cs-firewall-bouncer/issues) diff --git a/crowdsec-docs/unversioned/troubleshooting/intro.md b/crowdsec-docs/unversioned/troubleshooting/intro.md index 3072d0979..32913e84f 100644 --- a/crowdsec-docs/unversioned/troubleshooting/intro.md +++ b/crowdsec-docs/unversioned/troubleshooting/intro.md @@ -15,10 +15,29 @@ If you have any suggestions for this please open an [issue here](https://github. Also, checkout our 🩺 [**Stack Health-Check page**](/u/getting_started/health_check) to make sure your **Detection**, **Community Sharing** and **Remediation** are working properly -Here you'll also find Troubleshooting by topic: -* [Security Engine Troubleshooting](/troubleshooting/security_engine.mdx) -* [Remediation Components Troubleshooting](/troubleshooting/remediation_components.mdx) -* [CTI Troubleshooting](/troubleshooting/cti.mdx) +## Console Health Check Issues + +If you received a health check alert from the CrowdSec Console, check out the [**Console Health Check Issues**](/u/troubleshooting/console_issues) page for a complete list of issues, their trigger conditions, and dedicated troubleshooting guides. + +## Troubleshooting by Topic + +* [Security Engine Troubleshooting](/u/troubleshooting/security_engine.mdx) +* [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components.mdx) +* [CTI Troubleshooting](/u/troubleshooting/cti.mdx) + +## Troubleshooting by Issue + +Individual troubleshooting guides for specific Console alerts: + +* [Security Engine Offline](/u/troubleshooting/security_engine_offline) - Security Engine not reporting to Console +* [Engine No Alerts](/u/troubleshooting/engine_no_alerts) - No alerts generated in 48 hours +* [Engine Too Many Alerts](/u/troubleshooting/engine_too_many_alerts) - Abnormally high alert volume +* [Log Processor Offline](/u/troubleshooting/log_processor_offline) - Log Processor not checking in +* [LP No Alerts](/u/troubleshooting/lp_no_alerts) - Log Processor not generating alerts +* [LP No Logs Read](/u/troubleshooting/lp_no_logs_read) - No logs being acquired +* [LP No Logs Parsed](/u/troubleshooting/lp_no_logs_parsed) - Logs read but not parsed +* [Firewall Integration Offline](/u/troubleshooting/fw_integration_offline) - Firewall bouncer not pulling decisions +* [RC Integration Offline](/u/troubleshooting/rc_integration_offline) - Remediation component not pulling decisions ## Community support diff --git a/crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md new file mode 100644 index 000000000..dc9b61d21 --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md @@ -0,0 +1,169 @@ +--- +title: LP No Alerts +id: lp_no_alerts +--- + +The **LP No Alerts** issue appears when a specific Log Processor (agent) is running and communicating with the Local API but hasn't generated any alerts in the last 48 hours. This is similar to [Engine No Alerts](/u/troubleshooting/engine_no_alerts) but applies to individual Log Processor instances in distributed setups. + +## What Triggers This Issue + +- **Trigger condition**: Log Processor online but no alerts for 48 hours +- **Criticality**: High +- **Impact**: Detection may not be working on this specific agent + +## Common Root Causes + +### No logs being read by this agent +The acquisition configuration on this specific Log Processor may be missing, disabled, or pointing to empty sources. + +### No logs being parsed successfully +Logs are being read but parsers can't process them due to format mismatches or missing collections. + +### Scenarios in simulation mode +Detection scenarios are installed but running in simulation mode on this agent. + +### Low-activity monitored service +The service monitored by this Log Processor may genuinely have no malicious activity. + +## How to Diagnose + +### Identify the affected Log Processor + +Check which machine is not generating alerts: + +```bash +# On LAPI host +sudo cscli machines list +``` + +Look for the Last Update timestamp and verify which machine corresponds to the alert. + +### Check metrics on the affected agent + +Connect to the specific Log Processor host and check its metrics: + +```bash +# On the Log Processor host +sudo cscli metrics show acquisition parsers scenarios + +# Docker +docker exec crowdsec-agent cscli metrics show acquisition parsers scenarios + +# Kubernetes - for specific agent pod +kubectl exec -n crowdsec -it -- cscli metrics show acquisition parsers scenarios +``` + +Look for: +- **Acquisition Metrics**: Are log lines being read? (non-zero "Lines read") +- **Parser Metrics**: Are logs being parsed? (non-zero "Lines parsed") +- **Scenario Metrics**: Are scenarios evaluating events? + +### Check recent alerts from this agent + +```bash +# On the Log Processor host +sudo cscli alerts list + +# Or filter by origin on LAPI +sudo cscli alerts list --origin +``` + +## How to Resolve + +### If no logs are being read + +Follow the [LP No Logs Read troubleshooting guide](/u/troubleshooting/lp_no_logs_read) for detailed steps. + +**Quick checks on the affected agent:** + +```bash +# Verify acquisition configuration +sudo cat /etc/crowdsec/acquis.yaml +sudo ls -la /etc/crowdsec/acquis.d/ + +# Check log file existence and permissions +ls -la /var/log/nginx/ # or your specific log path + +# Verify CrowdSec can access logs +sudo -u crowdsec cat /var/log/nginx/access.log | head -5 +``` + +### If logs are read but not parsed + +Follow the [LP No Logs Parsed troubleshooting guide](/u/troubleshooting/lp_no_logs_parsed) for detailed steps. + +**Quick checks on the affected agent:** + +```bash +# Check installed collections +sudo cscli collections list + +# Test parsing with a sample log line +sudo cscli explain --log "" --type + +# Example for nginx +sudo cscli explain --log '192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] "GET / HTTP/1.1" 200 1234' --type nginx +``` + +### If scenarios are in simulation mode + +Check and disable simulation mode on the affected agent: + +```bash +# Check simulation status +sudo cscli simulation status + +# Disable for all scenarios +sudo cscli simulation disable --all +sudo systemctl reload crowdsec + +# Or for specific scenarios +sudo cscli simulation disable crowdsecurity/ssh-bf +sudo systemctl reload crowdsec +``` + +### If this is a low-activity service + +For legitimately clean services: + +1. **Test with dummy scenarios** using the [Health Check guide](/u/getting_started/health_check) to verify the detection pipeline works +2. **Verify the agent is processing logs** with `cscli metrics show acquisition` +3. **Accept the low alert rate** if the service truly has no malicious traffic + +## Verify Resolution + +After making changes on the affected Log Processor: + +1. Restart the agent: `sudo systemctl restart crowdsec` +2. Wait a few minutes for processing +3. Check metrics: `sudo cscli metrics show scenarios` +4. Trigger a test alert: [Health Check detection tests](/u/getting_started/health_check#-detection-checks) +5. Verify alert appears: `sudo cscli alerts list` + +## Distributed Setup Considerations + +In multi-agent deployments: + +- **Each agent processes its own logs independently** +- **Agents forward alerts to the Local API** +- **One agent having no alerts doesn't affect others** + +If multiple agents show no alerts, review: +- Common configuration issues (e.g., centralized config management problems) +- Network connectivity between agents and LAPI +- Synchronized collection installations across all agents + +## Related Issues + +- [Engine No Alerts](/u/troubleshooting/engine_no_alerts) - Similar issue at the Security Engine level +- [LP No Logs Read](/u/troubleshooting/lp_no_logs_read) - If acquisition is not working +- [LP No Logs Parsed](/u/troubleshooting/lp_no_logs_parsed) - If parsing is failing +- [Log Processor Offline](/u/troubleshooting/log_processor_offline) - If the agent is not communicating at all + +## Getting Help + +If you've verified logs are being read and parsed but still see no alerts: + +- Share your setup details on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with `cscli metrics` output +- Test your log samples with [CrowdSec Playground](https://playground.crowdsec.net/) diff --git a/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md b/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md new file mode 100644 index 000000000..1c7c51ec0 --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md @@ -0,0 +1,270 @@ +--- +title: LP No Logs Parsed +id: lp_no_logs_parsed +--- + +The **LP No Logs Parsed** issue appears when logs are being successfully read by the Log Processor but none are being parsed correctly in the last 48 hours. This means the acquisition is working, but parsers can't interpret the log format. + +## What Triggers This Issue + +- **Trigger condition**: Logs read but no successful parsing for 48 hours +- **Criticality**: Critical +- **Impact**: No events generated means no detection or alerts possible + +## Common Root Causes + +### Missing collection or parsers +The required parser collection for your log format isn't installed. + +### Acquisition type mismatch +The `type:` or `program:` label in acquisition doesn't match any installed parser's FILTER. + +### Custom or unexpected log format +Logs don't match the format expected by the parser (custom format, version mismatch, etc.). + +### Parser FILTER not matching +Parser exists but its FILTER clause doesn't match the acquisition label. + +### Grok pattern mismatch +Log structure has changed and the parser's grok patterns no longer match. + +## How to Diagnose + +### Check parsing metrics + +```bash +# On host +sudo cscli metrics show acquisition parsers + +# Docker +docker exec crowdsec cscli metrics show acquisition parsers + +# Kubernetes +kubectl exec -n crowdsec -it -- cscli metrics show acquisition parsers +``` + +**What to look for:** +- **Acquisition**: "Lines read" should be > 0 (confirms logs are being read) +- **Parsers**: "Lines parsed" should be > 0 (currently 0 means parsing is failing) +- **Unparsed lines**: Check if there's a high "unparsed" count + +### Use cscli explain to test parsing + +Take a sample log line and test it: + +```bash +# Test with your actual log line +sudo cscli explain --log "192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] \"GET / HTTP/1.1\" 200 1234" --type nginx + +# Or test from a file +sudo cscli explain --file /var/log/nginx/access.log --type nginx +``` + +**What to look for:** +- πŸ”΄ (red) next to parser names means the parser didn't match +- 🟒 (green) means the parser succeeded +- If all parsers show πŸ”΄, the log format isn't being recognized + +### Check installed collections and parsers + +```bash +# List installed collections +sudo cscli collections list + +# List installed parsers +sudo cscli parsers list + +# Check specific parser details +sudo cscli parsers inspect crowdsecurity/nginx-logs +``` + +### Verify acquisition type/program label + +```bash +# Check your acquisition configuration +sudo cat /etc/crowdsec/acquis.yaml +sudo cat /etc/crowdsec/acquis.d/*.yaml +``` + +Compare the `type:` (or `program:` in Kubernetes) with installed parser names. + +## How to Resolve + +### Install missing collection + +Most services have a collection that includes parsers and scenarios: + +```bash +# Search for collections +sudo cscli collections search nginx + +# Install the collection +sudo cscli collections install crowdsecurity/nginx + +# Restart CrowdSec +sudo systemctl restart crowdsec +``` + +**Docker:** +```yaml +environment: + COLLECTIONS: "crowdsecurity/nginx crowdsecurity/linux" +``` +Then restart the container. + +**Kubernetes:** +```yaml +agent: + env: + - name: COLLECTIONS + value: "crowdsecurity/nginx crowdsecurity/traefik" +``` +Then: `helm upgrade crowdsec crowdsec/crowdsec -n crowdsec -f values.yaml` + +### Fix acquisition type/program mismatch + +The acquisition label must match a parser's FILTER: + +#### On Host or Docker + +Check your `acquis.yaml`: +```yaml +filenames: + - /var/log/nginx/access.log +labels: + type: nginx # This must match a parser FILTER +``` + +Common types: +- `nginx` - for NGINX logs +- `apache2` - for Apache logs +- `syslog` - for syslog-formatted logs (SSH, etc.) +- `mysql` - for MySQL logs +- `postgres` - for PostgreSQL logs + +#### Kubernetes + +In Kubernetes, use `program:` instead of `type:`: +```yaml +agent: + acquisition: + - namespace: production + podName: nginx-* + program: nginx # This must match parser FILTER +``` + +**After changing configuration:** +```bash +sudo systemctl restart crowdsec +# or docker restart crowdsec +# or helm upgrade (for Kubernetes) +``` + +### Handle custom log formats + +If you use a custom log format that doesn't match standard parsers: + +#### Option 1: Adjust log format to match parser +**NGINX example:** +```nginx +# In nginx.conf, use the combined format +log_format combined '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"'; +access_log /var/log/nginx/access.log combined; +``` + +#### Option 2: Create a custom parser +1. Use the [CrowdSec Playground](https://playground.crowdsec.net/) to develop and test your parser +2. Create a custom parser in `/etc/crowdsec/parsers/s01-parse/custom-parser.yaml` +3. Use grok patterns to match your format +4. See [Parser Documentation](/docs/log_processor/parsers/format) for details + +**Simple custom parser example:** +```yaml +onsuccess: next_stage +debug: false +filter: "evt.Parsed.program == 'my-custom-app'" +name: my-org/my-custom-app-logs +description: "Custom parser for my application" +grok: + pattern: '%{IPORHOST:source_ip} - %{DATA:message}' + apply_on: message +statics: + - meta: log_type + value: my_custom_app + - meta: service + value: http +``` + +#### Option 3: Use a different parser +Some services have multiple parser options. Check the [Hub](https://app.crowdsec.net/hub/parsers) for alternatives. + +### Debug parser FILTER issues + +If a parser is installed but not matching, check its FILTER: + +```bash +# View parser details +sudo cscli parsers inspect crowdsecurity/nginx-logs + +# Look for the "filter" field +# Example: filter: "evt.Parsed.program == 'nginx'" +``` + +The FILTER must match your acquisition label. If your label is `type: nginx`, the parser FILTER should check `evt.Line.Labels.type == "nginx"` or `evt.Parsed.program == "nginx"`. + +## Verify Resolution + +After making changes: + +1. **Restart CrowdSec:** + ```bash + sudo systemctl restart crowdsec + ``` + +2. **Wait 1-2 minutes for log processing** + +3. **Check metrics again:** + ```bash + sudo cscli metrics show parsers + ``` + + **"Lines parsed" should now be > 0** + +4. **Test with cscli explain:** + ```bash + sudo cscli explain --log "" --type + ``` + + **Parsers should show 🟒 (green) indicators** + +5. **Verify events are reaching scenarios:** + ```bash + sudo cscli metrics show scenarios + ``` + +## Common Parser FILTER Values + +| Service | Acquisition Label | Parser FILTER | +|---------|------------------|---------------| +| NGINX | `type: nginx` | `evt.Line.Labels.type == "nginx"` | +| Apache | `type: apache2` | `evt.Line.Labels.type == "apache2"` | +| SSH (syslog) | `type: syslog` | `evt.Line.Labels.type == "syslog"` | +| Traefik | `program: traefik` | `evt.Parsed.program == "traefik"` | +| MySQL | `type: mysql` | `evt.Line.Labels.type == "mysql"` | + +## Related Issues + +- [LP No Logs Read](/u/troubleshooting/lp_no_logs_read) - If logs aren't being read at all +- [LP No Alerts](/u/troubleshooting/lp_no_alerts) - If logs are parsed but scenarios don't trigger +- [Engine No Alerts](/u/troubleshooting/engine_no_alerts) - Similar issue at the Security Engine level + +## Getting Help + +If parsing still fails: + +- Test your logs in [CrowdSec Playground](https://playground.crowdsec.net/) +- Share your log samples and acquisition config on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with `cscli explain` output +- Check parser documentation on the [Hub](https://app.crowdsec.net/hub/parsers) diff --git a/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md b/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md new file mode 100644 index 000000000..1d7b9caa4 --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md @@ -0,0 +1,295 @@ +--- +title: LP No Logs Read +id: lp_no_logs_read +--- + +The **LP No Logs Read** issue appears when a Log Processor is running but hasn't acquired any log lines in the last 24 hours. This is the first step in the detection pipeline and must work for CrowdSec to function. + +## What Triggers This Issue + +- **Trigger condition**: No logs acquired for 24 hours +- **Criticality**: Critical +- **Impact**: Complete detection failure - no logs means no alerts + +## Common Root Causes + +### Missing acquisition configuration +No acquisition files exist, or they're empty. + +### Incorrect log file paths +Acquisition configuration points to paths that don't exist or have moved. + +### File permission issues +CrowdSec doesn't have read access to the log files. + +### Log files are empty or not being written +The services being monitored aren't generating logs. + +### Acquisition type mismatch +Wrong datasource type configured (e.g., using `file` instead of `journald`). + +### Container/Kubernetes volume issues +In containerized deployments, logs aren't mounted or accessible to the CrowdSec container. + +## How to Diagnose + +### Check acquisition metrics + +```bash +# On host +sudo cscli metrics show acquisition + +# Docker +docker exec crowdsec cscli metrics show acquisition + +# Kubernetes +kubectl exec -n crowdsec -it -- cscli metrics show acquisition +``` + +**What to look for:** +- If the output is empty or shows 0 "Lines read", acquisition is not working +- If sources are listed but "Lines read" is 0, the source exists but isn't reading data + +### Verify acquisition configuration exists + +```bash +# On host +sudo cat /etc/crowdsec/acquis.yaml +sudo ls -la /etc/crowdsec/acquis.d/ + +# Docker +docker exec crowdsec cat /etc/crowdsec/acquis.yaml +docker exec crowdsec ls -la /etc/crowdsec/acquis.d/ + +# Kubernetes - check ConfigMap +kubectl get configmap -n crowdsec -o yaml +``` + +If these files are empty or missing, you need to create acquisition configuration. + +### Check log files exist and have content + +```bash +# Verify log file exists +ls -la /var/log/nginx/access.log + +# Check if it has recent content +tail -10 /var/log/nginx/access.log + +# Check last modification time +stat /var/log/nginx/access.log +``` + +### Check file permissions + +```bash +# Check if CrowdSec user can read the log file +sudo -u crowdsec cat /var/log/nginx/access.log | head -5 + +# Check directory permissions +ls -la /var/log/nginx/ +``` + +## How to Resolve + +### Create or fix acquisition configuration + +The acquisition configuration tells CrowdSec which logs to read. Configuration varies by deployment: + +#### On Host + +Create or edit `/etc/crowdsec/acquis.yaml` or add files to `/etc/crowdsec/acquis.d/`: + +**Example for NGINX:** +```yaml +filenames: + - /var/log/nginx/access.log + - /var/log/nginx/error.log +labels: + type: nginx +--- +``` + +**Example for SSH (via syslog):** +```yaml +filenames: + - /var/log/auth.log +labels: + type: syslog +--- +``` + +**Example for journald:** +```yaml +source: journalctl +journalctl_filter: + - "_SYSTEMD_UNIT=ssh.service" +labels: + type: syslog +--- +``` + +After creating the configuration: +```bash +sudo systemctl restart crowdsec +``` + +#### Docker + +Ensure log volumes are mounted and acquisition is configured: + +**docker-compose.yml example:** +```yaml +services: + crowdsec: + image: crowdsecurity/crowdsec:latest + volumes: + - /var/log:/var/log:ro # Mount host logs as read-only + - ./acquis.yaml:/etc/crowdsec/acquis.yaml:ro + - crowdsec-config:/etc/crowdsec + - crowdsec-data:/var/lib/crowdsec/data +``` + +**acquis.yaml for Docker:** +```yaml +filenames: + - /var/log/nginx/access.log +labels: + type: nginx +``` + +Restart the container: +```bash +docker-compose restart crowdsec +``` + +#### Kubernetes + +Configure acquisition in your Helm values: + +**values.yaml:** +```yaml +agent: + acquisition: + - namespace: production + podName: nginx-* + program: nginx + - namespace: production + podName: webapp-* + program: nginx +``` + +**Note:** In Kubernetes, use `program:` (not `type:`). The `program` field must match the FILTER in your parsers. + +Apply changes: +```bash +helm upgrade crowdsec crowdsec/crowdsec -n crowdsec -f values.yaml +``` + +### Fix file permissions + +If CrowdSec can't read log files: + +```bash +# Add CrowdSec user to the log group (e.g., adm) +sudo usermod -aG adm crowdsec + +# Or adjust log file permissions (less secure) +sudo chmod 644 /var/log/nginx/access.log + +# Restart CrowdSec to pick up group membership +sudo systemctl restart crowdsec +``` + +### Verify log files are being written + +If log files are empty: + +1. **Check the monitored service is running:** + ```bash + sudo systemctl status nginx + ``` + +2. **Generate some log activity:** + ```bash + curl http://localhost/ + tail /var/log/nginx/access.log + ``` + +3. **Check service logging configuration:** + - For NGINX: verify `access_log` directives in nginx.conf + - For Apache: verify `CustomLog` directives + - For systemd services: verify they're logging to journald or files + +### Fix container/Kubernetes volume issues + +#### Docker +Ensure volumes are correctly mounted: +```bash +# Check mounts inside container +docker exec crowdsec ls -la /var/log/nginx/ + +# If empty, verify docker-compose.yml volumes section +``` + +#### Kubernetes +Kubernetes agents read from `/var/log/containers` by default (mounted by helm chart). If logs aren't there: + +```bash +# Verify pods are writing to expected locations +kubectl logs -n production nginx-pod-name + +# Check if logs are in /var/log/containers on the node +kubectl debug node/your-node -it --image=busybox -- ls -la /var/log/containers/ +``` + +## Verify Resolution + +After making changes: + +1. **Restart CrowdSec:** + ```bash + sudo systemctl restart crowdsec + # or docker restart crowdsec + # or kubectl rollout restart deployment/crowdsec-agent -n crowdsec + ``` + +2. **Wait 1-2 minutes for acquisition to start** + +3. **Check metrics again:** + ```bash + sudo cscli metrics show acquisition + ``` + +4. **Verify "Lines read" is increasing:** + - Run metrics command twice with a delay + - Numbers should increase if logs are being actively generated + +5. **Check CrowdSec logs for errors:** + ```bash + sudo tail -50 /var/log/crowdsec.log + # or docker logs crowdsec + # or kubectl logs -n crowdsec + ``` + +## Detailed Acquisition Documentation + +For more information on acquisition configuration: +- [Datasources Documentation](/docs/log_processor/data_sources/intro) +- [File datasource](/docs/log_processor/data_sources/file) +- [Journald datasource](/docs/log_processor/data_sources/journald) +- [Hub collection pages](https://app.crowdsec.net/hub/collections) - each collection shows example acquisition config + +## Related Issues + +- [LP No Logs Parsed](/u/troubleshooting/lp_no_logs_parsed) - Next step if logs are read but not parsed +- [LP No Alerts](/u/troubleshooting/lp_no_alerts) - If logs are read and parsed but scenarios don't trigger +- [Engine No Alerts](/u/troubleshooting/engine_no_alerts) - Similar issue at the Security Engine level + +## Getting Help + +If acquisition still doesn't work: + +- Share your acquisition config on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with your `cscli metrics` output and acquisition files +- Check for similar issues in the [GitHub repository](https://github.com/crowdsecurity/crowdsec/issues) diff --git a/crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md new file mode 100644 index 000000000..504ec4a05 --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md @@ -0,0 +1,371 @@ +--- +title: RC Integration Offline +id: rc_integration_offline +--- + +The **RC Integration Offline** (Remediation Component Integration Offline) issue appears when a non-firewall remediation component (bouncer) has not pulled decisions from the Local API for more than 24 hours. This means your web server, reverse proxy, CDN, or other integration is not receiving block/captcha decisions. + +## What Triggers This Issue + +- **Trigger condition**: No decision pulls for 24 hours +- **Criticality**: Critical +- **Impact**: Application-level remediation is not working - threats are not being blocked or challenged + +## Common Remediation Components + +This issue applies to bouncers such as: +- **Web servers**: NGINX, Apache, IIS +- **Reverse proxies**: Traefik, HAProxy, Caddy +- **Application frameworks**: PHP, Wordpress plugins +- **Cloud services**: Cloudflare, Akamai connectors +- **Custom integrations**: Using CrowdSec API + +## Common Root Causes + +### Bouncer service or process stopped +The bouncer daemon, module, or plugin is not running. + +### Authentication failure +API key is invalid, expired, or the bouncer was removed from the Security Engine. + +### Network connectivity issues +The bouncer cannot reach the Local API endpoint. + +### Configuration errors +Incorrect API URL, missing configuration file, or malformed settings. + +### Integration not loaded +Module/plugin is installed but not enabled in the web server or application. + +### Log rotation or restart issues +Bouncer lost connection after service restart and didn't reconnect. + +## How to Diagnose + +### Check bouncer status in Security Engine + +From the Security Engine (or LAPI host): + +```bash +# On host +sudo cscli bouncers list + +# Docker +docker exec crowdsec cscli bouncers list + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli bouncers list +``` + +**What to look for:** +- Is your bouncer listed? +- Check "Last API Pull" timestamp - is it older than 24 hours? +- Is the bouncer marked as "βœ“" (valid)? + +### Check bouncer service status + +Depending on your bouncer type: + +#### Web server module bouncers + +```bash +# NGINX +sudo systemctl status nginx +sudo nginx -t # Test configuration + +# Apache +sudo systemctl status apache2 +sudo apache2ctl -t # Test configuration + +# Check if module is loaded +# NGINX: check nginx.conf for crowdsec module +# Apache: check mods-enabled/crowdsec.conf +``` + +#### Standalone bouncer daemons + +```bash +# Traefik bouncer +sudo systemctl status crowdsec-traefik-bouncer + +# HAProxy bouncer +sudo systemctl status crowdsec-haproxy-bouncer + +# Cloudflare bouncer +sudo systemctl status crowdsec-cloudflare-bouncer +``` + +### Check bouncer logs + +Log locations vary by bouncer type: + +```bash +# Web server logs +sudo tail -50 /var/log/nginx/error.log +sudo tail -50 /var/log/apache2/error.log + +# Standalone bouncer logs +sudo tail -50 /var/log/crowdsec-.log +sudo journalctl -u crowdsec- -n 50 + +# Docker/Kubernetes +docker logs +kubectl logs -n +``` + +**Look for errors like:** +- `connection refused` - API unreachable +- `401 Unauthorized` or `403 Forbidden` - Authentication failed +- `module not loaded` - Integration not enabled +- `invalid configuration` - Config file issues + +### Test connectivity to Local API + +From the bouncer host: + +```bash +# Test network connectivity +curl -I http://:8080/ + +# Test with API key +curl -H "X-Api-Key: " http://:8080/v1/decisions +``` + +## How to Resolve + +### Restart the bouncer + +#### For web server modules + +```bash +# NGINX +sudo systemctl restart nginx + +# Apache +sudo systemctl restart apache2 + +# IIS (Windows) +iisreset +``` + +#### For standalone daemons + +```bash +sudo systemctl restart crowdsec- +sudo systemctl enable crowdsec- +``` + +### Re-register the bouncer + +If the API key is invalid: + +#### Generate new API key on Security Engine + +```bash +# On LAPI host +sudo cscli bouncers add my-nginx-bouncer + +# Copy the generated API key +``` + +#### Update bouncer configuration + +Configuration file locations vary: + +**NGINX bouncer:** +```bash +# Edit config +sudo nano /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf + +# Update api_key line +API_KEY= +``` + +**Traefik bouncer:** +```bash +# Edit config +sudo nano /etc/crowdsec/bouncers/crowdsec-traefik-bouncer.yaml + +# Update api_key field +crowdsec_lapi_key: +``` + +**Cloudflare bouncer:** +```bash +# Edit config +sudo nano /etc/crowdsec/bouncers/crowdsec-cloudflare-bouncer.yaml + +# Update api_key +crowdsec_lapi_key: +``` + +#### Restart after updating config + +```bash +sudo systemctl restart +``` + +### Fix connectivity issues + +If bouncer is on a different host: + +```bash +# Test connectivity +nc -zv 8080 + +# Check API URL in bouncer config +# Should be: http://:8080/ + +# Update bouncer config with correct URL +``` + +### Enable the module/plugin + +Some bouncers require explicit enabling: + +#### NGINX + +Check `/etc/nginx/nginx.conf` includes the CrowdSec module: + +```nginx +load_module modules/ngx_http_crowdsec_module.so; + +http { + # CrowdSec configuration + crowdsec_enabled on; + crowdsec_api_url http://127.0.0.1:8080; + # ... +} +``` + +Test and reload: +```bash +sudo nginx -t +sudo systemctl reload nginx +``` + +#### Apache + +Enable the module: +```bash +sudo a2enmod crowdsec +sudo systemctl restart apache2 +``` + +#### WordPress + +Activate the plugin via WordPress admin panel or: +```bash +wp plugin activate crowdsec # if using WP-CLI +``` + +### Fix configuration errors + +Validate configuration syntax: + +```bash +# Web servers +sudo nginx -t +sudo apache2ctl -t + +# YAML-based bouncers +sudo cat /etc/crowdsec/bouncers/.yaml +# Check for YAML syntax errors +``` + +**Common config issues:** +- Missing or incorrect `api_url` / `api_key` +- Wrong file permissions (must be readable by web server user) +- Incorrect YAML indentation +- Missing trailing `/` in API URL + +### Check file permissions + +Bouncer config files must be readable: + +```bash +# Check permissions +ls -la /etc/crowdsec/bouncers/ + +# Fix if needed +sudo chmod 640 /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf +sudo chown root:www-data /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf +``` + +## Verify Resolution + +After making changes: + +1. **Check bouncer service:** + ```bash + sudo systemctl status + # or for web servers + sudo systemctl status nginx + ``` + +2. **Verify API pulls are resuming:** + ```bash + sudo cscli bouncers list + ``` + "Last API Pull" should update within seconds/minutes + +3. **Check bouncer logs for success:** + ```bash + sudo tail -20 /var/log/.log + ``` + Should see successful API connection messages + +4. **Test remediation:** + Add a test decision: + ```bash + sudo cscli decisions add --ip 192.0.2.1 --duration 5m --reason "test" + ``` + + Try accessing your service from that IP (or simulate): + ```bash + curl -H "X-Forwarded-For: 192.0.2.1" http://your-service/ + ``` + Should receive 403 Forbidden or a captcha challenge + +## Bouncer-Specific Documentation + +- [NGINX Bouncer](/u/bouncers/nginx) +- [Traefik Bouncer](/u/bouncers/traefik) +- [HAProxy Bouncer](/u/bouncers/haproxy) +- [Cloudflare Bouncer](/u/bouncers/cloudflare) +- [WordPress Plugin](/u/bouncers/wordpress) +- [All Bouncers](/u/bouncers/intro) + +## Kubernetes-Specific Notes + +For Kubernetes ingress controllers: + +```bash +# Check ingress controller is running +kubectl get pods -n ingress-nginx + +# Check CrowdSec integration in ingress +kubectl describe ingress -n + +# Check controller logs +kubectl logs -n ingress-nginx --tail=50 +``` + +Ensure the bouncer is registered and pulling decisions: +```bash +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli bouncers list +``` + +## Related Issues + +- [Firewall Integration Offline](/u/troubleshooting/fw_integration_offline) - Similar issue for firewall bouncers +- [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components) - General bouncer issues + +## Getting Help + +If your bouncer still doesn't work: + +- Check bouncer-specific documentation (links above) +- Share config and logs on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with `cscli bouncers list` output +- Report bouncer bugs on GitHub (check bouncer's repository) From 8f0245ffe2afcdde5edf6cf5533620808a5095c0 Mon Sep 17 00:00:00 2001 From: jdv Date: Thu, 20 Nov 2025 14:40:58 +0100 Subject: [PATCH 02/13] future issues page (for test) --- .../troubleshooting/console_issues.md | 10 + .../troubleshooting/future_console_issues.md | 240 ++++++++++++++++++ 2 files changed, 250 insertions(+) create mode 100644 crowdsec-docs/unversioned/troubleshooting/future_console_issues.md diff --git a/crowdsec-docs/unversioned/troubleshooting/console_issues.md b/crowdsec-docs/unversioned/troubleshooting/console_issues.md index 047dca22c..c9c20b87c 100644 --- a/crowdsec-docs/unversioned/troubleshooting/console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/console_issues.md @@ -88,6 +88,16 @@ Some issues are related and share common root causes: Understanding these dependencies helps you troubleshoot more efficiently by addressing root causes first. +## Future Enhancements + +The CrowdSec Console will continue to evolve with additional health checks and recommendations. See the [Future Console Health Check Issues](/u/troubleshooting/future_console_issues) page for planned features including: + +- Enhanced configuration validation +- Blocklist optimization recommendations +- Collection update notifications +- False positive prevention checks +- Premium feature upsell opportunities based on detected benefit + ## Getting Help If you've followed the troubleshooting guides and still need assistance: diff --git a/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md b/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md new file mode 100644 index 000000000..0ea3c907c --- /dev/null +++ b/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md @@ -0,0 +1,240 @@ +--- +title: Future Console Health Check Issues +id: future_console_issues +--- + +This page lists potential health check issues and recommendations that may be added to the CrowdSec Console in future versions. These are categorized by type and priority to help guide feature development. + +:::info +These features are planned or under consideration and are not yet available in the Console. This documentation is maintained for planning purposes. +::: + +## Overview + +This page documents **17 future issues** across four main categories: + +- **Configuration Issues** (4 issues) - Initial setup and component configuration +- **Maintenance & Updates** (4 issues) - Version updates and collection management +- **Configuration Validation** (3 issues) - Detecting misconfigurations and optimization opportunities +- **Premium Features & Enhancements** (6 issues) - Value-added features and intelligent upgrade recommendations + +## Configuration Issues + +### No Security Engine or Blocklist Integration Configured + +- **Criticality**: Recommended +- **Trigger**: Organization has neither Security Engines (LAPI) nor Blocklist-as-a-Service (BLaaS) integrations configured +- **Description**: Account is set up but has no active detection or blocklist infrastructure +- **Impact**: No threat detection or proactive blocking capabilities +- **Category**: Initial Configuration + +### No Scenarios Installed + +- **Criticality**: Critical +- **Trigger**: Security Engine has zero scenarios installed +- **Description**: No detection rules configured to identify threats +- **Impact**: Even if logs are parsed, no alerts can be generated +- **Category**: Configuration + +### No Notification Channels Configured + +- **Criticality**: Recommended (bonus for Premium users) +- **Trigger**: No notification integrations configured for Console alerts +- **Description**: User won't receive proactive notifications about stack health issues +- **Impact**: Delayed response to critical problems +- **Notes**: Recommended as a Premium feature benefit +- **Category**: Configuration + +### Alert Context Not Activated + +- **Criticality**: Recommended +- **Trigger**: Alert context enrichment is disabled in Console settings +- **Description**: Missing valuable CTI context data for alert analysis +- **Impact**: Reduced threat intelligence and harder troubleshooting +- **Category**: Configuration Enhancement + +## Maintenance & Updates + +### Security Engine Version Outdated + +- **Criticality**: Recommended +- **Trigger**: Security Engine running an older version when a new stable release is available +- **Description**: Missing bug fixes, performance improvements, security patches, and new features +- **Impact**: Potential vulnerabilities, reduced performance, or missing functionality +- **Requirements**: Version reporting from Security Engine, release tracking system +- **Notes**: Could highlight major version upgrades separately (e.g., 1.6.x β†’ 1.7.x with significant new features) +- **Category**: Maintenance + +### Remediation Component Version Outdated + +- **Criticality**: Recommended +- **Trigger**: Active remediation components (bouncers) running outdated versions +- **Description**: Remediation components missing features, bug fixes, or security patches from newer releases +- **Impact**: Reduced remediation effectiveness, potential vulnerabilities, or missing compatibility +- **Requirements**: Bouncer version reporting from FOSS/backend, release tracking for all bouncer types +- **Category**: Maintenance + +### Collection Version Outdated + +- **Criticality**: Recommended +- **Trigger**: Installed collections have newer versions available on the Hub +- **Description**: Using outdated detection rules and parsers, potentially missing scenarios from updated collections +- **Impact**: Missing newer attack patterns, parser improvements, and additional scenarios added to collection +- **Requirements**: Hub version comparison, backend processing +- **Notes**: Includes detecting when collection on Hub has new scenarios not present in installed version +- **Category**: Maintenance + +### Incomplete Scenario Installation from Collection + +- **Criticality**: High +- **Trigger**: Scenarios installed but not representing the complete collection (missing scenarios compared to Hub collection definition) +- **Description**: Partial collection installation leaves detection gaps +- **Impact**: Reduced detection coverage for specific attack types within the collection scope +- **Requirements**: Collection definition comparison between installed and Hub versions +- **Category**: Configuration Validation + +## Configuration Validation & Optimization + +### Acquisition and Collection Mismatch + +- **Criticality**: Recommended +- **Trigger**: Collection installed (e.g., nginx) but no corresponding acquisition configuration for that log type +- **Description**: Detection rules installed but no logs being collected to trigger them +- **Impact**: Wasted resources, collection cannot function as intended +- **Example**: NGINX collection installed but no nginx access logs configured in acquisition +- **Category**: Configuration Validation + +### Long-Duration Decisions + +- **Criticality**: Bonus (informational) +- **Trigger**: Active decisions with TTL exceeding threshold (e.g., 30+ days) +- **Description**: Very long bans may indicate manual decisions that should be reviewed +- **Impact**: No direct functional impact but may need periodic review +- **Notes**: Informational alert for housekeeping +- **Category**: Maintenance + +### Decisions Against Legitimate IPs + +- **Criticality**: High +- **Trigger**: Active decisions against known legitimate IP ranges (Let's Encrypt, CDN providers, cloud services, etc.) +- **Description**: Potentially blocking legitimate service traffic +- **Impact**: Service disruption (e.g., SSL certificate renewal failures, CDN issues, API connectivity problems) +- **Requirements**: Maintained database of known legitimate IP ranges and services +- **Category**: False Positive Prevention + +## Premium Features & Intelligent Recommendations + +### Alert Volume Over Free Quota + +- **Criticality**: Bonus (informational/upsell opportunity) +- **Trigger**: Alert volume approaching or exceeding free tier limits +- **Description**: High alert activity may benefit from Premium tier features +- **Impact**: Opportunity to upgrade for enhanced capabilities +- **Notes**: Informational nudge toward Premium upgrade for heavy users +- **Category**: Upsell Opportunity + +### Notification Overload - Premium Recommended + +- **Criticality**: Recommended +- **Trigger**: Community user with multiple Security Engines OR high alert/activity volume +- **Description**: Complex setup would benefit from notification channels to track issues across infrastructure +- **Impact**: Missing visibility across distributed deployment or high-activity environment +- **Notes**: Highlight Premium notification features for managing complex deployments +- **Category**: Enhancement - Upsell Opportunity + +### AIUA Not Activated (Premium User) + +- **Criticality**: Recommended +- **Trigger**: Premium tier user without "Am I Under Attack" (AIUA) feature enabled +- **Description**: Premium feature not utilized despite availability +- **Impact**: Not leveraging paid feature for automated attack detection and response +- **Notes**: Premium feature - ensure paid users activate available capabilities +- **Category**: Premium Feature Activation + +### AIUA Not Activated (Community User) + +- **Criticality**: Bonus (informational) +- **Trigger**: Community tier user without AIUA enabled +- **Description**: Missing automated attack detection available in Premium tiers +- **Impact**: Manual attack detection vs automated Premium feature +- **Notes**: Gentle upsell to Premium for automated attack detection +- **Category**: Enhancement - Upsell Opportunity + +### High-Value Blocklist Available (Same Tier - >30%) + +- **Criticality**: Recommended +- **Trigger**: Blocklist with >30% protection prediction (Alakazam score) available for user's current tier but not subscribed +- **Description**: High-impact blocklist available at current subscription level could significantly improve protection +- **Impact**: Missing substantial proactive threat blocking opportunity +- **Requirements**: Alakazam efficiency prediction calculation based on user's threat profile +- **Example**: Community user not subscribed to high-efficiency free blocklist, or Premium user not using available Premium blocklist +- **Category**: Enhancement - Optimization + +### High-Value Blocklist Available (Upper Tier - >30%) + +- **Criticality**: Bonus (informational/upsell opportunity) +- **Trigger**: Premium/Platinum blocklist with >30% protection prediction available in higher tier +- **Description**: Significant protection improvement available through tier upgrade +- **Impact**: Major reduction in attack surface through proactive blocking +- **Requirements**: Alakazam efficiency prediction showing concrete benefit of upgrade +- **Example**: Community user could block 35% of threats with Premium BL, or Premium user could block 40% with Platinum BL +- **Notes**: Data-driven upsell showing measurable security benefit of upgrading +- **Category**: Enhancement - Upsell Opportunity + +## Criticality Levels Explained + +### Critical + +Issues that represent complete failure of core functionality. Immediate attention required. + +### High + +Important issues that should be addressed soon. May significantly impact protection effectiveness. + +### Recommended + +Improvements that would enhance security posture or operational efficiency. Should be addressed when possible. + +### Bonus + +Informational, optimization opportunities, or value-demonstration items. Low priority but helpful for optimization, housekeeping, or demonstrating ROI/upgrade value. + +## Key Features + +### Alakazam Protection Prediction + +The **Alakazam scoring system** analyzes your specific threat profile (alerts, attack patterns, geographic sources) and calculates the **predicted effectiveness** of each blocklist: + +- **>30% threshold**: Significant protection improvement recommended +- **Personalized**: Based on your actual threat landscape, not generic statistics +- **Tier-aware**: Shows both same-tier optimizations and upgrade opportunities +- **Data-driven upsell**: Concrete, measurable benefit (e.g., "Block 35% of your threats preemptively") + +### Smart Collection Management + +- **Version tracking**: Detect when Hub collections gain new scenarios +- **Acquisition alignment**: Ensure installed collections match your log sources +- **Completeness validation**: Identify partial installations missing key scenarios + +## Implementation Requirements + +These future issues require: + +- **Version Tracking**: Security Engine, bouncer, and Hub collection version reporting +- **Alakazam Prediction Engine**: Personalized blocklist efficiency scoring based on user's threat profile +- **Legitimate IP Database**: Curated list of known good IPs (CDNs, certificate authorities, cloud providers) +- **Collection Definition Comparison**: Track scenario additions/changes in Hub collections +- **Activity Metrics**: Alert volume, Security Engine count, notification usage patterns + +## Related Pages + +- [Current Console Health Check Issues](/u/troubleshooting/console_issues) - Issues currently available in the Console +- [Troubleshooting Overview](/u/troubleshooting/intro) - General troubleshooting resources + +## Feedback + +These future issues are based on user feedback and operational insights. If you have suggestions for additional health checks or recommendations, please: + +- Share on [Discourse](https://discourse.crowdsec.net/) +- Join the discussion on [Discord](https://discord.gg/crowdsec) +- Open an issue on [GitHub](https://github.com/crowdsecurity/crowdsec-docs/issues) From 362a2541d447c9884ab6a489937dd6a5106e4233 Mon Sep 17 00:00:00 2001 From: jdv Date: Thu, 20 Nov 2025 16:30:40 +0100 Subject: [PATCH 03/13] nano aesthetic changes emoticon++ --- .../troubleshooting/console_issues.md | 22 ++++----- .../troubleshooting/engine_no_alerts.md | 4 +- .../troubleshooting/future_console_issues.md | 48 +++++++++---------- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/crowdsec-docs/unversioned/troubleshooting/console_issues.md b/crowdsec-docs/unversioned/troubleshooting/console_issues.md index c9c20b87c..7813acb2a 100644 --- a/crowdsec-docs/unversioned/troubleshooting/console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/console_issues.md @@ -15,19 +15,19 @@ The CrowdSec Console monitors your infrastructure health and raises alerts when ### Security Engine Issues #### Security Engine Offline -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: Security Engine has not reported to the Console for more than 24 hours - **Description**: The core CrowdSec service (Log Processor + Local API) has stopped communicating with the Console infrastructure - **Resolution**: [Security Engine Offline Troubleshooting](/u/troubleshooting/security_engine_offline) #### Engine No Alerts -- **Criticality**: High +- **Criticality**: ⚠️ High - **Trigger**: No alerts generated in the last 48 hours - **Description**: The Security Engine is running but hasn't detected any threats, which may indicate logs aren't being processed or scenarios aren't triggering - **Resolution**: [Engine No Alerts Troubleshooting](/u/troubleshooting/engine_no_alerts) #### Engine Too Many Alerts -- **Criticality**: High +- **Criticality**: ⚠️ High - **Trigger**: More than 250,000 alerts generated in 6 hours - **Description**: Abnormally high alert volume may indicate a misconfigured scenario, false positives, or an ongoing large-scale attack - **Resolution**: [Engine Too Many Alerts Troubleshooting](/u/troubleshooting/engine_too_many_alerts) @@ -35,25 +35,25 @@ The CrowdSec Console monitors your infrastructure health and raises alerts when ### Log Processor Issues #### Log Processor Offline -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: Log Processor has not checked in with Local API for more than 24 hours - **Description**: The local agent component has stopped communicating with the Local API - **Resolution**: [Log Processor Offline Troubleshooting](/u/troubleshooting/log_processor_offline) #### LP No Alerts -- **Criticality**: High +- **Criticality**: ⚠️ High - **Trigger**: No alerts generated by this Log Processor in the last 48 hours - **Description**: Logs may not be read, parsed correctly, or no scenarios are matching the parsed events - **Resolution**: [LP No Alerts Troubleshooting](/u/troubleshooting/lp_no_alerts) #### LP No Logs Read -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: No logs acquired in the last 24 hours - **Description**: The acquisition configuration is missing, incorrect, or log sources are not producing data - **Resolution**: [LP No Logs Read Troubleshooting](/u/troubleshooting/lp_no_logs_read) #### LP No Logs Parsed -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: Logs are being read but none are successfully parsed in the last 48 hours - **Description**: Parsers may be missing, log format may have changed, or there's a mismatch between acquisition type and parser - **Resolution**: [LP No Logs Parsed Troubleshooting](/u/troubleshooting/lp_no_logs_parsed) @@ -61,13 +61,13 @@ The CrowdSec Console monitors your infrastructure health and raises alerts when ### Remediation Component Issues #### Firewall Integration Offline -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: Firewall bouncer has not pulled decisions for more than 24 hours - **Description**: Firewall-based remediation components have stopped communicating with the Local API - **Resolution**: [Firewall Integration Offline Troubleshooting](/u/troubleshooting/fw_integration_offline) #### RC Integration Offline -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: Remediation Component has not pulled decisions for more than 24 hours - **Description**: Non-firewall remediation components (web servers, reverse proxies, etc.) have stopped communicating with the Local API - **Resolution**: [RC Integration Offline Troubleshooting](/u/troubleshooting/rc_integration_offline) @@ -93,10 +93,10 @@ Understanding these dependencies helps you troubleshoot more efficiently by addr The CrowdSec Console will continue to evolve with additional health checks and recommendations. See the [Future Console Health Check Issues](/u/troubleshooting/future_console_issues) page for planned features including: - Enhanced configuration validation -- Blocklist optimization recommendations +- Blocklists optimization recommendations - Collection update notifications - False positive prevention checks -- Premium feature upsell opportunities based on detected benefit +- Premium feature recommendation based on detected benefit ## Getting Help diff --git a/crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md index 0b6782f07..554d1ff8a 100644 --- a/crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md @@ -3,12 +3,12 @@ title: Engine No Alerts id: engine_no_alerts --- -The **Engine No Alerts** issue appears when your Security Engine has been running but hasn't generated any alerts in the last 48 hours. This usually indicates that logs aren't being processed properly or scenarios aren't matching any threats. +The **Engine No Alerts** issue appears when your Security Engine has been running but hasn't generated any alerts in the last **48 hours**. This usually indicates that logs aren't being processed properly or scenarios aren't matching any threats. ## What Triggers This Issue - **Trigger condition**: No alerts generated for 48 hours -- **Criticality**: High +- **Criticality**: ⚠️ High - **Impact**: Your detection system may not be working as expected ## Common Root Causes diff --git a/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md b/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md index 0ea3c907c..5b9b7e7a2 100644 --- a/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/future_console_issues.md @@ -22,7 +22,7 @@ This page documents **17 future issues** across four main categories: ### No Security Engine or Blocklist Integration Configured -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Organization has neither Security Engines (LAPI) nor Blocklist-as-a-Service (BLaaS) integrations configured - **Description**: Account is set up but has no active detection or blocklist infrastructure - **Impact**: No threat detection or proactive blocking capabilities @@ -30,7 +30,7 @@ This page documents **17 future issues** across four main categories: ### No Scenarios Installed -- **Criticality**: Critical +- **Criticality**: πŸ”₯ Critical - **Trigger**: Security Engine has zero scenarios installed - **Description**: No detection rules configured to identify threats - **Impact**: Even if logs are parsed, no alerts can be generated @@ -38,7 +38,7 @@ This page documents **17 future issues** across four main categories: ### No Notification Channels Configured -- **Criticality**: Recommended (bonus for Premium users) +- **Criticality**: πŸ’‘ Recommended (bonus for Premium users) - **Trigger**: No notification integrations configured for Console alerts - **Description**: User won't receive proactive notifications about stack health issues - **Impact**: Delayed response to critical problems @@ -47,7 +47,7 @@ This page documents **17 future issues** across four main categories: ### Alert Context Not Activated -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Alert context enrichment is disabled in Console settings - **Description**: Missing valuable CTI context data for alert analysis - **Impact**: Reduced threat intelligence and harder troubleshooting @@ -57,7 +57,7 @@ This page documents **17 future issues** across four main categories: ### Security Engine Version Outdated -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Security Engine running an older version when a new stable release is available - **Description**: Missing bug fixes, performance improvements, security patches, and new features - **Impact**: Potential vulnerabilities, reduced performance, or missing functionality @@ -67,7 +67,7 @@ This page documents **17 future issues** across four main categories: ### Remediation Component Version Outdated -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Active remediation components (bouncers) running outdated versions - **Description**: Remediation components missing features, bug fixes, or security patches from newer releases - **Impact**: Reduced remediation effectiveness, potential vulnerabilities, or missing compatibility @@ -76,7 +76,7 @@ This page documents **17 future issues** across four main categories: ### Collection Version Outdated -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Installed collections have newer versions available on the Hub - **Description**: Using outdated detection rules and parsers, potentially missing scenarios from updated collections - **Impact**: Missing newer attack patterns, parser improvements, and additional scenarios added to collection @@ -86,7 +86,7 @@ This page documents **17 future issues** across four main categories: ### Incomplete Scenario Installation from Collection -- **Criticality**: High +- **Criticality**: ⚠️ High - **Trigger**: Scenarios installed but not representing the complete collection (missing scenarios compared to Hub collection definition) - **Description**: Partial collection installation leaves detection gaps - **Impact**: Reduced detection coverage for specific attack types within the collection scope @@ -97,7 +97,7 @@ This page documents **17 future issues** across four main categories: ### Acquisition and Collection Mismatch -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Collection installed (e.g., nginx) but no corresponding acquisition configuration for that log type - **Description**: Detection rules installed but no logs being collected to trigger them - **Impact**: Wasted resources, collection cannot function as intended @@ -106,7 +106,7 @@ This page documents **17 future issues** across four main categories: ### Long-Duration Decisions -- **Criticality**: Bonus (informational) +- **Criticality**: 🌟 Bonus (informational) - **Trigger**: Active decisions with TTL exceeding threshold (e.g., 30+ days) - **Description**: Very long bans may indicate manual decisions that should be reviewed - **Impact**: No direct functional impact but may need periodic review @@ -115,7 +115,7 @@ This page documents **17 future issues** across four main categories: ### Decisions Against Legitimate IPs -- **Criticality**: High +- **Criticality**: ⚠️ High - **Trigger**: Active decisions against known legitimate IP ranges (Let's Encrypt, CDN providers, cloud services, etc.) - **Description**: Potentially blocking legitimate service traffic - **Impact**: Service disruption (e.g., SSL certificate renewal failures, CDN issues, API connectivity problems) @@ -126,25 +126,25 @@ This page documents **17 future issues** across four main categories: ### Alert Volume Over Free Quota -- **Criticality**: Bonus (informational/upsell opportunity) +- **Criticality**: 🌟 Bonus (informational/upgrade opportunity) - **Trigger**: Alert volume approaching or exceeding free tier limits - **Description**: High alert activity may benefit from Premium tier features - **Impact**: Opportunity to upgrade for enhanced capabilities - **Notes**: Informational nudge toward Premium upgrade for heavy users -- **Category**: Upsell Opportunity +- **Category**: Upgrade Opportunity ### Notification Overload - Premium Recommended -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Community user with multiple Security Engines OR high alert/activity volume - **Description**: Complex setup would benefit from notification channels to track issues across infrastructure - **Impact**: Missing visibility across distributed deployment or high-activity environment - **Notes**: Highlight Premium notification features for managing complex deployments -- **Category**: Enhancement - Upsell Opportunity +- **Category**: Enhancement - Upgrade Opportunity ### AIUA Not Activated (Premium User) -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Premium tier user without "Am I Under Attack" (AIUA) feature enabled - **Description**: Premium feature not utilized despite availability - **Impact**: Not leveraging paid feature for automated attack detection and response @@ -153,16 +153,16 @@ This page documents **17 future issues** across four main categories: ### AIUA Not Activated (Community User) -- **Criticality**: Bonus (informational) +- **Criticality**: 🌟 Bonus (informational) - **Trigger**: Community tier user without AIUA enabled - **Description**: Missing automated attack detection available in Premium tiers - **Impact**: Manual attack detection vs automated Premium feature -- **Notes**: Gentle upsell to Premium for automated attack detection -- **Category**: Enhancement - Upsell Opportunity +- **Notes**: Possible upgrade to Premium for automated attack detection +- **Category**: Enhancement - Upgrade Opportunity ### High-Value Blocklist Available (Same Tier - >30%) -- **Criticality**: Recommended +- **Criticality**: πŸ’‘ Recommended - **Trigger**: Blocklist with >30% protection prediction (Alakazam score) available for user's current tier but not subscribed - **Description**: High-impact blocklist available at current subscription level could significantly improve protection - **Impact**: Missing substantial proactive threat blocking opportunity @@ -172,14 +172,14 @@ This page documents **17 future issues** across four main categories: ### High-Value Blocklist Available (Upper Tier - >30%) -- **Criticality**: Bonus (informational/upsell opportunity) +- **Criticality**: 🌟 Bonus (informational/upgrade opportunity) - **Trigger**: Premium/Platinum blocklist with >30% protection prediction available in higher tier - **Description**: Significant protection improvement available through tier upgrade - **Impact**: Major reduction in attack surface through proactive blocking - **Requirements**: Alakazam efficiency prediction showing concrete benefit of upgrade - **Example**: Community user could block 35% of threats with Premium BL, or Premium user could block 40% with Platinum BL -- **Notes**: Data-driven upsell showing measurable security benefit of upgrading -- **Category**: Enhancement - Upsell Opportunity +- **Notes**: Data-driven upgrade showing measurable security benefit of upgrading +- **Category**: Enhancement - Upgrade Opportunity ## Criticality Levels Explained @@ -208,7 +208,7 @@ The **Alakazam scoring system** analyzes your specific threat profile (alerts, a - **>30% threshold**: Significant protection improvement recommended - **Personalized**: Based on your actual threat landscape, not generic statistics - **Tier-aware**: Shows both same-tier optimizations and upgrade opportunities -- **Data-driven upsell**: Concrete, measurable benefit (e.g., "Block 35% of your threats preemptively") +- **Data-driven upgrade**: Concrete, measurable benefit (e.g., "Block 35% of your threats preemptively") ### Smart Collection Management From 5d9b2252ab5171725cdbb5bbd1391f43749fcd02 Mon Sep 17 00:00:00 2001 From: jdv Date: Fri, 21 Nov 2025 10:30:12 +0100 Subject: [PATCH 04/13] mini formating pdate on common issues --- .../troubleshooting/console_issues.md | 18 ++++++------- ...no_alerts.md => issue_engine_no_alerts.md} | 17 ++++--------- ...rts.md => issue_engine_too_many_alerts.md} | 17 ++++--------- ...ine.md => issue_fw_integration_offline.md} | 21 +++++----------- ...line.md => issue_log_processor_offline.md} | 2 +- ...{lp_no_alerts.md => issue_lp_no_alerts.md} | 17 ++++--------- ...s_parsed.md => issue_lp_no_logs_parsed.md} | 21 +++++----------- ..._logs_read.md => issue_lp_no_logs_read.md} | 25 ++++++------------- ...ine.md => issue_rc_integration_offline.md} | 25 ++++++------------- ...ne.md => issue_security_engine_offline.md} | 2 +- 10 files changed, 52 insertions(+), 113 deletions(-) rename crowdsec-docs/unversioned/troubleshooting/{engine_no_alerts.md => issue_engine_no_alerts.md} (88%) rename crowdsec-docs/unversioned/troubleshooting/{engine_too_many_alerts.md => issue_engine_too_many_alerts.md} (91%) rename crowdsec-docs/unversioned/troubleshooting/{fw_integration_offline.md => issue_fw_integration_offline.md} (92%) rename crowdsec-docs/unversioned/troubleshooting/{log_processor_offline.md => issue_log_processor_offline.md} (99%) rename crowdsec-docs/unversioned/troubleshooting/{lp_no_alerts.md => issue_lp_no_alerts.md} (89%) rename crowdsec-docs/unversioned/troubleshooting/{lp_no_logs_parsed.md => issue_lp_no_logs_parsed.md} (92%) rename crowdsec-docs/unversioned/troubleshooting/{lp_no_logs_read.md => issue_lp_no_logs_read.md} (91%) rename crowdsec-docs/unversioned/troubleshooting/{rc_integration_offline.md => issue_rc_integration_offline.md} (92%) rename crowdsec-docs/unversioned/troubleshooting/{security_engine_offline.md => issue_security_engine_offline.md} (99%) diff --git a/crowdsec-docs/unversioned/troubleshooting/console_issues.md b/crowdsec-docs/unversioned/troubleshooting/console_issues.md index 7813acb2a..d19521e02 100644 --- a/crowdsec-docs/unversioned/troubleshooting/console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/console_issues.md @@ -18,19 +18,19 @@ The CrowdSec Console monitors your infrastructure health and raises alerts when - **Criticality**: πŸ”₯ Critical - **Trigger**: Security Engine has not reported to the Console for more than 24 hours - **Description**: The core CrowdSec service (Log Processor + Local API) has stopped communicating with the Console infrastructure -- **Resolution**: [Security Engine Offline Troubleshooting](/u/troubleshooting/security_engine_offline) +- **Resolution**: [Security Engine Offline Troubleshooting](/u/troubleshooting/issue_security_engine_offline) #### Engine No Alerts - **Criticality**: ⚠️ High - **Trigger**: No alerts generated in the last 48 hours - **Description**: The Security Engine is running but hasn't detected any threats, which may indicate logs aren't being processed or scenarios aren't triggering -- **Resolution**: [Engine No Alerts Troubleshooting](/u/troubleshooting/engine_no_alerts) +- **Resolution**: [Engine No Alerts Troubleshooting](/u/troubleshooting/issue_engine_no_alerts) #### Engine Too Many Alerts - **Criticality**: ⚠️ High - **Trigger**: More than 250,000 alerts generated in 6 hours - **Description**: Abnormally high alert volume may indicate a misconfigured scenario, false positives, or an ongoing large-scale attack -- **Resolution**: [Engine Too Many Alerts Troubleshooting](/u/troubleshooting/engine_too_many_alerts) +- **Resolution**: [Engine Too Many Alerts Troubleshooting](/u/troubleshooting/issue_engine_too_many_alerts) ### Log Processor Issues @@ -38,25 +38,25 @@ The CrowdSec Console monitors your infrastructure health and raises alerts when - **Criticality**: πŸ”₯ Critical - **Trigger**: Log Processor has not checked in with Local API for more than 24 hours - **Description**: The local agent component has stopped communicating with the Local API -- **Resolution**: [Log Processor Offline Troubleshooting](/u/troubleshooting/log_processor_offline) +- **Resolution**: [Log Processor Offline Troubleshooting](/u/troubleshooting/issue_log_processor_offline) #### LP No Alerts - **Criticality**: ⚠️ High - **Trigger**: No alerts generated by this Log Processor in the last 48 hours - **Description**: Logs may not be read, parsed correctly, or no scenarios are matching the parsed events -- **Resolution**: [LP No Alerts Troubleshooting](/u/troubleshooting/lp_no_alerts) +- **Resolution**: [LP No Alerts Troubleshooting](/u/troubleshooting/issue_lp_no_alerts) #### LP No Logs Read - **Criticality**: πŸ”₯ Critical - **Trigger**: No logs acquired in the last 24 hours - **Description**: The acquisition configuration is missing, incorrect, or log sources are not producing data -- **Resolution**: [LP No Logs Read Troubleshooting](/u/troubleshooting/lp_no_logs_read) +- **Resolution**: [LP No Logs Read Troubleshooting](/u/troubleshooting/issue_lp_no_logs_read) #### LP No Logs Parsed - **Criticality**: πŸ”₯ Critical - **Trigger**: Logs are being read but none are successfully parsed in the last 48 hours - **Description**: Parsers may be missing, log format may have changed, or there's a mismatch between acquisition type and parser -- **Resolution**: [LP No Logs Parsed Troubleshooting](/u/troubleshooting/lp_no_logs_parsed) +- **Resolution**: [LP No Logs Parsed Troubleshooting](/u/troubleshooting/issue_lp_no_logs_parsed) ### Remediation Component Issues @@ -64,13 +64,13 @@ The CrowdSec Console monitors your infrastructure health and raises alerts when - **Criticality**: πŸ”₯ Critical - **Trigger**: Firewall bouncer has not pulled decisions for more than 24 hours - **Description**: Firewall-based remediation components have stopped communicating with the Local API -- **Resolution**: [Firewall Integration Offline Troubleshooting](/u/troubleshooting/fw_integration_offline) +- **Resolution**: [Firewall Integration Offline Troubleshooting](/u/troubleshooting/issue_fw_integration_offline) #### RC Integration Offline - **Criticality**: πŸ”₯ Critical - **Trigger**: Remediation Component has not pulled decisions for more than 24 hours - **Description**: Non-firewall remediation components (web servers, reverse proxies, etc.) have stopped communicating with the Local API -- **Resolution**: [RC Integration Offline Troubleshooting](/u/troubleshooting/rc_integration_offline) +- **Resolution**: [RC Integration Offline Troubleshooting](/u/troubleshooting/issue_rc_integration_offline) ## Issue Dependencies diff --git a/crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md similarity index 88% rename from crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md rename to crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md index 554d1ff8a..0cd0928b4 100644 --- a/crowdsec-docs/unversioned/troubleshooting/engine_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md @@ -1,6 +1,6 @@ --- title: Engine No Alerts -id: engine_no_alerts +id: issue_engine_no_alerts --- The **Engine No Alerts** issue appears when your Security Engine has been running but hasn't generated any alerts in the last **48 hours**. This usually indicates that logs aren't being processed properly or scenarios aren't matching any threats. @@ -13,17 +13,10 @@ The **Engine No Alerts** issue appears when your Security Engine has been runnin ## Common Root Causes -### No logs being read -The acquisition configuration may be missing, disabled, or pointing to empty log sources. - -### No logs being parsed -Logs are being read but parsers can't process them due to format mismatches or missing collections. - -### Scenarios in simulation mode -Detection scenarios are installed but set to simulation mode, preventing actual alerts. - -### Legitimate low-activity environment -In some cases, truly clean environments with no malicious activity may not trigger alerts. +- **No logs being read**: The acquisition configuration may be missing, disabled, or pointing to empty log sources. +- **No logs being parsed**: Logs are being read but parsers can't process them due to format mismatches or missing collections. +- **Scenarios in simulation mode**: Detection scenarios are installed but set to simulation mode, preventing actual alerts. +- **Legitimate low-activity environment**: In some cases, truly clean environments with no malicious activity may not trigger alerts. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md similarity index 91% rename from crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md rename to crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md index d66e61011..a7ee9b65e 100644 --- a/crowdsec-docs/unversioned/troubleshooting/engine_too_many_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md @@ -1,6 +1,6 @@ --- title: Engine Too Many Alerts -id: engine_too_many_alerts +id: issue_engine_too_many_alerts --- The **Engine Too Many Alerts** issue appears when your Security Engine generates an abnormally high volume of alertsβ€”more than 250,000 in a 6-hour period. This usually indicates a misconfigured scenario, false positives, or an ongoing large-scale attack. @@ -13,17 +13,10 @@ The **Engine Too Many Alerts** issue appears when your Security Engine generates ## Common Root Causes -### Misconfigured or overly sensitive scenario -A scenario with thresholds set too low or matching too broadly can trigger excessive alerts. - -### Log duplication -The same log file is being read multiple times due to acquisition misconfiguration. - -### Actual large-scale attack -A genuine distributed attack (DDoS, brute force campaign) targeting your infrastructure. - -### Parser creating duplicate events -A parser issue causing the same log line to generate multiple events. +- **Misconfigured or overly sensitive scenario**: A scenario with thresholds set too low or matching too broadly can trigger excessive alerts. +- **Log duplication**: The same log file is being read multiple times due to acquisition misconfiguration. +- **Actual large-scale attack**: A genuine distributed attack (DDoS, brute force campaign) targeting your infrastructure. +- **Parser creating duplicate events**: A parser issue causing the same log line to generate multiple events. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md similarity index 92% rename from crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md rename to crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md index 466247f32..d2b065516 100644 --- a/crowdsec-docs/unversioned/troubleshooting/fw_integration_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md @@ -1,6 +1,6 @@ --- title: Firewall Integration Offline -id: fw_integration_offline +id: issue_fw_integration_offline --- The **Firewall Integration Offline** issue appears when a firewall-based remediation component (bouncer) has not pulled decisions from the Local API for more than 24 hours. This means blocked IPs are not being enforced at the firewall level. @@ -13,20 +13,11 @@ The **Firewall Integration Offline** issue appears when a firewall-based remedia ## Common Root Causes -### Bouncer service stopped -The firewall bouncer systemd service or process is not running. - -### Authentication failure -API key is invalid, expired, or the bouncer was removed from the Security Engine. - -### Network connectivity issues -The bouncer cannot reach the Local API endpoint (different host, port closed, etc.). - -### Configuration errors -Incorrect API URL, missing configuration file, or malformed settings. - -### Bouncer installation issue -The bouncer may not be properly installed or registered. +- **Bouncer service stopped**: The firewall bouncer systemd service or process is not running. +- **Authentication failure**: API key is invalid, expired, or the bouncer was removed from the Security Engine. +- **Network connectivity issues**: The bouncer cannot reach the Local API endpoint (different host, port closed, etc.). +- **Configuration errors**: Incorrect API URL, missing configuration file, or malformed settings. +- **Bouncer installation issue**: The bouncer may not be properly installed or registered. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/log_processor_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md similarity index 99% rename from crowdsec-docs/unversioned/troubleshooting/log_processor_offline.md rename to crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md index 2699719de..7deea1fba 100644 --- a/crowdsec-docs/unversioned/troubleshooting/log_processor_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md @@ -1,6 +1,6 @@ --- title: Log Processor Offline -id: log_processor_offline +id: issue_log_processor_offline --- When the Console or a notification rule reports **Log Processor Offline**, the local agent has not checked in with the Local API (LAPI) for more than 24 hours. The alert is different from **Log Processor No Alert**, which only means logs were parsed but no scenarios fired. Use the sections below to identify why the heartbeat stopped and how to bring the agent back online. diff --git a/crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md similarity index 89% rename from crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md rename to crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md index dc9b61d21..d74dbbd05 100644 --- a/crowdsec-docs/unversioned/troubleshooting/lp_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md @@ -1,6 +1,6 @@ --- title: LP No Alerts -id: lp_no_alerts +id: issue_lp_no_alerts --- The **LP No Alerts** issue appears when a specific Log Processor (agent) is running and communicating with the Local API but hasn't generated any alerts in the last 48 hours. This is similar to [Engine No Alerts](/u/troubleshooting/engine_no_alerts) but applies to individual Log Processor instances in distributed setups. @@ -13,17 +13,10 @@ The **LP No Alerts** issue appears when a specific Log Processor (agent) is runn ## Common Root Causes -### No logs being read by this agent -The acquisition configuration on this specific Log Processor may be missing, disabled, or pointing to empty sources. - -### No logs being parsed successfully -Logs are being read but parsers can't process them due to format mismatches or missing collections. - -### Scenarios in simulation mode -Detection scenarios are installed but running in simulation mode on this agent. - -### Low-activity monitored service -The service monitored by this Log Processor may genuinely have no malicious activity. +- **No logs being read by this agent**: The acquisition configuration on this specific Log Processor may be missing, disabled, or pointing to empty sources. +- **No logs being parsed successfully**: Logs are being read but parsers can't process them due to format mismatches or missing collections. +- **Scenarios in simulation mode**: Detection scenarios are installed but running in simulation mode on this agent. +- **Low-activity monitored service**: The service monitored by this Log Processor may genuinely have no malicious activity. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md similarity index 92% rename from crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md rename to crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md index 1c7c51ec0..7fe493f73 100644 --- a/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_parsed.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md @@ -1,6 +1,6 @@ --- title: LP No Logs Parsed -id: lp_no_logs_parsed +id: issue_lp_no_logs_parsed --- The **LP No Logs Parsed** issue appears when logs are being successfully read by the Log Processor but none are being parsed correctly in the last 48 hours. This means the acquisition is working, but parsers can't interpret the log format. @@ -13,20 +13,11 @@ The **LP No Logs Parsed** issue appears when logs are being successfully read by ## Common Root Causes -### Missing collection or parsers -The required parser collection for your log format isn't installed. - -### Acquisition type mismatch -The `type:` or `program:` label in acquisition doesn't match any installed parser's FILTER. - -### Custom or unexpected log format -Logs don't match the format expected by the parser (custom format, version mismatch, etc.). - -### Parser FILTER not matching -Parser exists but its FILTER clause doesn't match the acquisition label. - -### Grok pattern mismatch -Log structure has changed and the parser's grok patterns no longer match. +- **Missing collection or parsers**: The required parser collection for your log format isn't installed. +- **Acquisition type mismatch**: The `type:` or `program:` label in acquisition doesn't match any installed parser's FILTER. +- **Custom or unexpected log format**: Logs don't match the format expected by the parser (custom format, version mismatch, etc.). +- **Parser FILTER not matching**: Parser exists but its FILTER clause doesn't match the acquisition label. +- **Grok pattern mismatch**: Log structure has changed and the parser's grok patterns no longer match. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md similarity index 91% rename from crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md rename to crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md index 1d7b9caa4..e8dc387c1 100644 --- a/crowdsec-docs/unversioned/troubleshooting/lp_no_logs_read.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md @@ -1,6 +1,6 @@ --- title: LP No Logs Read -id: lp_no_logs_read +id: issue_lp_no_logs_read --- The **LP No Logs Read** issue appears when a Log Processor is running but hasn't acquired any log lines in the last 24 hours. This is the first step in the detection pipeline and must work for CrowdSec to function. @@ -13,23 +13,12 @@ The **LP No Logs Read** issue appears when a Log Processor is running but hasn't ## Common Root Causes -### Missing acquisition configuration -No acquisition files exist, or they're empty. - -### Incorrect log file paths -Acquisition configuration points to paths that don't exist or have moved. - -### File permission issues -CrowdSec doesn't have read access to the log files. - -### Log files are empty or not being written -The services being monitored aren't generating logs. - -### Acquisition type mismatch -Wrong datasource type configured (e.g., using `file` instead of `journald`). - -### Container/Kubernetes volume issues -In containerized deployments, logs aren't mounted or accessible to the CrowdSec container. +- **Missing acquisition configuration**: No acquisition files exist, or they're empty. +- **Incorrect log file paths**: Acquisition configuration points to paths that don't exist or have moved. +- **File permission issues**: CrowdSec doesn't have read access to the log files. +- **Log files are empty or not being written**: The services being monitored aren't generating logs. +- **Acquisition type mismatch**: Wrong datasource type configured (e.g., using `file` instead of `journald`). +- **Container/Kubernetes volume issues**: In containerized deployments, logs aren't mounted or accessible to the CrowdSec container. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md similarity index 92% rename from crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md rename to crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md index 504ec4a05..ee3ac76b7 100644 --- a/crowdsec-docs/unversioned/troubleshooting/rc_integration_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md @@ -1,6 +1,6 @@ --- title: RC Integration Offline -id: rc_integration_offline +id: issue_rc_integration_offline --- The **RC Integration Offline** (Remediation Component Integration Offline) issue appears when a non-firewall remediation component (bouncer) has not pulled decisions from the Local API for more than 24 hours. This means your web server, reverse proxy, CDN, or other integration is not receiving block/captcha decisions. @@ -22,23 +22,12 @@ This issue applies to bouncers such as: ## Common Root Causes -### Bouncer service or process stopped -The bouncer daemon, module, or plugin is not running. - -### Authentication failure -API key is invalid, expired, or the bouncer was removed from the Security Engine. - -### Network connectivity issues -The bouncer cannot reach the Local API endpoint. - -### Configuration errors -Incorrect API URL, missing configuration file, or malformed settings. - -### Integration not loaded -Module/plugin is installed but not enabled in the web server or application. - -### Log rotation or restart issues -Bouncer lost connection after service restart and didn't reconnect. +- **Bouncer service or process stopped**: The bouncer daemon, module, or plugin is not running. +- **Authentication failure**: API key is invalid, expired, or the bouncer was removed from the Security Engine. +- **Network connectivity issues**: The bouncer cannot reach the Local API endpoint. +- **Configuration errors**: Incorrect API URL, missing configuration file, or malformed settings. +- **Integration not loaded**: Module/plugin is installed but not enabled in the web server or application. +- **Log rotation or restart issues**: Bouncer lost connection after service restart and didn't reconnect. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/security_engine_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md similarity index 99% rename from crowdsec-docs/unversioned/troubleshooting/security_engine_offline.md rename to crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md index 91dc11d12..1e896775e 100644 --- a/crowdsec-docs/unversioned/troubleshooting/security_engine_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md @@ -1,6 +1,6 @@ --- title: Security Engine Offline -id: security_engine_offline +id: issue_security_engine_offline --- The **Security Engine Offline** alert appears in the Console and notification integrations when an enrolled engine has not reported or logged in to CrowdSec for more than 48 hours. This usually means the core `crowdsec` service (Log Processor + Local API) has stopped working or communicating with our infrastructure. From a68bde93f2b41555c81efdc9ff2b654544a89ff1 Mon Sep 17 00:00:00 2001 From: jdv Date: Fri, 21 Nov 2025 16:25:24 +0100 Subject: [PATCH 05/13] clean up and precisions of a few pages still wip --- .../troubleshooting/issue_engine_no_alerts.md | 147 +++++++---- .../issue_log_processor_offline.md | 234 +++++++++++++++--- .../troubleshooting/issue_lp_no_alerts.md | 10 +- .../issue_security_engine_offline.md | 9 +- 4 files changed, 309 insertions(+), 91 deletions(-) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md index 0cd0928b4..a13b428b7 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md @@ -13,106 +13,159 @@ The **Engine No Alerts** issue appears when your Security Engine has been runnin ## Common Root Causes -- **No logs being read**: The acquisition configuration may be missing, disabled, or pointing to empty log sources. -- **No logs being parsed**: Logs are being read but parsers can't process them due to format mismatches or missing collections. - **Scenarios in simulation mode**: Detection scenarios are installed but set to simulation mode, preventing actual alerts. -- **Legitimate low-activity environment**: In some cases, truly clean environments with no malicious activity may not trigger alerts. +- **Are appropriate collections installed**: make sure you have the detection scenarios and/or appsec rules covering your services needs +- **Low/no-traffic environment**: If your service handles very few request or is not open to the internet it's usually to observe low/no malicious activity. +- **Legitimate low-activity environment**: Your defenses preceding your service might be good enough that you don't detect additional malicious behaviors (CrowdSec blocklists or other protections may already deflect most malicious activity) + + + +**Other Issues** +- πŸ”— **[No logs being read](/u/troubleshooting/issue_lp_no_logs_read)**: The acquisition configuration may be missing, disabled, or pointing to empty log sources. +- πŸ”— **[No logs being parsed](/u/troubleshooting/issue_lp_no_logs_parsed)**: Logs are being read but parsers can't process them due to format mismatches or missing collections. ## How to Diagnose -### Check metrics to identify the issue +If it's not due to [other issues](#otherIssues), here are the diagnosis and resolutions for other root causes. + +### Check if scenarios are in simulation mode -Run the metrics command to see the full pipeline: +Verify whether your scenarios are set to simulation mode, which prevents them from generating alerts: ```bash # On host -sudo cscli metrics show acquisition parsers scenarios +sudo cscli simulation status # Docker -docker exec crowdsec cscli metrics show acquisition parsers scenarios +docker exec crowdsec cscli simulation status # Kubernetes -kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli metrics show acquisition parsers scenarios +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli simulation status ``` -Look for: -- **Acquisition Metrics**: Are log lines being read? (non-zero "Lines read") -- **Parser Metrics**: Are logs being parsed successfully? (non-zero "Lines parsed") -- **Scenario Metrics**: Are scenarios evaluating events? (check "Current count" or "Overflow") +If scenarios are listed, they're in simulation mode and won't be sent to CrowdSec console (they should however still appear in `cscli alerts list`). + +### Check if appropriate collections are installed -### Check recent alerts +Verify you have collections matching your protected services: ```bash # On host -sudo cscli alerts list +sudo cscli collections list # Docker -docker exec crowdsec cscli alerts list +docker exec crowdsec cscli collections list # Kubernetes -kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli alerts list +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli collections list ``` -If the list is empty, proceed with the resolution steps below. +Compare your installed collections against your actual services (nginx, apache, ssh, etc.). Missing collections means no detection rules for those services. -## How to Resolve +### Evaluate your service activity level -### If no logs are being read +Check how much traffic your service is processing: -This is the most common cause. Follow the [LP No Logs Read troubleshooting guide](/u/troubleshooting/lp_no_logs_read) for detailed steps. - -**Quick checks:** -- Verify acquisition configuration exists (`/etc/crowdsec/acquis.yaml` or `acquis.d/`) -- Ensure log files exist and are accessible -- Check file permissions allow CrowdSec to read logs +```bash +# On host +sudo cscli metrics show acquisition parsers -### If logs are read but not parsed +# Docker +docker exec crowdsec cscli metrics show acquisition parsers -Follow the [LP No Logs Parsed troubleshooting guide](/u/troubleshooting/lp_no_logs_parsed) for detailed steps. +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli metrics show acquisition parsers +``` -**Quick checks:** -- Verify collections are installed: `cscli collections list` -- Check log format matches parser expectations -- Use `cscli explain --log "" --type ` to test parsing +Look at "Lines parsed" - if this number is very low (dozens or hundreds per day), you may simply have insufficient traffic volume for malicious activity to appear. -### If scenarios are in simulation mode +### Check if proactive defenses are blocking threats upstream -Check if scenarios are in simulation: +If you have CrowdSec blocklists or other protection layers active, they may be blocking malicious traffic before it reaches your scenarios: ```bash -sudo cscli simulation status +# On host +sudo cscli decisions list +sudo cscli metrics show bouncers + +# Docker +docker exec crowdsec cscli decisions list +docker exec crowdsec cscli metrics show bouncers + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli decisions list +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli metrics show bouncers ``` -If scenarios are in simulation mode, they will be listed. To disable simulation for all scenarios: +High numbers of active decisions or bouncer blocks indicate your proactive defenses are working - malicious actors never reach your log-based detection. + +## How to Resolve + +### If scenarios are in simulation mode + +Disable simulation mode to allow alerts to be generated: ```bash +# On host sudo cscli simulation disable --all sudo systemctl reload crowdsec + +# Docker +docker exec crowdsec cscli simulation disable --all +docker restart crowdsec + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli simulation disable --all +kubectl rollout restart deployment/crowdsec -n crowdsec ``` -Or for specific scenarios: +You can also disable simulation for specific scenarios only: ```bash sudo cscli simulation disable crowdsecurity/ssh-bf sudo systemctl reload crowdsec ``` -### If this is a low-activity environment +### If appropriate collections are missing + +Install collections matching your protected services. Visit the [CrowdSec Hub](https://hub.crowdsec.net/) to find collections for your stack: + +- **Web servers**: `crowdsecurity/nginx`, `crowdsecurity/apache2`, `crowdsecurity/caddy` +- **SSH**: `crowdsecurity/sshd` +- **Linux base**: `crowdsecurity/linux` +- **AppSec/WAF**: `crowdsecurity/appsec-*` collections for application-level protection + +Install collections using: + +```bash +# On host +sudo cscli collections install crowdsecurity/nginx +sudo systemctl reload crowdsec + +# Docker +docker exec crowdsec cscli collections install crowdsecurity/nginx +docker restart crowdsec + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli collections install crowdsecurity/nginx +kubectl rollout restart deployment/crowdsec -n crowdsec +``` + +### If this is a low-traffic environment -In genuinely clean environments, you can: +For services with minimal traffic or limited internet exposure: -1. **Test with dummy scenarios** using the [Health Check guide](/u/getting_started/health_check) to verify detection works -2. **Subscribe to Community Blocklist** decisions in the Console to add proactive blocking -3. **Monitor metrics regularly** to ensure the pipeline stays healthy +1. **Verify detection is working** by triggering test scenarios as described in the [Health Check guide](/u/getting_started/health_check/#trigger-crowdsecs-test-scenarios) +2. **Consider this normal** - If your detection is properly working, low traffic may means fewer threats to detect and you can ignore the issue for now. -## Verify Resolution +### If proactive defenses are already handling threats -After making changes: +This is actually a **positive outcome** - your blocklists and bouncers are preventing malicious traffic from reaching your services: -1. Restart CrowdSec: `sudo systemctl restart crowdsec` -2. Wait a few minutes for log processing -3. Check metrics again: `sudo cscli metrics show scenarios` -4. Trigger a test alert using the [Health Check detection tests](/u/getting_started/health_check#-detection-checks) +1. **Verify your setup is working** by running the [Health Check detection tests](/u/getting_started/health_check#-detection-checks) to confirm scenarios can still trigger when needed +2. **Monitor bouncer metrics** to see how many threats are being blocked: `sudo cscli metrics show bouncers` +3. **Review active decisions** to understand what threats are being prevented: `sudo cscli decisions list` +4. **Keep the Console enrolled** to maintain visibility into your protection posture even if local alerts are minimal ## Related Issues diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md index 7deea1fba..47d5d51de 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md @@ -5,9 +5,22 @@ id: issue_log_processor_offline When the Console or a notification rule reports **Log Processor Offline**, the local agent has not checked in with the Local API (LAPI) for more than 24 hours. The alert is different from **Log Processor No Alert**, which only means logs were parsed but no scenarios fired. Use the sections below to identify why the heartbeat stopped and how to bring the agent back online. -## Common Root Causes & Diagnostics +## What Triggers This Issue -### Service stopped or stuck +- **Trigger condition**: Log Processor has not checked in with Local API for more than 24 hours +- **Criticality**: πŸ”₯ Critical +- **Impact**: The agent is not communicating with the Local API - no alerts from this agent will reach the Console + +## Common Root Causes + +- **Service stopped or stuck**: The crowdsec service has crashed, hung, or was manually stopped on the agent host. +- **Machine not validated or credentials revoked**: The agent's credentials are pending validation, were removed from the LAPI, or the credentials file is missing/corrupt. +- **Local API unreachable from agent**: Network issues, firewall rules, or configuration errors prevent the agent from connecting to the LAPI endpoint. +- **Local API service unavailable**: The LAPI service itself is down or not responding, affecting all agents trying to connect. + +## How to Diagnose + +### Check if the service is stopped or stuck - Confirm the service state on the host: @@ -25,90 +38,132 @@ kubectl get pods -n crowdsec - On the LAPI node, run `sudo cscli machines list` and check whether the `Last Update` column is older than 24 hours for the affected machine. -### Machine not validated or credentials revoked +### Check if machine credentials are valid + +From the LAPI host: + +```bash +# On host +sudo cscli machines list + +# Docker +docker exec crowdsec cscli machines list + +# Kubernetes +kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli machines list +``` -- `sudo cscli machines list` on the LAPI shows the machine in `PENDING` state or missing entirely. -- On the agent host, ensure `/etc/crowdsec/local_api_credentials.yaml` exists and contains the expected login and password. -- If you recently reinstalled or renamed the machine, it must be re-validated. See [Machines management](/u/user_guides/machines_mgmt) for details. +- If the machine shows in `PENDING` state or is missing entirely, credentials need validation +- On the agent host, ensure `/etc/crowdsec/local_api_credentials.yaml` exists and contains valid login and password +- If you recently reinstalled or renamed the machine, it must be re-validated -### Local API unreachable +### Check if the Local API is reachable from the agent -- From the agent, run: +From the agent host, test connectivity to the LAPI: ```bash +# On host sudo cscli lapi status + +# Docker +docker exec crowdsec-agent cscli lapi status + +# Kubernetes +kubectl exec -n crowdsec -it -- cscli lapi status ``` - Errors such as `401 Unauthorized`, TLS failures, or connection timeouts indicate an authentication or network issue. +Look for errors: +- `401 Unauthorized` - credentials issue +- TLS failures - certificate problems +- Connection timeouts - network/firewall blocking -- Verify the API endpoint declared in `/etc/crowdsec/config.yaml` (`api.client.credentials_path`, `url`, `ca_cert`, `insecure_skip_verify`) matches your LAPI setup. Refer to [Local API configuration](/docs/local_api/configuration) and [TLS authentication](/docs/local_api/tls_auth) if certificates changed. -- Confirm the network path between the agent and the LAPI host is open (default port `8080/TCP`). Firewalls or reverse proxies introduced after installation commonly block the heartbeat. +Also verify the API endpoint in `/etc/crowdsec/config.yaml`: +- Check `api.client.credentials_path` points to correct credentials file +- Verify `url` matches your LAPI endpoint (default: `http://localhost:8080`) +- Review `ca_cert` and `insecure_skip_verify` if using TLS -### Local API unavailable +Test network connectivity: -- If several agents show as offline simultaneously, the LAPI service might be down. Check its status on the LAPI machine: +```bash +nc -zv 8080 +``` + +### Check if the Local API service is available + +If several agents show as offline simultaneously, the LAPI service itself might be down. + +On the LAPI machine: ```bash +# On host sudo systemctl status crowdsec sudo journalctl -u crowdsec -n 50 + +# Docker +docker ps --filter name=crowdsec-lapi +docker logs crowdsec-lapi --tail 50 + +# Kubernetes +kubectl get pods -n crowdsec -l type=lapi +kubectl logs -n crowdsec -l type=lapi --tail 50 ``` -- Inspect `/var/log/crowdsec/` (or container logs) for database or authentication errors that prevent the LAPI from responding. -- Use `sudo cscli metrics show engine` on the LAPI to confirm it is still ingesting events from other agents. See the [Health Check guide](/u/getting_started/health_check) for additional diagnostics. +Check `sudo cscli metrics show engine` on the LAPI to confirm it is processing events from other agents. -## Recovery Actions +## How to Resolve -### Restart the Log Processor service +### If the service is stopped or stuck -- Systemd: +Restart the Log Processor service: ```bash +# On host (systemd) sudo systemctl restart crowdsec -``` -- Docker: - -```bash +# Docker docker restart crowdsec + +# Kubernetes +kubectl rollout restart deployment/crowdsec -n crowdsec ``` -- Kubernetes: +After the restart, verify the agent is checking in: ```bash -kubectl rollout restart deployment/crowdsec -n crowdsec +# On LAPI host +sudo cscli machines list ``` -After the restart, re-run `sudo cscli machines list` on the LAPI to confirm the `Last Update` timestamp is refreshed. +Check that the `Last Update` timestamp is recent (within last few minutes). -### Validate or re-register the machine +### If machine credentials need validation -#### Using credentials +#### Using credentials (single machine setups) :::info More suitable for single machine setups. ::: -- To regenerate credentials directly on the LAPI host when the agent runs locally, run: +To regenerate credentials directly on the LAPI host when the agent runs locally: ```bash sudo cscli machines add -a ``` -#### Using registration system +#### Using registration system (distributed setups) :::info Registration system is more suitable for distributed setups. ::: - - -- Approve pending machines on the LAPI: +Approve pending machines on the LAPI: ```bash +sudo cscli machines list sudo cscli machines validate ``` -- If credentials were removed or the agent was rebuilt, re-register it against the LAPI: +If credentials were removed or the agent was rebuilt, re-register it against the LAPI: ```bash sudo cscli lapi register --url http://:8080 --machine @@ -117,26 +172,123 @@ sudo systemctl restart crowdsec Update the `--url` to match your deployment. Auto-registration tokens are covered in [Machines management](/u/user_guides/machines_mgmt#machine-auto-validation). -### Restore connectivity to the Local API +#### Kubernetes pod rotation (stale machines) + +In Kubernetes environments, pod restarts and scaling events create new pod identities. Old Log Processor entries may remain in the LAPI's machine list even after pods are deleted, causing the Console to show offline agents that no longer exist. + +To identify and clean up stale machines: + +1. List all registered machines and note their last update times: + ```bash + # On LAPI host + sudo cscli machines list + + # In Kubernetes + kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli machines list + ``` + +2. Identify machines that haven't checked in for 24+ hours and verify they correspond to deleted pods: + ```bash + # Check current running pods + kubectl get pods -n crowdsec -l app=crowdsec-agent -o wide + ``` + +3. Prune stale machines: + ```bash + # Delete specific stale machine + sudo cscli machines delete + + # Or prune all machines not seen in 24+ hours (use with caution) + sudo cscli machines prune + ``` + +4. After pruning, you may need to restart the agent deployment to regenerate credentials for current pods: + ```bash + kubectl rollout restart deployment/crowdsec-agent -n crowdsec + ``` + +5. Verify new pods register successfully: + ```bash + # Wait 1-2 minutes then check + kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli machines list + ``` + +:::tip +To prevent accumulation of stale machines in Kubernetes, consider using [auto-registration tokens](/u/user_guides/machines_mgmt#machine-auto-validation) which handle pod lifecycle automatically. +::: + +### If the Local API is unreachable from the agent -- Open the required port on firewalls or security groups and verify with: +Open the required port on firewalls or security groups: ```bash +# Test connectivity nc -zv 8080 + +# If using firewall, ensure port is open +sudo ufw allow 8080/tcp +# or +sudo firewall-cmd --add-port=8080/tcp --permanent +sudo firewall-cmd --reload ``` -- If TLS certificates were renewed, update the agent trust store (`ca_cert`) or temporarily enable `insecure_skip_verify: true` for testing. Follow the hardening recommendations in [TLS authentication](/docs/local_api/tls_auth). -- When using proxies or load balancers, ensure they forward HTTP headers and TLS material expected by the LAPI. +If using TLS: +- Update the agent trust store (`ca_cert` in `/etc/crowdsec/config.yaml`) if certificates were renewed +- Temporarily enable `insecure_skip_verify: true` for testing (then fix certificates properly) +- Follow [TLS authentication](/docs/local_api/tls_auth) for proper setup + +If using proxies or load balancers: +- Ensure they forward HTTP headers correctly +- Verify TLS passthrough or termination is configured properly +- Check that the LAPI endpoint is accessible through the proxy -### Stabilise the Local API +### If the Local API service is unavailable -- Restart the LAPI service or pod if it was unresponsive: +Restart the LAPI service: ```bash +# On host (systemd) sudo systemctl restart crowdsec + +# Kubernetes kubectl rollout restart deployment/crowdsec-lapi -n crowdsec ``` -- Run `sudo cscli support dump` to collect diagnostics if the LAPI repeatedly crashes or loses database access. Review the resulting archive for database connectivity errors and consult the [Security Engine troubleshooting guide](/u/troubleshooting/security_engine) when escalation is required. +If the LAPI repeatedly crashes or loses database access: + +1. Collect diagnostics: + ```bash + sudo cscli support dump + ``` + +2. Review `/var/log/crowdsec/` (or container logs) for errors +3. Check database connectivity and credentials +4. Consult the [Security Engine troubleshooting guide](/u/troubleshooting/security_engine) if issues persist + +## Verify Resolution + +After making changes: + +1. Wait 1-2 minutes for the agent to check in +2. Verify on the LAPI host: + ```bash + sudo cscli machines list + ``` +3. Check that `Last Update` timestamp is recent (within last few minutes) +4. The Console alert will clear automatically during the next polling cycle + +## Related Issues + +- [Engine No Alerts](/u/troubleshooting/issue_engine_no_alerts) - If the agent is online but not generating alerts +- [LP No Logs Read](/u/troubleshooting/issue_lp_no_logs_read) - If acquisition is not working +- [Security Engine Troubleshooting](/u/troubleshooting/security_engine) - General Security Engine issues + +## Getting Help + +If the agent still shows as offline after following these steps: + +- Check [Discourse](https://discourse.crowdsec.net/) for similar issues +- Ask on [Discord](https://discord.gg/crowdsec) with your `cscli machines list` and `cscli lapi status` output +- Share the output of `sudo cscli support dump` if the issue persists -Once the heartbeat is restored, the Console alert clears automatically during the next polling cycle. Consider adding a [notification rule](/u/console/notification_integrations/rule) for **Log Processor Offline** so you are alerted promptly when it happens again. +Consider adding a [notification rule](/u/console/notification_integrations/rule) for **Log Processor Offline** to be alerted promptly when this happens again. diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md index d74dbbd05..61371bc24 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md @@ -13,13 +13,19 @@ The **LP No Alerts** issue appears when a specific Log Processor (agent) is runn ## Common Root Causes -- **No logs being read by this agent**: The acquisition configuration on this specific Log Processor may be missing, disabled, or pointing to empty sources. -- **No logs being parsed successfully**: Logs are being read but parsers can't process them due to format mismatches or missing collections. - **Scenarios in simulation mode**: Detection scenarios are installed but running in simulation mode on this agent. - **Low-activity monitored service**: The service monitored by this Log Processor may genuinely have no malicious activity. + + +**Other Issues** +- πŸ”— **[No logs being read](/u/troubleshooting/issue_lp_no_logs_read)**: The acquisition configuration on this specific Log Processor may be missing, disabled, or pointing to empty sources. +- πŸ”— **[No logs being parsed](/u/troubleshooting/issue_lp_no_logs_parsed)**: Logs are being read but parsers can't process them due to format mismatches or missing collections. + ## How to Diagnose +If it's not due to [other issues](#otherIssues), here are the diagnosis and resolutions for other root causes. + ### Identify the affected Log Processor Check which machine is not generating alerts: diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md index 1e896775e..66fe8594b 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md @@ -5,7 +5,14 @@ id: issue_security_engine_offline The **Security Engine Offline** alert appears in the Console and notification integrations when an enrolled engine has not reported or logged in to CrowdSec for more than 48 hours. This usually means the core `crowdsec` service (Log Processor + Local API) has stopped working or communicating with our infrastructure. -## Common Root Causes & Diagnostics +## Common Root Causes + +- **Host or service down**: The crowdsec service has stopped or the host itself is unreachable. +- **Enrollment revoked or pending**: Engine enrollment was removed from the Console or is awaiting approval. +- **Console connectivity issues**: Network, firewall, or proxy blocking HTTPS calls to Console endpoints, or TLS validation failures. +- **Local API unavailable**: The Local API component has stopped and cannot gather or forward alerts to the Console. + +## Diagnostics ### Host or service down From 0860064967c0d2f5a062edefc47cb53874ac2183 Mon Sep 17 00:00:00 2001 From: jdv Date: Fri, 21 Nov 2025 18:38:28 +0100 Subject: [PATCH 06/13] some more updates --- .../troubleshooting/issue_engine_no_alerts.md | 3 +- .../issue_engine_too_many_alerts.md | 1 - .../troubleshooting/issue_lp_no_alerts.md | 1 - .../issue_rc_integration_offline.md | 301 +++++++----------- .../issue_security_engine_offline.md | 7 + 5 files changed, 126 insertions(+), 187 deletions(-) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md index a13b428b7..00f7997df 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md @@ -178,5 +178,4 @@ This is actually a **positive outcome** - your blocklists and bouncers are preve If you've verified logs are being read and parsed correctly but still see no alerts: - Check [Discourse](https://discourse.crowdsec.net/) for similar cases -- Ask on [Discord](https://discord.gg/crowdsec) with your `cscli metrics` output -- Review your scenarios and log samples using [CrowdSec Playground](https://playground.crowdsec.net/) \ No newline at end of file +- Ask on [Discord](https://discord.gg/crowdsec) with your `cscli metrics` output \ No newline at end of file diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md index a7ee9b65e..0b05d2c2f 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md @@ -173,4 +173,3 @@ If you need assistance analyzing alert patterns: - Share anonymized alert samples on [Discourse](https://discourse.crowdsec.net/) - Ask on [Discord](https://discord.gg/crowdsec) with your `cscli metrics show scenarios` output -- Use the [CrowdSec Playground](https://playground.crowdsec.net/) to test scenario behavior diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md index 61371bc24..cfd073d78 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md @@ -165,4 +165,3 @@ If you've verified logs are being read and parsed but still see no alerts: - Share your setup details on [Discourse](https://discourse.crowdsec.net/) - Ask on [Discord](https://discord.gg/crowdsec) with `cscli metrics` output -- Test your log samples with [CrowdSec Playground](https://playground.crowdsec.net/) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md index ee3ac76b7..0d3c44fb6 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md @@ -3,56 +3,66 @@ title: RC Integration Offline id: issue_rc_integration_offline --- -The **RC Integration Offline** (Remediation Component Integration Offline) issue appears when a non-firewall remediation component (bouncer) has not pulled decisions from the Local API for more than 24 hours. This means your web server, reverse proxy, CDN, or other integration is not receiving block/captcha decisions. +The **RC Integration Offline** (Remediation Component Integration Offline) refers to a Blocklist-Integration of type Remediation Component has not pulled from its endpoint for more than 24 hours. + +This issue applies to Remediation Component (aka bouncers) directly connected to a Blocklist integration endpoint (aka Blocklist as a Service). ## What Triggers This Issue -- **Trigger condition**: No decision pulls for 24 hours +- **Trigger condition**: No pull for 24 hours - **Criticality**: Critical -- **Impact**: Application-level remediation is not working - threats are not being blocked or challenged - -## Common Remediation Components - -This issue applies to bouncers such as: -- **Web servers**: NGINX, Apache, IIS -- **Reverse proxies**: Traefik, HAProxy, Caddy -- **Application frameworks**: PHP, Wordpress plugins -- **Cloud services**: Cloudflare, Akamai connectors -- **Custom integrations**: Using CrowdSec API +- **Impact**: blocklist update not retrieved and potential malfunction of the remediation component. ## Common Root Causes - **Bouncer service or process stopped**: The bouncer daemon, module, or plugin is not running. -- **Authentication failure**: API key is invalid, expired, or the bouncer was removed from the Security Engine. -- **Network connectivity issues**: The bouncer cannot reach the Local API endpoint. -- **Configuration errors**: Incorrect API URL, missing configuration file, or malformed settings. -- **Integration not loaded**: Module/plugin is installed but not enabled in the web server or application. -- **Log rotation or restart issues**: Bouncer lost connection after service restart and didn't reconnect. +- **Configuration errors**: Incorrect or missing API URL or API Key in bouncer's configuration file, or malformed settings. +- **Network connectivity issues**: The bouncer cannot reach the endpoint. +- **Bouncer not loaded**: Bouncer Module/plugin is installed but not enabled or started. ## How to Diagnose -### Check bouncer status in Security Engine +Depending on the type of bouncer, you'll need to check its installation status, configuration, and running status. -From the Security Engine (or LAPI host): +**Types of remediation components:** +- **Web server modules**: NGINX, Apache plugins +- **Reverse proxy integrations**: Traefik, HAProxy, Caddy middlewares +- **Application frameworks**: PHP libraries, WordPress plugins +- **Cloud service workers**: Cloudflare Workers, Fastly Compute, autonomous update daemons +- **Custom integrations**: Using the Bouncer SDK -```bash -# On host -sudo cscli bouncers list +### Check bouncer configuration has proper parameters -# Docker -docker exec crowdsec cscli bouncers list +For Blocklist-as-a-Service (BLaaS) connectivity, verify the bouncer configuration contains: -# Kubernetes -kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli bouncers list -``` +1. **api_url**: Must point to your BLaaS endpoint (e.g., `https://admin.api.crowdsec.net/v1/decisions/stream`) +2. **api_key**: Your BLaaS API key (found in the Console under your Blocklist integration) -**What to look for:** -- Is your bouncer listed? -- Check "Last API Pull" timestamp - is it older than 24 hours? -- Is the bouncer marked as "βœ“" (valid)? +**Common configuration file locations:** +- **NGINX**: `/etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf` +- **Traefik**: `/etc/crowdsec/bouncers/crowdsec-traefik-bouncer.yaml` +- **HAProxy**: `/etc/crowdsec/bouncers/crowdsec-haproxy-bouncer.conf` +- **Cloudflare**: `/etc/crowdsec/bouncers/crowdsec-cloudflare-bouncer.yaml` +- **WordPress**: Admin panel β†’ CrowdSec Settings + +Check the configuration file: +```bash +# Example for NGINX bouncer +sudo cat /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf + +# Look for: +# API_URL=https://admin.api.crowdsec.net/v1/decisions/stream +# API_KEY= +``` ### Check bouncer service status +Verify the bouncer is running and hasn't encountered errors. + +#### For host-based processes + +Check if the bouncer process or service is running: + Depending on your bouncer type: #### Web server module bouncers @@ -86,38 +96,41 @@ sudo systemctl status crowdsec-cloudflare-bouncer ### Check bouncer logs -Log locations vary by bouncer type: +Bouncer logs locations vary by type: -```bash -# Web server logs -sudo tail -50 /var/log/nginx/error.log -sudo tail -50 /var/log/apache2/error.log +**Standalone daemon bouncers:** +- **Systemd services**: `sudo journalctl -u crowdsec- -n 50` +- **Traefik/HAProxy/Cloudflare**: `/var/log/crowdsec-.log` -# Standalone bouncer logs -sudo tail -50 /var/log/crowdsec-.log -sudo journalctl -u crowdsec- -n 50 +**Web server module bouncers:** +- **NGINX**: Check main NGINX error log (`/var/log/nginx/error.log`) +- **Apache**: Check Apache error log (`/var/log/apache2/error.log`) -# Docker/Kubernetes -docker logs -kubectl logs -n -``` +**Application framework bouncers:** +- **WordPress**: WordPress debug log or plugin settings page +- **PHP**: Application logs or web server error logs + +**Cloud service workers:** +- **Cloudflare Workers**: Cloudflare dashboard β†’ Workers β†’ Logs +- **Fastly Compute**: Fastly dashboard β†’ Real-time logs **Look for errors like:** -- `connection refused` - API unreachable -- `401 Unauthorized` or `403 Forbidden` - Authentication failed -- `module not loaded` - Integration not enabled -- `invalid configuration` - Config file issues +- `connection refused` or `timeout` - API endpoint unreachable +- `401 Unauthorized` or `403 Forbidden` - API key invalid or missing +- `module not loaded` - Integration not enabled in web server +- `invalid configuration` - Config file syntax or parameter errors +- `rate limit exceeded` - Cloud service plan limits reached -### Test connectivity to Local API +### Test connectivity to the endpoint From the bouncer host: ```bash # Test network connectivity -curl -I http://:8080/ +curl -I https:/// # Test with API key -curl -H "X-Api-Key: " http://:8080/v1/decisions +curl -H "X-Api-Key: " https:// ``` ## How to Resolve @@ -132,9 +145,6 @@ sudo systemctl restart nginx # Apache sudo systemctl restart apache2 - -# IIS (Windows) -iisreset ``` #### For standalone daemons @@ -144,69 +154,63 @@ sudo systemctl restart crowdsec- sudo systemctl enable crowdsec- ``` -### Re-register the bouncer - -If the API key is invalid: +### Update bouncer configuration -#### Generate new API key on Security Engine +If the API URL or API key is incorrect, update the bouncer's configuration file: +**NGINX bouncer** (`/etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf`): ```bash -# On LAPI host -sudo cscli bouncers add my-nginx-bouncer - -# Copy the generated API key +API_URL=https://admin.api.crowdsec.net/v1/decisions/stream +API_KEY= +UPDATE_FREQUENCY=10s ``` -#### Update bouncer configuration - -Configuration file locations vary: - -**NGINX bouncer:** -```bash -# Edit config -sudo nano /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf - -# Update api_key line -API_KEY= +**Traefik bouncer** (`/etc/crowdsec/bouncers/crowdsec-traefik-bouncer.yaml`): +```yaml +crowdsec_url: https://admin.api.crowdsec.net/v1/decisions/stream +crowdsec_api_key: +update_frequency: 10s ``` -**Traefik bouncer:** +**HAProxy bouncer** (`/etc/crowdsec/bouncers/crowdsec-haproxy-bouncer.conf`): ```bash -# Edit config -sudo nano /etc/crowdsec/bouncers/crowdsec-traefik-bouncer.yaml - -# Update api_key field -crowdsec_lapi_key: +CROWDSEC_URL=https://admin.api.crowdsec.net/v1/decisions/stream +CROWDSEC_API_KEY= ``` -**Cloudflare bouncer:** -```bash -# Edit config -sudo nano /etc/crowdsec/bouncers/crowdsec-cloudflare-bouncer.yaml +After updating, restart the bouncer service. -# Update api_key -crowdsec_lapi_key: -``` +### Fix connectivity issues -#### Restart after updating config +If the bouncer cannot reach the BLaaS endpoint: -```bash -sudo systemctl restart -``` +1. **Test network connectivity:** + ```bash + curl -I https://admin.api.crowdsec.net/ + ``` -### Fix connectivity issues +2. **Check firewall rules:** + ```bash + # Ensure outbound HTTPS (443) is allowed + sudo ufw status + # or + sudo firewall-cmd --list-all + ``` -If bouncer is on a different host: +3. **Test with API key:** + ```bash + curl -H "X-Api-Key: " \ + https://admin.api.crowdsec.net/v1/decisions/stream + ``` -```bash -# Test connectivity -nc -zv 8080 + Should return `{"new":null,"deleted":null}` or similar if authenticated. -# Check API URL in bouncer config -# Should be: http://:8080/ +4. **Check proxy settings** if using a corporate proxy - configure in bouncer's environment or config file. -# Update bouncer config with correct URL -``` +5. **For cloud workers (Cloudflare/Fastly):** + - Verify the worker is deployed and running + - Check if you've hit rate limits on your plan + - Review worker logs for errors ### Enable the module/plugin @@ -222,7 +226,7 @@ load_module modules/ngx_http_crowdsec_module.so; http { # CrowdSec configuration crowdsec_enabled on; - crowdsec_api_url http://127.0.0.1:8080; + crowdsec_api_url https://; # ... } ``` @@ -241,80 +245,31 @@ sudo a2enmod crowdsec sudo systemctl restart apache2 ``` -#### WordPress - -Activate the plugin via WordPress admin panel or: -```bash -wp plugin activate crowdsec # if using WP-CLI -``` - -### Fix configuration errors - -Validate configuration syntax: - -```bash -# Web servers -sudo nginx -t -sudo apache2ctl -t - -# YAML-based bouncers -sudo cat /etc/crowdsec/bouncers/.yaml -# Check for YAML syntax errors -``` - -**Common config issues:** -- Missing or incorrect `api_url` / `api_key` -- Wrong file permissions (must be readable by web server user) -- Incorrect YAML indentation -- Missing trailing `/` in API URL - -### Check file permissions - -Bouncer config files must be readable: - -```bash -# Check permissions -ls -la /etc/crowdsec/bouncers/ - -# Fix if needed -sudo chmod 640 /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf -sudo chown root:www-data /etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf -``` - ## Verify Resolution After making changes: -1. **Check bouncer service:** - ```bash - sudo systemctl status - # or for web servers - sudo systemctl status nginx - ``` +1. **Wait 1-2 minutes** for the bouncer to attempt its next pull from the endpoint -2. **Verify API pulls are resuming:** - ```bash - sudo cscli bouncers list - ``` - "Last API Pull" should update within seconds/minutes +2. **Check in the Console:** + - Navigate to your Blocklist integration + - Look at the integration tile + - Verify the "Last Pull" timestamp has updated to a recent time (within last few minutes) + - The offline alert should clear automatically -3. **Check bouncer logs for success:** +3. **Verify bouncer is pulling decisions:** ```bash - sudo tail -20 /var/log/.log - ``` - Should see successful API connection messages + # For standalone daemons, check logs + sudo journalctl -u crowdsec- -n 20 -4. **Test remediation:** - Add a test decision: - ```bash - sudo cscli decisions add --ip 192.0.2.1 --duration 5m --reason "test" + # Look for successful pull messages like: + # "Successfully pulled X decisions" + # "Decisions updated" ``` - Try accessing your service from that IP (or simulate): - ```bash - curl -H "X-Forwarded-For: 192.0.2.1" http://your-service/ - ``` - Should receive 403 Forbidden or a captcha challenge +4. **Test that blocking is working** (optional but recommended): + - Check bouncer-specific documentation for test procedures + - For web servers, you can test by temporarily adding a test decision ## Bouncer-Specific Documentation @@ -325,26 +280,6 @@ After making changes: - [WordPress Plugin](/u/bouncers/wordpress) - [All Bouncers](/u/bouncers/intro) -## Kubernetes-Specific Notes - -For Kubernetes ingress controllers: - -```bash -# Check ingress controller is running -kubectl get pods -n ingress-nginx - -# Check CrowdSec integration in ingress -kubectl describe ingress -n - -# Check controller logs -kubectl logs -n ingress-nginx --tail=50 -``` - -Ensure the bouncer is registered and pulling decisions: -```bash -kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli bouncers list -``` - ## Related Issues - [Firewall Integration Offline](/u/troubleshooting/fw_integration_offline) - Similar issue for firewall bouncers diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md index 66fe8594b..0a9bcc959 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_security_engine_offline.md @@ -104,3 +104,10 @@ After restarting, re-run `sudo cscli console status` to ensure the heartbeat is - Investigate persistent database or authentication errors using `sudo cscli support dump`, then consult the [Security Engine troubleshooting guide](/u/troubleshooting/security_engine) if issues remain. Once the engine resumes contact, the Console clears the **Security Engine Offline** alert during the next poll. Consider enabling the **Security Engine Offline** notification in your preferred integration so future outages are caught quickly. + +## Getting Help + +If you still don't manage to resume your Security Engine hearthbeat towards CrowdSec Console: + +- Check [Discourse](https://discourse.crowdsec.net/) for similar cases +- Ask on [Discord](https://discord.gg/crowdsec) with your `sudo cscli support dump` output \ No newline at end of file From 26d5d5a472d6ea7b32f4c2f1c8401b3c2f942b90 Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 11:57:41 +0100 Subject: [PATCH 07/13] last to first review 30% --- .../issue_fw_integration_offline.md | 363 +++++++----------- .../issue_lp_no_logs_parsed.md | 13 +- .../issue_rc_integration_offline.md | 20 +- 3 files changed, 154 insertions(+), 242 deletions(-) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md index d2b065516..63b674868 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_fw_integration_offline.md @@ -3,295 +3,206 @@ title: Firewall Integration Offline id: issue_fw_integration_offline --- -The **Firewall Integration Offline** issue appears when a firewall-based remediation component (bouncer) has not pulled decisions from the Local API for more than 24 hours. This means blocked IPs are not being enforced at the firewall level. +The **Firewall Integration Offline** issue appears when a firewall that is configured to pull blocklists directly from CrowdSec's Blocklist-as-a-Service (BLaaS) endpoint has not pulled the list for more than 24 hours. This means your firewall is no longer receiving the latest threat intelligence and blocked IPs. ## What Triggers This Issue -- **Trigger condition**: No decision pulls for 24 hours -- **Criticality**: Critical -- **Impact**: Firewall-based blocking is not working - detected threats are not being blocked +- **Trigger condition**: No pull from BLaaS endpoint for 24 hours +- **Criticality**: πŸ”₯ Critical +- **Impact**: Firewall blocklist is not being updated - new threats are not being blocked - Firewall potentially malfunctioning. ## Common Root Causes -- **Bouncer service stopped**: The firewall bouncer systemd service or process is not running. -- **Authentication failure**: API key is invalid, expired, or the bouncer was removed from the Security Engine. -- **Network connectivity issues**: The bouncer cannot reach the Local API endpoint (different host, port closed, etc.). -- **Configuration errors**: Incorrect API URL, missing configuration file, or malformed settings. -- **Bouncer installation issue**: The bouncer may not be properly installed or registered. +- **Firewall rule disabled or removed**: The firewall rule that pulls from external blocklists no longer exists or has been disabled. +- **BLaaS credentials invalid**: The basic auth credentials configured in the firewall for accessing the BLaaS endpoint is incorrect, expired, or has been regenerated. +- **Network connectivity issues**: The firewall cannot reach the BLaaS endpoint due to network problems, DNS issues, or routing failures. +- **Firewall offline**: The firewall itself is powered off, unreachable, or not processing rules. ## How to Diagnose -### Check bouncer status in Security Engine +### Check if the firewall is running and has access to BLaaS endpoint -From the Security Engine (or LAPI host): +// a few lines describe generic ways for them to check their firewall is workin and can ping https://admin.api.crowdsec.net -```bash -# On host -sudo cscli bouncers list - -# Docker -docker exec crowdsec cscli bouncers list +### Check if the firewall rule for external blocklist still exists -# Kubernetes -kubectl exec -n crowdsec -it $(kubectl get pods -n crowdsec -l type=lapi -o name) -- cscli bouncers list -``` +Access your firewall's management interface and verify: -**What to look for:** -- Is your firewall bouncer listed? -- Check the "Last API Pull" timestamp - is it older than 24 hours? -- Is the bouncer marked as "βœ“" (valid)? +1. **Navigate to the external blocklist configuration section** (varies by vendor): + - FortiGate: Security Fabric β†’ External Connectors β†’ Threat Feeds + - Palo Alto: Objects β†’ External Dynamic Lists + - ... -### Check bouncer service status +2. **Verify the rule exists and is valid:** + - Is the CrowdSec blocklist rule present? + - Is it enabled/active? + - Check the URL configured - should point to `https://admin.api.crowdsec.net/...` + - Some firewalls have a "test" function for external feeds access -On the host where the firewall bouncer is installed: +### Check BLaaS endpoint credentials -```bash -# For systemd-based bouncers -sudo systemctl status crowdsec-firewall-bouncer +Verify the basic auth credentials configured in your firewall matches the one from the Console: -# Or for other firewall bouncers -sudo systemctl status cs-firewall-bouncer -``` +**Get the correct basic auth credentials from CrowdSec Console:** +If you lost the credentials you can regenerate them: + - Navigate to **Blocklists** β†’ **Integrations**: select your firewall integration + - Click **Configuration** β†’ **Refresh Credentials** if you suspect the key is wrong (this will generate a new one) + - Copy the displayed API key or authentication header +**Check authentication method:** + - Some firewalls use HTTP headers (`X-Api-Key: `) + - Others may use URL parameters (`?api_key=`) + - Some may offer basic auth forms that are not functional *(Checkpoint among other)*, you can put the credentials directly into the URL: `https://:@https://admin.api.crowdsec.net/...` -**Common firewall bouncers:** -- `crowdsec-firewall-bouncer` - iptables/nftables bouncer -- `cs-firewall-bouncer` - (legacy name) -- Platform-specific: check your installation method +### Test connectivity to BLaaS endpoint -### Check bouncer logs - -```bash -# Linux -sudo tail -50 /var/log/crowdsec-firewall-bouncer.log - -# Or check journald -sudo journalctl -u crowdsec-firewall-bouncer -n 50 - -# FreeBSD (OPNsense/pfSense) -sudo tail -50 /var/log/crowdsec/crowdsec-firewall-bouncer.log -``` - -**Look for errors like:** -- `connection refused` - API is unreachable -- `401 Unauthorized` or `403 Forbidden` - Authentication failed -- `invalid configuration` - Config file issues -- `cannot bind` or `permission denied` - Firewall permission issues - -### Test connectivity to Local API - -From the bouncer host: +From a host on the same network as your firewall (or from the firewall's CLI if available): ```bash # Test network connectivity -curl -I http://:8080/ - -# Test with API key -curl -H "X-Api-Key: " http://:8080/v1/decisions -``` +curl -I https://admin.api.crowdsec.net/ -## How to Resolve - -### Restart the bouncer service - -```bash -# Restart the service -sudo systemctl restart crowdsec-firewall-bouncer +# Test with Credentials +curl -I https://:admin.api.crowdsec.net/v1/integrations//content -# Enable it to start on boot -sudo systemctl enable crowdsec-firewall-bouncer - -# Check status -sudo systemctl status crowdsec-firewall-bouncer +# Expected response: JSON with decisions or empty list +# Should NOT return 401 Unauthorized or 403 Forbidden ``` -### Re-register the bouncer - -If the API key is invalid or missing: - -#### Generate a new API key on the Security Engine - -```bash -# On Security Engine / LAPI host -sudo cscli bouncers add firewall-bouncer-01 - -# Copy the generated API key -``` - -#### Update bouncer configuration - -Edit the bouncer configuration file (usually `/etc/crowdsec/bouncers/crowdsec-firewall-bouncer.yaml`): - -```yaml -api_url: http://:8080/ -api_key: -``` +If you get connection errors: +- DNS resolution failures - check DNS configuration +- Connection timeouts - firewall outbound rules may be blocking +- SSL/TLS errors - firewall may need updated root certificates -#### Restart the bouncer +### Check firewall logs -```bash -sudo systemctl restart crowdsec-firewall-bouncer -``` +Review your firewall's logs for errors related to external blocklist updates: -### Fix connectivity issues +**Common log locations by vendor:** +*Path to logs may vary depending on your firewall version, check your documentation.* +- **FortiGate**: Log & Report β†’ System Events β†’ filter for "Threat Feed" +- **Palo Alto**: Monitor β†’ System Logs β†’ filter for "External Dynamic List" +- **pfSense**: Status β†’ System Logs β†’ Firewall +- **OPNsense**: System β†’ Log Files β†’ Firewall -If the bouncer is on a different host than the Security Engine: +**Look for error messages like:** +- `failed to download` - connectivity issue +- `authentication failed` or `401` - API key invalid +- `SSL certificate verification failed` - certificate trust issue +- `timeout` - network connectivity or endpoint unreachable +- `invalid format` - blocklist format mismatch -#### Check firewall rules allow access +## How to Resolve -```bash -# Test from bouncer host -nc -zv 8080 -``` +### If the firewall rule is disabled or missing -If connection fails: -- Open port 8080 on the Security Engine host firewall -- Check network security groups / iptables rules -- Verify no proxy is blocking the connection +Re-enable or recreate the external blocklist rule: -#### Verify API URL in bouncer config +### If BLaaS credentials are invalid -Edit `/etc/crowdsec/bouncers/crowdsec-firewall-bouncer.yaml`: +Update the API key in your firewall configuration: -```yaml -# For local LAPI -api_url: http://127.0.0.1:8080/ +1. **Regenerate API key in Console** (if needed): + - Navigate to **Integrations** β†’ **Blocklists** β†’ select firewall integration + - Click **Refresh Credentials** + - Copy the new API key -# For remote LAPI -api_url: http://:8080/ +2. **Update firewall configuration** with the new API key: + - Edit the external blocklist rule + - Update the authentication header or API key field + - Save and apply changes -# For HTTPS -api_url: https://:8080/ -``` +3. **Trigger manual update** to test: + - Most firewalls have a "Refresh Now" or "Update" button + - Click it to force an immediate pull from BLaaS + - Check logs for success or errors -**Important:** Don't forget the trailing `/` +### If network connectivity is failing -### Fix configuration errors +Fix network issues preventing firewall from reaching BLaaS: -If bouncer logs show configuration errors: +1. **Check firewall outbound rules:** + - Ensure firewall allows outbound HTTPS (port 443) to `admin.api.crowdsec.net` + - Verify no egress filtering is blocking the connection + - Check if firewall's management interface has internet access -```bash -# Validate YAML syntax -sudo cat /etc/crowdsec/bouncers/crowdsec-firewall-bouncer.yaml +2. **Verify DNS resolution:** + ```bash + # From firewall CLI or nearby host + nslookup admin.api.crowdsec.net + dig admin.api.crowdsec.net + ``` -# Check for common issues: -# - Incorrect indentation (YAML is whitespace-sensitive) -# - Missing api_key or api_url -# - Incorrect mode (iptables vs nftables) -``` + If DNS fails, configure firewall to use public DNS (8.8.8.8, 1.1.1.1) temporarily -**Example minimal configuration:** -```yaml -mode: iptables # or nftables -pid_dir: /var/run/ -update_frequency: 10s -daemonize: true -log_mode: file -log_dir: /var/log/ -log_level: info -api_url: http://127.0.0.1:8080/ -api_key: -deny_action: DROP -deny_log: false -``` +3. **Check proxy settings:** + - If firewall uses a proxy for outbound connections, verify proxy configuration + - Ensure proxy allows HTTPS connections to CrowdSec endpoints + - Test proxy with: `curl -x : https://admin.api.crowdsec.net/` -### Fix firewall permission issues +4. **Test from firewall CLI:** + - If firewall has CLI access, test connectivity directly: + ```bash + # Example for pfSense/OPNsense + curl -I https://admin.api.crowdsec.net/ -Some firewall bouncers need specific permissions: + # Example for FortiGate + execute ping admin.api.crowdsec.net + execute telnet admin.api.crowdsec.net 443 + ``` -```bash -# For iptables -sudo setcap cap_net_admin+ep /usr/bin/crowdsec-firewall-bouncer +5. **Check SSL/TLS certificate trust:** + - Ensure firewall trusts public CA certificates + - Update firewall's certificate store if needed + - Temporarily disable certificate verification for testing (then fix properly) -# Verify iptables rules are being applied -sudo iptables -L crowdsec-chain -n -v +### If the firewall is offline -# For nftables -sudo nft list ruleset | grep crowdsec -``` +Restore firewall connectivity: -### Reinstall the bouncer (if needed) +1. **Physical/Virtual access:** + - Check if firewall hardware is powered on + - For virtual firewalls, verify VM is running + - Check network cables and interfaces -If the bouncer is corrupted or not properly installed: +2. **Management access:** + - Connect via console/KVM if network management is down + - Verify management interface IP configuration + - Check firewall's default gateway -```bash -# Remove old installation -sudo apt remove crowdsec-firewall-bouncer # Debian/Ubuntu -sudo yum remove crowdsec-firewall-bouncer # RHEL/CentOS - -# Reinstall -sudo apt install crowdsec-firewall-bouncer -# Or follow installation instructions for your platform - -# Re-register with new API key -sudo cscli bouncers add firewall-bouncer-new -# Update config with the new key -# Restart service -``` +3. **After restoring connectivity:** + - Trigger manual blocklist update + - Verify last pull timestamp updates in Console + - Monitor firewall logs for successful updates ## Verify Resolution After making changes: -1. **Check bouncer status:** - ```bash - sudo systemctl status crowdsec-firewall-bouncer - ``` - Should show "active (running)" - -2. **Verify API pulls on Security Engine:** - ```bash - sudo cscli bouncers list - ``` - "Last API Pull" should update to a recent timestamp (within seconds) - -3. **Check firewall rules are applied:** - ```bash - # iptables - sudo iptables -L crowdsec-chain -n -v - - # nftables - sudo nft list table inet crowdsec - ``` - -4. **Test blocking:** - Add a test decision and verify it appears in firewall rules: - ```bash - sudo cscli decisions add --ip 192.0.2.1 --duration 5m --reason "test" - - # Wait 10-15 seconds for bouncer to pull - sudo iptables -L crowdsec-chain -n -v | grep 192.0.2.1 - ``` - -## Platform-Specific Notes - -### OPNsense / pfSense -- Bouncer name: `crowdsec-firewall-bouncer` or `os-crowdsec` -- Config: `/usr/local/etc/crowdsec/bouncers/` -- Logs: `/var/log/crowdsec/` -- Service: Check via OPNsense/pfSense GUI or `service crowdsec-firewall-bouncer status` +1. **Trigger manual update on firewall:** + - Use the firewall's "Refresh" or "Update Now" function + - Wait 30-60 seconds for the pull to complete -### Docker -If running the bouncer in Docker, ensure: -- Container is running: `docker ps | grep bouncer` -- Network connectivity to LAPI container/host -- Proper capabilities: `--cap-add=NET_ADMIN --cap-add=NET_RAW` +2. **Check in CrowdSec Console:** + - Navigate to **Integrations** β†’ **Blocklists** + - Verify the "Last Pull" timestamp has updated to a recent time (within last few minutes) + - The offline alert should clear automatically during next polling cycle -### Kubernetes -For Kubernetes network policies or firewall controllers: -- Check pod status: `kubectl get pods -n ` -- Check logs: `kubectl logs -n ` -- Verify service connectivity to LAPI +3. **Verify blocklist is populated:** + - Check your firewall shows IP addresses in the blocklist + - Number of entries should match your subscription tier and decisions + - Example: FortiGate β†’ System β†’ External Resources β†’ view entries ## Related Issues -- [RC Integration Offline](/u/troubleshooting/rc_integration_offline) - Similar issue for non-firewall bouncers -- [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components) - General bouncer issues +- [RC Integration Offline](/u/troubleshooting/issue_rc_integration_offline) - Similar issue for remediation components (bouncers) +- [Security Engine Offline](/u/troubleshooting/issue_security_engine_offline) - If using agent-based deployment +- [Blocklist Integration Setup](/u/integrations/blocklists/intro) - Initial setup guide ## Getting Help -If your firewall bouncer still doesn't work: +If your firewall integration still shows as offline: -- Share bouncer logs on [Discourse](https://discourse.crowdsec.net/) -- Ask on [Discord](https://discord.gg/crowdsec) with `cscli bouncers list` output -- Check firewall bouncer documentation: [Firewall Bouncer Docs](/u/bouncers/firewall) -- Report bugs: [GitHub Issues](https://github.com/crowdsecurity/cs-firewall-bouncer/issues) +- Check firewall vendor's documentation for external blocklist configuration +- Share firewall logs on [Discourse](https://discourse.crowdsec.net/) +- Ask on [Discord](https://discord.gg/crowdsec) with firewall model and error messages +- Contact CrowdSec support via Console if BLaaS endpoint issues persist diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md index 7fe493f73..553d7d575 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_parsed.md @@ -14,10 +14,11 @@ The **LP No Logs Parsed** issue appears when logs are being successfully read by ## Common Root Causes - **Missing collection or parsers**: The required parser collection for your log format isn't installed. -- **Acquisition type mismatch**: The `type:` or `program:` label in acquisition doesn't match any installed parser's FILTER. - **Custom or unexpected log format**: Logs don't match the format expected by the parser (custom format, version mismatch, etc.). + +For more advanced cases (often for custom made parsers): +- **Acquisition type mismatch**: The `type:` or `program:` label in acquisition doesn't match any installed parser's FILTER. - **Parser FILTER not matching**: Parser exists but its FILTER clause doesn't match the acquisition label. -- **Grok pattern mismatch**: Log structure has changed and the parser's grok patterns no longer match. ## How to Diagnose @@ -153,7 +154,7 @@ sudo systemctl restart crowdsec ### Handle custom log formats -If you use a custom log format that doesn't match standard parsers: +If you are using non-default log formats for your services or if they are relayed by a 3rd party service they may be changed by this proxy service. #### Option 1: Adjust log format to match parser **NGINX example:** @@ -166,10 +167,8 @@ access_log /var/log/nginx/access.log combined; ``` #### Option 2: Create a custom parser -1. Use the [CrowdSec Playground](https://playground.crowdsec.net/) to develop and test your parser -2. Create a custom parser in `/etc/crowdsec/parsers/s01-parse/custom-parser.yaml` -3. Use grok patterns to match your format -4. See [Parser Documentation](/docs/log_processor/parsers/format) for details +1. Follow the [Create parsers doc](/log_processor/parsers/create) to develop and test your parser +2. Get help from our [Discord](https://discord.gg/crowdsec) community is you hit roadblocks. **Simple custom parser example:** ```yaml diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md index 0d3c44fb6..7ff4365fc 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md @@ -5,13 +5,13 @@ id: issue_rc_integration_offline The **RC Integration Offline** (Remediation Component Integration Offline) refers to a Blocklist-Integration of type Remediation Component has not pulled from its endpoint for more than 24 hours. -This issue applies to Remediation Component (aka bouncers) directly connected to a Blocklist integration endpoint (aka Blocklist as a Service). +This issue applies to Remediation Component (aka bouncers) directly connected to a Blocklist integration endpoint (aka Blocklist as a Service / BLaaS). ## What Triggers This Issue - **Trigger condition**: No pull for 24 hours - **Criticality**: Critical -- **Impact**: blocklist update not retrieved and potential malfunction of the remediation component. +- **Impact**: Latest blocklist updates not retrieved and potential malfunction of the remediation component. ## Common Root Causes @@ -33,17 +33,19 @@ Depending on the type of bouncer, you'll need to check its installation status, ### Check bouncer configuration has proper parameters -For Blocklist-as-a-Service (BLaaS) connectivity, verify the bouncer configuration contains: +For Blocklist-as-a-Service (BLaaS) connectivity, verify the bouncer configuration has proper api url and key +:::info +Properties name may vary: *api_url, api_key or lapi_url_lapi_key* ... Check your [bouncer's doc](/u/bouncers/intro) +::: + 1. **api_url**: Must point to your BLaaS endpoint (e.g., `https://admin.api.crowdsec.net/v1/decisions/stream`) -2. **api_key**: Your BLaaS API key (found in the Console under your Blocklist integration) +2. **api_key**: Your BLaaS API key *(Found in the Console in your Blocklist integration section, on creation or on "Refresh Credentials")* **Common configuration file locations:** -- **NGINX**: `/etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf` -- **Traefik**: `/etc/crowdsec/bouncers/crowdsec-traefik-bouncer.yaml` -- **HAProxy**: `/etc/crowdsec/bouncers/crowdsec-haproxy-bouncer.conf` -- **Cloudflare**: `/etc/crowdsec/bouncers/crowdsec-cloudflare-bouncer.yaml` -- **WordPress**: Admin panel β†’ CrowdSec Settings +- **On host**: `/etc/crowdsec/bouncers/crowdsec--bouncer.conf` +- ie: **NGINX**: `/etc/crowdsec/bouncers/crowdsec-nginx-bouncer.conf` +- **WordPress**: Admin panel β†’ CrowdSec β†’ **Connection details** Section Check the configuration file: ```bash From 1363c083da15cc1b03d26923bfd6904b1a8627a9 Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 14:35:18 +0100 Subject: [PATCH 08/13] ready for deeper review with foss team --- .../troubleshooting/issue_engine_no_alerts.md | 2 +- .../troubleshooting/issue_engine_too_many_alerts.md | 6 +++--- .../troubleshooting/issue_log_processor_offline.md | 12 ++++++------ .../troubleshooting/issue_lp_no_alerts.md | 6 ++---- .../troubleshooting/issue_lp_no_logs_read.md | 3 ++- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md index 00f7997df..2fba19054 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_no_alerts.md @@ -3,7 +3,7 @@ title: Engine No Alerts id: issue_engine_no_alerts --- -The **Engine No Alerts** issue appears when your Security Engine has been running but hasn't generated any alerts in the last **48 hours**. This usually indicates that logs aren't being processed properly or scenarios aren't matching any threats. +The **Engine No Alerts** issue appears when your Security Engine has been running but hasn't generated any alerts in the last **48 hours**. ## What Triggers This Issue diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md index 0b05d2c2f..e6ac43a6d 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_engine_too_many_alerts.md @@ -3,20 +3,20 @@ title: Engine Too Many Alerts id: issue_engine_too_many_alerts --- -The **Engine Too Many Alerts** issue appears when your Security Engine generates an abnormally high volume of alertsβ€”more than 250,000 in a 6-hour period. This usually indicates a misconfigured scenario, false positives, or an ongoing large-scale attack. +The **Engine Too Many Alerts** issue appears when your Security Engine generates an abnormally high volume of alerts (more than 250,000 in a 6-hour period). This usually indicates a misconfigured scenario, false positives, or an ongoing large-scale attack. ## What Triggers This Issue - **Trigger condition**: More than 250,000 alerts in 6 hours - **Criticality**: High -- **Impact**: May indicate false positives, performance issues, or a real attack +- **Impact**: May indicate misconfiguration, performance issues, or a real large scale attack. ## Common Root Causes - **Misconfigured or overly sensitive scenario**: A scenario with thresholds set too low or matching too broadly can trigger excessive alerts. - **Log duplication**: The same log file is being read multiple times due to acquisition misconfiguration. -- **Actual large-scale attack**: A genuine distributed attack (DDoS, brute force campaign) targeting your infrastructure. - **Parser creating duplicate events**: A parser issue causing the same log line to generate multiple events. +- **Actual large-scale attack**: A genuine distributed attack (DDoS, brute force campaign) targeting your infrastructure. ## How to Diagnose diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md index 47d5d51de..1b05efe74 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md @@ -3,20 +3,20 @@ title: Log Processor Offline id: issue_log_processor_offline --- -When the Console or a notification rule reports **Log Processor Offline**, the local agent has not checked in with the Local API (LAPI) for more than 24 hours. The alert is different from **Log Processor No Alert**, which only means logs were parsed but no scenarios fired. Use the sections below to identify why the heartbeat stopped and how to bring the agent back online. +When a Log Processor (Security Engine used to read log in a distributed setup) has not checked in with the Local API (LAPI) of the central Security Engine for more than 24 hours. ## What Triggers This Issue - **Trigger condition**: Log Processor has not checked in with Local API for more than 24 hours - **Criticality**: πŸ”₯ Critical -- **Impact**: The agent is not communicating with the Local API - no alerts from this agent will reach the Console +- **Impact**: Services supposed to be watched by this LP are not anymore - potential threats undetected ## Common Root Causes -- **Service stopped or stuck**: The crowdsec service has crashed, hung, or was manually stopped on the agent host. -- **Machine not validated or credentials revoked**: The agent's credentials are pending validation, were removed from the LAPI, or the credentials file is missing/corrupt. +- **Service stopped or stuck**: The crowdsec service of this LP has crashed, hung, or was manually stopped on the agent host. +- **Machine not validated or credentials revoked**: The agent's credentials are pending validation, were removed from the central LAPI, or the credentials file is missing/corrupt. - **Local API unreachable from agent**: Network issues, firewall rules, or configuration errors prevent the agent from connecting to the LAPI endpoint. -- **Local API service unavailable**: The LAPI service itself is down or not responding, affecting all agents trying to connect. +- **Local API service unavailable**: The central LAPI service itself is down or not responding, affecting all agents trying to connect *(would have triggered an other issue)*. ## How to Diagnose @@ -217,7 +217,7 @@ To identify and clean up stale machines: To prevent accumulation of stale machines in Kubernetes, consider using [auto-registration tokens](/u/user_guides/machines_mgmt#machine-auto-validation) which handle pod lifecycle automatically. ::: -### If the Local API is unreachable from the agent +### If the central LAPI is unreachable from the agent Open the required port on firewalls or security groups: diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md index cfd073d78..a1ce71e6d 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_alerts.md @@ -16,15 +16,13 @@ The **LP No Alerts** issue appears when a specific Log Processor (agent) is runn - **Scenarios in simulation mode**: Detection scenarios are installed but running in simulation mode on this agent. - **Low-activity monitored service**: The service monitored by this Log Processor may genuinely have no malicious activity. - - -**Other Issues** +#### Other Issues - πŸ”— **[No logs being read](/u/troubleshooting/issue_lp_no_logs_read)**: The acquisition configuration on this specific Log Processor may be missing, disabled, or pointing to empty sources. - πŸ”— **[No logs being parsed](/u/troubleshooting/issue_lp_no_logs_parsed)**: Logs are being read but parsers can't process them due to format mismatches or missing collections. ## How to Diagnose -If it's not due to [other issues](#otherIssues), here are the diagnosis and resolutions for other root causes. +If it's not due to [other issues](#other-issues), here are the diagnosis and resolutions for other root causes. ### Identify the affected Log Processor diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md index e8dc387c1..63c9838b0 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_lp_no_logs_read.md @@ -14,9 +14,10 @@ The **LP No Logs Read** issue appears when a Log Processor is running but hasn't ## Common Root Causes - **Missing acquisition configuration**: No acquisition files exist, or they're empty. -- **Incorrect log file paths**: Acquisition configuration points to paths that don't exist or have moved. +- **Incorrect Acquisition file configuration**: Acquisition configuration points to paths that don't exist or have moved. - **File permission issues**: CrowdSec doesn't have read access to the log files. - **Log files are empty or not being written**: The services being monitored aren't generating logs. +- **Incorrect Acquisition endpoint configuration**: Error in endpoint config, for acquisition types listening for incoming data (httpLogs, syslog,...) - **Acquisition type mismatch**: Wrong datasource type configured (e.g., using `file` instead of `journald`). - **Container/Kubernetes volume issues**: In containerized deployments, logs aren't mounted or accessible to the CrowdSec container. From b41ec174d6272e7bf6cbba47901e5820fa266bdb Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 14:58:30 +0100 Subject: [PATCH 09/13] nano info about issue fixing --- .../unversioned/troubleshooting/issue_log_processor_offline.md | 2 ++ .../unversioned/troubleshooting/issue_rc_integration_offline.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md index 1b05efe74..53dc6b89b 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_log_processor_offline.md @@ -217,6 +217,8 @@ To identify and clean up stale machines: To prevent accumulation of stale machines in Kubernetes, consider using [auto-registration tokens](/u/user_guides/machines_mgmt#machine-auto-validation) which handle pod lifecycle automatically. ::: +Once pruned, the issues concerning those pruned LPs will disappear on next SE info update *(within 30minutes)*. + ### If the central LAPI is unreachable from the agent Open the required port on firewalls or security groups: diff --git a/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md index 7ff4365fc..a3f08df6f 100644 --- a/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md +++ b/crowdsec-docs/unversioned/troubleshooting/issue_rc_integration_offline.md @@ -273,6 +273,8 @@ After making changes: - Check bouncer-specific documentation for test procedures - For web servers, you can test by temporarily adding a test decision +Once fixed, the issues concerning those RC will disappear on next SE info update *(within 30minutes)*. + ## Bouncer-Specific Documentation - [NGINX Bouncer](/u/bouncers/nginx) From 4709c4dd357f44d8b3939c3766ed366ee9d71e75 Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 18:17:02 +0100 Subject: [PATCH 10/13] simplification of main page --- .../troubleshooting/console_issues.md | 81 +++++-------------- 1 file changed, 18 insertions(+), 63 deletions(-) diff --git a/crowdsec-docs/unversioned/troubleshooting/console_issues.md b/crowdsec-docs/unversioned/troubleshooting/console_issues.md index d19521e02..df9331414 100644 --- a/crowdsec-docs/unversioned/troubleshooting/console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/console_issues.md @@ -3,74 +3,29 @@ title: Console Health Check Issues id: console_issues --- -The CrowdSec Console monitors your infrastructure health and raises alerts when issues are detected. This page lists all possible health check issues, their trigger conditions, and links to detailed troubleshooting guides. +The CrowdSec Console monitors the health of your CrowdSec stack *(Security Engines, Log Processors, remediation components and blocklist integrations)* and raises alerts when issues are detected. +This page lists all possible health check issues, their trigger conditions, and links to detailed troubleshooting guides. ## Understanding Issue Criticality -- **Critical**: Immediate attention required - core functionality is impaired -- **High**: Important issue that should be addressed soon - may impact protection effectiveness +- πŸ”₯ **Critical**: Immediate attention required - core functionality is impaired +- ⚠️ **High**: Important issue that should be addressed soon - may impact protection effectiveness +- πŸ’‘ **Recomended**: Additionnal actions that will continue improving your security posture *(comming in next iterations of Stack Health)* +- 🌟 **Bonus** : Optimization advises and upper tier recommendation with great return on value *(comming in next iterations of Stack Health)* ## Health Check Issues Overview -### Security Engine Issues - -#### Security Engine Offline -- **Criticality**: πŸ”₯ Critical -- **Trigger**: Security Engine has not reported to the Console for more than 24 hours -- **Description**: The core CrowdSec service (Log Processor + Local API) has stopped communicating with the Console infrastructure -- **Resolution**: [Security Engine Offline Troubleshooting](/u/troubleshooting/issue_security_engine_offline) - -#### Engine No Alerts -- **Criticality**: ⚠️ High -- **Trigger**: No alerts generated in the last 48 hours -- **Description**: The Security Engine is running but hasn't detected any threats, which may indicate logs aren't being processed or scenarios aren't triggering -- **Resolution**: [Engine No Alerts Troubleshooting](/u/troubleshooting/issue_engine_no_alerts) - -#### Engine Too Many Alerts -- **Criticality**: ⚠️ High -- **Trigger**: More than 250,000 alerts generated in 6 hours -- **Description**: Abnormally high alert volume may indicate a misconfigured scenario, false positives, or an ongoing large-scale attack -- **Resolution**: [Engine Too Many Alerts Troubleshooting](/u/troubleshooting/issue_engine_too_many_alerts) - -### Log Processor Issues - -#### Log Processor Offline -- **Criticality**: πŸ”₯ Critical -- **Trigger**: Log Processor has not checked in with Local API for more than 24 hours -- **Description**: The local agent component has stopped communicating with the Local API -- **Resolution**: [Log Processor Offline Troubleshooting](/u/troubleshooting/issue_log_processor_offline) - -#### LP No Alerts -- **Criticality**: ⚠️ High -- **Trigger**: No alerts generated by this Log Processor in the last 48 hours -- **Description**: Logs may not be read, parsed correctly, or no scenarios are matching the parsed events -- **Resolution**: [LP No Alerts Troubleshooting](/u/troubleshooting/issue_lp_no_alerts) - -#### LP No Logs Read -- **Criticality**: πŸ”₯ Critical -- **Trigger**: No logs acquired in the last 24 hours -- **Description**: The acquisition configuration is missing, incorrect, or log sources are not producing data -- **Resolution**: [LP No Logs Read Troubleshooting](/u/troubleshooting/issue_lp_no_logs_read) - -#### LP No Logs Parsed -- **Criticality**: πŸ”₯ Critical -- **Trigger**: Logs are being read but none are successfully parsed in the last 48 hours -- **Description**: Parsers may be missing, log format may have changed, or there's a mismatch between acquisition type and parser -- **Resolution**: [LP No Logs Parsed Troubleshooting](/u/troubleshooting/issue_lp_no_logs_parsed) - -### Remediation Component Issues - -#### Firewall Integration Offline -- **Criticality**: πŸ”₯ Critical -- **Trigger**: Firewall bouncer has not pulled decisions for more than 24 hours -- **Description**: Firewall-based remediation components have stopped communicating with the Local API -- **Resolution**: [Firewall Integration Offline Troubleshooting](/u/troubleshooting/issue_fw_integration_offline) - -#### RC Integration Offline -- **Criticality**: πŸ”₯ Critical -- **Trigger**: Remediation Component has not pulled decisions for more than 24 hours -- **Description**: Non-firewall remediation components (web servers, reverse proxies, etc.) have stopped communicating with the Local API -- **Resolution**: [RC Integration Offline Troubleshooting](/u/troubleshooting/issue_rc_integration_offline) +| Issue | Criticality | Summary | Resolution | +|-------|-------------|---------|------------| +| **Security Engine Offline** | πŸ”₯ Critical | Security Engine has not reported to Console for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_security_engine_offline) | +| **Engine No Alerts** | ⚠️ High | No alerts generated in the last 48 hours | [Troubleshooting](/u/troubleshooting/issue_engine_no_alerts) | +| **Engine Too Many Alerts** | ⚠️ High | More than 250,000 alerts in 6 hours | [Troubleshooting](/u/troubleshooting/issue_engine_too_many_alerts) | +| **Log Processor Offline** | πŸ”₯ Critical | Log Processor has not checked in with LAPI for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_log_processor_offline) | +| **LP No Alerts** | ⚠️ High | Log Processor has not generated alerts in 48 hours | [Troubleshooting](/u/troubleshooting/issue_lp_no_alerts) | +| **LP No Logs Read** | πŸ”₯ Critical | No logs acquired in the last 24 hours | [Troubleshooting](/u/troubleshooting/issue_lp_no_logs_read) | +| **LP No Logs Parsed** | πŸ”₯ Critical | Logs read but none parsed in the last 48 hours | [Troubleshooting](/u/troubleshooting/issue_lp_no_logs_parsed) | +| **Firewall Integration Offline** | πŸ”₯ Critical | Firewall has not pulled from BLaaS endpoint for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_fw_integration_offline) | +| **RC Integration Offline** | πŸ”₯ Critical | Remediation Component has not pulled from endpoint for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_rc_integration_offline) | ## Issue Dependencies @@ -90,7 +45,7 @@ Understanding these dependencies helps you troubleshoot more efficiently by addr ## Future Enhancements -The CrowdSec Console will continue to evolve with additional health checks and recommendations. See the [Future Console Health Check Issues](/u/troubleshooting/future_console_issues) page for planned features including: +For planned and experimental health checks, see [Future Console Health Check Issues](/u/troubleshooting/future_console_issues) page for planned features including: - Enhanced configuration validation - Blocklists optimization recommendations From e453e5db6b435c32e0955a94523f959ceb5f852f Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 20:13:27 +0100 Subject: [PATCH 11/13] extract pluging base --- crowdsec-docs/docusaurus.config.ts | 7 +- .../plugins/remark-extract/extract-plugin.js | 133 ++++++++++++++++++ .../post_installation/troubleshoot.mdx | 4 +- .../troubleshooting/console_issues.md | 4 +- .../unversioned/troubleshooting/intro.md | 28 ++-- 5 files changed, 159 insertions(+), 17 deletions(-) create mode 100644 crowdsec-docs/plugins/remark-extract/extract-plugin.js diff --git a/crowdsec-docs/docusaurus.config.ts b/crowdsec-docs/docusaurus.config.ts index 5a6133218..0491957ff 100644 --- a/crowdsec-docs/docusaurus.config.ts +++ b/crowdsec-docs/docusaurus.config.ts @@ -6,6 +6,8 @@ import { themes } from "prism-react-renderer"; import tailwindPlugin from "./plugins/tailwind-config"; import { ctiApiSidebar, guidesSideBar, remediationSideBar } from "./sidebarsUnversioned"; +const extractPlugin = require('./plugins/remark-extract/extract-plugin'); + const generateCurrentAndNextRedirects = (s) => [ { from: `/docs/${s}`, @@ -291,6 +293,9 @@ const config: Config = { path: "/next", }, }, + remarkPlugins: [ + [extractPlugin, {paths: ['./unversioned']}], + ] }, blog: { showReadingTime: true, @@ -317,7 +322,7 @@ const config: Config = { ["./plugins/gtag/index.ts", { trackingID: "G-0TFBMNTDFQ" }], ["@docusaurus/plugin-client-redirects", { redirects }], tailwindPlugin, - ], + ] }; export default config; diff --git a/crowdsec-docs/plugins/remark-extract/extract-plugin.js b/crowdsec-docs/plugins/remark-extract/extract-plugin.js new file mode 100644 index 000000000..77c913628 --- /dev/null +++ b/crowdsec-docs/plugins/remark-extract/extract-plugin.js @@ -0,0 +1,133 @@ +const fs = require('fs'); +const path = require('path'); + +// 1. Helper: Recursive directory walker +const getAllFiles = (dirPath, arrayOfFiles) => { + if (!fs.existsSync(dirPath)) return arrayOfFiles || []; + + const files = fs.readdirSync(dirPath); + arrayOfFiles = arrayOfFiles || []; + + files.forEach((file) => { + const fullPath = path.join(dirPath, file); + if (fs.statSync(fullPath).isDirectory()) { + getAllFiles(fullPath, arrayOfFiles); + } else { + if (file.endsWith('.md') || file.endsWith('.mdx')) { + arrayOfFiles.push(fullPath); + } + } + }); + + return arrayOfFiles; +}; + +// 2. Helper: Frontmatter Parser +const getDocId = (content, filename) => { + const idMatch = content.match(/^---\s+[\s\S]*?\nid:\s*(.*?)\s*[\n\r]/m); + if (idMatch && idMatch[1]) { + return idMatch[1].replace(/['"]/g, '').trim(); + } + return filename; +}; + +// 3. Helper: Attribute getter +const getAttribute = (node, name) => { + const attr = node.attributes?.find((a) => a.name === name); + return attr ? attr.value : null; +}; + +// Registry to store snippets +const extractRegistry = new Map(); + +module.exports = function plugin(options = {}) { + let sourceDirs = options.paths || (options.path ? [options.path] : ['./docs']); + if (!Array.isArray(sourceDirs)) sourceDirs = [sourceDirs]; + + return async (root) => { + // Import Docusaurus internal dependencies + const { visit } = await import('unist-util-visit'); + const { unified } = await import('unified'); + const remarkParse = (await import('remark-parse')).default; + const remarkMdx = (await import('remark-mdx')).default; + // We add GFM (GitHub Flavored Markdown) to support Tables, Strikethrough, etc. + const remarkGfm = (await import('remark-gfm')).default; + + // --- PHASE A: INDEXING --- + if (extractRegistry.size === 0) { + console.log(`[ExtractPlugin] Indexing snippets from: ${sourceDirs.join(', ')}`); + + let allFiles = []; + for (const dir of sourceDirs) { + const absDir = path.resolve(process.cwd(), dir); + getAllFiles(absDir, allFiles); + } + + // Configure processor with GFM support + const processor = unified() + .use(remarkParse) + .use(remarkGfm) + .use(remarkMdx); + + for (const filePath of allFiles) { + try { + let content = fs.readFileSync(filePath, 'utf8'); + + // --- CLEANING CONTENT --- + // 1. Remove HTML comments to prevent MDX crash + content = content.replace(//g, ''); + + // 2. Remove Frontmatter --- ... --- to focus on body + content = content.replace(/^---\s+[\s\S]*?---/, ''); + + // --- END CLEANING --- + + // Get Doc ID from original file (or filename) + const rawContent = fs.readFileSync(filePath, 'utf8'); + const filename = path.basename(filePath, path.extname(filePath)); + const docId = getDocId(rawContent, filename); + + const tree = processor.parse(content); + + visit(tree, 'mdxJsxFlowElement', (node) => { + if (node.name === 'extract') { + const extractId = getAttribute(node, 'id'); + if (extractId) { + const key = `${docId}:${extractId}`; + // We store the AST children (the content inside the tag) + extractRegistry.set(key, node.children); + } + } + }); + } catch (e) { + const filename = path.basename(filePath); + console.warn(`[ExtractPlugin] Skipped ${filename}: ${e.message}`); + } + } + console.log(`[ExtractPlugin] Indexed ${extractRegistry.size} snippets.`); + } + + // --- PHASE B: TRANSFORMING --- + visit(root, 'mdxJsxFlowElement', (node, index, parent) => { + // 1. Unwrap (Display content normally in source page) + if (node.name === 'extract') { + parent.children.splice(index, 1, ...node.children); + return index; + } + + // 2. Inject (Copy content from registry) + if (node.name === 'extractCopy') { + const targetId = getAttribute(node, 'id'); + + if (targetId && extractRegistry.has(targetId)) { + // Clone the AST nodes to avoid reference issues + const cloned = JSON.parse(JSON.stringify(extractRegistry.get(targetId))); + parent.children.splice(index, 1, ...cloned); + return index + cloned.length; + } else { + console.warn(`[ExtractPlugin] Warning: Extract ID "${targetId}" not found in registry.`); + } + } + }); + }; +}; \ No newline at end of file diff --git a/crowdsec-docs/unversioned/getting_started/post_installation/troubleshoot.mdx b/crowdsec-docs/unversioned/getting_started/post_installation/troubleshoot.mdx index 106f58898..64b993cd9 100644 --- a/crowdsec-docs/unversioned/getting_started/post_installation/troubleshoot.mdx +++ b/crowdsec-docs/unversioned/getting_started/post_installation/troubleshoot.mdx @@ -9,7 +9,7 @@ import FormattedTabs from '@site/src/components/formatted-tabs'; # Troubleshoot -This troubleshoot section is intended to help you resolve common issues that may arise during the installation process. You can find extensive [troubleshooting documentation](/u/troubleshooting/intro.md) if this document does not resolve your issues. +This troubleshoot section is intended to help you resolve common issues that may arise during the installation process. You can find extensive [troubleshooting documentation](/u/troubleshooting/intro) if this document does not resolve your issues. # Logs and Errors @@ -89,6 +89,6 @@ After you have made the changes you will need to restart the CrowdSec service. ## Next Steps? -If the above hasn't resolved the issue you are facing, you can find more detailed troubleshooting documentation [here](/u/troubleshooting/intro.md). +If the above hasn't resolved the issue you are facing, you can find more detailed troubleshooting documentation [here](/u/troubleshooting/intro). If you have resolved the issue you can continue with the [post installation steps](/getting_started/next_steps.mdx#1-crowdsec-console-). diff --git a/crowdsec-docs/unversioned/troubleshooting/console_issues.md b/crowdsec-docs/unversioned/troubleshooting/console_issues.md index df9331414..d4f98931a 100644 --- a/crowdsec-docs/unversioned/troubleshooting/console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/console_issues.md @@ -14,7 +14,7 @@ This page lists all possible health check issues, their trigger conditions, and - 🌟 **Bonus** : Optimization advises and upper tier recommendation with great return on value *(comming in next iterations of Stack Health)* ## Health Check Issues Overview - + | Issue | Criticality | Summary | Resolution | |-------|-------------|---------|------------| | **Security Engine Offline** | πŸ”₯ Critical | Security Engine has not reported to Console for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_security_engine_offline) | @@ -26,7 +26,7 @@ This page lists all possible health check issues, their trigger conditions, and | **LP No Logs Parsed** | πŸ”₯ Critical | Logs read but none parsed in the last 48 hours | [Troubleshooting](/u/troubleshooting/issue_lp_no_logs_parsed) | | **Firewall Integration Offline** | πŸ”₯ Critical | Firewall has not pulled from BLaaS endpoint for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_fw_integration_offline) | | **RC Integration Offline** | πŸ”₯ Critical | Remediation Component has not pulled from endpoint for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_rc_integration_offline) | - + ## Issue Dependencies Some issues are related and share common root causes: diff --git a/crowdsec-docs/unversioned/troubleshooting/intro.md b/crowdsec-docs/unversioned/troubleshooting/intro.md index 32913e84f..f7f1fa85c 100644 --- a/crowdsec-docs/unversioned/troubleshooting/intro.md +++ b/crowdsec-docs/unversioned/troubleshooting/intro.md @@ -21,23 +21,23 @@ If you received a health check alert from the CrowdSec Console, check out the [* ## Troubleshooting by Topic -* [Security Engine Troubleshooting](/u/troubleshooting/security_engine.mdx) -* [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components.mdx) -* [CTI Troubleshooting](/u/troubleshooting/cti.mdx) +* [Security Engine Troubleshooting](/u/troubleshooting/security_engine) +* [Remediation Components Troubleshooting](/u/troubleshooting/remediation_components) +* [CTI Troubleshooting](/u/troubleshooting/cti) ## Troubleshooting by Issue Individual troubleshooting guides for specific Console alerts: -* [Security Engine Offline](/u/troubleshooting/security_engine_offline) - Security Engine not reporting to Console -* [Engine No Alerts](/u/troubleshooting/engine_no_alerts) - No alerts generated in 48 hours -* [Engine Too Many Alerts](/u/troubleshooting/engine_too_many_alerts) - Abnormally high alert volume -* [Log Processor Offline](/u/troubleshooting/log_processor_offline) - Log Processor not checking in -* [LP No Alerts](/u/troubleshooting/lp_no_alerts) - Log Processor not generating alerts -* [LP No Logs Read](/u/troubleshooting/lp_no_logs_read) - No logs being acquired -* [LP No Logs Parsed](/u/troubleshooting/lp_no_logs_parsed) - Logs read but not parsed -* [Firewall Integration Offline](/u/troubleshooting/fw_integration_offline) - Firewall bouncer not pulling decisions -* [RC Integration Offline](/u/troubleshooting/rc_integration_offline) - Remediation component not pulling decisions +* [Security Engine Offline](/u/troubleshooting/issue_security_engine_offline) - Security Engine not reporting to Console +* [Engine No Alerts](/u/troubleshooting/issue_engine_no_alerts) - No alerts generated in 48 hours +* [Engine Too Many Alerts](/u/troubleshooting/issue_engine_too_many_alerts) - Abnormally high alert volume +* [Log Processor Offline](/u/troubleshooting/issue_log_processor_offline) - Log Processor not checking in +* [LP No Alerts](/u/troubleshooting/issue_lp_no_alerts) - Log Processor not generating alerts +* [LP No Logs Read](/u/troubleshooting/issue_lp_no_logs_read) - No logs being acquired +* [LP No Logs Parsed](/u/troubleshooting/issue_lp_no_logs_parsed) - Logs read but not parsed +* [Firewall Integration Offline](/u/troubleshooting/issue_fw_integration_offline) - Firewall bouncer not pulling decisions +* [RC Integration Offline](/u/troubleshooting/issue_rc_integration_offline) - Remediation component not pulling decisions ## Community support @@ -91,3 +91,7 @@ When using `cscli` to list your parsers, scenarios and collections, some might a ### Which information is sent to your services ? See [CAPI documentation](/docs/next/central_api/intro). + +### stack Health issues list + + \ No newline at end of file From 957c071e1f2c500a1ba570311c81ffafeb65bcbe Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 21:43:24 +0100 Subject: [PATCH 12/13] almost there but still not --- .../plugins/remark-extract/extract-plugin.js | 95 +++++++++---------- .../troubleshooting/console_issues.md | 8 +- .../unversioned/troubleshooting/intro.md | 3 +- 3 files changed, 51 insertions(+), 55 deletions(-) diff --git a/crowdsec-docs/plugins/remark-extract/extract-plugin.js b/crowdsec-docs/plugins/remark-extract/extract-plugin.js index 77c913628..862b96065 100644 --- a/crowdsec-docs/plugins/remark-extract/extract-plugin.js +++ b/crowdsec-docs/plugins/remark-extract/extract-plugin.js @@ -1,13 +1,11 @@ const fs = require('fs'); const path = require('path'); -// 1. Helper: Recursive directory walker +// Recursive file walker const getAllFiles = (dirPath, arrayOfFiles) => { if (!fs.existsSync(dirPath)) return arrayOfFiles || []; - const files = fs.readdirSync(dirPath); arrayOfFiles = arrayOfFiles || []; - files.forEach((file) => { const fullPath = path.join(dirPath, file); if (fs.statSync(fullPath).isDirectory()) { @@ -18,11 +16,9 @@ const getAllFiles = (dirPath, arrayOfFiles) => { } } }); - return arrayOfFiles; }; -// 2. Helper: Frontmatter Parser const getDocId = (content, filename) => { const idMatch = content.match(/^---\s+[\s\S]*?\nid:\s*(.*?)\s*[\n\r]/m); if (idMatch && idMatch[1]) { @@ -31,31 +27,29 @@ const getDocId = (content, filename) => { return filename; }; -// 3. Helper: Attribute getter const getAttribute = (node, name) => { const attr = node.attributes?.find((a) => a.name === name); return attr ? attr.value : null; }; -// Registry to store snippets +// Global Registry const extractRegistry = new Map(); +let isIndexed = false; module.exports = function plugin(options = {}) { let sourceDirs = options.paths || (options.path ? [options.path] : ['./docs']); if (!Array.isArray(sourceDirs)) sourceDirs = [sourceDirs]; - return async (root) => { - // Import Docusaurus internal dependencies + return async (root, vfile) => { const { visit } = await import('unist-util-visit'); const { unified } = await import('unified'); const remarkParse = (await import('remark-parse')).default; const remarkMdx = (await import('remark-mdx')).default; - // We add GFM (GitHub Flavored Markdown) to support Tables, Strikethrough, etc. const remarkGfm = (await import('remark-gfm')).default; // --- PHASE A: INDEXING --- - if (extractRegistry.size === 0) { - console.log(`[ExtractPlugin] Indexing snippets from: ${sourceDirs.join(', ')}`); + if (!isIndexed) { + console.log(`\n[ExtractPlugin] ========== INDEXING ==========`); let allFiles = []; for (const dir of sourceDirs) { @@ -63,69 +57,68 @@ module.exports = function plugin(options = {}) { getAllFiles(absDir, allFiles); } - // Configure processor with GFM support - const processor = unified() - .use(remarkParse) - .use(remarkGfm) - .use(remarkMdx); + const processor = unified().use(remarkParse).use(remarkGfm).use(remarkMdx); for (const filePath of allFiles) { try { let content = fs.readFileSync(filePath, 'utf8'); - - // --- CLEANING CONTENT --- - // 1. Remove HTML comments to prevent MDX crash content = content.replace(//g, ''); - - // 2. Remove Frontmatter --- ... --- to focus on body content = content.replace(/^---\s+[\s\S]*?---/, ''); - // --- END CLEANING --- - - // Get Doc ID from original file (or filename) const rawContent = fs.readFileSync(filePath, 'utf8'); const filename = path.basename(filePath, path.extname(filePath)); const docId = getDocId(rawContent, filename); const tree = processor.parse(content); - visit(tree, 'mdxJsxFlowElement', (node) => { - if (node.name === 'extract') { - const extractId = getAttribute(node, 'id'); + visit(tree, ['mdxJsxFlowElement', 'mdxJsxTextElement'], (node) => { + // STRATEGY CHANGE: Look for
+ if (node.name === 'div') { + const extractId = getAttribute(node, 'data-extract'); if (extractId) { - const key = `${docId}:${extractId}`; - // We store the AST children (the content inside the tag) + // If ID does not contain a colon, prepend the DocID automatically + const key = extractId.includes(':') ? extractId : `${docId}:${extractId}`; extractRegistry.set(key, node.children); } } }); - } catch (e) { - const filename = path.basename(filePath); - console.warn(`[ExtractPlugin] Skipped ${filename}: ${e.message}`); - } + } catch (e) { /* ignore */ } } + isIndexed = true; + console.log(`[ExtractPlugin] Indexed ${extractRegistry.size} snippets.`); + extractRegistry.forEach((val, key) => console.log(` - ${key}`)); + console.log(`[ExtractPlugin] ========== INDEXING DONE==========\n`); } // --- PHASE B: TRANSFORMING --- - visit(root, 'mdxJsxFlowElement', (node, index, parent) => { - // 1. Unwrap (Display content normally in source page) - if (node.name === 'extract') { - parent.children.splice(index, 1, ...node.children); - return index; - } - - // 2. Inject (Copy content from registry) - if (node.name === 'extractCopy') { - const targetId = getAttribute(node, 'id'); + visit(root, ['mdxJsxFlowElement', 'mdxJsxTextElement'], (node, index, parent) => { + + // We only care about DIVs + if (node.name === 'div') { - if (targetId && extractRegistry.has(targetId)) { - // Clone the AST nodes to avoid reference issues - const cloned = JSON.parse(JSON.stringify(extractRegistry.get(targetId))); - parent.children.splice(index, 1, ...cloned); - return index + cloned.length; - } else { - console.warn(`[ExtractPlugin] Warning: Extract ID "${targetId}" not found in registry.`); + // 1. Handle Definitions:
+ const extractId = getAttribute(node, 'data-extract'); + if (extractId) { + // Unwrap: Remove the div wrapper, leave the content + if (parent && Array.isArray(parent.children)) { + parent.children.splice(index, 1, ...node.children); + return index; + } + } + + // 2. Handle Copies:
+ const copyId = getAttribute(node, 'data-extract-copy'); + if (copyId) { + if (extractRegistry.has(copyId)) { + const clonedNodes = JSON.parse(JSON.stringify(extractRegistry.get(copyId))); + if (parent && Array.isArray(parent.children)) { + parent.children.splice(index, 1, ...clonedNodes); + return index + clonedNodes.length; + } + } else { + console.error(`[ExtractPlugin] ❌ Missing Snippet: "${copyId}"`); + } } } }); diff --git a/crowdsec-docs/unversioned/troubleshooting/console_issues.md b/crowdsec-docs/unversioned/troubleshooting/console_issues.md index d4f98931a..049a75d7a 100644 --- a/crowdsec-docs/unversioned/troubleshooting/console_issues.md +++ b/crowdsec-docs/unversioned/troubleshooting/console_issues.md @@ -14,7 +14,8 @@ This page lists all possible health check issues, their trigger conditions, and - 🌟 **Bonus** : Optimization advises and upper tier recommendation with great return on value *(comming in next iterations of Stack Health)* ## Health Check Issues Overview - +
+ | Issue | Criticality | Summary | Resolution | |-------|-------------|---------|------------| | **Security Engine Offline** | πŸ”₯ Critical | Security Engine has not reported to Console for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_security_engine_offline) | @@ -26,10 +27,11 @@ This page lists all possible health check issues, their trigger conditions, and | **LP No Logs Parsed** | πŸ”₯ Critical | Logs read but none parsed in the last 48 hours | [Troubleshooting](/u/troubleshooting/issue_lp_no_logs_parsed) | | **Firewall Integration Offline** | πŸ”₯ Critical | Firewall has not pulled from BLaaS endpoint for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_fw_integration_offline) | | **RC Integration Offline** | πŸ”₯ Critical | Remediation Component has not pulled from endpoint for 24+ hours | [Troubleshooting](/u/troubleshooting/issue_rc_integration_offline) | - + +
## Issue Dependencies -Some issues are related and share common root causes: +Some issues are related and share common root causes: - **Engine No Alerts** may be caused by: - LP No Logs Read diff --git a/crowdsec-docs/unversioned/troubleshooting/intro.md b/crowdsec-docs/unversioned/troubleshooting/intro.md index f7f1fa85c..d9eecfff7 100644 --- a/crowdsec-docs/unversioned/troubleshooting/intro.md +++ b/crowdsec-docs/unversioned/troubleshooting/intro.md @@ -93,5 +93,6 @@ When using `cscli` to list your parsers, scenarios and collections, some might a See [CAPI documentation](/docs/next/central_api/intro). ### stack Health issues list +YEP: - \ No newline at end of file +
From b6bd5ce11a8fd7a72973477196c5891e221e1540 Mon Sep 17 00:00:00 2001 From: jdv Date: Mon, 24 Nov 2025 21:53:34 +0100 Subject: [PATCH 13/13] preprocessor success --- crowdsec-docs/docusaurus.config.ts | 8 +- crowdsec-docs/plugins/extract-preprocessor.js | 104 +++++++++++++++ .../plugins/remark-extract/extract-plugin.js | 126 ------------------ .../unversioned/troubleshooting/intro.md | 3 +- 4 files changed, 108 insertions(+), 133 deletions(-) create mode 100644 crowdsec-docs/plugins/extract-preprocessor.js delete mode 100644 crowdsec-docs/plugins/remark-extract/extract-plugin.js diff --git a/crowdsec-docs/docusaurus.config.ts b/crowdsec-docs/docusaurus.config.ts index 0491957ff..c876f3b1b 100644 --- a/crowdsec-docs/docusaurus.config.ts +++ b/crowdsec-docs/docusaurus.config.ts @@ -6,7 +6,7 @@ import { themes } from "prism-react-renderer"; import tailwindPlugin from "./plugins/tailwind-config"; import { ctiApiSidebar, guidesSideBar, remediationSideBar } from "./sidebarsUnversioned"; -const extractPlugin = require('./plugins/remark-extract/extract-plugin'); +const extractPreprocessor = require("./plugins/extract-preprocessor"); const generateCurrentAndNextRedirects = (s) => [ { @@ -222,6 +222,7 @@ const config: Config = { admonitions: true, headingIds: true, }, + preprocessor:extractPreprocessor }, stylesheets: [ { @@ -292,10 +293,7 @@ const config: Config = { current: { path: "/next", }, - }, - remarkPlugins: [ - [extractPlugin, {paths: ['./unversioned']}], - ] + } }, blog: { showReadingTime: true, diff --git a/crowdsec-docs/plugins/extract-preprocessor.js b/crowdsec-docs/plugins/extract-preprocessor.js new file mode 100644 index 000000000..aaa07affc --- /dev/null +++ b/crowdsec-docs/plugins/extract-preprocessor.js @@ -0,0 +1,104 @@ +const fs = require('fs'); +const path = require('path'); + +// --- CONFIGURATION --- +// The directories to scan for snippets +const DOCS_DIRS = ['./docs', './unversioned']; +// --------------------- + +const snippetRegistry = new Map(); +let isIndexed = false; + +// Helper: Recursively find all .md/.mdx files +const getAllFiles = (dirPath, arrayOfFiles = []) => { + if (!fs.existsSync(dirPath)) return arrayOfFiles; + + const files = fs.readdirSync(dirPath); + files.forEach((file) => { + const fullPath = path.join(dirPath, file); + if (fs.statSync(fullPath).isDirectory()) { + getAllFiles(fullPath, arrayOfFiles); + } else if (file.endsWith('.md') || file.endsWith('.mdx')) { + arrayOfFiles.push(fullPath); + } + }); + return arrayOfFiles; +}; + +// Helper: Extract Doc ID from Frontmatter +const getDocId = (content, filename) => { + const idMatch = content.match(/^---\s+[\s\S]*?\nid:\s*(.*?)\s*[\n\r]/m); + if (idMatch && idMatch[1]) { + return idMatch[1].replace(/['"]/g, '').trim(); + } + return filename; +}; + +// --- CORE LOGIC --- +const buildIndex = () => { + if (isIndexed) return; + console.log('[ExtractPreprocessor] ⚑ Indexing snippets via Regex...'); + + const allFiles = []; + DOCS_DIRS.forEach(dir => getAllFiles(path.resolve(process.cwd(), dir), allFiles)); + + let count = 0; + + // Regex to find:
CONTENT
+ // We use [\s\S]*? to match content across multiple lines (lazy match) + const extractRegex = /]*>([\s\S]*?)<\/div>/g; + + allFiles.forEach(filePath => { + try { + const content = fs.readFileSync(filePath, 'utf8'); + const filename = path.basename(filePath, path.extname(filePath)); + const docId = getDocId(content, filename); + + let match; + // Loop through all matches in the file + while ((match = extractRegex.exec(content)) !== null) { + let [fullTag, extractId, snippetContent] = match; + + // Clean up the content (optional: trim leading/trailing newlines) + snippetContent = snippetContent.replace(/^\n+|\n+$/g, ''); + + // Generate Key: "docId:snippetId" + // If the ID already has a colon, assume user provided full ID + const key = extractId.includes(':') ? extractId : `${docId}:${extractId}`; + + snippetRegistry.set(key, snippetContent); + console.log(`[ExtractPreprocessor] ⚑ Indexed snippet: ${key}`); + count++; + } + } catch (e) { + console.warn(`[ExtractPreprocessor] Failed to read ${filePath}`); + } + }); + + isIndexed = true; + console.log(`[ExtractPreprocessor] ⚑ Indexed ${count} snippets.`); +}; + +// This function is called by Docusaurus for EVERY markdown file +const preprocessor = ({ filePath, fileContent }) => { + // 1. Ensure Index exists (runs once) + buildIndex(); + + // 2. Regex to find:
+ // Matches
OR
+ const copyRegex = /\s*(?:<\/div>)?/g; + + // 3. Replace with content + return fileContent.replace(copyRegex, (match, requestedId) => { + if (snippetRegistry.has(requestedId)) { + // Return the stored snippet content + return snippetRegistry.get(requestedId); + } else { + console.error(`[ExtractPreprocessor] ❌ Snippet not found: "${requestedId}" in ${path.basename(filePath)}`); + // Return an error message in the UI so you see it + return `> **Error: Snippet "${requestedId}" not found.**`; + } + }); +}; + +module.exports = preprocessor; \ No newline at end of file diff --git a/crowdsec-docs/plugins/remark-extract/extract-plugin.js b/crowdsec-docs/plugins/remark-extract/extract-plugin.js deleted file mode 100644 index 862b96065..000000000 --- a/crowdsec-docs/plugins/remark-extract/extract-plugin.js +++ /dev/null @@ -1,126 +0,0 @@ -const fs = require('fs'); -const path = require('path'); - -// Recursive file walker -const getAllFiles = (dirPath, arrayOfFiles) => { - if (!fs.existsSync(dirPath)) return arrayOfFiles || []; - const files = fs.readdirSync(dirPath); - arrayOfFiles = arrayOfFiles || []; - files.forEach((file) => { - const fullPath = path.join(dirPath, file); - if (fs.statSync(fullPath).isDirectory()) { - getAllFiles(fullPath, arrayOfFiles); - } else { - if (file.endsWith('.md') || file.endsWith('.mdx')) { - arrayOfFiles.push(fullPath); - } - } - }); - return arrayOfFiles; -}; - -const getDocId = (content, filename) => { - const idMatch = content.match(/^---\s+[\s\S]*?\nid:\s*(.*?)\s*[\n\r]/m); - if (idMatch && idMatch[1]) { - return idMatch[1].replace(/['"]/g, '').trim(); - } - return filename; -}; - -const getAttribute = (node, name) => { - const attr = node.attributes?.find((a) => a.name === name); - return attr ? attr.value : null; -}; - -// Global Registry -const extractRegistry = new Map(); -let isIndexed = false; - -module.exports = function plugin(options = {}) { - let sourceDirs = options.paths || (options.path ? [options.path] : ['./docs']); - if (!Array.isArray(sourceDirs)) sourceDirs = [sourceDirs]; - - return async (root, vfile) => { - const { visit } = await import('unist-util-visit'); - const { unified } = await import('unified'); - const remarkParse = (await import('remark-parse')).default; - const remarkMdx = (await import('remark-mdx')).default; - const remarkGfm = (await import('remark-gfm')).default; - - // --- PHASE A: INDEXING --- - if (!isIndexed) { - console.log(`\n[ExtractPlugin] ========== INDEXING ==========`); - - let allFiles = []; - for (const dir of sourceDirs) { - const absDir = path.resolve(process.cwd(), dir); - getAllFiles(absDir, allFiles); - } - - const processor = unified().use(remarkParse).use(remarkGfm).use(remarkMdx); - - for (const filePath of allFiles) { - try { - let content = fs.readFileSync(filePath, 'utf8'); - content = content.replace(//g, ''); - content = content.replace(/^---\s+[\s\S]*?---/, ''); - - const rawContent = fs.readFileSync(filePath, 'utf8'); - const filename = path.basename(filePath, path.extname(filePath)); - const docId = getDocId(rawContent, filename); - - const tree = processor.parse(content); - - visit(tree, ['mdxJsxFlowElement', 'mdxJsxTextElement'], (node) => { - // STRATEGY CHANGE: Look for
- if (node.name === 'div') { - const extractId = getAttribute(node, 'data-extract'); - if (extractId) { - // If ID does not contain a colon, prepend the DocID automatically - const key = extractId.includes(':') ? extractId : `${docId}:${extractId}`; - extractRegistry.set(key, node.children); - } - } - }); - } catch (e) { /* ignore */ } - } - isIndexed = true; - - console.log(`[ExtractPlugin] Indexed ${extractRegistry.size} snippets.`); - extractRegistry.forEach((val, key) => console.log(` - ${key}`)); - console.log(`[ExtractPlugin] ========== INDEXING DONE==========\n`); - } - - // --- PHASE B: TRANSFORMING --- - visit(root, ['mdxJsxFlowElement', 'mdxJsxTextElement'], (node, index, parent) => { - - // We only care about DIVs - if (node.name === 'div') { - - // 1. Handle Definitions:
- const extractId = getAttribute(node, 'data-extract'); - if (extractId) { - // Unwrap: Remove the div wrapper, leave the content - if (parent && Array.isArray(parent.children)) { - parent.children.splice(index, 1, ...node.children); - return index; - } - } - - // 2. Handle Copies:
- const copyId = getAttribute(node, 'data-extract-copy'); - if (copyId) { - if (extractRegistry.has(copyId)) { - const clonedNodes = JSON.parse(JSON.stringify(extractRegistry.get(copyId))); - if (parent && Array.isArray(parent.children)) { - parent.children.splice(index, 1, ...clonedNodes); - return index + clonedNodes.length; - } - } else { - console.error(`[ExtractPlugin] ❌ Missing Snippet: "${copyId}"`); - } - } - } - }); - }; -}; \ No newline at end of file diff --git a/crowdsec-docs/unversioned/troubleshooting/intro.md b/crowdsec-docs/unversioned/troubleshooting/intro.md index d9eecfff7..d3fd0a108 100644 --- a/crowdsec-docs/unversioned/troubleshooting/intro.md +++ b/crowdsec-docs/unversioned/troubleshooting/intro.md @@ -93,6 +93,5 @@ When using `cscli` to list your parsers, scenarios and collections, some might a See [CAPI documentation](/docs/next/central_api/intro). ### stack Health issues list -YEP: -
+