# EDA Rule Explorer

Parse the **Event-Driven Ansible (EDA) rulebook** YAML and visualize event routing logic.

## What EDA Does

EDA consumes security events from Kafka and triggers Ansible playbooks to revoke certificates. The rulebook defines 87 rules that route events to the correct PKI hierarchy and CA level.

```
Kafka (security-events) ──▶ EDA Rulebook (87 rules) ──▶ Ansible Playbook ──▶ Dogtag Revocation
```

## Rule Structure (87 Rules)

| Component | Count | Description |
|-----------|-------|-------------|
| Dogtag rules | 81 | 27 event patterns × 3 PKI types (RSA/ECC/PQC) |
| FreeIPA rules | 4 | Identity events also trigger FreeIPA revocation |
| Logging rules | 2 | Event logging/debugging |

### PKI Type Routing

- **RSA** (default): Matches when `pki_type` is `"rsa"`, `null`, or not defined
- **ECC**: Matches when `pki_type == "ecc"`
- **PQC**: Matches when `pki_type == "pqc"`

No catch-all fallback — every event type has explicit rules for all three PKI types.

## Configuration

The rulebook YAML is mounted read-only at `/home/jovyan/rulebooks/` inside the Jupyter container.

In [None]:
import re
import json
from pathlib import Path
from collections import Counter

import yaml
import pandas as pd
from IPython.display import display

RULEBOOK_DIR = Path("/home/jovyan/rulebooks")
RULEBOOK_FILE = RULEBOOK_DIR / "security-events.yml"

print(f"Rulebook: {RULEBOOK_FILE}")
print(f"Exists: {RULEBOOK_FILE.exists()}")

## Parse Rulebook

Load the YAML and extract rule name, condition, and action for each rule.

In [None]:
rules = []

if RULEBOOK_FILE.exists():
    with open(RULEBOOK_FILE) as f:
        rulebook = yaml.safe_load(f)

    # Rulebook is a list of rulesets
    if isinstance(rulebook, list):
        for ruleset in rulebook:
            ruleset_rules = ruleset.get("rules", [])
            for rule in ruleset_rules:
                name = rule.get("name", "")
                condition = rule.get("condition", "")
                action = rule.get("action", {})

                # Extract event_type from condition
                et_match = re.search(r'event_type\s*==\s*"([^"]+)"', str(condition))
                event_type = et_match.group(1) if et_match else ""

                # Extract pki_type from condition
                pki_match = re.search(r'pki_type\s*==\s*"([^"]+)"', str(condition))
                if pki_match:
                    pki_type = pki_match.group(1)
                elif "pki_type is not defined" in str(condition) or "pki_type == null" in str(condition):
                    pki_type = "rsa (default)"
                else:
                    pki_type = ""

                # Extract severity from condition
                sev_match = re.search(r'severity\s*==\s*"([^"]+)"', str(condition))
                severity = sev_match.group(1) if sev_match else ""

                # Extract playbook from action
                playbook = ""
                if isinstance(action, dict):
                    rp = action.get("run_playbook", {})
                    if isinstance(rp, dict):
                        playbook = rp.get("name", "")

                # Extract ca_level from extra_vars
                ca_level = ""
                if isinstance(action, dict):
                    rp = action.get("run_playbook", {})
                    if isinstance(rp, dict):
                        ev = rp.get("extra_vars", {})
                        if isinstance(ev, dict):
                            ca_level = ev.get("ca_level", "")

                rules.append({
                    "name": name,
                    "event_type": event_type,
                    "pki_type": pki_type,
                    "severity": severity,
                    "ca_level": ca_level,
                    "playbook": playbook,
                    "condition": str(condition)[:120],
                })

    print(f"Parsed {len(rules)} rules from rulebook.")
else:
    print("Rulebook file not found. Make sure the volume mount is configured.")

## Rule Summary

Table of all rules with event type, PKI type, severity, target playbook, and CA level.

In [None]:
if rules:
    df = pd.DataFrame(rules)
    display(df[["name", "event_type", "pki_type", "severity", "ca_level", "playbook"]])
else:
    print("No rules parsed.")

## Coverage Matrix

Which event_type × pki_type combinations have rules? A complete matrix should show coverage for all 26 event types across all 3 PKI types.

In [None]:
if rules:
    df = pd.DataFrame(rules)
    # Filter to rules with event_type (skip logging/meta rules)
    df_typed = df[df["event_type"] != ""].copy()

    if not df_typed.empty:
        # Normalize pki_type for the matrix
        df_typed["pki_normalized"] = df_typed["pki_type"].apply(
            lambda x: "rsa" if "rsa" in str(x).lower() else x
        )
        # Create coverage matrix
        df_typed["has_rule"] = "YES"
        pivot = df_typed.pivot_table(
            index="event_type", columns="pki_normalized",
            values="has_rule", aggfunc="first", fill_value="-"
        )
        # Reorder columns
        for col_order in [["rsa", "ecc", "pqc"], ["rsa", "ecc"], ["rsa"]]:
            available = [c for c in col_order if c in pivot.columns]
            if available:
                pivot = pivot[available]
                break

        covered = (pivot == "YES").sum().sum()
        total = pivot.shape[0] * pivot.shape[1]
        print(f"Coverage: {covered}/{total} cells ({covered/total*100:.0f}%)")
        print(f"Event types: {pivot.shape[0]}, PKI types: {pivot.shape[1]}")
        display(pivot)
    else:
        print("No typed rules found.")
else:
    print("No rules parsed.")

## Rules by Category

Group rules by event category and show counts.

In [None]:
CATEGORIES = {
    "Original": ["malware_detection", "credential_theft", "ransomware",
                 "c2_communication", "lateral_movement", "privilege_escalation",
                 "suspicious_script"],
    "PKI/Cert": ["key_compromise", "geo_anomaly", "compliance_violation",
                 "mitm_detected", "rogue_ca"],
    "IoT": ["firmware_integrity", "device_cloning", "iot_anomaly",
            "protocol_attack"],
    "Identity": ["impossible_travel", "service_account_abuse",
                 "mfa_bypass", "kerberoasting"],
    "Network": ["tls_downgrade", "ct_log_mismatch", "ocsp_bypass"],
    "SIEM": ["data_exfiltration", "unauthorized_access", "certificate_misuse"],
}

# Reverse lookup
event_to_cat = {}
for cat, events in CATEGORIES.items():
    for e in events:
        event_to_cat[e] = cat

if rules:
    df = pd.DataFrame(rules)
    df["category"] = df["event_type"].map(event_to_cat).fillna("Other")

    cat_counts = df.groupby("category").agg(
        rules=("name", "count"),
        event_types=("event_type", "nunique"),
    ).sort_values("rules", ascending=False)

    print("Rules by Category:")
    display(cat_counts)
else:
    print("No rules parsed.")

## Event Trace

Given an event type and PKI type, show which rule matches and what playbook runs. **Change `TRACE_EVENT` and `TRACE_PKI` below.**

In [None]:
TRACE_EVENT = "key_compromise"   # any of the 26 event types
TRACE_PKI = "rsa"                # rsa, ecc, pqc

if rules:
    df = pd.DataFrame(rules)
    # Match event type
    matches = df[df["event_type"] == TRACE_EVENT]
    # Match PKI type
    pki_matches = matches[
        matches["pki_type"].str.contains(TRACE_PKI, case=False, na=False)
    ]

    print(f"Event trace: event_type='{TRACE_EVENT}', pki_type='{TRACE_PKI}'")
    print(f"Matching rules: {len(pki_matches)}")

    if not pki_matches.empty:
        for _, row in pki_matches.iterrows():
            print(f"\n  Rule: {row['name']}")
            print(f"  Severity: {row['severity'] or 'any'}")
            print(f"  CA Level: {row['ca_level'] or 'default'}")
            print(f"  Playbook: {row['playbook']}")
    else:
        print("No matching rules found.")
        if not matches.empty:
            print(f"\nRules exist for event_type='{TRACE_EVENT}' with these PKI types:")
            print(f"  {matches['pki_type'].unique().tolist()}")
else:
    print("No rules parsed.")

## CA Level Routing

Which event types route to which CA levels? Shows the default CA level assigned by the rulebook `extra_vars`.

In [None]:
if rules:
    df = pd.DataFrame(rules)
    # Filter to rules with event_type and ca_level
    df_routed = df[(df["event_type"] != "") & (df["ca_level"] != "")].copy()

    if not df_routed.empty:
        # Show unique event_type -> ca_level mappings
        routing = df_routed.groupby("event_type")["ca_level"].apply(
            lambda x: ", ".join(sorted(set(x)))
        ).reset_index()
        routing.columns = ["event_type", "ca_levels"]

        # Add category
        routing["category"] = routing["event_type"].map(event_to_cat).fillna("Other")
        routing = routing.sort_values(["category", "event_type"])

        print("CA Level Routing:")
        display(routing.set_index("event_type")[["category", "ca_levels"]])
    else:
        print("No CA level routing data found.")
else:
    print("No rules parsed.")