diff --git a/cleancloud.yaml b/cleancloud.yaml
index bd6331e..c7b71c6 100644
--- a/cleancloud.yaml
+++ b/cleancloud.yaml
@@ -61,9 +61,8 @@ rules:
aws.rds.instance.idle:
enabled: true
- min_cost: 100 # suppress RDS findings below $100/month estimated cost
params:
- idle_days: 21 # require 21 days idle before flagging (default: 14)
+ idle_days_threshold: 21 # require 21 days idle before flagging (default: 14)
gcp.sql.instance.idle:
enabled: true
diff --git a/cleancloud/providers/aws/rules/ai/ec2_gpu_idle.py b/cleancloud/providers/aws/rules/ai/ec2_gpu_idle.py
index 8a1ecf2..818a152 100644
--- a/cleancloud/providers/aws/rules/ai/ec2_gpu_idle.py
+++ b/cleancloud/providers/aws/rules/ai/ec2_gpu_idle.py
@@ -397,7 +397,7 @@ def _list_gpu_metrics(cloudwatch, instance_id: str) -> list:
Dimensions=[{"Name": "InstanceId", "Value": instance_id}],
)
return resp.get("Metrics", [])
- except ClientError:
+ except Exception:
return []
@@ -434,7 +434,7 @@ def _get_max_gpu_utilisation(
gpu_max = max(dp["Maximum"] for dp in datapoints)
if max_util is None or gpu_max > max_util:
max_util = gpu_max
- except ClientError:
+ except Exception:
continue
return max_util
@@ -467,5 +467,5 @@ def _get_avg_cpu_utilisation(
if not datapoints:
return None
return max(dp["Maximum"] for dp in datapoints)
- except ClientError:
+ except Exception:
return None
diff --git a/cleancloud/providers/aws/rules/ai/sagemaker_endpoint_idle.py b/cleancloud/providers/aws/rules/ai/sagemaker_endpoint_idle.py
index 91d5b56..d69fc75 100644
--- a/cleancloud/providers/aws/rules/ai/sagemaker_endpoint_idle.py
+++ b/cleancloud/providers/aws/rules/ai/sagemaker_endpoint_idle.py
@@ -434,7 +434,7 @@ def _check_invocations(
queried_with_variants=False,
fetch_failed=False,
)
- except ClientError:
+ except Exception:
return InvocationCheckResult(
has_traffic=True,
active_variants=[],
@@ -471,7 +471,7 @@ def _check_invocations(
else:
idle_variants.append(variant_name)
- except ClientError:
+ except Exception:
# CloudWatch API failure — treat this variant as active and surface the failure.
return InvocationCheckResult(
has_traffic=True,
@@ -561,7 +561,7 @@ def _describe_endpoint(
slcfg = cv.get("ServerlessConfig")
if slcfg:
serverless_cfg_by_variant[cv["VariantName"]] = slcfg
- except ClientError:
+ except Exception:
pass # config inaccessible — costs/GPU will use defaults
accumulated_cost = 0.0
@@ -631,7 +631,7 @@ def _describe_endpoint(
total_provisioned_concurrency,
)
- except ClientError:
+ except Exception:
# Unknown state — return zero instances so the endpoint is skipped rather
# than flagged with assumed cost and instance count.
return None, False, 0, 0, None, [], 0
diff --git a/cleancloud/providers/aws/rules/ami_old.py b/cleancloud/providers/aws/rules/ami_old.py
index 06d065f..b9a34d5 100644
--- a/cleancloud/providers/aws/rules/ami_old.py
+++ b/cleancloud/providers/aws/rules/ami_old.py
@@ -507,7 +507,7 @@ def _get_last_launched_time(ec2, ami_id: str) -> Tuple[Optional[datetime], bool]
if not isinstance(value, str) or not value:
return None, False
return datetime.fromisoformat(value.replace("Z", "+00:00")), False
- except ClientError:
+ except Exception:
return None, True
@@ -527,7 +527,7 @@ def _check_active_instances(ec2, ami_id: str) -> Tuple[bool, bool]:
)
found = any(r.get("Instances") for r in resp.get("Reservations", []))
return found, False
- except ClientError:
+ except Exception:
return False, True
@@ -572,11 +572,11 @@ def _build_lt_index(ec2) -> Tuple[Dict[str, List[str]], bool]:
v_lt_id = v.get("LaunchTemplateId")
if image_id and v_lt_id:
index.setdefault(image_id, set()).add(v_lt_id)
- except ClientError:
+ except Exception:
continue # best-effort per LT
return {k: sorted(v) for k, v in index.items()}, lt_truncated
- except ClientError:
+ except Exception:
return {}, True
@@ -610,5 +610,5 @@ def _build_lc_index(autoscaling) -> Tuple[Dict[str, List[str]], bool]:
break
kwargs["NextToken"] = nxt
return {k: sorted(v) for k, v in index.items()}, lc_truncated
- except ClientError:
+ except Exception:
return {}, True
diff --git a/cleancloud/providers/aws/rules/ebs_snapshot_old.py b/cleancloud/providers/aws/rules/ebs_snapshot_old.py
index bca6187..4a2f41a 100644
--- a/cleancloud/providers/aws/rules/ebs_snapshot_old.py
+++ b/cleancloud/providers/aws/rules/ebs_snapshot_old.py
@@ -37,7 +37,6 @@
from typing import List, Optional, Set, Tuple
import boto3
-from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
from cleancloud.core.evidence import Evidence
@@ -68,7 +67,7 @@ def _build_ami_snapshot_index(ec2) -> Tuple[Set[str], bool]:
snap_id = bdm.get("Ebs", {}).get("SnapshotId")
if snap_id:
referenced.add(snap_id)
- except (ClientError, BotoCoreError):
+ except Exception:
return referenced, True
return referenced, False
@@ -91,7 +90,7 @@ def _check_external_sharing(ec2, snap_id: str) -> Tuple[bool, bool]:
if perm.get("UserId"): # explicit cross-account
return True, False
return False, False
- except (ClientError, BotoCoreError):
+ except Exception:
return False, True
diff --git a/cleancloud/providers/aws/rules/ec2_sg_unused.py b/cleancloud/providers/aws/rules/ec2_sg_unused.py
index 8a45e87..1452cd4 100644
--- a/cleancloud/providers/aws/rules/ec2_sg_unused.py
+++ b/cleancloud/providers/aws/rules/ec2_sg_unused.py
@@ -262,7 +262,7 @@ def find_unused_security_groups(
)
if name:
vpc_names[vpc["VpcId"]] = name
- except (ClientError, BotoCoreError):
+ except Exception:
pass # VPC names are display-only; don't fail the rule
# --- Step 7: Apply exclusion rules and emit findings ---
diff --git a/cleancloud/providers/aws/rules/ec2_stopped.py b/cleancloud/providers/aws/rules/ec2_stopped.py
index 0b4bbb3..d8f5e39 100644
--- a/cleancloud/providers/aws/rules/ec2_stopped.py
+++ b/cleancloud/providers/aws/rules/ec2_stopped.py
@@ -331,7 +331,7 @@ def _get_volume_sizes(ec2, volume_ids: List[str]) -> Dict[str, int]:
size = vol.get("Size")
if vid and size is not None:
sizes[vid] = size
- except (ClientError, BotoCoreError):
+ except Exception:
pass
return sizes
diff --git a/cleancloud/providers/aws/rules/elastic_ip_unattached.py b/cleancloud/providers/aws/rules/elastic_ip_unattached.py
index 465e22b..66fde96 100644
--- a/cleancloud/providers/aws/rules/elastic_ip_unattached.py
+++ b/cleancloud/providers/aws/rules/elastic_ip_unattached.py
@@ -1,8 +1,44 @@
+"""
+Rule: aws.ec2.elastic_ip.unattached
+
+ (spec — docs/specs/aws/elastic_ip_unattached.md)
+
+Intent:
+ Detect Elastic IP address records that are currently allocated to the account
+ in the scanned Region and are not currently associated with an instance or
+ network interface.
+
+Exclusions:
+ - resource_id absent (malformed identity)
+ - any canonical association field present (currently associated)
+
+Detection:
+ - resource_id present
+ - association_id, instance_id, network_interface_id, private_ip_address all absent
+
+Key rules:
+ - This is a review-candidate rule, not a delete-safe rule.
+ - No temporal threshold — current unattached state is the sole eligibility signal.
+ - Do not use AllocationTime (undocumented field).
+ - All four canonical association fields must be checked, not only AssociationId.
+ - Missing/non-iterable Addresses response fails the rule.
+ - Do not hardcode a fixed monthly cost estimate.
+
+Blind spots:
+ - future planned attachment or operational reserve intent not known
+ - DNS / allowlist / manual failover dependencies
+ - application-level use of the reserved public IP
+ - service-managed lifecycle expectations outside current association state
+
+APIs:
+ - ec2:DescribeAddresses
+"""
+
from datetime import datetime, timezone
-from typing import List
+from typing import List, Optional
import boto3
-from botocore.exceptions import ClientError
+from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
from cleancloud.core.evidence import Evidence
@@ -10,136 +46,191 @@
from cleancloud.core.risk import RiskLevel
-def find_unattached_elastic_ips(
- session: boto3.Session,
- region: str,
- days_unattached: int = 30,
-) -> List[Finding]:
- """
- Find Elastic IPs allocated 30+ days ago and currently unattached.
-
- Unattached Elastic IPs incur small hourly charges when not associated.
+def _str(value) -> Optional[str]:
+ """Return value if it is a non-empty string, else None."""
+ return value if isinstance(value, str) and value else None
- IMPORTANT: AWS does not expose "unattached since" timestamp, so we use
- allocation age as a proxy. An EIP allocated 30+ days ago and currently
- unattached is worth reviewing.
- SAFE RULE (review-only):
- - EIP does not have AssociationId (not attached)
- - EIP allocation age >= days_unattached threshold (NOT unattached duration)
- - Classic EIPs without AllocationTime are flagged immediately (conservative)
+def _normalize_address(address: dict) -> Optional[dict]:
+ """Normalize a raw SDK address dict to the canonical field shape.
- IAM permissions:
- - ec2:DescribeAddresses
+ Returns None when the item must be skipped (non-dict, absent stable identity).
+ All rule logic must operate only on the returned dict.
"""
- ec2 = session.client("ec2", region_name=region)
+ if not isinstance(address, dict):
+ return None
+
+ # Identity — resource_id: AllocationId → PublicIp → CarrierIp → absent (skip)
+ allocation_id = _str(address.get("AllocationId"))
+ public_ip = _str(address.get("PublicIp"))
+ carrier_ip = _str(address.get("CarrierIp"))
+
+ resource_id = allocation_id or public_ip or carrier_ip
+ if not resource_id:
+ return None
+
+ # Association fields — any present means currently associated
+ association_id = _str(address.get("AssociationId"))
+ instance_id = _str(address.get("InstanceId"))
+ network_interface_id = _str(address.get("NetworkInterfaceId"))
+ private_ip_address = _str(address.get("PrivateIpAddress"))
+
+ # Context fields — absent → null; never block evaluation
+ domain = _str(address.get("Domain"))
+ network_interface_owner_id = _str(address.get("NetworkInterfaceOwnerId"))
+ network_border_group = _str(address.get("NetworkBorderGroup"))
+ public_ipv4_pool = _str(address.get("PublicIpv4Pool"))
+ customer_owned_ip = _str(address.get("CustomerOwnedIp"))
+ customer_owned_ipv4_pool = _str(address.get("CustomerOwnedIpv4Pool"))
+ subnet_id = _str(address.get("SubnetId"))
+
+ # ServiceManaged — string enum ("alb", "nlb", "rnat", "rds", …); normalize as string
+ service_managed: Optional[str] = _str(address.get("ServiceManaged"))
+
+ # Tags — prefer list; degrade to empty if absent or wrong type
+ tags_raw = address.get("Tags")
+ tags: list = tags_raw if isinstance(tags_raw, list) else []
+
+ return {
+ "resource_id": resource_id,
+ "allocation_id": allocation_id,
+ "public_ip": public_ip,
+ "carrier_ip": carrier_ip,
+ "association_id": association_id,
+ "instance_id": instance_id,
+ "network_interface_id": network_interface_id,
+ "private_ip_address": private_ip_address,
+ "domain": domain,
+ "network_interface_owner_id": network_interface_owner_id,
+ "network_border_group": network_border_group,
+ "public_ipv4_pool": public_ipv4_pool,
+ "customer_owned_ip": customer_owned_ip,
+ "customer_owned_ipv4_pool": customer_owned_ipv4_pool,
+ "subnet_id": subnet_id,
+ "service_managed": service_managed,
+ "tags": tags,
+ }
+
+def find_unattached_elastic_ips(
+ session: boto3.Session,
+ region: str,
+) -> List[Finding]:
+ ec2 = session.client("ec2", region_name=region)
now = datetime.now(timezone.utc)
findings: List[Finding] = []
+ # --- Step 1: Retrieve all Elastic IP records ---
try:
- # DescribeAddresses is non-paginated by AWS (no paginator exists).
- # Returns all Elastic IPs in a single call.
response = ec2.describe_addresses()
- for eip in response.get("Addresses", []):
- # Skip if attached to an instance or network interface
- if "AssociationId" in eip:
- continue
-
- # Calculate age since allocation
- allocation_time = eip.get("AllocationTime")
- domain = eip.get("Domain", "vpc")
- is_classic = domain == "standard"
-
- if not allocation_time:
- if is_classic:
- # Genuine EC2-Classic EIP without AllocationTime — flag conservatively
- age_days = None
- else:
- # VPC EIP without AllocationTime — cannot determine age, skip
- continue
- else:
- age_days = (now - allocation_time).days
-
- # Apply age threshold (skip if too young)
- if age_days is not None and age_days < days_unattached:
- continue
-
- # Build evidence
- signals_used = ["Elastic IP is not associated with any instance or network interface"]
- if age_days is not None:
- signals_used.append(
- f"Elastic IP was allocated {age_days} days ago and is currently unattached"
- )
- if is_classic:
- signals_used.append(
- "Classic EIP without AllocationTime (age unknown, flagged conservatively)"
- )
- signals_used.append(
- "EC2-Classic is deprecated; unattached Classic EIPs are almost always legacy leftovers"
- )
-
- evidence = Evidence(
- signals_used=signals_used,
- signals_not_checked=[
- "Unattached duration (AWS does not expose detach timestamp)",
- "Previous attachment history",
- "Application-level usage",
- "Manual operational workflows",
- "Future planned attachments",
- "Disaster recovery intent",
- ],
- time_window=(
- f"{days_unattached} days since allocation"
- if age_days is not None
- else "Unknown (Classic EIP, no AllocationTime)"
- ),
- )
+ except ClientError as exc:
+ code = exc.response["Error"]["Code"]
+ if code in ("UnauthorizedOperation", "AccessDenied"):
+ raise PermissionError("Missing required IAM permission: ec2:DescribeAddresses") from exc
+ raise
+ except BotoCoreError:
+ raise
- # Build details
- details = {
- "public_ip": eip.get("PublicIp"),
- "domain": eip.get("Domain", "vpc"),
- "is_classic": is_classic,
+ # --- Step 2: Validate top-level response integrity ---
+ raw_addresses = response.get("Addresses")
+ if raw_addresses is None or not isinstance(raw_addresses, list):
+ raise RuntimeError(
+ "DescribeAddresses response is missing a usable top-level Addresses field — "
+ "cannot reliably determine EIP association state"
+ )
+
+ # --- Steps 3–5: Normalize, apply exclusions, emit ---
+ for raw_address in raw_addresses:
+ a = _normalize_address(raw_address)
+ if a is None:
+ continue # SKIP: absent stable identity
+
+ # EXCLUSION: currently associated
+ if (
+ a["association_id"] is not None
+ or a["instance_id"] is not None
+ or a["network_interface_id"] is not None
+ or a["private_ip_address"] is not None
+ ):
+ continue
+
+ # --- Detection path: unattached-eip-review-candidate ---
+
+ evidence = Evidence(
+ signals_used=[
+ f"Address {a['resource_id']} is currently not associated per DescribeAddresses",
+ "Address remains allocated to the account until explicitly released",
+ "AWS recommends release only when the address is no longer needed "
+ "and is not currently associated",
+ ],
+ signals_not_checked=[
+ "Future planned attachment or operational reserve intent not known",
+ "DNS / allowlist / manual failover dependencies",
+ "Application-level use of the reserved public IP",
+ "Exact monthly pricing from the current pricing page",
+ "Service-managed lifecycle expectations outside current association state",
+ ],
+ time_window=None,
+ )
+
+ details: dict = {
+ "evaluation_path": "unattached-eip-review-candidate",
+ "resource_id": a["resource_id"],
+ "allocation_id": a["allocation_id"],
+ "public_ip": a["public_ip"],
+ "carrier_ip": a["carrier_ip"],
+ "domain": a["domain"],
+ "currently_associated": False,
+ "association_id": None,
+ "instance_id": None,
+ "network_interface_id": None,
+ "private_ip_address": None,
+ }
+ if a["network_interface_owner_id"] is not None:
+ details["network_interface_owner_id"] = a["network_interface_owner_id"]
+ if a["network_border_group"] is not None:
+ details["network_border_group"] = a["network_border_group"]
+ if a["public_ipv4_pool"] is not None:
+ details["public_ipv4_pool"] = a["public_ipv4_pool"]
+ if a["customer_owned_ip"] is not None:
+ details["customer_owned_ip"] = a["customer_owned_ip"]
+ if a["customer_owned_ipv4_pool"] is not None:
+ details["customer_owned_ipv4_pool"] = a["customer_owned_ipv4_pool"]
+ if a["subnet_id"] is not None:
+ details["subnet_id"] = a["subnet_id"]
+ if a["service_managed"] is not None:
+ details["service_managed"] = a["service_managed"]
+ if a["tags"]:
+ details["tags"] = {
+ t.get("Key"): t.get("Value") for t in a["tags"] if isinstance(t, dict)
}
- if age_days is not None:
- details["age_days"] = age_days
- details["allocation_time"] = allocation_time.isoformat()
-
- if "Tags" in eip:
- details["tags"] = eip["Tags"]
-
- findings.append(
- Finding(
- provider="aws",
- rule_id="aws.ec2.elastic_ip.unattached",
- resource_type="aws.ec2.elastic_ip",
- resource_id=eip.get("AllocationId") or eip.get("PublicIp"),
- region=region,
- estimated_monthly_cost_usd=3.75,
- title="Unattached Elastic IP (Review Recommended)",
- summary=(
- f"Elastic IP allocated {age_days} days ago and currently unattached (incurs hourly charges)"
- if age_days is not None
- else "Classic Elastic IP currently unattached (incurs hourly charges, allocation age unknown)"
- ),
- reason=(
- f"Elastic IP is {age_days} days old and currently unattached, incurring charges"
- if age_days is not None
- else "Classic Elastic IP currently unattached, incurring charges (allocation age unknown)"
- ),
- risk=RiskLevel.LOW,
- confidence=ConfidenceLevel.HIGH, # Deterministic state: no AssociationId
- detected_at=now,
- evidence=evidence,
- details=details,
- )
+ findings.append(
+ Finding(
+ provider="aws",
+ rule_id="aws.ec2.elastic_ip.unattached",
+ resource_type="aws.ec2.elastic_ip",
+ resource_id=a["resource_id"],
+ region=region,
+ title="Unattached Elastic IP review candidate",
+ summary=(
+ f"Elastic IP {a['resource_id']}"
+ + (
+ f" ({a['public_ip']})"
+ if a["public_ip"] and a["public_ip"] != a["resource_id"]
+ else ""
+ )
+ + " is currently not associated with any instance or network interface; "
+ "review for possible release"
+ ),
+ reason="Address has no current association per DescribeAddresses",
+ risk=RiskLevel.LOW,
+ confidence=ConfidenceLevel.HIGH,
+ detected_at=now,
+ evidence=evidence,
+ details=details,
+ estimated_monthly_cost_usd=None,
)
-
- except ClientError as e:
- if e.response["Error"]["Code"] == "UnauthorizedOperation":
- raise PermissionError("Missing required IAM permission: ec2:DescribeAddresses") from e
- raise
+ )
return findings
diff --git a/cleancloud/providers/aws/rules/elb_idle.py b/cleancloud/providers/aws/rules/elb_idle.py
index 1d216f3..4d290be 100644
--- a/cleancloud/providers/aws/rules/elb_idle.py
+++ b/cleancloud/providers/aws/rules/elb_idle.py
@@ -1,525 +1,726 @@
+"""
+Rule: aws.elbv2.alb.idle
+Rule: aws.elbv2.nlb.idle
+Rule: aws.elb.clb.idle
+
+ (spec — docs/specs/aws/elb_idle.md)
+
+Intent:
+ Detect ALB, NLB, and CLB load balancers that are at least
+ idle_days_threshold days old and show no trusted CloudWatch evidence of
+ client traffic during the full lookback window, so they can be reviewed
+ as potential cleanup candidates.
+
+Exclusions:
+ - resource_id absent (malformed identity)
+ - lb_family == "unsupported" (gateway LB or unknown type)
+ - created_time absent or not safely comparable
+ - age_days < idle_days_threshold (too new to evaluate)
+ - ELBv2 state_code not "active" or "active_impaired"
+ - trusted traffic present (any CloudWatch signal > 0)
+ - ELBv2 ARN dimension unparsable
+
+Detection:
+ - resource_id present, lb_family in {"alb","nlb","clb"}
+ - age_days >= idle_days_threshold
+ - ELBv2: state_code "active" or "active_impaired"
+ - all traffic signals absent during full lookback window
+
+Key rules:
+ - ALB: RequestCount Sum>0, ProcessedBytes Sum>0, or ActiveConnectionCount Sum>0
+ - NLB: NewFlowCount Sum>0, ProcessedBytes Sum>0, or ActiveFlowCount Maximum>0
+ - NLB: missing datapoints over full window = FAIL RULE (not zero)
+ - CLB: RequestCount Sum>0 or EstimatedProcessedBytes Sum>0
+ - Any metric read failure = FAIL RULE; no LOW-confidence path
+ - ELBv2 dimension strictly from ARN suffix after loadbalancer/; unparsable = SKIP ITEM
+ - Backend registration is contextual only
+ - estimated_monthly_cost_usd = None
+
+Blind spots:
+ - planned future usage or blue/green staging
+ - seasonal traffic patterns outside the current lookback window
+ - DNS / allowlist / manual failover dependencies
+ - NLB traffic rejected by security groups (not in CloudWatch)
+
+APIs:
+ - elbv2:DescribeLoadBalancers
+ - elb:DescribeLoadBalancers
+ - cloudwatch:GetMetricStatistics
+ - elbv2:DescribeTargetGroups (contextual)
+ - elbv2:DescribeTargetHealth (contextual)
+"""
+
from datetime import datetime, timedelta, timezone
-from typing import List
+from typing import List, Optional
import boto3
-from botocore.exceptions import ClientError
+from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
from cleancloud.core.evidence import Evidence
from cleancloud.core.finding import Finding
from cleancloud.core.risk import RiskLevel
+_DEFAULT_IDLE_DAYS_THRESHOLD = 14
-def find_idle_load_balancers(
- session: boto3.Session,
- region: str,
- idle_days: int = 14,
-) -> List[Finding]:
+
+# ---------------------------------------------------------------------------
+# Normalization helpers
+# ---------------------------------------------------------------------------
+
+
+def _str(value) -> Optional[str]:
+ """Return value if it is a non-empty string, else None."""
+ return value if isinstance(value, str) and value else None
+
+
+def _normalize_elbv2(lb: dict, idle_days_threshold: int, now_utc: datetime) -> Optional[dict]:
+ """Normalize a raw ELBv2 SDK dict to canonical fields.
+
+ Returns None when the item must be skipped (non-dict, absent identity).
+ All rule logic must operate only on the returned dict.
"""
- Find idle Elastic Load Balancers (ALB, NLB, CLB) with no traffic.
+ if not isinstance(lb, dict):
+ return None
- ELBs have a base hourly charge regardless of usage (~$16-22/month).
- Idle load balancers with no traffic are a clear cost optimization signal.
+ arn = _str(lb.get("LoadBalancerArn"))
+ if not arn:
+ return None # SKIP: no stable identity
- Detection logic:
- - LB is older than `idle_days` days
- - Zero traffic over the `idle_days` period (CloudWatch metrics)
- - No registered targets (ALB/NLB) or no registered instances (CLB)
+ lb_type = _str(lb.get("Type"))
+ if lb_type == "application":
+ lb_family = "alb"
+ elif lb_type == "network":
+ lb_family = "nlb"
+ else:
+ lb_family = "unsupported" # gateway or unknown
+
+ name = _str(lb.get("LoadBalancerName"))
+
+ # created_time — must be timezone-aware for age calculation.
+ # Naive datetimes are not safely comparable and must not be coerced; leave absent.
+ created_time_raw = lb.get("CreatedTime")
+ created_time: Optional[datetime] = None
+ if isinstance(created_time_raw, datetime) and created_time_raw.tzinfo is not None:
+ created_time = created_time_raw.astimezone(timezone.utc)
+
+ age_days: Optional[int] = None
+ if created_time is not None:
+ age_days = int((now_utc - created_time).total_seconds() // 86400)
+
+ # state_code from nested State dict
+ state_raw = lb.get("State")
+ state_code: Optional[str] = None
+ if isinstance(state_raw, dict):
+ state_code = _str(state_raw.get("Code"))
+
+ scheme = _str(lb.get("Scheme"))
+ dns_name = _str(lb.get("DNSName"))
+ vpc_id = _str(lb.get("VpcId"))
+
+ return {
+ "resource_id": arn,
+ "lb_family": lb_family,
+ "load_balancer_name": name,
+ "load_balancer_arn": arn,
+ "created_time": created_time,
+ "age_days": age_days,
+ "scheme": scheme,
+ "dns_name": dns_name,
+ "vpc_id": vpc_id,
+ "state_code": state_code,
+ "idle_days_threshold": idle_days_threshold,
+ }
+
+
+def _normalize_clb(lb: dict, idle_days_threshold: int, now_utc: datetime) -> Optional[dict]:
+ """Normalize a raw CLB SDK dict to canonical fields.
+
+ Returns None when the item must be skipped (non-dict, absent identity).
+ """
+ if not isinstance(lb, dict):
+ return None
+
+ name = _str(lb.get("LoadBalancerName"))
+ if not name:
+ return None # SKIP: no stable identity
+
+ # Naive datetimes are not safely comparable and must not be coerced; leave absent.
+ created_time_raw = lb.get("CreatedTime")
+ created_time: Optional[datetime] = None
+ if isinstance(created_time_raw, datetime) and created_time_raw.tzinfo is not None:
+ created_time = created_time_raw.astimezone(timezone.utc)
+
+ age_days: Optional[int] = None
+ if created_time is not None:
+ age_days = int((now_utc - created_time).total_seconds() // 86400)
+
+ scheme = _str(lb.get("Scheme"))
+ dns_name = _str(lb.get("DNSName"))
+ # CLB uses VPCId (capital VPC), not VpcId
+ vpc_id = _str(lb.get("VPCId"))
+
+ instances_raw = lb.get("Instances")
+ instances: list = instances_raw if isinstance(instances_raw, list) else []
+
+ return {
+ "resource_id": name,
+ "lb_family": "clb",
+ "load_balancer_name": name,
+ "load_balancer_arn": None,
+ "created_time": created_time,
+ "age_days": age_days,
+ "scheme": scheme,
+ "dns_name": dns_name,
+ "vpc_id": vpc_id,
+ "state_code": None,
+ "idle_days_threshold": idle_days_threshold,
+ "instances": instances,
+ }
+
+
+# ---------------------------------------------------------------------------
+# CloudWatch dimension extraction
+# ---------------------------------------------------------------------------
+
+
+def _extract_elbv2_dimension(lb_arn: str) -> Optional[str]:
+ """Extract the CloudWatch LoadBalancer dimension value from an ELBv2 ARN.
+
+ Strictly uses the suffix after 'loadbalancer/'. Returns None if
+ the suffix cannot be reliably extracted — caller must SKIP the item.
- Confidence:
- - HIGH: Zero traffic AND no targets/instances
- - MEDIUM: Zero traffic only
+ ARN format: arn:aws:elasticloadbalancing:region:account:loadbalancer/app/name/id
+ Dimension: app/name/id
+ """
+ parts = lb_arn.split("loadbalancer/", 1)
+ if len(parts) == 2 and parts[1]:
+ return parts[1]
+ return None
- IAM permissions:
- - elasticloadbalancing:DescribeLoadBalancers
- - elasticloadbalancing:DescribeTargetGroups
- - elasticloadbalancing:DescribeTargetHealth
- - cloudwatch:GetMetricStatistics
+
+# ---------------------------------------------------------------------------
+# CloudWatch metric fetching
+# ---------------------------------------------------------------------------
+
+
+def _get_metric_datapoints(
+ cloudwatch,
+ namespace: str,
+ metric_name: str,
+ statistic: str,
+ dimension_name: str,
+ dimension_value: str,
+ start_time: datetime,
+ end_time: datetime,
+) -> List[dict]:
+ """Fetch CloudWatch metric datapoints.
+
+ Returns the raw list of datapoints (may be empty for ALB/CLB; see NLB caller).
+ Raises PermissionError on permission errors, re-raises ClientError/BotoCoreError
+ for all other failures — caller treats these as FAIL RULE.
"""
- cloudwatch = session.client("cloudwatch", region_name=region)
- now = datetime.now(timezone.utc)
- findings: List[Finding] = []
+ try:
+ response = cloudwatch.get_metric_statistics(
+ Namespace=namespace,
+ MetricName=metric_name,
+ Dimensions=[{"Name": dimension_name, "Value": dimension_value}],
+ StartTime=start_time,
+ EndTime=end_time,
+ Period=86400,
+ Statistics=[statistic],
+ )
+ return response.get("Datapoints", [])
+ except ClientError as exc:
+ code = exc.response["Error"]["Code"]
+ if code in ("AccessDenied", "UnauthorizedOperation"):
+ raise PermissionError(
+ "Missing required IAM permission: cloudwatch:GetMetricStatistics"
+ ) from exc
+ raise
+ except BotoCoreError:
+ raise
- # Scan ALB/NLB via elbv2
- findings.extend(_scan_elbv2(session, region, cloudwatch, now, idle_days))
- # Scan CLB via elb
- findings.extend(_scan_clb(session, region, cloudwatch, now, idle_days))
+def _check_alb_traffic(
+ cloudwatch,
+ dimension_value: str,
+ start_time: datetime,
+ end_time: datetime,
+) -> bool:
+ """Return True if ALB has trusted traffic over the window, False if confirmed zero.
- return findings
+ Checks: RequestCount Sum, ProcessedBytes Sum, ActiveConnectionCount Sum.
+ Missing datapoints treated as zero (ALB only reports when traffic is present).
+ Raises on metric read failure → FAIL RULE.
+ """
+ namespace = "AWS/ApplicationELB"
+ dim = "LoadBalancer"
+
+ for metric_name in ("RequestCount", "ProcessedBytes", "ActiveConnectionCount"):
+ dps = _get_metric_datapoints(
+ cloudwatch, namespace, metric_name, "Sum", dim, dimension_value, start_time, end_time
+ )
+ if any(dp.get("Sum", 0) > 0 for dp in dps):
+ return True
+
+ return False
+
+
+def _check_nlb_traffic(
+ cloudwatch,
+ dimension_value: str,
+ start_time: datetime,
+ end_time: datetime,
+ expected_days: int,
+) -> bool:
+ """Return True if NLB has trusted traffic over the window, False if confirmed zero.
+
+ Checks: NewFlowCount Sum, ProcessedBytes Sum, ActiveFlowCount Maximum.
+ NLB metrics are documented as always reported; incomplete coverage (fewer
+ datapoints than the full window warrants) means the zero-traffic claim is
+ not trustworthy → raise RuntimeError (FAIL RULE).
+ Raises on metric read failure → FAIL RULE.
+ """
+ namespace = "AWS/NetworkELB"
+ dim = "LoadBalancer"
+ # Spec requires full-window coverage with no gaps; no tolerance applied.
+ min_datapoints = expected_days
+
+ for metric_name in ("NewFlowCount", "ProcessedBytes"):
+ dps = _get_metric_datapoints(
+ cloudwatch, namespace, metric_name, "Sum", dim, dimension_value, start_time, end_time
+ )
+ if len(dps) < min_datapoints:
+ raise RuntimeError(
+ f"NLB {metric_name} metric returned {len(dps)} datapoint(s) for a "
+ f"{expected_days}-day window — coverage is incomplete, "
+ "cannot confirm zero traffic"
+ )
+ if any(dp.get("Sum", 0) > 0 for dp in dps):
+ return True
+
+ dps = _get_metric_datapoints(
+ cloudwatch,
+ namespace,
+ "ActiveFlowCount",
+ "Maximum",
+ dim,
+ dimension_value,
+ start_time,
+ end_time,
+ )
+ if len(dps) < min_datapoints:
+ raise RuntimeError(
+ f"NLB ActiveFlowCount metric returned {len(dps)} datapoint(s) for a "
+ f"{expected_days}-day window — coverage is incomplete, "
+ "cannot confirm zero traffic"
+ )
+ if any(dp.get("Maximum", 0) > 0 for dp in dps):
+ return True
+
+ return False
+
+
+def _check_clb_traffic(
+ cloudwatch,
+ lb_name: str,
+ start_time: datetime,
+ end_time: datetime,
+) -> bool:
+ """Return True if CLB has trusted traffic over the window, False if confirmed zero.
+
+ Checks: RequestCount Sum, EstimatedProcessedBytes Sum.
+ Missing datapoints treated as zero (CLB only reports when traffic is present).
+ Raises on metric read failure → FAIL RULE.
+ """
+ namespace = "AWS/ELB"
+ dim = "LoadBalancerName"
+
+ for metric_name in ("RequestCount", "EstimatedProcessedBytes"):
+ dps = _get_metric_datapoints(
+ cloudwatch, namespace, metric_name, "Sum", dim, lb_name, start_time, end_time
+ )
+ if any(dp.get("Sum", 0) > 0 for dp in dps):
+ return True
+
+ return False
+
+
+# ---------------------------------------------------------------------------
+# Backend registration context (best-effort; failure degrades context not rule)
+# ---------------------------------------------------------------------------
+
+
+def _get_elbv2_backend_context(elbv2, lb_arn: str) -> tuple:
+ """Return (registered_target_count, target_group_count, enrichment_succeeded).
+
+ On any error returns (0, 0, False) — caller sets has_registered_targets = None.
+ Pagination of target groups is exhausted; target health is retrieved per group.
+ """
+ try:
+ paginator = elbv2.get_paginator("describe_target_groups")
+ target_groups = []
+ for page in paginator.paginate(LoadBalancerArn=lb_arn):
+ target_groups.extend(page.get("TargetGroups", []))
+
+ tg_count = len(target_groups)
+ total_targets = 0
+ for tg in target_groups:
+ tg_arn = _str(tg.get("TargetGroupArn"))
+ if not tg_arn:
+ continue
+ health_resp = elbv2.describe_target_health(TargetGroupArn=tg_arn)
+ total_targets += len(health_resp.get("TargetHealthDescriptions", []))
+ return total_targets, tg_count, True
+ except (ClientError, BotoCoreError, Exception):
+ return 0, 0, False
+
+
+# ---------------------------------------------------------------------------
+# ELBv2 (ALB + NLB) scanner
+# ---------------------------------------------------------------------------
def _scan_elbv2(
session: boto3.Session,
region: str,
cloudwatch,
- now: datetime,
- idle_days: int,
+ now_utc: datetime,
+ idle_days_threshold: int,
) -> List[Finding]:
- """Scan ALB and NLB load balancers for idle resources."""
elbv2 = session.client("elbv2", region_name=region)
findings: List[Finding] = []
+ start_time = now_utc - timedelta(days=max(idle_days_threshold, 1))
try:
paginator = elbv2.get_paginator("describe_load_balancers")
+ pages = list(paginator.paginate())
+ except ClientError as exc:
+ code = exc.response["Error"]["Code"]
+ if code in ("AccessDenied", "UnauthorizedOperation"):
+ raise PermissionError(
+ "Missing required IAM permission: elbv2:DescribeLoadBalancers"
+ ) from exc
+ raise
+ except BotoCoreError:
+ raise
- for page in paginator.paginate():
- for lb in page.get("LoadBalancers", []):
- lb_arn = lb["LoadBalancerArn"]
- lb_name = lb.get("LoadBalancerName", lb_arn)
- lb_type = lb.get("Type", "application") # application or network
-
- # Calculate age
- create_time = lb.get("CreatedTime")
- age_days = 0
- if create_time:
- try:
- age_days = (now - create_time).days
- except TypeError:
- pass
-
- # Skip if younger than threshold
- if age_days < idle_days:
- continue
-
- # Check traffic via CloudWatch
- has_traffic, traffic_fetch_failed = _check_elbv2_traffic(
- cloudwatch, lb_arn, lb_type, idle_days
- )
- # has_traffic=True with fetch_failed=False → confirmed traffic, skip.
- # has_traffic=True with fetch_failed=True → metric unreadable; create LOW-confidence
- # finding so the operator knows to verify manually rather than silently suppress.
- if has_traffic and not traffic_fetch_failed:
- continue
-
- # Check registered targets
- has_targets = _check_elbv2_targets(elbv2, lb_arn)
-
- # Determine confidence
- if traffic_fetch_failed:
- # Metric read failed — traffic status unknown; operator must verify
- confidence = ConfidenceLevel.LOW
- elif not has_targets:
- confidence = ConfidenceLevel.HIGH
- else:
- confidence = ConfidenceLevel.MEDIUM
-
- type_label = "ALB" if lb_type == "application" else "NLB"
- rule_id = "aws.elbv2.alb.idle" if lb_type == "application" else "aws.elbv2.nlb.idle"
- primary_metric = "RequestCount" if lb_type == "application" else "NewFlowCount"
- scheme = lb.get("Scheme", "unknown")
-
- signals = [
- f"Load balancer type: {type_label}",
- f"Scheme: {scheme}",
- f"State: {lb.get('State', {}).get('Code', 'unknown')}",
+ for page in pages:
+ for raw_lb in page.get("LoadBalancers", []):
+ lb = _normalize_elbv2(raw_lb, idle_days_threshold, now_utc)
+ if lb is None:
+ continue # SKIP: non-dict or absent identity
+
+ # EXCLUSION: unsupported family (gateway or unknown)
+ if lb["lb_family"] == "unsupported":
+ continue
+
+ # EXCLUSION: unusable created_time
+ if lb["created_time"] is None or lb["age_days"] is None:
+ continue
+
+ # EXCLUSION: too new
+ if lb["age_days"] < idle_days_threshold:
+ continue
+
+ # EXCLUSION: unsupported ELBv2 state
+ if lb["state_code"] not in ("active", "active_impaired"):
+ continue
+
+ # Derive CloudWatch dimension — SKIP ITEM if unparsable
+ dimension_value = _extract_elbv2_dimension(lb["load_balancer_arn"])
+ if dimension_value is None:
+ continue # SKIP: ARN dimension unparsable
+
+ # --- Traffic check (raises → FAIL RULE) ---
+ if lb["lb_family"] == "alb":
+ has_traffic = _check_alb_traffic(cloudwatch, dimension_value, start_time, now_utc)
+ traffic_signals_checked = [
+ "RequestCount:Sum",
+ "ProcessedBytes:Sum",
+ "ActiveConnectionCount:Sum",
]
- if not traffic_fetch_failed:
- signals.insert(
- 0,
- f"Zero {primary_metric} and ProcessedBytes for {idle_days} days (CloudWatch)",
- )
-
- if not has_targets:
- signals.append("No registered targets")
- if age_days > 0:
- signals.append(f"Load balancer is {age_days} days old")
-
- signals_not_checked = [
- "Planned future usage",
- "Blue/green deployment scenarios",
- "Seasonal traffic patterns",
- "Internal health-check-only usage",
+ rule_id = "aws.elbv2.alb.idle"
+ label = "ALB"
+ resource_type = "aws.elbv2.load_balancer"
+ else:
+ has_traffic = _check_nlb_traffic(
+ cloudwatch, dimension_value, start_time, now_utc, idle_days_threshold
+ )
+ traffic_signals_checked = [
+ "NewFlowCount:Sum",
+ "ProcessedBytes:Sum",
+ "ActiveFlowCount:Maximum",
]
- if traffic_fetch_failed:
- signals_not_checked.insert(
- 0,
- f"Traffic metrics ({primary_metric}, ProcessedBytes) — CloudWatch fetch "
- "failed (transient/throttle error); traffic status unverified",
- )
-
- evidence = Evidence(
- signals_used=signals,
- signals_not_checked=signals_not_checked,
- time_window=f"{idle_days} days",
+ rule_id = "aws.elbv2.nlb.idle"
+ label = "NLB"
+ resource_type = "aws.elbv2.load_balancer"
+
+ if has_traffic:
+ continue # SKIP: trusted traffic present
+
+ # --- Backend context (best-effort) ---
+ target_count, tg_count, enrichment_ok = _get_elbv2_backend_context(
+ elbv2, lb["load_balancer_arn"]
+ )
+ if enrichment_ok:
+ has_registered_targets: Optional[bool] = target_count > 0
+ details_target_count: Optional[int] = target_count
+ details_tg_count: Optional[int] = tg_count
+ else:
+ # Enrichment failed — context unknown; do not fabricate zero counts
+ has_registered_targets = None
+ details_target_count = None
+ details_tg_count = None
+
+ # --- Confidence ---
+ if has_registered_targets is False:
+ confidence = ConfidenceLevel.HIGH
+ else:
+ # has targets OR unknown → MEDIUM
+ confidence = ConfidenceLevel.MEDIUM
+
+ created_time_str = lb["created_time"].isoformat() if lb["created_time"] else None
+
+ evidence = Evidence(
+ signals_used=[
+ f"Load balancer has been running for {lb['age_days']} days, "
+ f"exceeding the {idle_days_threshold}-day idle evaluation threshold",
+ f"No trusted CloudWatch traffic signal observed over the "
+ f"{idle_days_threshold}-day lookback window",
+ *(
+ ["No registered targets found"]
+ if has_registered_targets is False
+ else (
+ [f"{target_count} registered target(s) still present"]
+ if has_registered_targets
+ else []
+ )
+ ),
+ ],
+ signals_not_checked=[
+ "Planned future usage or blue/green staging",
+ "Seasonal traffic patterns outside the current lookback window",
+ "DNS / allowlist / manual failover dependencies still pointing at the load balancer",
+ "NLB traffic rejected by security groups, which is not captured in CloudWatch",
+ ],
+ time_window=f"{idle_days_threshold} days",
+ )
+
+ details = {
+ "evaluation_path": "idle-load-balancer-review-candidate",
+ "lb_family": lb["lb_family"],
+ "resource_id": lb["resource_id"],
+ "load_balancer_name": lb["load_balancer_name"],
+ "load_balancer_arn": lb["load_balancer_arn"],
+ "scheme": lb["scheme"],
+ "dns_name": lb["dns_name"],
+ "vpc_id": lb["vpc_id"],
+ "created_time": created_time_str,
+ "age_days": lb["age_days"],
+ "idle_days_threshold": idle_days_threshold,
+ "traffic_window_days": idle_days_threshold,
+ "traffic_signals_checked": traffic_signals_checked,
+ "traffic_detected": False,
+ "state_code": lb["state_code"],
+ "has_registered_targets": has_registered_targets,
+ "registered_target_count": details_target_count,
+ "target_group_count": details_tg_count,
+ }
+
+ findings.append(
+ Finding(
+ provider="aws",
+ rule_id=rule_id,
+ resource_type=resource_type,
+ resource_id=lb["resource_id"],
+ region=region,
+ title=f"Idle {label} review candidate",
+ summary=(
+ f"{label} '{lb['load_balancer_name']}' has had no trusted CloudWatch "
+ f"traffic signal over the last {idle_days_threshold} days; "
+ "review for possible cleanup"
+ ),
+ reason=(
+ f"{label} has no trusted CloudWatch traffic signal in the last "
+ f"{idle_days_threshold} days"
+ ),
+ risk=RiskLevel.MEDIUM,
+ confidence=confidence,
+ detected_at=now_utc,
+ evidence=evidence,
+ details=details,
+ estimated_monthly_cost_usd=None,
)
+ )
- if traffic_fetch_failed:
- title = f"{type_label} Requires Traffic Verification"
- summary = (
- f"{type_label} '{lb_name}' could not be verified as idle — "
- f"CloudWatch traffic metrics were unreadable (transient/throttle error)."
- )
- reason = f"{type_label} traffic metrics could not be fetched; idle status is unconfirmed"
- else:
- title = f"Idle {type_label} (No Traffic for {idle_days}+ Days)"
- summary = (
- f"{type_label} '{lb_name}' has had zero traffic for "
- f"{idle_days}+ days and is incurring base charges."
- )
- reason = f"{type_label} has zero traffic for {idle_days}+ days"
-
- findings.append(
- Finding(
- provider="aws",
- rule_id=rule_id,
- resource_type="aws.elbv2.load_balancer",
- resource_id=lb_arn,
- region=region,
- estimated_monthly_cost_usd=18.0,
- title=title,
- summary=summary,
- reason=reason,
- risk=RiskLevel.MEDIUM,
- confidence=confidence,
- detected_at=now,
- evidence=evidence,
- details={
- "name": lb_name,
- "type": lb_type,
- "scheme": scheme,
- "state": lb.get("State", {}).get("Code", "unknown"),
- "dns_name": lb.get("DNSName"),
- "vpc_id": lb.get("VpcId"),
- "age_days": age_days,
- "has_targets": has_targets,
- "idle_days_threshold": idle_days,
- "estimated_monthly_cost": (
- "~$16-22/month base cost (us-east-1 on-demand; "
- "region-dependent; excludes LCU/NLCU usage charges)"
- ),
- },
- )
- )
+ return findings
- except ClientError as e:
- code = e.response["Error"]["Code"]
- if code in ("UnauthorizedOperation", "AccessDenied"):
- raise PermissionError(
- "Missing required IAM permissions: "
- "elasticloadbalancing:DescribeLoadBalancers, "
- "elasticloadbalancing:DescribeTargetGroups, "
- "elasticloadbalancing:DescribeTargetHealth, "
- "cloudwatch:GetMetricStatistics"
- ) from e
- raise
- return findings
+# ---------------------------------------------------------------------------
+# CLB scanner
+# ---------------------------------------------------------------------------
def _scan_clb(
session: boto3.Session,
region: str,
cloudwatch,
- now: datetime,
- idle_days: int,
+ now_utc: datetime,
+ idle_days_threshold: int,
) -> List[Finding]:
- """Scan Classic Load Balancers for idle resources."""
elb = session.client("elb", region_name=region)
findings: List[Finding] = []
+ start_time = now_utc - timedelta(days=max(idle_days_threshold, 1))
try:
paginator = elb.get_paginator("describe_load_balancers")
-
- for page in paginator.paginate():
- for lb in page.get("LoadBalancerDescriptions", []):
- lb_name = lb["LoadBalancerName"]
-
- # Calculate age
- create_time = lb.get("CreatedTime")
- age_days = 0
- if create_time:
- try:
- age_days = (now - create_time).days
- except TypeError:
- pass
-
- # Skip if younger than threshold
- if age_days < idle_days:
- continue
-
- # Check traffic via CloudWatch
- has_traffic, traffic_fetch_failed = _check_clb_traffic(
- cloudwatch, lb_name, idle_days
- )
- # has_traffic=True with fetch_failed=False → confirmed traffic, skip.
- # has_traffic=True with fetch_failed=True → metric unreadable; create LOW-confidence
- # finding so the operator knows to verify manually rather than silently suppress.
- if has_traffic and not traffic_fetch_failed:
- continue
-
- # Check registered instances
- instances = lb.get("Instances", [])
- has_instances = len(instances) > 0
- scheme = lb.get("Scheme", "unknown")
-
- # Determine confidence
- if traffic_fetch_failed:
- confidence = ConfidenceLevel.LOW
- elif not has_instances:
- confidence = ConfidenceLevel.HIGH
- else:
- confidence = ConfidenceLevel.MEDIUM
-
- signals = [
- "Load balancer type: CLB",
- f"Scheme: {scheme}",
- ]
- if not traffic_fetch_failed:
- signals.insert(
- 0,
- f"Zero RequestCount and EstimatedProcessedBytes for {idle_days} days (CloudWatch)",
- )
-
- if not has_instances:
- signals.append("No registered instances")
- else:
- signals.append(f"{len(instances)} registered instance(s)")
- if age_days > 0:
- signals.append(f"Load balancer is {age_days} days old")
-
- signals_not_checked = [
- "Planned future usage",
- "Blue/green deployment scenarios",
- "Seasonal traffic patterns",
- "Internal health-check-only usage",
- ]
- if traffic_fetch_failed:
- signals_not_checked.insert(
- 0,
- "Traffic metrics (RequestCount, EstimatedProcessedBytes) — CloudWatch fetch "
- "failed (transient/throttle error); traffic status unverified",
- )
-
- evidence = Evidence(
- signals_used=signals,
- signals_not_checked=signals_not_checked,
- time_window=f"{idle_days} days",
- )
-
- if traffic_fetch_failed:
- title = "CLB Requires Traffic Verification"
- summary = (
- f"CLB '{lb_name}' could not be verified as idle — "
- "CloudWatch traffic metrics were unreadable (transient/throttle error)."
- )
- reason = "CLB traffic metrics could not be fetched; idle status is unconfirmed"
- else:
- title = f"Idle CLB (No Traffic for {idle_days}+ Days)"
- summary = (
- f"CLB '{lb_name}' has had zero traffic for "
- f"{idle_days}+ days and is incurring base charges."
- )
- reason = f"CLB has zero traffic for {idle_days}+ days"
-
- findings.append(
- Finding(
- provider="aws",
- rule_id="aws.elb.clb.idle",
- resource_type="aws.elb.load_balancer",
- resource_id=lb_name,
- region=region,
- estimated_monthly_cost_usd=18.0,
- title=title,
- summary=summary,
- reason=reason,
- risk=RiskLevel.MEDIUM,
- confidence=confidence,
- detected_at=now,
- evidence=evidence,
- details={
- "name": lb_name,
- "type": "classic",
- "scheme": scheme,
- "dns_name": lb.get("DNSName"),
- "vpc_id": lb.get("VPCId"),
- "age_days": age_days,
- "has_instances": has_instances,
- "instance_count": len(instances),
- "idle_days_threshold": idle_days,
- "estimated_monthly_cost": (
- "~$16-22/month base cost (us-east-1 on-demand; "
- "region-dependent; excludes LCU usage charges)"
- ),
- },
- )
- )
-
- except ClientError as e:
- code = e.response["Error"]["Code"]
- if code in ("UnauthorizedOperation", "AccessDenied"):
+ pages = list(paginator.paginate())
+ except ClientError as exc:
+ code = exc.response["Error"]["Code"]
+ if code in ("AccessDenied", "UnauthorizedOperation"):
raise PermissionError(
- "Missing required IAM permissions: "
- "elasticloadbalancing:DescribeLoadBalancers, "
- "cloudwatch:GetMetricStatistics"
- ) from e
+ "Missing required IAM permission: elb:DescribeLoadBalancers"
+ ) from exc
+ raise
+ except BotoCoreError:
raise
- return findings
-
-
-def _check_elbv2_traffic(cloudwatch, lb_arn: str, lb_type: str, days: int) -> tuple:
- """Check if an ALB/NLB has had any traffic in the past `days` days.
-
- ALB: checks both RequestCount and ProcessedBytes.
- - RequestCount only increments when a target is chosen — fixed-response, redirect,
- and pre-routing-rejection actions leave it at zero even with real traffic.
- - ProcessedBytes captures all bytes processed by the ALB regardless of routing outcome.
-
- NLB: checks both NewFlowCount and ProcessedBytes.
- - NewFlowCount only counts flows successfully established to targets — traffic that
- hits the NLB listener but doesn't reach a target (e.g. health check gaps) is missed.
- - ProcessedBytes always reflects total bytes received/sent by the NLB.
-
- Either metric > 0 is treated as traffic (OR logic, conservative for false-positive avoidance).
-
- Returns (has_traffic: bool, fetch_failed: bool).
- fetch_failed is True when a transient/throttle error prevented a clean metric read.
- """
- now = datetime.now(timezone.utc)
- start_time = now - timedelta(days=max(days, 1))
- dimension_value = _extract_elbv2_dimension(lb_arn)
-
- if lb_type == "application":
- namespace = "AWS/ApplicationELB"
- primary_metric = "RequestCount"
- else:
- namespace = "AWS/NetworkELB"
- primary_metric = "NewFlowCount"
-
- def _fetch(metric_name: str) -> tuple:
- return _get_metric_sum(
- cloudwatch, namespace, metric_name, "LoadBalancer", dimension_value, start_time, now
- )
-
- primary_val, primary_err = _fetch(primary_metric)
- if primary_val > 0:
- return True, primary_err
+ for page in pages:
+ for raw_lb in page.get("LoadBalancerDescriptions", []):
+ lb = _normalize_clb(raw_lb, idle_days_threshold, now_utc)
+ if lb is None:
+ continue # SKIP: non-dict or absent identity
+
+ # EXCLUSION: unusable created_time
+ if lb["created_time"] is None or lb["age_days"] is None:
+ continue
+
+ # EXCLUSION: too new
+ if lb["age_days"] < idle_days_threshold:
+ continue
+
+ # --- Traffic check (raises → FAIL RULE) ---
+ has_traffic = _check_clb_traffic(
+ cloudwatch, lb["load_balancer_name"], start_time, now_utc
+ )
+ if has_traffic:
+ continue # SKIP: trusted traffic present
+
+ # --- Backend context from normalized item ---
+ instances = lb["instances"]
+ registered_instance_count = len(instances)
+ has_registered_instances = registered_instance_count > 0
+
+ # --- Confidence ---
+ confidence = (
+ ConfidenceLevel.HIGH if not has_registered_instances else ConfidenceLevel.MEDIUM
+ )
+
+ created_time_str = lb["created_time"].isoformat() if lb["created_time"] else None
+
+ evidence = Evidence(
+ signals_used=[
+ f"Load balancer has been running for {lb['age_days']} days, "
+ f"exceeding the {idle_days_threshold}-day idle evaluation threshold",
+ f"No trusted CloudWatch traffic signal observed over the "
+ f"{idle_days_threshold}-day lookback window",
+ *(
+ ["No registered instances found"]
+ if not has_registered_instances
+ else [f"{registered_instance_count} registered instance(s) still present"]
+ ),
+ ],
+ signals_not_checked=[
+ "Planned future usage or blue/green staging",
+ "Seasonal traffic patterns outside the current lookback window",
+ "DNS / allowlist / manual failover dependencies still pointing at the load balancer",
+ ],
+ time_window=f"{idle_days_threshold} days",
+ )
+
+ details = {
+ "evaluation_path": "idle-load-balancer-review-candidate",
+ "lb_family": "clb",
+ "resource_id": lb["resource_id"],
+ "load_balancer_name": lb["load_balancer_name"],
+ "load_balancer_arn": None,
+ "scheme": lb["scheme"],
+ "dns_name": lb["dns_name"],
+ "vpc_id": lb["vpc_id"],
+ "created_time": created_time_str,
+ "age_days": lb["age_days"],
+ "idle_days_threshold": idle_days_threshold,
+ "traffic_window_days": idle_days_threshold,
+ "traffic_signals_checked": ["RequestCount:Sum", "EstimatedProcessedBytes:Sum"],
+ "traffic_detected": False,
+ "has_registered_instances": has_registered_instances,
+ "registered_instance_count": registered_instance_count,
+ }
+
+ findings.append(
+ Finding(
+ provider="aws",
+ rule_id="aws.elb.clb.idle",
+ resource_type="aws.elb.load_balancer",
+ resource_id=lb["resource_id"],
+ region=region,
+ title="Idle CLB review candidate",
+ summary=(
+ f"CLB '{lb['load_balancer_name']}' has had no trusted CloudWatch "
+ f"traffic signal over the last {idle_days_threshold} days; "
+ "review for possible cleanup"
+ ),
+ reason=(
+ f"CLB has no trusted CloudWatch traffic signal in the last "
+ f"{idle_days_threshold} days"
+ ),
+ risk=RiskLevel.MEDIUM,
+ confidence=confidence,
+ detected_at=now_utc,
+ evidence=evidence,
+ details=details,
+ estimated_monthly_cost_usd=None,
+ )
+ )
- processed_val, processed_err = _fetch("ProcessedBytes")
- if processed_val > 0:
- return True, processed_err
+ return findings
- return False, (primary_err or processed_err)
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
-def _check_clb_traffic(cloudwatch, lb_name: str, days: int) -> tuple:
- """Check if a CLB has had any traffic in the past `days` days.
- Checks both RequestCount (HTTP/HTTPS listeners) and EstimatedProcessedBytes
- (all protocols including TCP/SSL). A CLB with only TCP/SSL listeners will
- always report zero RequestCount, so checking only that metric would produce
- false positives for any active TCP CLB.
+def find_idle_load_balancers(
+ session: boto3.Session,
+ region: str,
+ idle_days_threshold: int = _DEFAULT_IDLE_DAYS_THRESHOLD,
+) -> List[Finding]:
+ """Find idle ALB, NLB, and CLB load balancers with no trusted CloudWatch traffic.
- Returns (has_traffic: bool, fetch_failed: bool).
- fetch_failed is True when a transient/throttle error prevented a clean metric read.
+ Each branch (ELBv2 and CLB) is evaluated independently. A failure in one
+ branch does not prevent the other from running. If either branch fails the
+ exception is re-raised after both have been attempted.
"""
- now = datetime.now(timezone.utc)
- start_time = now - timedelta(days=max(days, 1))
-
- # HTTP/HTTPS traffic
- request_count, req_err = _get_metric_sum(
- cloudwatch,
- "AWS/ELB",
- "RequestCount",
- "LoadBalancerName",
- lb_name,
- start_time,
- now,
- )
- if request_count > 0:
- return True, req_err
-
- # TCP/SSL traffic (covers all protocols including HTTP/HTTPS)
- processed_bytes, proc_err = _get_metric_sum(
- cloudwatch,
- "AWS/ELB",
- "EstimatedProcessedBytes",
- "LoadBalancerName",
- lb_name,
- start_time,
- now,
- )
- return processed_bytes > 0, (req_err or proc_err)
-
-
-def _check_elbv2_targets(elbv2, lb_arn: str) -> bool:
- """Check if an ALB/NLB has any registered targets.
+ cloudwatch = session.client("cloudwatch", region_name=region)
+ now_utc = datetime.now(timezone.utc)
+ findings: List[Finding] = []
+ first_exc: Optional[BaseException] = None
- describe_target_health only returns targets that ARE registered in the target
- group — unregistered targets are simply absent from the response. Therefore
- any non-empty TargetHealthDescriptions list means there are registered targets,
- regardless of their health state (healthy/unhealthy/draining/unused all count).
- """
try:
- tg_resp = elbv2.describe_target_groups(LoadBalancerArn=lb_arn)
- for tg in tg_resp.get("TargetGroups", []):
- tg_arn = tg["TargetGroupArn"]
- health_resp = elbv2.describe_target_health(TargetGroupArn=tg_arn)
- if health_resp.get("TargetHealthDescriptions"):
- return True
- except ClientError:
- # If we can't check targets, assume they exist to avoid false positives
- return True
- return False
-
-
-def _extract_elbv2_dimension(lb_arn: str) -> str:
- """
- Extract the CloudWatch dimension value from an ELBv2 ARN.
-
- ARN format: arn:aws:elasticloadbalancing:region:account:loadbalancer/app/name/id
- Dimension value: app/name/id (or net/name/id for NLB)
- """
- parts = lb_arn.split("loadbalancer/", 1)
- if len(parts) == 2:
- return parts[1]
- return lb_arn
-
+ findings.extend(_scan_elbv2(session, region, cloudwatch, now_utc, idle_days_threshold))
+ except Exception as exc:
+ first_exc = exc
-def _get_metric_sum(
- cloudwatch,
- namespace: str,
- metric_name: str,
- dimension_name: str,
- dimension_value: str,
- start_time: datetime,
- end_time: datetime,
-) -> tuple:
- """Get sum of a CloudWatch metric over the time period.
-
- Returns (has_traffic: int, fetch_error: bool).
- - has_traffic: 1 if any datapoint had Sum > 0, else 0.
- - fetch_error: True if a non-permission error occurred (throttle, transient, etc.).
- When fetch_error is True, has_traffic is 1 (conservative — avoids false positives),
- but the caller should surface this to the operator via signals_not_checked.
- """
try:
- response = cloudwatch.get_metric_statistics(
- Namespace=namespace,
- MetricName=metric_name,
- Dimensions=[
- {
- "Name": dimension_name,
- "Value": dimension_value,
- }
- ],
- StartTime=start_time,
- EndTime=end_time,
- Period=86400, # 1 day in seconds
- Statistics=["Sum"],
- )
+ findings.extend(_scan_clb(session, region, cloudwatch, now_utc, idle_days_threshold))
+ except Exception as exc:
+ if first_exc is None:
+ first_exc = exc
- datapoints = response.get("Datapoints", [])
- if any(dp.get("Sum", 0) > 0 for dp in datapoints):
- return 1, False
- return 0, False
+ if first_exc is not None:
+ raise first_exc
- except ClientError as e:
- if e.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
- raise PermissionError(
- "Missing required IAM permissions: cloudwatch:GetMetricStatistics"
- ) from e
- # Other errors (throttle, transient): assume traffic to avoid false positives,
- # but flag the error so the caller can surface it.
- return 1, True
+ return findings
diff --git a/cleancloud/providers/aws/rules/eni_detached.py b/cleancloud/providers/aws/rules/eni_detached.py
index a93409d..8615c3e 100644
--- a/cleancloud/providers/aws/rules/eni_detached.py
+++ b/cleancloud/providers/aws/rules/eni_detached.py
@@ -1,162 +1,265 @@
+"""
+Rule: aws.ec2.eni.detached
+
+ (spec — docs/specs/aws/eni_detached.md)
+
+Intent:
+ Detect network interfaces that are currently not attached according to the
+ EC2 DescribeNetworkInterfaces contract, so they can be reviewed as possible
+ cleanup candidates if no longer needed.
+
+Exclusions:
+ - network_interface_id absent (malformed identity)
+ - normalized_status absent (missing current-state signal)
+ - normalized_status != "available" (attached or other non-eligible state)
+ - attachment_status is not null/absent or "detached" (any other value including
+ unknown/malformed strings is treated as inconsistent — SKIP ITEM)
+
+Detection:
+ - network_interface_id present
+ - normalized_status == "available"
+ - attachment_status absent, null, or "detached"
+
+Key rules:
+ - Top-level Status is the sole state authority; attachment_status is validation only.
+ - No temporal threshold — current not-attached state is the sole eligibility signal.
+ - No exclusion for interface_type, requester_managed, or operator_managed.
+ - Do not use CreateTime or any age/duration field for eligibility.
+ - estimated_monthly_cost_usd = None.
+ - Confidence: HIGH.
+ - Risk: LOW.
+
+Blind spots:
+ - how long the ENI has been in a not-currently-attached state
+ - previous attachment history
+ - whether an AWS service expects to recycle or clean up this ENI
+ - application, failover, or operational intent
+ - exact pricing impact
+
+APIs:
+ - ec2:DescribeNetworkInterfaces
+"""
+
from datetime import datetime, timezone
-from typing import List
+from typing import List, Optional
import boto3
-from botocore.exceptions import ClientError
+from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
from cleancloud.core.evidence import Evidence
from cleancloud.core.finding import Finding
from cleancloud.core.risk import RiskLevel
+# --- Module-level constants ---
-def find_detached_enis(
- session: boto3.Session,
- region: str,
- max_age_days: int = 60,
-) -> List[Finding]:
- """
- Find Elastic Network Interfaces (ENIs) currently detached and 60+ days old.
+# Sole eligible top-level ENI status per EC2 documented contract.
+_ELIGIBLE_STATUS = "available"
- Detached ENIs incur small hourly charges and are often forgotten
- after failed deployments or when infrastructure is torn down incompletely.
+# Attachment states that are consistent with an available (not-currently-attached) ENI.
+# Any attachment_status outside this set is treated as inconsistent → SKIP ITEM.
+_ALLOWED_ATTACHMENT_STATUSES: frozenset = frozenset({None, "detached"})
- IMPORTANT: AWS does not expose "detached since" timestamp, so we use ENI
- creation age as a proxy. This is conservative - an ENI created 60 days ago
- and currently detached is worth reviewing even if it was recently detached.
+_FINDING_TITLE = "ENI not currently attached review candidate"
- SAFE RULE (review-only):
- - ENI Status == 'available' (not attached)
- - ENI creation age >= max_age_days threshold (NOT detached duration)
- - Excludes AWS infrastructure ENIs (NAT Gateway, Load Balancers, VPC Endpoints)
- - INCLUDES requester-managed ENIs (Lambda, ECS, RDS) - these are user resources!
+_SIGNAL_NOT_CURRENTLY_ATTACHED = (
+ "ENI top-level Status is 'available' (not currently attached per EC2 documented contract)"
+)
+_SIGNAL_REQUESTER_MANAGED = "ENI is requester-managed (created by an AWS service on your behalf)"
- IAM permissions:
- - ec2:DescribeNetworkInterfaces
+_SIGNALS_NOT_CHECKED = (
+ "How long the ENI has been in a not-currently-attached state",
+ "Previous attachment history",
+ "Whether an AWS service expects to recycle or clean up this ENI",
+ "Application, failover, or operational intent",
+ "Exact pricing impact",
+)
+
+
+def _str(value: object) -> Optional[str]:
+ """Return value as str only when it is a non-empty string; else None."""
+ return value if isinstance(value, str) and value else None
+
+
+def _bool_only(value: object) -> Optional[bool]:
+ """Return value only when it is an actual bool; else None."""
+ return value if isinstance(value, bool) else None
+
+
+def _normalize_eni(eni: object) -> Optional[dict]:
+ """Normalize a raw DescribeNetworkInterfaces item to the canonical field shape.
+
+ Returns None when the item is not a dict or required identity/state fields
+ are absent — the caller must skip the item. All rule logic must operate
+ only on the returned normalized dict.
"""
- ec2 = session.client("ec2", region_name=region)
+ if not isinstance(eni, dict):
+ return None
- now = datetime.now(timezone.utc)
- findings: List[Finding] = []
+ # --- Identity fields (required; absent → skip item) ---
+ network_interface_id = _str(eni.get("NetworkInterfaceId"))
+ if network_interface_id is None:
+ return None
- try:
- # Note: describe_network_interfaces supports pagination
- paginator = ec2.get_paginator("describe_network_interfaces")
+ # --- State fields (required; absent → skip item) ---
+ normalized_status = _str(eni.get("Status"))
+ if normalized_status is None:
+ return None
- for page in paginator.paginate():
- for eni in page.get("NetworkInterfaces", []):
- # Only consider detached ENIs
- if eni.get("Status") != "available":
- continue
-
- # Exclude AWS infrastructure ENIs using InterfaceType
- # These are ENIs for AWS infrastructure that users don't manage
- interface_type = eni.get("InterfaceType", "interface")
- if interface_type in [
- "nat_gateway", # NAT Gateway ENI (AWS infrastructure)
- "load_balancer", # ELB/ALB/NLB ENI (AWS infrastructure)
- "gateway_load_balancer", # Gateway Load Balancer
- "gateway_load_balancer_endpoint", # GWLB endpoint
- "vpc_endpoint", # VPC endpoint interface (AWS infrastructure)
- ]:
- continue
-
- # Note: We DO want to flag RequesterManaged ENIs with InterfaceType="interface"
- # These are user resources created by Lambda, ECS, RDS, etc. - common waste!
-
- # Calculate age since creation
- create_time = eni.get("CreateTime")
- if create_time is None:
- age_days = 0
- else:
- try:
- age_days = (now - create_time).days
- except TypeError:
- age_days = 0
-
- # Apply age threshold (skip if too young)
- if age_days < max_age_days:
- continue
-
- # Build evidence (be honest about what we're measuring)
- signals_used = [
- "ENI status is 'available' (currently detached)",
- f"ENI was created {age_days} days ago and is currently detached",
- ]
-
- # Note: We cannot measure "detached duration" because AWS doesn't expose DetachTime
- # We use creation age as a conservative proxy
-
- if eni.get("RequesterManaged"):
- signals_used.append(
- "ENI is requester-managed (created by AWS service such as Lambda/ECS)"
- )
-
- # Check if ENI has any tags
- tags = eni.get("TagSet", [])
- if not tags:
- signals_used.append("ENI has no tags (ownership unclear)")
-
- evidence = Evidence(
- signals_used=signals_used,
- signals_not_checked=[
- "Detached duration (AWS does not expose DetachTime)",
- "Previous attachment history",
- "AWS Hyperplane ENI reuse behavior (undocumented retention)",
- "Future planned attachments",
- "Application-level usage",
- "Manual operational workflows",
- ],
- time_window=f"{max_age_days} days since creation",
- )
+ # --- Attachment fields (all optional → null) ---
+ raw_attachment = eni.get("Attachment")
+ if isinstance(raw_attachment, dict):
+ attachment_status = _str(raw_attachment.get("Status"))
+ attachment_id = _str(raw_attachment.get("AttachmentId"))
+ attachment_instance_id = _str(raw_attachment.get("InstanceId"))
+ attachment_instance_owner_id = _str(raw_attachment.get("InstanceOwnerId"))
+ else:
+ attachment_status = None
+ attachment_id = None
+ attachment_instance_id = None
+ attachment_instance_owner_id = None
+
+ # --- Ownership / service-context fields (optional → null) ---
+ interface_type = _str(eni.get("InterfaceType"))
+ requester_managed = _bool_only(eni.get("RequesterManaged"))
+
+ raw_operator = eni.get("Operator")
+ if isinstance(raw_operator, dict):
+ operator_managed = _bool_only(raw_operator.get("Managed"))
+ operator_principal = _str(raw_operator.get("Principal"))
+ else:
+ operator_managed = None
+ operator_principal = None
+
+ # --- Network / resource-metadata fields (optional → null / []) ---
+ description = _str(eni.get("Description"))
+ availability_zone = _str(eni.get("AvailabilityZone"))
+ subnet_id = _str(eni.get("SubnetId"))
+ vpc_id = _str(eni.get("VpcId"))
+ private_ip_address = _str(eni.get("PrivateIpAddress"))
+
+ raw_association = eni.get("Association")
+ public_ip = _str(raw_association.get("PublicIp")) if isinstance(raw_association, dict) else None
+
+ raw_tag_set = eni.get("TagSet")
+ tag_set: list = raw_tag_set if isinstance(raw_tag_set, list) else []
+
+ return {
+ "resource_id": network_interface_id,
+ "network_interface_id": network_interface_id,
+ "normalized_status": normalized_status,
+ "attachment_status": attachment_status,
+ "attachment_id": attachment_id,
+ "attachment_instance_id": attachment_instance_id,
+ "attachment_instance_owner_id": attachment_instance_owner_id,
+ "interface_type": interface_type,
+ "requester_managed": requester_managed,
+ "operator_managed": operator_managed,
+ "operator_principal": operator_principal,
+ "description": description,
+ "availability_zone": availability_zone,
+ "subnet_id": subnet_id,
+ "vpc_id": vpc_id,
+ "private_ip_address": private_ip_address,
+ "public_ip": public_ip,
+ "tag_set": tag_set,
+ }
- # Build details
- details = {
- "status": eni.get("Status"),
- "age_days": age_days,
- "create_time": create_time.isoformat() if create_time else None,
- "interface_type": interface_type,
- "requester_managed": eni.get("RequesterManaged", False),
- "vpc_id": eni.get("VpcId"),
- "subnet_id": eni.get("SubnetId"),
- "availability_zone": eni.get("AvailabilityZone"),
- }
-
- description = eni.get("Description", "")
- if description:
- details["description"] = description
-
- if tags:
- details["tags"] = tags
-
- # Include private IP if present
- private_ips = eni.get("PrivateIpAddresses", [])
- if private_ips:
- details["private_ip"] = private_ips[0].get("PrivateIpAddress")
-
- findings.append(
- Finding(
- provider="aws",
- rule_id="aws.ec2.eni.detached",
- resource_type="aws.ec2.network_interface",
- resource_id=eni["NetworkInterfaceId"],
- region=region,
- title="Detached Network Interface (Review Recommended)",
- summary=f"ENI created {age_days} days ago and currently detached (incurs small hourly charges)",
- reason=f"ENI is {age_days} days old and currently in detached state, incurring charges",
- risk=RiskLevel.LOW,
- confidence=ConfidenceLevel.MEDIUM, # Medium because we can't measure detached duration
- detected_at=now,
- evidence=evidence,
- details=details,
- )
- )
- except ClientError as e:
- if e.response["Error"]["Code"] == "UnauthorizedOperation":
+def find_detached_enis(
+ session: boto3.Session,
+ region: str,
+) -> List[Finding]:
+ ec2 = session.client("ec2", region_name=region)
+
+ try:
+ paginator = ec2.get_paginator("describe_network_interfaces")
+ pages = list(paginator.paginate())
+ except ClientError as exc:
+ if exc.response["Error"]["Code"] == "UnauthorizedOperation":
raise PermissionError(
"Missing required IAM permission: ec2:DescribeNetworkInterfaces"
- ) from e
+ ) from exc
+ raise
+ except BotoCoreError:
raise
+ now = datetime.now(timezone.utc)
+ findings: List[Finding] = []
+
+ for page in pages:
+ for raw_eni in page.get("NetworkInterfaces", []):
+ # --- Step 1: Normalize ---
+ n = _normalize_eni(raw_eni)
+ if n is None:
+ continue
+
+ # --- Step 2: EXCLUSION RULES ---
+
+ # EXCLUSION: top-level status must be the sole eligible state
+ if n["normalized_status"] != _ELIGIBLE_STATUS:
+ continue
+
+ # EXCLUSION: attachment_status must be in the allowed set (None or "detached").
+ # Any other value — known conflict statuses or unknown/malformed strings —
+ # is inconsistent with the available state → SKIP ITEM.
+ if n["attachment_status"] not in _ALLOWED_ATTACHMENT_STATUSES:
+ continue
+
+ # --- Detection path: detached-eni-review-candidate ---
+
+ signals_used = [_SIGNAL_NOT_CURRENTLY_ATTACHED]
+ if n["requester_managed"] is True:
+ signals_used.append(_SIGNAL_REQUESTER_MANAGED)
+ if n["operator_managed"] is True:
+ principal = n["operator_principal"] or "unknown"
+ signals_used.append(f"ENI is operator-managed (operator principal: {principal})")
+
+ findings.append(
+ Finding(
+ provider="aws",
+ rule_id="aws.ec2.eni.detached",
+ resource_type="aws.ec2.network_interface",
+ resource_id=n["network_interface_id"],
+ region=region,
+ estimated_monthly_cost_usd=None,
+ title=_FINDING_TITLE,
+ summary=(
+ f"ENI {n['network_interface_id']} Status is 'available' — "
+ "not currently attached per DescribeNetworkInterfaces"
+ ),
+ reason=(
+ "ENI Status is 'available' — not currently attached "
+ "per DescribeNetworkInterfaces"
+ ),
+ risk=RiskLevel.LOW,
+ confidence=ConfidenceLevel.HIGH,
+ detected_at=now,
+ evidence=Evidence(
+ signals_used=signals_used,
+ signals_not_checked=list(_SIGNALS_NOT_CHECKED),
+ ),
+ details={
+ "evaluation_path": "detached-eni-review-candidate",
+ "network_interface_id": n["network_interface_id"],
+ "normalized_status": n["normalized_status"],
+ "attachment_status": n["attachment_status"],
+ "attachment_id": n["attachment_id"],
+ "attachment_instance_id": n["attachment_instance_id"],
+ "attachment_instance_owner_id": n["attachment_instance_owner_id"],
+ "interface_type": n["interface_type"],
+ "requester_managed": n["requester_managed"],
+ "operator_managed": n["operator_managed"],
+ "operator_principal": n["operator_principal"],
+ "availability_zone": n["availability_zone"],
+ "subnet_id": n["subnet_id"],
+ "vpc_id": n["vpc_id"],
+ "private_ip_address": n["private_ip_address"],
+ "public_ip": n["public_ip"],
+ "description": n["description"],
+ "tag_set": n["tag_set"],
+ },
+ )
+ )
+
return findings
diff --git a/cleancloud/providers/aws/rules/nat_gateway_idle.py b/cleancloud/providers/aws/rules/nat_gateway_idle.py
index ba9b270..101f981 100644
--- a/cleancloud/providers/aws/rules/nat_gateway_idle.py
+++ b/cleancloud/providers/aws/rules/nat_gateway_idle.py
@@ -1,385 +1,378 @@
+"""
+Rule: aws.ec2.nat_gateway.idle
+
+ (spec — docs/specs/aws/nat_gateway_idle.md)
+
+Intent:
+ Detect NAT Gateways that are currently available, old enough to evaluate,
+ and show no trusted CloudWatch traffic/activity evidence during the
+ configured observation window, so they can be reviewed as possible cleanup
+ candidates.
+
+Exclusions:
+ - nat_gateway_id absent (malformed identity)
+ - normalized_state absent (missing current-state signal)
+ - normalized_state != "available"
+ - create_time_utc absent, naive, or in the future
+ - age_days < idle_days_threshold (too new to evaluate)
+ - any required CloudWatch metric has no datapoints (insufficient evidence)
+ - any required metric shows activity > 0
+
+Detection:
+ - nat_gateway_id present, normalized_state == "available"
+ - age_days >= idle_days_threshold
+ - all 5 required CloudWatch metrics return datapoints and are all zero
+
+Key rules:
+ - Missing CloudWatch datapoints → SKIP ITEM (not zero).
+ - CloudWatch API failure → FAIL RULE (not LOW-confidence finding).
+ - 5 required metrics: BytesOutToDestination, BytesInFromSource,
+ BytesInFromDestination, BytesOutToSource (Sum), ActiveConnectionCount (Maximum).
+ - Route-table context is contextual only; absence does not substitute
+ for CloudWatch evidence.
+ - Naive CreateTime → SKIP ITEM.
+ - estimated_monthly_cost_usd = None.
+ - Confidence: HIGH (no route ref confirmed) or MEDIUM (route ref or unavailable).
+ - Risk: MEDIUM.
+
+Blind spots:
+ - planned future usage or DR/failover intent
+ - seasonal or cyclical usage outside the observation window
+ - organizational ownership or business intent
+ - exact region-specific pricing impact
+
+APIs:
+ - ec2:DescribeNatGateways
+ - cloudwatch:GetMetricStatistics
+ - ec2:DescribeRouteTables (contextual)
+"""
+
from datetime import datetime, timedelta, timezone
-from typing import List
+from typing import List, Optional, Tuple
import boto3
-from botocore.exceptions import ClientError
+from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
from cleancloud.core.evidence import Evidence
from cleancloud.core.finding import Finding
from cleancloud.core.risk import RiskLevel
+# --- Module-level constants ---
-def find_idle_nat_gateways(
- session: boto3.Session,
- region: str,
- idle_days: int = 14,
-) -> List[Finding]:
- """
- Find NAT Gateways with no traffic for `idle_days` days.
-
- NAT Gateways incur a fixed hourly charge (~$0.045/hr ≈ $32.85/month) regardless of
- connectivity type (public or private), plus per-GB data processing fees
- ($0.045/GB for public, $0.01/GB for private). The hourly cost alone makes idle
- gateways a meaningful waste.
-
- Detection logic:
- - NAT Gateway state is 'available'
- - Older than `idle_days` (noise-reduction heuristic — new gateways may not have had
- time for operators to configure routing; this is NOT an AWS-defined grace period)
- - All four CloudWatch byte metrics are zero over `idle_days`
- - Not referenced by any VPC route table (corroborating idle signal)
-
- Notes on accuracy:
- - CloudWatch NAT Gateway metrics are eventually consistent and can lag by minutes
- to hours. More importantly, datapoints can be absent entirely for periods of low
- activity — CloudWatch omits zero-value datapoints rather than publishing them.
- Missing datapoints are treated as zero by this rule, which means low-but-nonzero
- traffic could be missed if it falls within a gap in metric publication.
- - Daily (86400s) granularity is used; within-day bursts contribute to that day's Sum,
- but a burst that happens to fall in a metric-publication gap may not appear.
- - Zero traffic may be intentional: DR/failover, pre-warmed infrastructure, or
- seasonal traffic patterns. Always review before acting.
- - Elastic IPs associated with a public NAT Gateway may incur idle charges even
- after the gateway is deleted; check and release them separately.
- - Route table references are a corroborating signal only — a referenced route table
- does not prove the gateway is actively used; it only means it is reachable.
-
- IAM permissions:
- - ec2:DescribeNatGateways
- - ec2:DescribeRouteTables
- - cloudwatch:GetMetricStatistics
- """
- ec2 = session.client("ec2", region_name=region)
- cloudwatch = session.client("cloudwatch", region_name=region)
+_DEFAULT_IDLE_DAYS_THRESHOLD = 14
+_ELIGIBLE_STATE = "available"
+_CW_NAMESPACE = "AWS/NATGateway"
+_CW_DIM = "NatGatewayId"
- now = datetime.now(timezone.utc)
- findings: List[Finding] = []
+# Required metrics in evaluation order: (metric_name, statistic, detail_key)
+_REQUIRED_METRICS: Tuple = (
+ ("BytesOutToDestination", "Sum", "bytes_out_to_destination"),
+ ("BytesInFromSource", "Sum", "bytes_in_from_source"),
+ ("BytesInFromDestination", "Sum", "bytes_in_from_destination"),
+ ("BytesOutToSource", "Sum", "bytes_out_to_source"),
+ ("ActiveConnectionCount", "Maximum", "active_connection_count_max"),
+)
- try:
- paginator = ec2.get_paginator("describe_nat_gateways")
+_FINDING_TITLE = "Idle NAT Gateway review candidate"
- for page in paginator.paginate():
- for nat_gw in page.get("NatGateways", []):
- # Only check available gateways
- state = nat_gw.get("State")
- if state != "available":
- continue
-
- nat_gw_id = nat_gw["NatGatewayId"]
- connectivity_type = nat_gw.get("ConnectivityType", "public")
-
- # Calculate age
- create_time = nat_gw.get("CreateTime")
- age_days = 0
- if create_time:
- try:
- age_days = (now - create_time).days
- except TypeError:
- pass
-
- # Noise-reduction heuristic: skip recently created gateways.
- # New gateways may not have had time for route tables to be configured.
- # This is NOT an AWS-defined grace period — adjust idle_days as needed.
- if age_days < idle_days:
- continue
-
- # Check CloudWatch metrics for traffic — all 4 direction metrics
- (
- has_traffic,
- fetch_failed,
- bytes_out_dest,
- bytes_in_src,
- bytes_in_dest,
- bytes_out_src,
- ) = _check_nat_gateway_traffic(cloudwatch, nat_gw_id, idle_days)
-
- # has_traffic=True with fetch_failed=False → confirmed traffic, skip.
- # has_traffic=True with fetch_failed=True → metric unreadable; create a
- # LOW-confidence finding so the operator knows to verify manually.
- if has_traffic and not fetch_failed:
- continue
-
- # Check route table associations — a NAT GW not referenced by any route
- # table is not reachable from any subnet (strong corroborating idle signal).
- in_route_tables, route_table_check_failed = _check_route_table_references(
- ec2, nat_gw_id
- )
+_SIGNALS_NOT_CHECKED = (
+ "Planned future usage or infrastructure pre-warming intent",
+ "Disaster recovery or failover intent — zero traffic may be intentional",
+ "Seasonal or cyclical usage patterns outside the observation window",
+ "Organizational ownership or business intent",
+ "Exact region-specific pricing impact",
+)
- # Get VPC and subnet info
- vpc_id = nat_gw.get("VpcId")
- subnet_id = nat_gw.get("SubnetId")
-
- # Get Elastic IP info
- addresses = nat_gw.get("NatGatewayAddresses", [])
- eip_info = []
- for addr in addresses:
- eip_info.append(
- {
- "allocation_id": addr.get("AllocationId"),
- "public_ip": addr.get("PublicIp"),
- "private_ip": addr.get("PrivateIp"),
- }
- )
-
- if fetch_failed:
- confidence = ConfidenceLevel.LOW
- risk = RiskLevel.MEDIUM
- title = "NAT Gateway Requires Traffic Verification"
- summary = (
- f"NAT Gateway '{nat_gw_id}' could not be verified as idle — "
- "CloudWatch traffic metrics were unreadable (transient/throttle error)."
- )
- reason = "NAT Gateway traffic metrics could not be fetched; idle status is unconfirmed"
- elif not in_route_tables and not route_table_check_failed:
- # Zero traffic confirmed AND no route table references the gateway —
- # two independent signals agree; HIGH confidence and risk warranted.
- confidence = ConfidenceLevel.HIGH
- risk = RiskLevel.HIGH
- title = f"Idle NAT Gateway (No Traffic for {idle_days}+ Days, Not Routed)"
- summary = (
- f"NAT Gateway '{nat_gw_id}' has had no traffic for {idle_days}+ days "
- "and is not referenced by any route table — it is unreachable and billing."
- )
- reason = (
- f"NAT Gateway has zero traffic for {idle_days}+ days "
- "and is not referenced by any VPC route table"
- )
- else:
- confidence = ConfidenceLevel.MEDIUM
- risk = RiskLevel.MEDIUM
- title = f"Idle NAT Gateway (No Traffic for {idle_days}+ Days)"
- summary = (
- f"NAT Gateway '{nat_gw_id}' has had no traffic for "
- f"{idle_days}+ days and is incurring ~$32.85/month in base charges."
- )
- reason = f"NAT Gateway has zero traffic for {idle_days}+ days"
-
- signals = []
- if fetch_failed:
- signals.append(
- "CloudWatch traffic metrics unreadable (transient/throttle error) — "
- "traffic status unverified"
- )
- else:
- signals.append(
- f"No traffic detected for {idle_days} days (all 4 CloudWatch direction metrics; "
- "note: metrics are eventually consistent and may lag by minutes to hours)"
- )
- signals.append(f"BytesOutToDestination: {bytes_out_dest} bytes")
- signals.append(f"BytesInFromSource: {bytes_in_src} bytes")
- signals.append(f"BytesInFromDestination: {bytes_in_dest} bytes")
- signals.append(f"BytesOutToSource: {bytes_out_src} bytes")
-
- signals.append(f"NAT Gateway state is '{state}'")
- signals.append(f"Connectivity type: {connectivity_type}")
-
- if not in_route_tables and not route_table_check_failed:
- signals.append(
- "Not referenced by any VPC route table — gateway is unreachable from all subnets"
- )
- elif in_route_tables:
- signals.append("Referenced by at least one VPC route table")
-
- if age_days > 0:
- signals.append(f"NAT Gateway is {age_days} days old")
-
- signals_not_checked = [
- "Planned future usage",
- "Disaster recovery or failover intent — zero traffic may be intentional for DR standby",
- "Blue/green deployment scenarios",
- "Seasonal traffic patterns",
- "Development/staging environment cycles",
- ]
- if fetch_failed:
- signals_not_checked.insert(
- 0,
- "Traffic metrics (BytesOutToDestination, BytesInFromSource, "
- "BytesInFromDestination, BytesOutToSource) — CloudWatch fetch failed; "
- "traffic status unverified",
- )
- if route_table_check_failed:
- signals_not_checked.append(
- "Route table associations — DescribeRouteTables failed; "
- "could not confirm whether gateway is referenced"
- )
- if connectivity_type == "public" and eip_info:
- signals_not_checked.append(
- "Elastic IP idle charges — associated EIPs may incur additional cost "
- "even after the NAT Gateway is deleted; release them separately"
- )
-
- evidence = Evidence(
- signals_used=signals,
- signals_not_checked=signals_not_checked,
- time_window=f"{idle_days} days",
- )
- data_processing_note = (
- "$0.045/GB for public, $0.01/GB for private"
- if connectivity_type == "public"
- else "$0.01/GB (private NAT Gateway)"
- )
- tags = nat_gw.get("Tags", [])
- name_tag = next((t["Value"] for t in tags if t.get("Key") == "Name"), None)
-
- findings.append(
- Finding(
- provider="aws",
- rule_id="aws.ec2.nat_gateway.idle",
- resource_type="aws.ec2.nat_gateway",
- resource_id=nat_gw_id,
- region=region,
- estimated_monthly_cost_usd=32.85,
- title=title,
- summary=summary,
- reason=reason,
- risk=risk,
- confidence=confidence,
- detected_at=now,
- evidence=evidence,
- details={
- "name": name_tag,
- "connectivity_type": connectivity_type,
- "state": state,
- "age_days": age_days,
- "create_time": (create_time.isoformat() if create_time else None),
- "vpc_id": vpc_id,
- "subnet_id": subnet_id,
- "elastic_ips": eip_info,
- "in_route_tables": in_route_tables,
- "bytes_out_to_destination": bytes_out_dest,
- "bytes_in_from_source": bytes_in_src,
- "bytes_in_from_destination": bytes_in_dest,
- "bytes_out_to_source": bytes_out_src,
- "idle_days_threshold": idle_days,
- "estimated_monthly_cost": (
- f"~$32.85/month base hourly cost (us-east-1 on-demand; "
- f"region-dependent; excludes data processing charges: "
- f"{data_processing_note})"
- ),
- "tags": tags,
- },
- )
- )
+def _str(value: object) -> Optional[str]:
+ """Return value as str only when it is a non-empty string; else None."""
+ return value if isinstance(value, str) and value else None
- except ClientError as e:
- code = e.response["Error"]["Code"]
- if code in ("UnauthorizedOperation", "AccessDenied"):
- raise PermissionError(
- "Missing required IAM permissions: "
- "ec2:DescribeNatGateways, ec2:DescribeRouteTables, cloudwatch:GetMetricStatistics"
- ) from e
- raise
- return findings
+def _choose_period(idle_days: int) -> int:
+ """Return a deterministic Period compliant with CloudWatch retention rules.
+ idle_days * 86400 is a multiple of 60, 300, and 3600, satisfying all three
+ CloudWatch retention constraints for the chosen lookback window.
+ """
+ return idle_days * 86400
-def _check_route_table_references(ec2, nat_gw_id: str) -> tuple:
- """Check whether any VPC route table has a route pointing to this NAT Gateway.
- A NAT Gateway not referenced by any route table is unreachable from all subnets
- and is therefore a strong corroborating idle signal.
+def _normalize_nat_gw(item: object, now_utc: datetime) -> Optional[dict]:
+ """Normalize a raw DescribeNatGateways item to the canonical field shape.
- Returns (in_route_tables: bool, check_failed: bool).
- check_failed is True if DescribeRouteTables raised a non-permission error.
+ Returns None when the item is not a dict or required identity/state/age fields
+ are absent or invalid — the caller must skip the item.
+ All rule logic must operate only on the returned normalized dict.
"""
- try:
- response = ec2.describe_route_tables(
- Filters=[{"Name": "route.nat-gateway-id", "Values": [nat_gw_id]}]
- )
- return len(response.get("RouteTables", [])) > 0, False
- except ClientError as e:
- code = e.response["Error"]["Code"]
- if code in ("AccessDenied", "UnauthorizedOperation"):
- # Surface as a check failure rather than raising — missing this permission
- # degrades the signal but should not abort the scan.
- return False, True
- return False, True
-
-
-def _check_nat_gateway_traffic(
+ if not isinstance(item, dict):
+ return None
+
+ # --- Identity fields (required; absent → skip) ---
+ nat_gateway_id = _str(item.get("NatGatewayId"))
+ if nat_gateway_id is None:
+ return None
+
+ # --- State fields (required; absent → skip) ---
+ normalized_state = _str(item.get("State"))
+ if normalized_state is None:
+ return None
+
+ # --- CreateTime (required; absent, naive, or future → skip) ---
+ raw_ct = item.get("CreateTime")
+ if not isinstance(raw_ct, datetime):
+ return None
+ if raw_ct.tzinfo is None:
+ # Naive datetime — cannot safely compare to UTC; treat as absent → skip.
+ return None
+ create_time_utc = raw_ct.astimezone(timezone.utc)
+ if create_time_utc > now_utc:
+ # Future CreateTime is invalid → skip.
+ return None
+ age_days = int((now_utc - create_time_utc).total_seconds() // 86400)
+
+ # --- Core context fields (optional → null / []) ---
+ connectivity_type = _str(item.get("ConnectivityType"))
+ availability_mode = _str(item.get("AvailabilityMode"))
+ vpc_id = _str(item.get("VpcId"))
+ subnet_id = _str(item.get("SubnetId"))
+
+ raw_addresses = item.get("NatGatewayAddresses")
+ nat_gateway_addresses = raw_addresses if isinstance(raw_addresses, list) else []
+
+ raw_appliances = item.get("AttachedAppliances")
+ attached_appliances = raw_appliances if isinstance(raw_appliances, list) else []
+
+ raw_tags = item.get("Tags")
+ tag_set: list = raw_tags if isinstance(raw_tags, list) else []
+
+ return {
+ "resource_id": nat_gateway_id,
+ "nat_gateway_id": nat_gateway_id,
+ "normalized_state": normalized_state,
+ "create_time_utc": create_time_utc,
+ "age_days": age_days,
+ "connectivity_type": connectivity_type,
+ "availability_mode": availability_mode,
+ "vpc_id": vpc_id,
+ "subnet_id": subnet_id,
+ "nat_gateway_addresses": nat_gateway_addresses,
+ "attached_appliances": attached_appliances,
+ "auto_scaling_ips": _str(item.get("AutoScalingIps")),
+ "auto_provision_zones": _str(item.get("AutoProvisionZones")),
+ "tag_set": tag_set,
+ }
+
+
+def _get_metric_value(
cloudwatch,
nat_gw_id: str,
- days: int,
-) -> tuple:
- """
- Check if NAT Gateway has had any traffic in the past `days` days.
-
- AWS publishes four directional metrics for NAT Gateways:
- - BytesOutToDestination: private subnet → internet (outbound requests)
- - BytesInFromSource: private subnet → NAT GW (client-side inbound)
- - BytesInFromDestination: internet → NAT GW (return traffic)
- - BytesOutToSource: NAT GW → private subnet (return traffic to client)
-
- All four are checked to avoid missing asymmetric or long-lived connections
- where only return-path traffic falls within the observation window.
+ metric_name: str,
+ statistic: str,
+ start_time: datetime,
+ end_time: datetime,
+ period: int,
+) -> Optional[float]:
+ """Fetch a single metric over the observation window.
- Returns (has_traffic, fetch_failed, bytes_out_dest, bytes_in_src, bytes_in_dest, bytes_out_src).
- fetch_failed is True if any metric fetch encountered a transient/throttle error.
- When fetch_failed is True, has_traffic is True (conservative) — but the caller
- should surface this uncertainty rather than silently treating the gateway as active.
+ Returns None if no datapoints (insufficient evidence → caller must SKIP ITEM).
+ Returns the aggregated value (>= 0.0) if datapoints are present.
+ Raises ClientError / BotoCoreError / PermissionError on API failure (caller → FAIL RULE).
"""
- now = datetime.now(timezone.utc)
- start_time = now - timedelta(days=days)
-
- def _fetch(metric_name: str) -> tuple:
- return _get_metric_sum(
- cloudwatch, "AWS/NATGateway", metric_name, "NatGatewayId", nat_gw_id, start_time, now
+ try:
+ resp = cloudwatch.get_metric_statistics(
+ Namespace=_CW_NAMESPACE,
+ MetricName=metric_name,
+ Dimensions=[{"Name": _CW_DIM, "Value": nat_gw_id}],
+ StartTime=start_time,
+ EndTime=end_time,
+ Period=period,
+ Statistics=[statistic],
)
+ except ClientError as exc:
+ if exc.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
+ raise PermissionError(
+ "Missing required IAM permission: cloudwatch:GetMetricStatistics"
+ ) from exc
+ raise
+ except BotoCoreError:
+ raise
- out_dest, err1 = _fetch("BytesOutToDestination")
- in_src, err2 = _fetch("BytesInFromSource")
- in_dest, err3 = _fetch("BytesInFromDestination")
- out_src, err4 = _fetch("BytesOutToSource")
+ datapoints = resp.get("Datapoints", [])
+ if not datapoints:
+ return None # No datapoints → insufficient evidence → SKIP ITEM
- fetch_failed = err1 or err2 or err3 or err4
- has_traffic = (out_dest > 0) or (in_src > 0) or (in_dest > 0) or (out_src > 0)
- return has_traffic, fetch_failed, out_dest, in_src, in_dest, out_src
+ if statistic == "Sum":
+ return sum(dp.get("Sum", 0.0) for dp in datapoints)
+ if statistic == "Maximum":
+ return max(dp.get("Maximum", 0.0) for dp in datapoints)
+ # Fallback for any other statistic (not expected in this rule)
+ return sum(dp.get(statistic, 0.0) for dp in datapoints)
-def _get_metric_sum(
- cloudwatch,
- namespace: str,
- metric_name: str,
- dimension_name: str,
- dimension_value: str,
- start_time: datetime,
- end_time: datetime,
-) -> tuple:
- """Get sum of a CloudWatch metric over the time period.
-
- Returns (value: int, fetch_error: bool).
- - value: total bytes summed across all datapoints (0 if no data).
- - fetch_error: True if a non-permission error occurred (throttle, transient, etc.).
- When fetch_error is True, value is 1 (conservative — avoids false positives),
- but the caller should surface this to the operator via evidence.
+def _check_route_table_references(ec2, nat_gw_id: str) -> Tuple[Optional[bool], bool]:
+ """Check whether any VPC route table references this NAT Gateway.
+
+ Returns (route_table_referenced, check_succeeded):
+ - (False, True) — no route table references found
+ - (True, True) — at least one route table references the NAT Gateway
+ - (None, False) — DescribeRouteTables failed; context unavailable
"""
try:
- response = cloudwatch.get_metric_statistics(
- Namespace=namespace,
- MetricName=metric_name,
- Dimensions=[
- {
- "Name": dimension_name,
- "Value": dimension_value,
- }
- ],
- StartTime=start_time,
- EndTime=end_time,
- Period=86400, # 1 day in seconds
- Statistics=["Sum"],
+ response = ec2.describe_route_tables(
+ Filters=[{"Name": "route.nat-gateway-id", "Values": [nat_gw_id]}]
)
+ return len(response.get("RouteTables", [])) > 0, True
+ except Exception:
+ return None, False
- datapoints = response.get("Datapoints", [])
- total = sum(dp.get("Sum", 0) for dp in datapoints)
- return int(total), False
- except ClientError as e:
- if e.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
+def find_idle_nat_gateways(
+ session: boto3.Session,
+ region: str,
+ idle_days_threshold: int = _DEFAULT_IDLE_DAYS_THRESHOLD,
+) -> List[Finding]:
+ ec2 = session.client("ec2", region_name=region)
+ cloudwatch = session.client("cloudwatch", region_name=region)
+
+ try:
+ paginator = ec2.get_paginator("describe_nat_gateways")
+ pages = list(paginator.paginate())
+ except ClientError as exc:
+ if exc.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
raise PermissionError(
- "Missing required IAM permissions: cloudwatch:GetMetricStatistics"
- ) from e
- # Other errors (throttle, transient): assume traffic to avoid false positives,
- # but flag the error so the caller can surface it.
- return 1, True
+ "Missing required IAM permission: ec2:DescribeNatGateways"
+ ) from exc
+ raise
+ except BotoCoreError:
+ raise
+
+ now = datetime.now(timezone.utc)
+ window_start = now - timedelta(seconds=idle_days_threshold * 86400)
+ period = _choose_period(idle_days_threshold)
+ findings: List[Finding] = []
+
+ for page in pages:
+ for raw_item in page.get("NatGateways", []):
+ # --- Step 1: Normalize ---
+ n = _normalize_nat_gw(raw_item, now)
+ if n is None:
+ continue
+
+ # --- Step 2: EXCLUSION RULES ---
+
+ # EXCLUSION: state must be available
+ if n["normalized_state"] != _ELIGIBLE_STATE:
+ continue
+
+ # EXCLUSION: too young to evaluate
+ if n["age_days"] < idle_days_threshold:
+ continue
+
+ # --- Step 3: CloudWatch metrics (FAIL RULE on error; SKIP ITEM if no data) ---
+ metric_values: dict = {}
+ skip_insufficient = False
+
+ for metric_name, statistic, detail_key in _REQUIRED_METRICS:
+ value = _get_metric_value(
+ cloudwatch,
+ n["nat_gateway_id"],
+ metric_name,
+ statistic,
+ window_start,
+ now,
+ period,
+ )
+ if value is None:
+ # No datapoints for this metric → insufficient evidence → SKIP ITEM
+ skip_insufficient = True
+ break
+ metric_values[detail_key] = value
+
+ if skip_insufficient:
+ continue
+
+ # EXCLUSION: any metric shows activity
+ if any(v > 0 for v in metric_values.values()):
+ continue
+
+ # --- Step 4: Route-table context (optional; failure degrades context only) ---
+ route_table_referenced, rt_check_ok = _check_route_table_references(
+ ec2, n["nat_gateway_id"]
+ )
+
+ # --- Step 5: Confidence ---
+ if rt_check_ok and route_table_referenced is False:
+ confidence = ConfidenceLevel.HIGH
+ rt_signal = "No VPC route table references this NAT Gateway"
+ elif rt_check_ok and route_table_referenced is True:
+ confidence = ConfidenceLevel.MEDIUM
+ rt_signal = "At least one VPC route table still references this NAT Gateway"
+ else:
+ confidence = ConfidenceLevel.MEDIUM
+ rt_signal = "Route-table context unavailable (DescribeRouteTables failed)"
+
+ reason = (
+ f"NAT Gateway has no trusted CloudWatch traffic signal "
+ f"in the last {idle_days_threshold} days"
+ )
+
+ signals_used = [
+ f"NAT Gateway State is '{_ELIGIBLE_STATE}' (able to process traffic)",
+ f"Age is {n['age_days']} days, meeting the {idle_days_threshold}-day threshold",
+ "All 5 required CloudWatch activity metrics returned datapoints and are zero "
+ f"for the {idle_days_threshold}-day observation window "
+ "(CleanCloud-derived idle heuristic)",
+ rt_signal,
+ ]
+
+ findings.append(
+ Finding(
+ provider="aws",
+ rule_id="aws.ec2.nat_gateway.idle",
+ resource_type="aws.ec2.nat_gateway",
+ resource_id=n["nat_gateway_id"],
+ region=region,
+ estimated_monthly_cost_usd=None,
+ title=_FINDING_TITLE,
+ summary=(
+ f"NAT Gateway {n['nat_gateway_id']} has no trusted CloudWatch "
+ f"traffic signal in the last {idle_days_threshold} days"
+ ),
+ reason=reason,
+ risk=RiskLevel.MEDIUM,
+ confidence=confidence,
+ detected_at=now,
+ evidence=Evidence(
+ signals_used=signals_used,
+ signals_not_checked=list(_SIGNALS_NOT_CHECKED),
+ time_window=f"{idle_days_threshold} days",
+ ),
+ details={
+ "evaluation_path": "idle-nat-gateway-review-candidate",
+ "nat_gateway_id": n["nat_gateway_id"],
+ "normalized_state": n["normalized_state"],
+ "create_time": n["create_time_utc"].isoformat(),
+ "age_days": n["age_days"],
+ "idle_days_threshold": idle_days_threshold,
+ "connectivity_type": n["connectivity_type"],
+ "availability_mode": n["availability_mode"],
+ "vpc_id": n["vpc_id"],
+ "subnet_id": n["subnet_id"],
+ "bytes_out_to_destination": metric_values["bytes_out_to_destination"],
+ "bytes_in_from_source": metric_values["bytes_in_from_source"],
+ "bytes_in_from_destination": metric_values["bytes_in_from_destination"],
+ "bytes_out_to_source": metric_values["bytes_out_to_source"],
+ "active_connection_count_max": metric_values["active_connection_count_max"],
+ "route_table_referenced": route_table_referenced,
+ "nat_gateway_addresses": n["nat_gateway_addresses"],
+ "attached_appliances": n["attached_appliances"],
+ "auto_scaling_ips": n["auto_scaling_ips"],
+ "auto_provision_zones": n["auto_provision_zones"],
+ "tag_set": n["tag_set"],
+ },
+ )
+ )
+
+ return findings
diff --git a/cleancloud/providers/aws/rules/rds_idle.py b/cleancloud/providers/aws/rules/rds_idle.py
index d0e6287..dc4183d 100644
--- a/cleancloud/providers/aws/rules/rds_idle.py
+++ b/cleancloud/providers/aws/rules/rds_idle.py
@@ -1,542 +1,350 @@
+"""
+Rule: aws.rds.instance.idle
+
+ (spec — docs/specs/aws/rds_idle.md)
+
+Intent:
+ Detect provisioned standalone DB instances that are currently available, old
+ enough to evaluate, and show no trusted CloudWatch client-connection activity
+ for the configured observation window, so they can be reviewed as possible
+ cleanup candidates.
+
+Exclusions:
+ - db_instance_id absent (malformed identity)
+ - normalized_status absent (missing current-state signal)
+ - normalized_status != "available"
+ - db_cluster_identifier present (cluster member)
+ - read_replica_source_db_instance_identifier present (read replica)
+ - read_replica_source_db_cluster_identifier present (cross-cluster read replica)
+ - instance_create_time_utc absent, naive, or in the future
+ - age_days < idle_days_threshold (too new to evaluate)
+ - DatabaseConnections returns no datapoints (insufficient evidence)
+ - any DatabaseConnections Maximum > 0
+
+Detection:
+ - db_instance_id present, normalized_status == "available"
+ - standalone (not a cluster member or read replica)
+ - age_days >= idle_days_threshold
+ - DatabaseConnections Maximum returns datapoints and all are zero
+
+Key rules:
+ - DatabaseConnections Maximum is the sole required activity metric.
+ - Missing CloudWatch datapoints → SKIP ITEM (not zero).
+ - CloudWatch API failure → FAIL RULE (not LOW-confidence finding).
+ - No CPU or I/O thresholds — not required for baseline eligibility.
+ - estimated_monthly_cost_usd = None.
+ - Confidence: MEDIUM always.
+ - Risk: MEDIUM always.
+
+Blind spots:
+ - Sessions without network connections that the database hasn't cleaned up
+ - Sessions created by the database engine for its own purposes
+ - Sessions created by parallel execution capabilities or job schedulers
+ - Amazon RDS connections
+ - RDS Proxy, PgBouncer, and application connection pools that can hide real
+ usage while keeping observed client connection counts low or zero
+ - Planned future usage or disaster recovery intent
+ - Exact region-specific pricing impact
+
+APIs:
+ - rds:DescribeDBInstances
+ - cloudwatch:GetMetricStatistics
+"""
+
from datetime import datetime, timedelta, timezone
from typing import List, Optional
import boto3
-from botocore.exceptions import ClientError
+from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
from cleancloud.core.evidence import Evidence
from cleancloud.core.finding import Finding
from cleancloud.core.risk import RiskLevel
+# --- Module-level constants ---
-def find_idle_rds_instances(
- session: boto3.Session,
- region: str,
- idle_days: int = 14,
-) -> List[Finding]:
- """
- Find RDS instances with zero database connections for `idle_days` days.
-
- RDS instances incur significant hourly charges depending on instance class
- and engine. Cost estimates in this rule are based on MySQL/PostgreSQL us-east-1
- on-demand pricing — Oracle, SQL Server, and other engines have different rates.
-
- Detection logic:
- - Instance status is 'available'
- - Instance is older than `idle_days` days
- - CloudWatch DatabaseConnections metric sum is 0 over `idle_days` period
- - Not a read replica (ReadReplicaSourceDBInstanceIdentifier is empty)
- - Not an Aurora cluster member (DBClusterIdentifier is empty)
-
- Confidence tiers:
- - MEDIUM: Zero connections + low peak CPU + low storage I/O (three-signal agreement)
- - LOW: Zero connections only, or CPU/IO data unavailable
-
- Risk tiers:
- - HIGH: MEDIUM confidence (multiple corroborating signals)
- - MEDIUM: LOW confidence (connections only, or metrics partially unavailable)
-
- Notes on accuracy:
- - DatabaseConnections == 0 does not guarantee no activity. Connection poolers
- (RDS Proxy, PgBouncer, application-level pools) may route queries without
- maintaining persistent connections visible to CloudWatch. Always verify
- application-level usage before acting on this finding.
- - CloudWatch publishes DatabaseConnections as a daily Sum. Zero datapoints
- (not zero values) means metric visibility is absent — this rule surfaces
- those as LOW-confidence "metrics unavailable" findings rather than skipping,
- so operators know the instance was not verified.
- - Storage cost estimate uses gp2/gp3 at ~$0.115/GB-month (us-east-1). io1/io2
- volumes are more expensive (~$0.125/GB + IOPS charge). Multi-AZ doubling is
- approximate: actual billing includes standby compute + storage nuances.
- - Automated backups and snapshots may justify retaining an otherwise idle instance.
-
- IAM permissions:
- - rds:DescribeDBInstances
- - cloudwatch:GetMetricStatistics
- """
- rds = session.client("rds", region_name=region)
- cloudwatch = session.client("cloudwatch", region_name=region)
-
- now = datetime.now(timezone.utc)
- findings: List[Finding] = []
+_DEFAULT_IDLE_DAYS_THRESHOLD = 14
+_ELIGIBLE_STATUS = "available"
+_CW_NAMESPACE = "AWS/RDS"
+_CW_DIM = "DBInstanceIdentifier"
- try:
- paginator = rds.get_paginator("describe_db_instances")
+_FINDING_TITLE = "Idle RDS instance review candidate"
- for page in paginator.paginate():
- for instance in page.get("DBInstances", []):
- # Only check available instances
- status = instance.get("DBInstanceStatus")
- if status != "available":
- continue
-
- db_instance_id = instance["DBInstanceIdentifier"]
-
- # Skip read replicas
- if instance.get("ReadReplicaSourceDBInstanceIdentifier"):
- continue
-
- # Skip Aurora cluster members — Aurora instances are managed at
- # the cluster level and may show zero connections on individual
- # reader/writer nodes even when the cluster is active.
- if instance.get("DBClusterIdentifier"):
- continue
-
- tags = instance.get("TagList", [])
-
- # Calculate age
- create_time = instance.get("InstanceCreateTime")
- age_days = 0
- if create_time:
- try:
- age_days = (now - create_time).days
- except TypeError:
- pass
-
- # Skip if instance is younger than the idle threshold
- if age_days < idle_days:
- continue
-
- start_time = now - timedelta(days=idle_days)
-
- # Check CloudWatch metrics for connections
- total_connections, conn_datapoints = _get_metric_sum(
- cloudwatch,
- "AWS/RDS",
- "DatabaseConnections",
- "DBInstanceIdentifier",
- db_instance_id,
- start_time,
- now,
- )
+_SIGNALS_NOT_CHECKED = (
+ "Sessions without network connections that the database hasn't cleaned up",
+ "Sessions created by the database engine for its own purposes",
+ "Sessions created by parallel execution capabilities",
+ "Sessions created by the database engine job scheduler",
+ "Amazon RDS connections",
+ "RDS Proxy, PgBouncer, and application connection pools that can hide real "
+ "usage while keeping observed client connection counts low or zero",
+ "Planned future usage or disaster recovery intent",
+ "Exact region-specific pricing impact",
+)
- if total_connections > 0:
- continue
-
- # Gather instance details (needed for both finding paths below)
- engine = instance.get("Engine", "unknown")
- engine_version = instance.get("EngineVersion", "unknown")
- instance_class = instance.get("DBInstanceClass", "unknown")
- multi_az = instance.get("MultiAZ", False)
- storage_gb = instance.get("AllocatedStorage", 0)
- compute_cost = _estimate_monthly_cost(instance_class, multi_az)
- compute_cost_usd = _estimate_monthly_cost_usd(instance_class, multi_az)
- storage_cost_usd = round(storage_gb * 0.115, 2) if storage_gb else 0.0
- storage_cost_usd = storage_cost_usd * 2 if multi_az else storage_cost_usd
- total_cost_usd = (
- (compute_cost_usd + storage_cost_usd) if compute_cost_usd is not None else None
- )
- if conn_datapoints == 0:
- # Zero datapoints means CloudWatch has no visibility — we cannot
- # distinguish "truly idle" from "metrics not published". Surface a
- # LOW-confidence finding so the operator knows to verify manually.
- findings.append(
- Finding(
- provider="aws",
- rule_id="aws.rds.instance.idle",
- resource_type="aws.rds.instance",
- resource_id=db_instance_id,
- region=region,
- estimated_monthly_cost_usd=total_cost_usd,
- title="RDS Instance Requires Connection Verification",
- summary=(
- f"RDS instance '{db_instance_id}' ({engine}, {instance_class}) "
- f"has no CloudWatch connection data — idle status is unconfirmed."
- ),
- reason=(
- "DatabaseConnections metric returned zero datapoints; "
- "idle status cannot be confirmed"
- ),
- risk=RiskLevel.MEDIUM,
- confidence=ConfidenceLevel.LOW,
- detected_at=now,
- evidence=Evidence(
- signals_used=[
- f"Instance status is '{status}'",
- f"Engine: {engine} {engine_version}",
- f"Instance class: {instance_class}",
- f"Instance is {age_days} days old",
- ],
- signals_not_checked=[
- "DatabaseConnections — CloudWatch returned zero datapoints; "
- "metric may not be published for this instance",
- "CPU utilisation",
- "Storage I/O (ReadIOPS / WriteIOPS)",
- "Planned future usage",
- "Disaster recovery intent",
- "Automated backups or snapshots that may justify retention",
- ],
- time_window=f"{idle_days} days",
- ),
- details={
- "engine": f"{engine} {engine_version}",
- "instance_class": instance_class,
- f"connections_{idle_days}d": None,
- "connections_datapoints": 0,
- "metrics_note": (
- "DatabaseConnections returned zero datapoints — "
- "metric visibility absent; idle status unconfirmed"
- ),
- "estimated_compute_cost": compute_cost,
- "estimated_storage_cost": (
- f"~${storage_cost_usd:.2f}/month "
- "(gp2/gp3 approx ~$0.115/GB; io1/io2 higher)"
- ),
- "multi_az": multi_az,
- "allocated_storage_gb": storage_gb,
- "age_days": age_days,
- "idle_days_threshold": idle_days,
- **({"tags": {t["Key"]: t["Value"] for t in tags}} if tags else {}),
- },
- )
- )
- continue
-
- # Corroborating signal 1: peak CPU utilisation.
- # Use Maximum (not Average) to catch bursty workloads — a single
- # high-CPU day within the window means the instance was active.
- peak_cpu, cpu_datapoints = _get_peak_cpu(
- cloudwatch, db_instance_id, start_time, now
- )
+def _str(value: object) -> Optional[str]:
+ """Return value as str only when it is a non-empty string; else None."""
+ return value if isinstance(value, str) and value else None
- if peak_cpu is not None and peak_cpu >= 5.0:
- # CPU active despite zero connections — unusual but skip to avoid FP
- continue
- # Corroborating signal 2: storage I/O (ReadIOPS + WriteIOPS).
- # If connections == 0 but IOPS > 0, a background process or connection
- # pooler may be active. If IOPS == 0, it corroborates idle.
- has_io, read_iops, write_iops, io_datapoints = _get_storage_io(
- cloudwatch, db_instance_id, start_time, now
- )
+def _choose_period(idle_days: int) -> int:
+ """Return a deterministic Period compliant with CloudWatch retention rules.
- if has_io:
- # Storage I/O active despite zero connections — skip to avoid FP
- continue
-
- signals_not_checked = [
- "Planned future usage",
- "Disaster recovery intent",
- "Seasonal traffic patterns",
- "Application deployment cycles",
- (
- "Connection poolers or proxies (RDS Proxy, PgBouncer) — "
- "may route queries without visible persistent connections"
- ),
- "External readers or indirect usage patterns",
- "Automated backups or snapshots that may justify retention",
- ]
-
- signals = [
- f"Zero database connections for {idle_days} days "
- f"({conn_datapoints} of up to {idle_days} daily datapoints)",
- f"DatabaseConnections sum: {total_connections}",
- f"Instance status is '{status}'",
- f"Engine: {engine} {engine_version}",
- f"Instance class: {instance_class}",
- ]
-
- cpu_confirmed = False
- if peak_cpu is not None:
- signals.append(
- f"Peak daily CPU utilisation: {peak_cpu:.1f}% "
- f"(threshold: 5%) — corroborating idle signal"
- )
- cpu_confirmed = True
- else:
- signals_not_checked.append("CPU utilisation (metric unavailable)")
-
- io_confirmed = False
- if io_datapoints > 0:
- signals.append(
- f"Storage I/O: ReadIOPS={read_iops}, WriteIOPS={write_iops} "
- f"— corroborating idle signal"
- )
- io_confirmed = True
- else:
- signals_not_checked.append("Storage I/O (ReadIOPS / WriteIOPS — no data)")
-
- if age_days > 0:
- signals.append(f"Instance is {age_days} days old")
-
- # MEDIUM confidence only when all three signals agree: zero connections,
- # low peak CPU, and low storage I/O. Any missing or inconclusive
- # corroborating signal leaves confidence at LOW.
- # Risk mirrors confidence: HIGH for MEDIUM confidence, MEDIUM for LOW.
- if cpu_confirmed and io_confirmed:
- confidence = ConfidenceLevel.MEDIUM
- risk = RiskLevel.HIGH
- else:
- confidence = ConfidenceLevel.LOW
- risk = RiskLevel.MEDIUM
-
- evidence = Evidence(
- signals_used=signals,
- signals_not_checked=signals_not_checked,
- time_window=f"{idle_days} days",
- )
+ idle_days * 86400 is a multiple of 60, 300, and 3600, satisfying all three
+ CloudWatch retention constraints for the chosen lookback window.
+ """
+ return idle_days * 86400
- findings.append(
- Finding(
- provider="aws",
- rule_id="aws.rds.instance.idle",
- resource_type="aws.rds.instance",
- resource_id=db_instance_id,
- region=region,
- estimated_monthly_cost_usd=total_cost_usd,
- title=f"Idle RDS Instance (No Connections for {idle_days}+ Days)",
- summary=(
- f"RDS instance '{db_instance_id}' ({engine}, {instance_class}) "
- f"has had zero database connections for {idle_days}+ days."
- ),
- reason=f"RDS instance has zero connections for {idle_days}+ days",
- risk=risk,
- confidence=confidence,
- detected_at=now,
- evidence=evidence,
- details={
- "engine": f"{engine} {engine_version}",
- "instance_class": instance_class,
- f"connections_{idle_days}d": total_connections,
- "connections_datapoints": conn_datapoints,
- "peak_cpu_pct": round(peak_cpu, 2) if peak_cpu is not None else None,
- "read_iops": read_iops,
- "write_iops": write_iops,
- "estimated_compute_cost": (
- compute_cost
- + " (MySQL/PostgreSQL us-east-1 rate; engine-dependent)"
- if compute_cost and "varies" not in compute_cost
- else compute_cost
- ),
- "estimated_storage_cost": (
- f"~${storage_cost_usd:.2f}/month "
- "(gp2/gp3 approx ~$0.115/GB; io1/io2 higher; Multi-AZ doubling approximate)"
- ),
- "multi_az": multi_az,
- "allocated_storage_gb": storage_gb,
- "age_days": age_days,
- "idle_days_threshold": idle_days,
- **({"tags": {t["Key"]: t["Value"] for t in tags}} if tags else {}),
- },
- )
- )
- except ClientError as e:
- code = e.response["Error"]["Code"]
- if code in ("UnauthorizedOperation", "AccessDenied"):
- raise PermissionError(
- "Missing required IAM permissions: "
- "rds:DescribeDBInstances, cloudwatch:GetMetricStatistics"
- ) from e
- raise
+def _normalize_db_instance(item: object, now_utc: datetime) -> Optional[dict]:
+ """Normalize a raw DescribeDBInstances item to the canonical field shape.
- return findings
+ Returns None when required identity/state/age fields are absent or invalid —
+ the caller must skip the item. All rule logic must operate only on the
+ returned normalized dict.
+ """
+ if not isinstance(item, dict):
+ return None
+
+ # --- Identity fields (required; absent → skip) ---
+ db_instance_id = _str(item.get("DBInstanceIdentifier"))
+ if db_instance_id is None:
+ return None
+
+ # --- State fields (required; absent → skip) ---
+ normalized_status = _str(item.get("DBInstanceStatus"))
+ if normalized_status is None:
+ return None
+
+ # --- InstanceCreateTime (required; absent, naive, or future → skip) ---
+ raw_ct = item.get("InstanceCreateTime")
+ if not isinstance(raw_ct, datetime):
+ return None
+ if raw_ct.tzinfo is None:
+ # Naive datetime — cannot safely compare to UTC; treat as absent → skip.
+ return None
+ instance_create_time_utc = raw_ct.astimezone(timezone.utc)
+ if instance_create_time_utc > now_utc:
+ # Future InstanceCreateTime is invalid → skip.
+ return None
+ age_days = int((now_utc - instance_create_time_utc).total_seconds() // 86400)
+
+ # --- Scope fields (optional → null) ---
+ db_cluster_identifier = _str(item.get("DBClusterIdentifier"))
+ read_replica_source_db_instance_identifier = _str(
+ item.get("ReadReplicaSourceDBInstanceIdentifier")
+ )
+ read_replica_source_db_cluster_identifier = _str(
+ item.get("ReadReplicaSourceDBClusterIdentifier")
+ )
+
+ # --- Core context fields (optional → null / []) ---
+ engine = _str(item.get("Engine"))
+ engine_version = _str(item.get("EngineVersion"))
+ db_instance_class = _str(item.get("DBInstanceClass"))
+ storage_type = _str(item.get("StorageType"))
+ dbi_resource_id = _str(item.get("DbiResourceId"))
+ db_instance_arn = _str(item.get("DBInstanceArn"))
+
+ raw_multi_az = item.get("MultiAZ")
+ multi_az = raw_multi_az if isinstance(raw_multi_az, bool) else None
+
+ raw_storage = item.get("AllocatedStorage")
+ allocated_storage_gib = raw_storage if isinstance(raw_storage, int) else None
+
+ raw_tags = item.get("TagList")
+ tag_set: list = raw_tags if isinstance(raw_tags, list) else []
+
+ return {
+ "resource_id": db_instance_id,
+ "db_instance_id": db_instance_id,
+ "normalized_status": normalized_status,
+ "instance_create_time_utc": instance_create_time_utc,
+ "age_days": age_days,
+ "db_cluster_identifier": db_cluster_identifier,
+ "read_replica_source_db_instance_identifier": read_replica_source_db_instance_identifier,
+ "read_replica_source_db_cluster_identifier": read_replica_source_db_cluster_identifier,
+ "engine": engine,
+ "engine_version": engine_version,
+ "db_instance_class": db_instance_class,
+ "multi_az": multi_az,
+ "allocated_storage_gib": allocated_storage_gib,
+ "storage_type": storage_type,
+ "dbi_resource_id": dbi_resource_id,
+ "db_instance_arn": db_instance_arn,
+ "tag_set": tag_set,
+ }
-def _get_metric_sum(
+def _get_database_connections_max(
cloudwatch,
- namespace: str,
- metric_name: str,
- dimension_name: str,
- dimension_value: str,
+ db_instance_id: str,
start_time: datetime,
end_time: datetime,
-) -> tuple:
- """Get sum of a CloudWatch metric over the time period.
-
- Returns (value, datapoint_count):
- - value: 1 if any datapoint has Sum > 0, else 0
- - datapoint_count: number of datapoints returned (0 = no metric visibility)
+ period: int,
+) -> Optional[float]:
+ """Fetch DatabaseConnections Maximum over the observation window.
- Zero datapoints is distinct from all-zero datapoints — the caller should
- handle datapoint_count == 0 as "unknown" rather than "confirmed idle".
+ Returns None if no datapoints (insufficient evidence → caller must SKIP ITEM).
+ Returns the maximum value (>= 0.0) if datapoints are present.
+ Raises ClientError / BotoCoreError / PermissionError on API failure (caller → FAIL RULE).
"""
try:
- response = cloudwatch.get_metric_statistics(
- Namespace=namespace,
- MetricName=metric_name,
- Dimensions=[
- {
- "Name": dimension_name,
- "Value": dimension_value,
- }
- ],
+ resp = cloudwatch.get_metric_statistics(
+ Namespace=_CW_NAMESPACE,
+ MetricName="DatabaseConnections",
+ Dimensions=[{"Name": _CW_DIM, "Value": db_instance_id}],
StartTime=start_time,
EndTime=end_time,
- Period=86400, # 1 day in seconds
- Statistics=["Sum"],
+ Period=period,
+ Statistics=["Maximum"],
)
-
- datapoints = response.get("Datapoints", [])
- count = len(datapoints)
- # Use any() instead of sum() — missing datapoints are omitted by
- # CloudWatch (not returned as 0), so summing could mask gaps.
- # any() is safer: if any single day had connections, it's not idle.
- if any(dp.get("Sum", 0) > 0 for dp in datapoints):
- return 1, count
- return 0, count
-
- except ClientError as e:
- if e.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
+ except ClientError as exc:
+ if exc.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
raise PermissionError(
- "Missing required IAM permissions: cloudwatch:GetMetricStatistics"
- ) from e
- # Other errors (throttle, transient): assume connections to avoid false positives
- return 1, -1
-
-
-def _get_peak_cpu(
- cloudwatch, db_instance_id: str, start_time: datetime, end_time: datetime
-) -> tuple:
- """Return (peak_cpu_pct, datapoint_count) for the RDS instance over the window.
-
- Uses Maximum statistic (not Average) to catch bursty workloads — a single
- high-CPU day means the instance was active during that window.
+ "Missing required IAM permission: cloudwatch:GetMetricStatistics"
+ ) from exc
+ raise
+ except BotoCoreError:
+ raise
- Returns (None, 0) on error — caller treats None as CPU signal unavailable.
- """
- try:
- response = cloudwatch.get_metric_statistics(
- Namespace="AWS/RDS",
- MetricName="CPUUtilization",
- Dimensions=[{"Name": "DBInstanceIdentifier", "Value": db_instance_id}],
- StartTime=start_time,
- EndTime=end_time,
- Period=86400,
- Statistics=["Maximum"],
- )
- datapoints = response.get("Datapoints", [])
- if not datapoints:
- return None, 0
- peak = max(dp["Maximum"] for dp in datapoints)
- return peak, len(datapoints)
- except ClientError:
- return None, 0
+ datapoints = resp.get("Datapoints", [])
+ if not datapoints:
+ return None # No datapoints → insufficient evidence → SKIP ITEM
+ return max(dp.get("Maximum", 0.0) for dp in datapoints)
-def _get_storage_io(
- cloudwatch, db_instance_id: str, start_time: datetime, end_time: datetime
-) -> tuple:
- """Return (has_io, read_iops_sum, write_iops_sum, datapoint_count).
- Checks ReadIOPS and WriteIOPS over the window. Any non-zero IOPS means
- the storage was active, which is a strong signal of actual database usage
- even if DatabaseConnections appears zero (e.g. via connection poolers).
+def find_idle_rds_instances(
+ session: boto3.Session,
+ region: str,
+ idle_days_threshold: int = _DEFAULT_IDLE_DAYS_THRESHOLD,
+) -> List[Finding]:
+ rds = session.client("rds", region_name=region)
+ cloudwatch = session.client("cloudwatch", region_name=region)
- datapoint_count is the combined count from both metrics; 0 means no data.
- """
try:
+ paginator = rds.get_paginator("describe_db_instances")
+ pages = list(paginator.paginate())
+ except ClientError as exc:
+ if exc.response["Error"]["Code"] in ("AccessDenied", "UnauthorizedOperation"):
+ raise PermissionError(
+ "Missing required IAM permission: rds:DescribeDBInstances"
+ ) from exc
+ raise
+ except BotoCoreError:
+ raise
- def _fetch(metric_name: str) -> tuple:
- response = cloudwatch.get_metric_statistics(
- Namespace="AWS/RDS",
- MetricName=metric_name,
- Dimensions=[{"Name": "DBInstanceIdentifier", "Value": db_instance_id}],
- StartTime=start_time,
- EndTime=end_time,
- Period=86400,
- Statistics=["Sum"],
- )
- datapoints = response.get("Datapoints", [])
- total = sum(dp.get("Sum", 0) for dp in datapoints)
- return int(total), len(datapoints)
-
- read_iops, read_count = _fetch("ReadIOPS")
- write_iops, write_count = _fetch("WriteIOPS")
- has_io = (read_iops > 0) or (write_iops > 0)
- return has_io, read_iops, write_iops, read_count + write_count
-
- except ClientError:
- # On error, assume no IO (don't skip the finding) but return 0 datapoints
- # so the caller knows IO was not verified.
- return False, 0, 0, 0
-
+ now = datetime.now(timezone.utc)
+ window_start = now - timedelta(seconds=idle_days_threshold * 86400)
+ period = _choose_period(idle_days_threshold)
+ findings: List[Finding] = []
-def _estimate_monthly_cost(instance_class: str, multi_az: bool) -> str:
- """Rough monthly cost estimate based on instance class.
+ for page in pages:
+ for raw_item in page.get("DBInstances", []):
+ # --- Step 1: Normalize ---
+ n = _normalize_db_instance(raw_item, now)
+ if n is None:
+ continue
+
+ # --- Step 2: EXCLUSION RULES ---
+
+ # EXCLUSION: status must be available
+ if n["normalized_status"] != _ELIGIBLE_STATUS:
+ continue
+
+ # EXCLUSION: not standalone — cluster member or any form of read replica
+ if n["db_cluster_identifier"] is not None:
+ continue
+ if n["read_replica_source_db_instance_identifier"] is not None:
+ continue
+ if n["read_replica_source_db_cluster_identifier"] is not None:
+ continue
+
+ # EXCLUSION: too young to evaluate
+ if n["age_days"] < idle_days_threshold:
+ continue
+
+ # --- Step 3: CloudWatch (FAIL RULE on error; SKIP ITEM if no data) ---
+ db_connections_max = _get_database_connections_max(
+ cloudwatch,
+ n["db_instance_id"],
+ window_start,
+ now,
+ period,
+ )
- Rates are approximate MySQL/PostgreSQL us-east-1 on-demand pricing.
- Oracle, SQL Server, and other engines have different (often higher) costs.
- """
- cost_map = {
- "db.t3.micro": 12,
- "db.t3.small": 24,
- "db.t3.medium": 49,
- "db.t3.large": 97,
- "db.t3.xlarge": 194,
- "db.t4g.micro": 11,
- "db.t4g.small": 22,
- "db.t4g.medium": 44,
- "db.t4g.large": 88,
- "db.t4g.xlarge": 175,
- "db.r5.large": 172,
- "db.r5.xlarge": 344,
- "db.r5.2xlarge": 688,
- "db.r6g.large": 155,
- "db.r6g.xlarge": 310,
- "db.r6i.large": 184,
- "db.r6i.xlarge": 368,
- "db.r6i.2xlarge": 736,
- "db.r7g.large": 175,
- "db.r7g.xlarge": 350,
- "db.r7g.2xlarge": 700,
- "db.m5.large": 125,
- "db.m5.xlarge": 250,
- "db.m6g.large": 113,
- "db.m6g.xlarge": 225,
- "db.m6i.large": 139,
- "db.m6i.xlarge": 277,
- "db.m6i.2xlarge": 554,
- "db.m7g.large": 130,
- "db.m7g.xlarge": 260,
- "db.m7g.2xlarge": 520,
- }
+ if db_connections_max is None:
+ # No datapoints → insufficient trusted evidence → SKIP ITEM
+ continue
+
+ # EXCLUSION: observed client connections
+ if db_connections_max > 0:
+ continue
+
+ # --- Step 4: EMIT ---
+ signals_used = [
+ f"DB instance Status is '{_ELIGIBLE_STATUS}' (able to accept connections)",
+ "DB instance is standalone — not a DB cluster member or read replica",
+ f"Age is {n['age_days']} days, meeting the {idle_days_threshold}-day threshold",
+ f"DatabaseConnections Maximum was zero across all datapoints in the "
+ f"{idle_days_threshold}-day observation window "
+ "(CleanCloud-derived idle heuristic based on observed client network connections)",
+ ]
+
+ findings.append(
+ Finding(
+ provider="aws",
+ rule_id="aws.rds.instance.idle",
+ resource_type="aws.rds.instance",
+ resource_id=n["db_instance_id"],
+ region=region,
+ estimated_monthly_cost_usd=None,
+ title=_FINDING_TITLE,
+ summary=(
+ f"RDS instance {n['db_instance_id']} has no observed client "
+ f"connection activity in the last {idle_days_threshold} days"
+ ),
+ reason=(
+ f"DB instance has no observed client connection activity "
+ f"via DatabaseConnections Maximum in the last {idle_days_threshold} days"
+ ),
+ risk=RiskLevel.MEDIUM,
+ confidence=ConfidenceLevel.MEDIUM,
+ detected_at=now,
+ evidence=Evidence(
+ signals_used=signals_used,
+ signals_not_checked=list(_SIGNALS_NOT_CHECKED),
+ time_window=f"{idle_days_threshold} days",
+ ),
+ details={
+ "evaluation_path": "idle-rds-instance-review-candidate",
+ "db_instance_id": n["db_instance_id"],
+ "normalized_status": n["normalized_status"],
+ "instance_create_time": n["instance_create_time_utc"].isoformat(),
+ "age_days": n["age_days"],
+ "idle_days_threshold": idle_days_threshold,
+ "engine": n["engine"],
+ "engine_version": n["engine_version"],
+ "db_instance_class": n["db_instance_class"],
+ "database_connections_max": db_connections_max,
+ "db_cluster_identifier": n["db_cluster_identifier"],
+ "read_replica_source_db_instance_identifier": n[
+ "read_replica_source_db_instance_identifier"
+ ],
+ "read_replica_source_db_cluster_identifier": n[
+ "read_replica_source_db_cluster_identifier"
+ ],
+ "multi_az": n["multi_az"],
+ "allocated_storage_gib": n["allocated_storage_gib"],
+ "storage_type": n["storage_type"],
+ "dbi_resource_id": n["dbi_resource_id"],
+ "db_instance_arn": n["db_instance_arn"],
+ "tag_set": n["tag_set"],
+ },
+ )
+ )
- base_cost = cost_map.get(instance_class)
- if base_cost:
- total = base_cost * 2 if multi_az else base_cost
- return f"~${total}/month (region dependent)"
- return "Cost varies by instance class (region dependent)"
-
-
-def _estimate_monthly_cost_usd(instance_class: str, multi_az: bool) -> Optional[float]:
- """Numeric monthly cost estimate for aggregation."""
- cost_map = {
- "db.t3.micro": 12,
- "db.t3.small": 24,
- "db.t3.medium": 49,
- "db.t3.large": 97,
- "db.t3.xlarge": 194,
- "db.t4g.micro": 11,
- "db.t4g.small": 22,
- "db.t4g.medium": 44,
- "db.t4g.large": 88,
- "db.t4g.xlarge": 175,
- "db.r5.large": 172,
- "db.r5.xlarge": 344,
- "db.r5.2xlarge": 688,
- "db.r6g.large": 155,
- "db.r6g.xlarge": 310,
- "db.r6i.large": 184,
- "db.r6i.xlarge": 368,
- "db.r6i.2xlarge": 736,
- "db.r7g.large": 175,
- "db.r7g.xlarge": 350,
- "db.r7g.2xlarge": 700,
- "db.m5.large": 125,
- "db.m5.xlarge": 250,
- "db.m6g.large": 113,
- "db.m6g.xlarge": 225,
- "db.m6i.large": 139,
- "db.m6i.xlarge": 277,
- "db.m6i.2xlarge": 554,
- "db.m7g.large": 130,
- "db.m7g.xlarge": 260,
- "db.m7g.2xlarge": 520,
- }
- base_cost = cost_map.get(instance_class)
- if base_cost:
- return float(base_cost * 2 if multi_az else base_cost)
- return None
+ return findings
diff --git a/docs/configuration.md b/docs/configuration.md
index 9737ff7..7a5259d 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -166,9 +166,9 @@ See [rules.md](rules.md) for the full list of rule IDs and their supported param
| Param | Rule ID | Default | Description |
|---|---|---|---|
-| `idle_days` | `aws.elbv2.load_balancer.idle` | 14 | Days of zero traffic before flagging |
-| `idle_days` | `aws.ec2.nat_gateway.idle` | 14 | Days of zero traffic before flagging |
-| `idle_days` | `aws.rds.instance.idle` | 14 | Days of no connections before flagging |
+| `idle_days_threshold` | `aws.elbv2.load_balancer.idle` | 14 | Days of zero traffic before flagging |
+| `idle_days_threshold` | `aws.ec2.nat_gateway.idle` | 14 | Days of zero traffic before flagging |
+| `idle_days_threshold` | `aws.rds.instance.idle` | 14 | Days of no connections before flagging |
| `idle_days` | `aws.sagemaker.endpoint.idle` | 14 | Days of zero invocations before flagging |
| `idle_days` | `aws.sagemaker.notebook.idle` | 14 | Days since last control-plane activity before flagging |
| `idle_days` | `azure.aml.compute.idle` | 14 | Days of no runs before flagging |
diff --git a/docs/specs/aws/elastic_ip_unattached.md b/docs/specs/aws/elastic_ip_unattached.md
new file mode 100644
index 0000000..1555725
--- /dev/null
+++ b/docs/specs/aws/elastic_ip_unattached.md
@@ -0,0 +1,350 @@
+# aws.ec2.elastic_ip.unattached — Canonical Rule Specification
+
+## 1. Intent
+
+Detect Elastic IP address records that are currently allocated to the account in the
+scanned Region and are not currently associated with an instance or network interface,
+so they can be reviewed for possible release if no longer needed.
+
+This is a **read-only review-candidate rule**. It is not a delete-safe rule and not
+proof that release is operationally safe.
+
+---
+
+## 2. AWS API Grounding
+
+Based on official EC2/VPC API and user-guide behavior.
+
+### Key DescribeAddresses fields
+
+| Field | Behaviour |
+|---|---|
+| `AllocationId` | Unique allocation identifier; present for VPC-domain addresses |
+| `PublicIp` | Public IPv4 address string; always present |
+| `CarrierIp` | Carrier IP for Wavelength zones; present when applicable |
+| `AssociationId` | Present when currently associated with an instance or ENI |
+| `InstanceId` | Present when associated with a specific instance |
+| `NetworkInterfaceId` | Present when associated with a specific ENI |
+| `PrivateIpAddress` | Present when currently associated |
+| `Domain` | `"vpc"` or `"standard"` |
+| `NetworkBorderGroup` | Network border group the address is in |
+| `PublicIpv4Pool` | BYOIP pool identifier |
+| `CustomerOwnedIp` | Customer-owned IP for Outposts |
+| `CustomerOwnedIpv4Pool` | Customer-owned IP pool |
+| `SubnetId` | Subnet for Wavelength addresses |
+| `NetworkInterfaceOwnerId` | Owner of the associated ENI |
+| `ServiceManaged` | Whether AWS manages the association on behalf of a service |
+| `Tags` | Key-value tags |
+
+### Critical AWS facts
+
+1. **No `AllocationTime`** — the documented `Address` shape does not include
+ `AllocationTime`, `AssociationTime`, `DisassociationTime`, or any canonical
+ `unattached_since`/`allocated_since` timestamp.
+
+2. **Billing** — AWS charges for all public IPv4 addresses, including Elastic IPs,
+ whether associated or unassociated. Unattached state alone is not a unique billing
+ trigger.
+
+3. **DescribeAddresses** is non-paginated; one successful call returns all addresses
+ for the scanned Region and caller scope.
+
+4. **Region-specific** — Elastic IPs are Region-scoped. Results from one Region
+ cannot prove absence from another.
+
+5. **Association signals** — an address can be associated via `AssociationId`,
+ `InstanceId`, `NetworkInterfaceId`, or `PrivateIpAddress`. All four must be absent
+ for the address to be considered currently unattached.
+
+### Rule-design consequence
+
+- Current association state is the only baseline eligibility signal this rule can
+ prove from `DescribeAddresses`.
+- No temporal predicate (allocation age, unattached duration) may be required for
+ baseline eligibility.
+- Undocumented fields such as `AllocationTime` must not be used.
+
+---
+
+## 3. Scope
+
+**Included:**
+- All addresses returned by `DescribeAddresses` with a stable `resource_id`
+- `currently_associated == False` (all canonical association fields absent)
+
+**Excluded:**
+- Addresses missing `AllocationId`, `PublicIp`, and `CarrierIp` (no stable identity)
+- Addresses with any canonical association field present
+
+---
+
+## 4. Canonical Definitions
+
+| Term | Definition |
+|---|---|
+| `resource_id` | `AllocationId` → `PublicIp` → `CarrierIp` → absent (skip item if absent) |
+| `currently_associated` | `True` when any of `association_id`, `instance_id`, `network_interface_id`, `private_ip_address` is present |
+| `currently_unattached` | All four canonical association fields absent |
+
+---
+
+## 5. Signal Model (Strict Separation)
+
+### Normalization Contract
+
+All rule logic must operate on normalized fields only.
+
+**Identity fields:**
+
+| Field | Derivation |
+|---|---|
+| `resource_id` | `address.AllocationId` → `address.PublicIp` → `address.CarrierIp` → absent (skip item if absent) |
+| `allocation_id` | `address.AllocationId` → `null` |
+| `public_ip` | `address.PublicIp` → `null` |
+| `carrier_ip` | `address.CarrierIp` → `null` |
+
+**Association fields (all must be absent for currently_unattached):**
+
+| Field | Derivation |
+|---|---|
+| `association_id` | `address.AssociationId` → `null` |
+| `instance_id` | `address.InstanceId` → `null` |
+| `network_interface_id` | `address.NetworkInterfaceId` → `null` |
+| `private_ip_address` | `address.PrivateIpAddress` → `null` |
+
+**Context fields:**
+
+| Field | Derivation |
+|---|---|
+| `domain` | `address.Domain` → `null` |
+| `network_interface_owner_id` | `address.NetworkInterfaceOwnerId` → `null` |
+| `network_border_group` | `address.NetworkBorderGroup` → `null` |
+| `public_ipv4_pool` | `address.PublicIpv4Pool` → `null` |
+| `customer_owned_ip` | `address.CustomerOwnedIp` → `null` |
+| `customer_owned_ipv4_pool` | `address.CustomerOwnedIpv4Pool` → `null` |
+| `subnet_id` | `address.SubnetId` → `null` |
+| `service_managed` | `address.ServiceManaged` → `null` |
+| `tags` | `address.Tags` → `[]` |
+
+String-valued fields must be normalized only from non-empty strings.
+Malformed or unexpected field types must not be converted into positive eligibility evidence.
+
+### A. EXCLUSION_RULES
+
+| Condition | Result |
+|---|---|
+| `resource_id` absent | **SKIP** (malformed identity) |
+| any canonical association field present | **SKIP** (currently associated) |
+
+There must be **no** exclusion for `service_managed`, tags, `domain`, BYOIP fields,
+`network_border_group`, or `public_ipv4_pool`.
+
+### B. DETECTION_SIGNAL
+
+| Condition | Result |
+|---|---|
+| `resource_id` present, all association fields absent | **EMIT** |
+
+### C. CONTEXTUAL_SIGNALS (non-detecting)
+
+All context fields are evidence/details only. `network_interface_owner_id` and
+`service_managed` are contextual and must not affect eligibility.
+
+---
+
+## 6. Evaluation Order (Mandatory)
+
+1. Call `DescribeAddresses` once for the scanned Region; fail rule on error.
+2. Validate that the top-level `Addresses` field is present and iterable; fail rule if not.
+3. Normalize each address item; skip items that return `None`.
+4. For each normalized address, apply EXCLUSION_RULES sequentially.
+5. Emit findings for remaining eligible addresses.
+
+No raw AWS field access after Step 3.
+
+---
+
+## 7. Confidence Model
+
+| Condition | Confidence |
+|---|---|
+| All exclusion checks passed | `HIGH` |
+
+High confidence refers to current unattached state, not to release safety or
+business irrelevance. `DescribeAddresses` deterministically reports association state.
+
+---
+
+## 8. Risk Model
+
+| Condition | Risk |
+|---|---|
+| Finding emitted | `LOW` |
+
+---
+
+## 9. Cost Model
+
+AWS charges for all public IPv4 addresses regardless of association state. Unattached
+state alone is not a unique billing trigger.
+
+- Do not present unattached state as a unique billing trigger.
+- Do not hardcode a fixed estimate such as `$3.75/month`.
+- `estimated_monthly_cost_usd` must be `None`.
+
+---
+
+## 10. Failure Behavior
+
+### Required API
+
+- `ec2:DescribeAddresses` — failure → **FAIL RULE**
+
+### Response integrity
+
+- `Addresses` key absent from response → **FAIL RULE**
+- `Addresses` value not iterable as a list → **FAIL RULE**
+
+### Item-level
+
+- Address missing stable identity (`resource_id` absent) → **SKIP** (not FAIL RULE)
+- Malformed contextual fields → **SKIP** that field; never fail the rule
+
+---
+
+## 11. Blind Spots
+
+Every finding must disclose in `signals_not_checked`:
+
+1. Future planned attachment or operational reserve intent not known
+2. DNS / allowlist / manual failover dependencies
+3. Application-level use of the reserved public IP
+4. Exact monthly pricing from the current pricing page
+5. Service-managed lifecycle expectations outside current association state
+
+---
+
+## 12. Evidence Contract
+
+Every finding **must** include all of the following (null allowed, never omitted):
+
+| Field | Requirement |
+|---|---|
+| `evaluation_path` | Exactly `"unattached-eip-review-candidate"` |
+| `resource_id` | Always present |
+| `allocation_id` | Present or `null` |
+| `public_ip` | Present or `null` |
+| `carrier_ip` | Present or `null` |
+| `domain` | Present or `null` |
+| `currently_associated` | Always `false` |
+| `association_id` | Always `null` |
+| `instance_id` | Always `null` |
+| `network_interface_id` | Always `null` |
+| `private_ip_address` | Always `null` |
+
+Optional contextual fields:
+- `network_interface_owner_id`, `network_border_group`, `public_ipv4_pool`,
+ `customer_owned_ip`, `customer_owned_ipv4_pool`, `subnet_id`, `service_managed`, `tags`
+
+---
+
+## 13. Title and Reason Contract
+
+| Field | Value |
+|---|---|
+| `title` | `"Unattached Elastic IP review candidate"` |
+| `reason` | `"Address has no current association per DescribeAddresses"` |
+
+**Hard rules:**
+- Do NOT call the address "safe to release"
+- Do NOT claim an allocation age or unattached duration
+- Do NOT use `AllocationTime` as evidence
+
+---
+
+## 14. API and IAM Contract
+
+**Required:** `ec2:DescribeAddresses`
+
+### API usage constraints
+
+- `DescribeAddresses` has no documented pagination; one call defines the full address set
+- No undocumented fields (`AllocationTime`, etc.) may be used
+
+---
+
+## 15. Acceptance Scenarios
+
+### Must emit
+
+1. VPC EIP with `AllocationId`, `PublicIp`, no association fields → EMIT HIGH
+2. Standard-domain address with no `InstanceId` or other association field → EMIT HIGH
+3. BYOIP / customer-owned / `service_managed` contextual fields present, no association fields → EMIT
+4. `CarrierIp` only (no `AllocationId`, no `PublicIp`) → EMIT; `CarrierIp` is `resource_id`
+
+### Must skip
+
+1. Address with `AssociationId` → SKIP
+2. Address with `NetworkInterfaceId` but no `AssociationId` → SKIP
+3. Address with `InstanceId` but no `AssociationId` → SKIP
+4. Address with `PrivateIpAddress` but no `AssociationId` → SKIP
+5. Address missing `AllocationId`, `PublicIp`, and `CarrierIp` → SKIP
+
+### Must fail
+
+1. `DescribeAddresses` unauthorized or request failure → FAIL RULE
+2. Response missing `Addresses` key → FAIL RULE
+3. Response `Addresses` not a list → FAIL RULE
+
+### Must NOT happen
+
+1. Temporal threshold applied to baseline eligibility
+2. `AllocationTime` used for any eligibility or evidence logic
+3. `$3.75` or any hardcoded cost in `estimated_monthly_cost_usd`
+4. `domain == "standard"` used as an exclusion
+5. `service_managed` used as an exclusion
+6. `AssociationId` as the sole association check (other association fields ignored)
+
+---
+
+## 16. In-File Contract
+
+```
+Rule: aws.ec2.elastic_ip.unattached
+
+Intent:
+ Detect Elastic IP address records that are currently allocated to the account
+ in the scanned Region and are not currently associated with an instance or
+ network interface.
+
+Exclusions:
+ - resource_id absent (malformed identity)
+ - any canonical association field present (currently associated)
+
+Detection:
+ - resource_id present
+ - association_id, instance_id, network_interface_id, private_ip_address all absent
+
+Key rules:
+ - This is a review-candidate rule, not a delete-safe rule.
+ - No temporal threshold — current unattached state is the sole eligibility signal.
+ - Do not use AllocationTime (undocumented field).
+ - All four canonical association fields must be checked, not only AssociationId.
+ - Missing/non-iterable Addresses response fails the rule.
+ - Do not hardcode a fixed monthly cost estimate.
+
+Blind spots:
+ - future planned attachment or operational reserve intent not known
+ - DNS / allowlist / manual failover dependencies
+ - application-level use of the reserved public IP
+ - service-managed lifecycle expectations outside current association state
+
+APIs:
+ - ec2:DescribeAddresses
+```
+
+---
+
+## 17. Implementation Constants
+
+No rule-level numeric constants required for baseline eligibility.
diff --git a/docs/specs/aws/elb_idle.md b/docs/specs/aws/elb_idle.md
new file mode 100644
index 0000000..0510581
--- /dev/null
+++ b/docs/specs/aws/elb_idle.md
@@ -0,0 +1,364 @@
+# aws.elbv2.alb.idle / aws.elbv2.nlb.idle / aws.elb.clb.idle — Canonical Rule Specification
+
+## 1. Intent
+
+Detect ALB, NLB, and CLB load balancers that are at least `idle_days_threshold` days old
+and show no trusted CloudWatch evidence of client traffic during the full lookback window,
+so they can be reviewed as potential cleanup candidates.
+
+This is a **read-only review-candidate rule family**. It is not a delete-safe rule family.
+
+---
+
+## 2. AWS API Grounding
+
+Based on official ELB / ELBv2 API and CloudWatch documentation.
+
+### Key facts
+
+1. ELBv2 `DescribeLoadBalancers` returns `LoadBalancerArn`, `LoadBalancerName`, `CreatedTime`,
+ `Scheme`, `VpcId`, `State`, and `Type` (`application`, `network`, `gateway`).
+2. Classic ELB `DescribeLoadBalancers` returns `LoadBalancerName`, `CreatedTime`, `Scheme`,
+ `VPCId`, `DNSName`, and `Instances`.
+3. ALB and CLB metrics are published only when requests flow; missing datapoints may be treated
+ as zero for ALB and CLB metrics.
+4. NLB metrics `NewFlowCount`, `ProcessedBytes`, and `ActiveFlowCount` are documented as always
+ reported. Missing datapoints for NLB metrics must be treated as incomplete / untrusted —
+ not as zero.
+5. ALB metrics are published under `AWS/ApplicationELB` using `LoadBalancer` dimension.
+6. NLB metrics are published under `AWS/NetworkELB` using `LoadBalancer` dimension.
+7. CLB metrics are published under `AWS/ELB` using `LoadBalancerName` dimension.
+8. The ELBv2 CloudWatch dimension value is the ARN suffix strictly after `loadbalancer/`.
+9. Gateway Load Balancers (`Type == "gateway"`) are out of scope.
+10. `CreatedTime` is a documented field and may be used for age calculation.
+
+---
+
+## 3. Scope and Terminology
+
+- ALB: ELBv2 `Type == "application"`
+- NLB: ELBv2 `Type == "network"`
+- CLB: Classic Load Balancer returned by the classic ELB API
+- Gateway LBs (`Type == "gateway"`) must be skipped
+- "idle over N days" means no trusted CloudWatch client-traffic signal over the full configured window
+
+---
+
+## 4. API and IAM Contract
+
+**Required:**
+- `elbv2:DescribeLoadBalancers` — failure → FAIL RULE for ELBv2 branch
+- `elb:DescribeLoadBalancers` — failure → FAIL RULE for CLB branch
+- `cloudwatch:GetMetricStatistics` — failure → FAIL RULE for the affected item's branch
+
+**Contextual (enrichment only; failure does not fail rule):**
+- `elbv2:DescribeTargetGroups`
+- `elbv2:DescribeTargetHealth`
+
+**Pagination:** ELBv2 and CLB pagination must be fully exhausted.
+
+---
+
+## 5. Normalization Contract
+
+All rule logic must operate on normalized fields only. No raw AWS field access after
+normalization.
+
+### ELBv2 Normalized Fields
+
+| Field | Derivation |
+|---|---|
+| `resource_id` | `LoadBalancerArn` → absent (skip) |
+| `lb_family` | `"alb"` when `Type == "application"`, `"nlb"` when `Type == "network"`, `"unsupported"` otherwise |
+| `load_balancer_name` | `LoadBalancerName` → null |
+| `load_balancer_arn` | `LoadBalancerArn` → null |
+| `created_time` | `CreatedTime` (timezone-aware UTC) → absent (skip) |
+| `age_days` | `floor((now_utc - created_time_utc) / 86400)` |
+| `scheme` | `Scheme` → null |
+| `dns_name` | `DNSName` → null |
+| `vpc_id` | `VpcId` → null |
+| `state_code` | `State.Code` → null |
+
+### CLB Normalized Fields
+
+| Field | Derivation |
+|---|---|
+| `resource_id` | `LoadBalancerName` → absent (skip) |
+| `lb_family` | Always `"clb"` |
+| `load_balancer_name` | `LoadBalancerName` → null |
+| `load_balancer_arn` | Always null |
+| `created_time` | `CreatedTime` (timezone-aware UTC) → absent (skip) |
+| `age_days` | `floor((now_utc - created_time_utc) / 86400)` |
+| `scheme` | `Scheme` → null |
+| `dns_name` | `DNSName` → null |
+| `vpc_id` | `VPCId` → null |
+| `state_code` | Always null |
+
+### Backend Context Fields (contextual only; never affect eligibility)
+
+ALB/NLB: `target_group_count`, `registered_target_count`, `has_registered_targets`
+CLB: `registered_instance_count`, `has_registered_instances`
+
+String fields must be normalized from non-empty strings only.
+
+---
+
+## 6. Trusted Traffic-Signal Contract
+
+### 6.1 ALB Traffic Contract
+
+Traffic present if any of:
+- `RequestCount` `Sum > 0`
+- `ProcessedBytes` `Sum > 0`
+- `ActiveConnectionCount` `Sum > 0`
+
+Namespace: `AWS/ApplicationELB`, dimension `LoadBalancer = `
+
+### 6.2 NLB Traffic Contract
+
+Traffic present if any of:
+- `NewFlowCount` `Sum > 0`
+- `ProcessedBytes` `Sum > 0`
+- `ActiveFlowCount` `Maximum > 0`
+
+Namespace: `AWS/NetworkELB`, dimension `LoadBalancer = `
+
+**NLB-specific:** Missing datapoints for any of these three metrics over the full lookback
+window must be treated as incomplete / untrusted → FAIL RULE.
+
+### 6.3 CLB Traffic Contract
+
+Traffic present if any of:
+- `RequestCount` `Sum > 0`
+- `EstimatedProcessedBytes` `Sum > 0`
+
+Namespace: `AWS/ELB`, dimension `LoadBalancerName = `
+
+### 6.4 Metric-Reading Rules
+
+- ALB/CLB: missing datapoints (none reported) may be treated as zero (no traffic).
+- NLB: missing datapoints over the full window must be treated as FAIL RULE.
+- Any required metric read failure (non-permission API error) → FAIL RULE.
+- Metric evaluation is deterministic: positive signal → traffic present; all-zero with
+ complete coverage → zero-traffic candidate; NLB incomplete → FAIL RULE.
+
+---
+
+## 7. Backend Registration Context Contract
+
+Backend registration is contextual only. Zero registered targets/instances increases
+confidence but does not independently qualify a load balancer as idle.
+
+ALBs can be useful with rules performing redirects or fixed responses;
+"no registered targets" must never be treated as equivalent to "unused."
+
+---
+
+## 8. Evaluation Order (Mandatory)
+
+**ELBv2 branch:**
+1. Retrieve and fully paginate ELBv2 load balancers; fail ELBv2 branch on error.
+2. Normalize each ELBv2 item.
+3. Skip items with `lb_family == "unsupported"`.
+4. Skip items without stable identity or without usable `created_time`.
+5. Skip items where `age_days < idle_days_threshold`.
+6. Skip items where `state_code` is not `"active"` or `"active_impaired"`.
+7. Retrieve CloudWatch traffic signals; fail rule on error.
+8. Skip items with trusted traffic present.
+9. Enrich with target-group/target-health context (best-effort; failure degrades context, not rule).
+10. Emit findings.
+
+**CLB branch:**
+11. Retrieve and fully paginate CLB inventory; fail CLB branch on error.
+12. Normalize each CLB item.
+13. Skip items without stable identity or without usable `created_time`.
+14. Skip items where `age_days < idle_days_threshold`.
+15. Retrieve CloudWatch traffic signals; fail rule on error.
+16. Skip items with trusted traffic present.
+17. Enrich with registered-instance context from normalized item.
+18. Emit findings.
+
+No raw AWS field access after normalization.
+
+---
+
+## 9. Exclusion Rules
+
+| Condition | Result |
+|---|---|
+| `resource_id` absent | **SKIP ITEM** |
+| `lb_family == "unsupported"` | **SKIP ITEM** |
+| `created_time` absent or not safely comparable | **SKIP ITEM** |
+| `age_days < idle_days_threshold` | **SKIP ITEM** |
+| ELBv2 `state_code` not `"active"` or `"active_impaired"` | **SKIP ITEM** |
+| Trusted traffic signal present | **SKIP ITEM** |
+| ELBv2 dimension unparsable from ARN | **SKIP ITEM** |
+
+No exclusion for: registered targets present, zero registered targets, scheme, VPC presence, tags.
+
+---
+
+## 10. Failure Model
+
+- `elbv2:DescribeLoadBalancers` error → **FAIL RULE** (ELBv2 branch)
+- `elb:DescribeLoadBalancers` error → **FAIL RULE** (CLB branch)
+- CloudWatch metric error for any evaluated item → **FAIL RULE**
+- NLB metric with no datapoints over full window → **FAIL RULE**
+- Target-group / target-health enrichment failure → degrade context only (not FAIL RULE)
+
+---
+
+## 11. Evidence and Cost Contract
+
+### 11.1 Required Evidence/Details Fields
+
+Every emitted finding must include:
+- `evaluation_path = "idle-load-balancer-review-candidate"`
+- `lb_family`
+- `resource_id`
+- `load_balancer_name`
+- `load_balancer_arn`
+- `scheme`
+- `dns_name`
+- `vpc_id`
+- `created_time`
+- `age_days`
+- `idle_days_threshold`
+- `traffic_window_days`
+- `traffic_signals_checked`
+- `traffic_detected = false`
+
+Family-specific:
+- ALB/NLB: `state_code`, `has_registered_targets`, `registered_target_count`, `target_group_count`
+- CLB: `has_registered_instances`, `registered_instance_count`
+
+### 11.2 Cost Estimation Boundary
+
+- `estimated_monthly_cost_usd = null`
+- Do not hardcode static cost guesses such as `~$16-22/month`.
+
+---
+
+## 12. Confidence Model
+
+| Condition | Confidence |
+|---|---|
+| Zero traffic AND no registered targets/instances | `HIGH` |
+| Zero traffic AND registered targets/instances still present | `MEDIUM` |
+
+No LOW-confidence finding may be emitted. Metric failure = FAIL RULE, not LOW finding.
+
+---
+
+## 13. Title and Reason Contract
+
+| Condition | Title | Reason |
+|---|---|---|
+| ALB finding | `"Idle ALB review candidate"` | `"ALB has no trusted CloudWatch traffic signal in the last {N} days"` |
+| NLB finding | `"Idle NLB review candidate"` | `"NLB has no trusted CloudWatch traffic signal in the last {N} days"` |
+| CLB finding | `"Idle CLB review candidate"` | `"CLB has no trusted CloudWatch traffic signal in the last {N} days"` |
+
+Do NOT claim the load balancer is safe to delete.
+
+---
+
+## 14. Risk Model
+
+| Condition | Risk |
+|---|---|
+| Finding emitted | `MEDIUM` |
+
+---
+
+## 15. Acceptance Scenarios
+
+### Must emit
+
+1. ALB older than threshold, `state_code == "active"`, no ALB traffic over full window, zero targets → EMIT, HIGH
+2. NLB older than threshold, `state_code == "active_impaired"`, zero NLB traffic with valid datapoints, registered targets → EMIT, MEDIUM
+3. CLB older than threshold, zero CLB traffic, no instances → EMIT, HIGH
+
+### Must skip
+
+4. ELBv2 `Type == "gateway"` → SKIP
+5. Load balancer younger than threshold → SKIP
+6. ALB/NLB with any metric > 0 → SKIP
+7. CLB with any metric > 0 → SKIP
+8. ELBv2 in `"provisioning"` or `"failed"` state → SKIP
+9. ELBv2 with ARN from which CloudWatch dimension cannot be extracted → SKIP
+
+### Must fail
+
+10. CloudWatch metric read failure for evaluated item → FAIL RULE
+11. Inventory pagination failure → FAIL RULE
+12. NLB metric missing datapoints over full window → FAIL RULE
+
+### Must NOT happen
+
+1. LOW-confidence finding emitted
+2. Metric failure → LOW finding
+3. Gateway LB evaluated
+4. `estimated_monthly_cost_usd` set to a non-null value
+5. `has_traffic=True, fetch_failed=True` producing any finding
+
+---
+
+## 16. In-File Contract
+
+```
+Rule: aws.elbv2.alb.idle
+Rule: aws.elbv2.nlb.idle
+Rule: aws.elb.clb.idle
+
+ (spec — docs/specs/aws/elb_idle.md)
+
+Intent:
+ Detect ALB, NLB, and CLB load balancers that are at least
+ idle_days_threshold days old and show no trusted CloudWatch evidence of
+ client traffic during the full lookback window, so they can be reviewed
+ as potential cleanup candidates.
+
+Exclusions:
+ - resource_id absent (malformed identity)
+ - lb_family == "unsupported" (gateway LB or unknown type)
+ - created_time absent or not safely comparable
+ - age_days < idle_days_threshold (too new to evaluate)
+ - ELBv2 state_code not "active" or "active_impaired"
+ - trusted traffic present (any CloudWatch signal > 0)
+ - ELBv2 ARN dimension unparsable
+
+Detection:
+ - resource_id present, lb_family in {"alb","nlb","clb"}
+ - age_days >= idle_days_threshold
+ - ELBv2: state_code "active" or "active_impaired"
+ - all traffic signals absent during full lookback window
+
+Key rules:
+ - ALB: RequestCount Sum>0, ProcessedBytes Sum>0, or ActiveConnectionCount Sum>0
+ - NLB: NewFlowCount Sum>0, ProcessedBytes Sum>0, or ActiveFlowCount Maximum>0
+ - NLB: missing datapoints over full window = FAIL RULE (not zero)
+ - CLB: RequestCount Sum>0 or EstimatedProcessedBytes Sum>0
+ - Any metric read failure = FAIL RULE; no LOW-confidence path
+ - ELBv2 dimension strictly from ARN suffix after loadbalancer/
+ - Backend registration is contextual only
+ - estimated_monthly_cost_usd = None
+
+Blind spots:
+ - planned future usage or blue/green staging
+ - seasonal traffic patterns outside the current lookback window
+ - DNS / allowlist / manual failover dependencies
+ - NLB traffic rejected by security groups (not in CloudWatch)
+
+APIs:
+ - elbv2:DescribeLoadBalancers
+ - elb:DescribeLoadBalancers
+ - cloudwatch:GetMetricStatistics
+ - elbv2:DescribeTargetGroups (contextual)
+ - elbv2:DescribeTargetHealth (contextual)
+```
+
+---
+
+## 17. Implementation Constants
+
+- `_DEFAULT_IDLE_DAYS_THRESHOLD = 14`
diff --git a/docs/specs/aws/eni_detached.md b/docs/specs/aws/eni_detached.md
new file mode 100644
index 0000000..afb2aff
--- /dev/null
+++ b/docs/specs/aws/eni_detached.md
@@ -0,0 +1,310 @@
+# aws.ec2.eni.detached — Canonical Rule Specification
+
+## 1. Intent
+
+Detect network interfaces that are currently not attached according to the EC2
+`DescribeNetworkInterfaces` contract, so they can be reviewed as possible cleanup
+candidates if no longer needed.
+
+This is a **read-only review-candidate rule**. It is not a delete-safe rule.
+
+---
+
+## 2. AWS API Grounding
+
+Based on official EC2 API and User Guide.
+
+### Key facts
+
+1. `DescribeNetworkInterfaces` is the canonical API for enumerating ENIs in the scanned
+ Region/account scope; AWS strongly recommends paginated requests.
+2. Top-level `Status` valid values: `available | associated | attaching | in-use | detaching`.
+3. AWS explicitly states: if an ENI is not attached, `Status == "available"`.
+4. `Attachment` is optional; `Attachment.Status` valid values: `attaching | attached | detaching | detached`.
+5. The documented shape does **not** include `CreateTime`, `DetachTime`, or any
+ `detached_since` / `allocated_since` timestamp.
+6. Requester-managed ENIs are created by AWS services on your behalf; if a service
+ detached an ENI but did not delete it, you can delete the detached ENI.
+
+### Rule-design consequences
+
+- Current not-attached state must be determined from documented current-state fields only.
+- No temporal inference (age, detach duration) may be used.
+- Top-level `Status` is the canonical state authority.
+- `requesterManaged`, `operator.managed`, `interfaceType`, and `description` are contextual
+ only — not eligibility gates.
+
+---
+
+## 3. Scope
+
+- "Not currently attached" means `Status == "available"` per the documented EC2 contract.
+- The rule is evaluated independently per Region.
+
+---
+
+## 4. API and IAM Contract
+
+**Required:** `ec2:DescribeNetworkInterfaces` — failure → FAIL RULE
+
+**Pagination:** Must be fully exhausted; no early exit.
+
+---
+
+## 5. Normalization Contract
+
+All rule logic must operate on normalized fields only. No raw AWS field access after
+normalization.
+
+### Identity fields
+
+| Field | Derivation |
+|---|---|
+| `resource_id` | `NetworkInterfaceId` → absent (skip) |
+| `network_interface_id` | `NetworkInterfaceId` → absent (skip) |
+
+### State fields
+
+| Field | Derivation |
+|---|---|
+| `normalized_status` | `Status` → absent |
+
+### Attachment fields
+
+| Field | Derivation |
+|---|---|
+| `attachment_status` | `Attachment.Status` → null |
+| `attachment_id` | `Attachment.AttachmentId` → null |
+| `attachment_instance_id` | `Attachment.InstanceId` → null |
+| `attachment_instance_owner_id` | `Attachment.InstanceOwnerId` → null |
+
+### Ownership / service-context fields
+
+| Field | Derivation |
+|---|---|
+| `interface_type` | `InterfaceType` → null |
+| `requester_managed` | `RequesterManaged` (bool only) → null |
+| `operator_managed` | `Operator.Managed` (bool only) → null |
+| `operator_principal` | `Operator.Principal` → null |
+
+### Network / resource-metadata fields
+
+| Field | Derivation |
+|---|---|
+| `description` | `Description` → null |
+| `availability_zone` | `AvailabilityZone` → null |
+| `subnet_id` | `SubnetId` → null |
+| `vpc_id` | `VpcId` → null |
+| `private_ip_address` | `PrivateIpAddress` → null |
+| `public_ip` | `Association.PublicIp` → null |
+| `tag_set` | `TagSet` → `[]` |
+
+Normalization requirements:
+- String fields: normalized only from non-empty strings.
+- Boolean fields: normalized only from actual `bool` values.
+- Malformed contextual fields must not produce positive eligibility evidence.
+
+---
+
+## 6. Current Attachment-State Determination
+
+Top-level `normalized_status` is the **sole** state authority.
+
+| `normalized_status` | Eligibility |
+|---|---|
+| `"available"` | **ELIGIBLE** (not currently attached) |
+| `"in-use"` | SKIP |
+| `"attaching"` | SKIP |
+| `"detaching"` | SKIP |
+| `"associated"` | SKIP |
+
+**Attachment consistency check:**
+- If `normalized_status == "available"` and `attachment_status` is `"attached"`,
+ `"attaching"`, or `"detaching"` → structural inconsistency → **SKIP ITEM**.
+- `attachment_status` is validation only; it does not override `normalized_status`.
+
+---
+
+## 7. Service-Managed / Requester-Managed Handling
+
+`requester_managed`, `operator_managed`, and `interface_type` are contextual only.
+None of them exclude an ENI from evaluation. AWS documents that if a service detached
+an ENI and did not delete it, the ENI is a valid deletion candidate.
+
+---
+
+## 8. Evaluation Order (Mandatory)
+
+1. Retrieve and fully paginate `DescribeNetworkInterfaces`; fail rule on error.
+2. Normalize each ENI item; skip non-dict or identity-absent items.
+3. Skip items with absent `normalized_status`.
+4. Skip items where `normalized_status != "available"`.
+5. Skip items where `attachment_status` conflicts with the available state.
+6. Emit findings for remaining items.
+
+No raw AWS field access after Step 2.
+
+---
+
+## 9. Exclusion Rules
+
+| Condition | Result |
+|---|---|
+| `network_interface_id` absent | **SKIP ITEM** |
+| `normalized_status` absent | **SKIP ITEM** |
+| `normalized_status != "available"` | **SKIP ITEM** |
+| `normalized_status == "available"` and `attachment_status` in `{"attached","attaching","detaching"}` | **SKIP ITEM** |
+
+No exclusion for: `requester_managed`, `operator_managed`, `interface_type`, tags, description.
+
+---
+
+## 10. Failure Model
+
+- `DescribeNetworkInterfaces` request/pagination error → **FAIL RULE**
+- Non-dict ENI item → SKIP ITEM (not FAIL RULE)
+- Missing identity → SKIP ITEM (not FAIL RULE)
+
+---
+
+## 11. Evidence and Cost Contract
+
+### 11.1 Required Evidence/Details Fields
+
+| Field | Requirement |
+|---|---|
+| `evaluation_path` | Exactly `"detached-eni-review-candidate"` |
+| `network_interface_id` | Always present |
+| `normalized_status` | Always `"available"` |
+| `attachment_status` | Present or null |
+| `interface_type` | Present or null |
+| `requester_managed` | Present or null |
+| `operator_managed` | Present or null |
+| `operator_principal` | Present or null |
+| `availability_zone` | Present or null |
+| `subnet_id` | Present or null |
+| `vpc_id` | Present or null |
+| `private_ip_address` | Present or null |
+| `public_ip` | Present or null |
+
+Optional: `attachment_id`, `attachment_instance_id`, `attachment_instance_owner_id`,
+`description`, `tag_set`.
+
+### 11.2 Cost Estimation Boundary
+
+- `estimated_monthly_cost_usd = null`
+- Do not hardcode a generic detached-ENI monthly cost estimate.
+
+---
+
+## 12. Confidence Model
+
+| Condition | Confidence |
+|---|---|
+| `normalized_status == "available"` and no structural conflict | `HIGH` |
+
+High confidence refers to current not-attached state, not delete safety.
+
+---
+
+## 13. Title and Reason Contract
+
+| Field | Value |
+|---|---|
+| `title` | `"Detached ENI review candidate"` |
+| `reason` | `"ENI Status is 'available' — not currently attached per DescribeNetworkInterfaces"` |
+
+Do NOT claim the ENI is safe to delete.
+
+---
+
+## 14. Risk Model
+
+| Condition | Risk |
+|---|---|
+| Finding emitted | `LOW` |
+
+---
+
+## 15. Acceptance Scenarios
+
+### Must emit
+
+1. ENI with `Status == "available"`, no attachment object → EMIT HIGH
+2. ENI with `Status == "available"`, `Attachment.Status == "detached"` → EMIT HIGH
+3. Requester-managed ENI with `Status == "available"` → EMIT (include context)
+4. Operator-managed ENI with `Status == "available"` → EMIT (include context)
+5. Any `interface_type` value, `Status == "available"` → EMIT (no type exclusion)
+
+### Must skip
+
+6. ENI with `Status == "in-use"` → SKIP
+7. ENI with `Status == "attaching"`, `"detaching"`, or `"associated"` → SKIP
+8. ENI with `Status == "available"` and `Attachment.Status == "attached"` → SKIP
+9. ENI missing `NetworkInterfaceId` → SKIP
+10. ENI missing `Status` → SKIP
+
+### Must fail
+
+11. `DescribeNetworkInterfaces` request/pagination failure → FAIL RULE
+
+### Must NOT happen
+
+1. Temporal threshold applied to eligibility
+2. `CreateTime` or age used for any eligibility or evidence logic
+3. `interface_type` used as an exclusion
+4. `requester_managed == true` used as an exclusion
+5. MEDIUM or LOW confidence for a valid not-attached ENI
+6. Hardcoded cost estimate in `estimated_monthly_cost_usd`
+
+---
+
+## 16. In-File Contract
+
+```
+Rule: aws.ec2.eni.detached
+
+ (spec — docs/specs/aws/eni_detached.md)
+
+Intent:
+ Detect network interfaces that are currently not attached according to the
+ EC2 DescribeNetworkInterfaces contract, so they can be reviewed as possible
+ cleanup candidates if no longer needed.
+
+Exclusions:
+ - network_interface_id absent (malformed identity)
+ - normalized_status absent (missing current-state signal)
+ - normalized_status != "available" (attached or other non-eligible state)
+ - structural inconsistency: normalized_status == "available" but
+ attachment_status in {"attached","attaching","detaching"}
+
+Detection:
+ - network_interface_id present
+ - normalized_status == "available"
+ - attachment_status absent, null, or "detached"
+
+Key rules:
+ - Top-level Status is the sole state authority; attachment_status is validation only.
+ - No temporal threshold — current not-attached state is the sole eligibility signal.
+ - No exclusion for interface_type, requester_managed, or operator_managed.
+ - Do not use CreateTime or any age/duration field for eligibility.
+ - estimated_monthly_cost_usd = None.
+ - Confidence: HIGH.
+ - Risk: LOW.
+
+Blind spots:
+ - how long the ENI has been in a not-currently-attached state
+ - previous attachment history
+ - whether an AWS service expects to recycle or clean up this ENI
+ - application, failover, or operational intent
+ - exact pricing impact
+
+APIs:
+ - ec2:DescribeNetworkInterfaces
+```
+
+---
+
+## 17. Implementation Constants
+
+No rule-level numeric constants required for baseline eligibility.
diff --git a/docs/specs/aws/nat_gateway_idle.md b/docs/specs/aws/nat_gateway_idle.md
new file mode 100644
index 0000000..bf934fb
--- /dev/null
+++ b/docs/specs/aws/nat_gateway_idle.md
@@ -0,0 +1,347 @@
+# aws.ec2.nat_gateway.idle — Canonical Rule Specification
+
+## 1. Intent
+
+Detect NAT Gateways that are currently `available`, old enough to evaluate, and show no
+trusted CloudWatch traffic/activity evidence during the configured observation window, so
+they can be reviewed as possible cleanup candidates.
+
+This is a **read-only review-candidate rule**. It is not a delete-safe rule.
+
+---
+
+## 2. AWS API Grounding
+
+Based on official EC2/VPC API and CloudWatch documentation.
+
+### Key facts
+
+1. `DescribeNatGateways` is the canonical API for enumerating NAT Gateways in the scanned
+ Region/account scope and supports pagination.
+2. `NatGateway.State` valid values: `pending | failed | available | deleting | deleted`.
+3. AWS documents that `available` means the NAT Gateway is able to process traffic and that
+ this status remains until you delete it; it does not indicate usage.
+4. `NatGateway.CreateTime` is a documented timestamp field.
+5. NAT Gateways have `ConnectivityType` values `public | private`.
+6. AWS documents NAT Gateway CloudWatch metrics in namespace `AWS/NATGateway` with dimension
+ `NatGatewayId`.
+7. Required CloudWatch metrics and statistics:
+ - `BytesOutToDestination` → `Sum`
+ - `BytesInFromSource` → `Sum`
+ - `BytesInFromDestination` → `Sum`
+ - `BytesOutToSource` → `Sum`
+ - `ActiveConnectionCount` → `Maximum`
+8. AWS states `ActiveConnectionCount == 0` indicates no active TCP connections.
+9. AWS pricing: charged per hour available and per GB processed. No canonical fixed monthly
+ USD value exists in the product docs used by this rule.
+10. `GetMetricStatistics` does not guarantee ordered datapoints. Missing datapoints must not
+ be assumed to mean zero activity.
+
+### Rule-design consequences
+
+- Only `available` NAT Gateways are eligible.
+- Age thresholding is valid because `CreateTime` is documented.
+- CloudWatch is the sole trusted activity source; missing datapoints → SKIP ITEM.
+- Route-table references are contextual only, not eligibility gates.
+
+---
+
+## 3. Scope
+
+- "idle" is a CleanCloud-derived heuristic: no trusted CloudWatch activity over the full
+ observation window.
+- `age_days = floor((now_utc - create_time_utc) / 86400)`
+- Observation window: `now_utc - idle_days_threshold * 86400` → `now_utc`
+- Rule is evaluated independently per Region.
+
+---
+
+## 4. API and IAM Contract
+
+**Required:** `ec2:DescribeNatGateways` — failure → FAIL RULE
+**Required:** `cloudwatch:GetMetricStatistics` — failure → FAIL RULE
+**Optional:** `ec2:DescribeRouteTables` — failure → degrade context, do not fail rule
+
+**Pagination:** `DescribeNatGateways` must be fully exhausted; no early exit.
+
+---
+
+## 5. Normalization Contract
+
+All rule logic must operate on normalized fields only. No raw AWS field access after
+normalization.
+
+### Identity fields
+
+| Field | Derivation |
+|---|---|
+| `resource_id` | `NatGatewayId` → absent (skip) |
+| `nat_gateway_id` | `NatGatewayId` → absent (skip) |
+
+### State / age fields
+
+| Field | Derivation |
+|---|---|
+| `normalized_state` | `State` → absent (skip) |
+| `create_time_utc` | `CreateTime` (timezone-aware UTC only) → absent (skip) |
+| `age_days` | `floor((now_utc - create_time_utc) / 86400)` if valid and not future → absent (skip) |
+
+### Core context fields
+
+| Field | Derivation |
+|---|---|
+| `connectivity_type` | `ConnectivityType` → null |
+| `availability_mode` | `AvailabilityMode` → null |
+| `vpc_id` | `VpcId` → null |
+| `subnet_id` | `SubnetId` → null |
+| `nat_gateway_addresses` | `NatGatewayAddresses` → `[]` |
+| `attached_appliances` | `AttachedAppliances` → `[]` |
+| `auto_scaling_ips` | `AutoScalingIps` → null |
+| `auto_provision_zones` | `AutoProvisionZones` → null |
+| `tag_set` | `Tags` → `[]` |
+
+Normalization requirements:
+- String fields: normalized only from non-empty strings.
+- Timestamp: timezone-aware UTC only; naive datetime → absent (skip).
+- Future `CreateTime` → absent (skip).
+- Malformed contextual fields must not produce positive idle evidence.
+
+---
+
+## 6. CloudWatch Traffic Contract
+
+### 6.1 Required Metrics
+
+| Metric | Statistic | Activity if |
+|---|---|---|
+| `BytesOutToDestination` | `Sum` | `Sum > 0` |
+| `BytesInFromSource` | `Sum` | `Sum > 0` |
+| `BytesInFromDestination` | `Sum` | `Sum > 0` |
+| `BytesOutToSource` | `Sum` | `Sum > 0` |
+| `ActiveConnectionCount` | `Maximum` | `Maximum > 0` |
+
+Namespace: `AWS/NATGateway`, dimension `NatGatewayId = `
+
+### 6.2 Datapoint Completeness
+
+- Missing datapoints for any required metric must not be treated as zero.
+- If any required metric returns no datapoints → **SKIP ITEM** (insufficient evidence).
+- If any required metric request fails → **FAIL RULE**.
+
+### 6.3 Period Selection
+
+Period must be chosen deterministically from the configured lookback age:
+
+| Window age | Period requirement |
+|---|---|
+| < 15 days | Multiple of 60 seconds |
+| 15–63 days | Multiple of 300 seconds |
+| > 63 days | Multiple of 3600 seconds |
+
+Using `idle_days_threshold * 86400` as the Period satisfies all three constraints (86400 is a
+multiple of 60, 300, and 3600) and produces a single full-window aggregate bucket.
+
+---
+
+## 7. Route-Table Handling
+
+Route-table references are contextual only.
+
+- A route targeting `nat-gateway-id` may be surfaced as evidence.
+- Route-table presence must not suppress an otherwise valid idle finding.
+- Route-table absence must not compensate for missing or incomplete CloudWatch evidence.
+- `DescribeRouteTables` failure → degrade context, do not fail rule.
+
+---
+
+## 8. Evaluation Order (Mandatory)
+
+1. Retrieve and fully paginate `DescribeNatGateways`; fail rule on error.
+2. Normalize each item.
+3. Skip items with absent identity, state, `create_time_utc`, or `age_days`.
+4. Skip items where `normalized_state != "available"`.
+5. Skip items where `age_days < idle_days_threshold`.
+6. Retrieve required CloudWatch metrics; fail rule on API error.
+7. Skip items where any required metric returns no datapoints.
+8. Skip items where any metric shows activity (`> 0`).
+9. Retrieve route-table context (best-effort).
+10. Emit findings.
+
+No raw AWS field access after Step 2.
+
+---
+
+## 9. Exclusion Rules
+
+| Condition | Result |
+|---|---|
+| `nat_gateway_id` absent | **SKIP ITEM** |
+| `normalized_state` absent | **SKIP ITEM** |
+| `normalized_state != "available"` | **SKIP ITEM** |
+| `create_time_utc` absent / naive / future | **SKIP ITEM** |
+| `age_days < idle_days_threshold` | **SKIP ITEM** |
+| Any required metric has no datapoints | **SKIP ITEM** |
+| Any required metric shows activity | **SKIP ITEM** |
+
+No exclusion for: `connectivity_type`, `availability_mode`, tags, route-table presence.
+
+---
+
+## 10. Failure Model
+
+- `DescribeNatGateways` error → **FAIL RULE**
+- CloudWatch metric API error → **FAIL RULE**
+- `DescribeRouteTables` error → degrade context only
+
+---
+
+## 11. Evidence and Cost Contract
+
+### 11.1 Required Evidence/Details Fields
+
+| Field | Requirement |
+|---|---|
+| `evaluation_path` | `"idle-nat-gateway-review-candidate"` |
+| `nat_gateway_id` | Always present |
+| `normalized_state` | Always `"available"` |
+| `create_time` | ISO 8601 UTC string |
+| `age_days` | Integer |
+| `idle_days_threshold` | Integer |
+| `connectivity_type` | Present or null |
+| `availability_mode` | Present or null |
+| `vpc_id` | Present or null |
+| `subnet_id` | Present or null |
+| `bytes_out_to_destination` | Numeric (0.0 if metric zero) |
+| `bytes_in_from_source` | Numeric |
+| `bytes_in_from_destination` | Numeric |
+| `bytes_out_to_source` | Numeric |
+| `active_connection_count_max` | Numeric |
+
+Optional: `nat_gateway_addresses`, `attached_appliances`, `route_table_referenced`,
+`auto_scaling_ips`, `auto_provision_zones`, `tag_set`.
+
+### 11.2 Cost Estimation Boundary
+
+- `estimated_monthly_cost_usd = null`
+- Do not hardcode a fixed NAT Gateway monthly cost estimate.
+
+---
+
+## 12. Confidence Model
+
+| Condition | Confidence |
+|---|---|
+| Zero traffic AND route-table confirms no reference | `HIGH` |
+| Zero traffic AND route-table referenced OR unavailable | `MEDIUM` |
+
+No LOW-confidence finding may be emitted. Metric failure = FAIL RULE.
+
+---
+
+## 13. Title and Reason Contract
+
+| Field | Value |
+|---|---|
+| `title` | `"Idle NAT Gateway review candidate"` |
+| `reason` | `"NAT Gateway has no trusted CloudWatch traffic signal in the last {N} days"` |
+
+Do NOT claim the NAT Gateway is safe to delete.
+
+---
+
+## 14. Risk Model
+
+| Condition | Risk |
+|---|---|
+| Finding emitted | `MEDIUM` |
+
+---
+
+## 15. Acceptance Scenarios
+
+### Must emit
+
+1. Available, old enough, all metrics zero, no route-table reference → EMIT HIGH
+2. Available, old enough, all metrics zero, route-table still references → EMIT MEDIUM
+3. Available, old enough, all metrics zero, route-table lookup failed → EMIT MEDIUM
+
+### Must skip
+
+4. State `pending`, `failed`, `deleting`, or `deleted` → SKIP
+5. Available but younger than threshold → SKIP
+6. Any byte metric `Sum > 0` → SKIP
+7. `ActiveConnectionCount Maximum > 0` → SKIP
+8. Absent/naive/future `CreateTime` → SKIP
+9. Any required metric returns no datapoints → SKIP
+
+### Must fail
+
+10. `DescribeNatGateways` failure → FAIL RULE
+11. CloudWatch metric fetch failure → FAIL RULE
+
+### Must NOT happen
+
+1. LOW-confidence finding emitted
+2. CloudWatch metric failure → LOW-confidence finding
+3. Missing datapoints treated as zero activity
+4. `estimated_monthly_cost_usd` set to non-null
+5. Route-table absence used as traffic evidence substitute
+
+---
+
+## 16. In-File Contract
+
+```
+Rule: aws.ec2.nat_gateway.idle
+
+ (spec — docs/specs/aws/nat_gateway_idle.md)
+
+Intent:
+ Detect NAT Gateways that are currently available, old enough to evaluate,
+ and show no trusted CloudWatch traffic/activity evidence during the
+ configured observation window, so they can be reviewed as possible cleanup
+ candidates.
+
+Exclusions:
+ - nat_gateway_id absent (malformed identity)
+ - normalized_state absent (missing current-state signal)
+ - normalized_state != "available"
+ - create_time_utc absent, naive, or in the future
+ - age_days < idle_days_threshold (too new to evaluate)
+ - any required CloudWatch metric has no datapoints (insufficient evidence)
+ - any required metric shows activity > 0
+
+Detection:
+ - nat_gateway_id present, normalized_state == "available"
+ - age_days >= idle_days_threshold
+ - all 5 required CloudWatch metrics return datapoints and are all zero
+
+Key rules:
+ - Missing CloudWatch datapoints → SKIP ITEM (not zero).
+ - CloudWatch API failure → FAIL RULE (not LOW-confidence finding).
+ - 5 required metrics: BytesOutToDestination, BytesInFromSource,
+ BytesInFromDestination, BytesOutToSource (Sum), ActiveConnectionCount (Maximum).
+ - Route-table context is contextual only; absence does not substitute
+ for CloudWatch evidence.
+ - Naive CreateTime → SKIP ITEM.
+ - estimated_monthly_cost_usd = None.
+ - Confidence: HIGH (no route ref) or MEDIUM (route ref or unavailable).
+ - Risk: MEDIUM.
+
+Blind spots:
+ - planned future usage or DR/failover intent
+ - seasonal or cyclical usage outside the observation window
+ - organizational ownership or business intent
+ - exact region-specific pricing impact
+
+APIs:
+ - ec2:DescribeNatGateways
+ - cloudwatch:GetMetricStatistics
+ - ec2:DescribeRouteTables (contextual)
+```
+
+---
+
+## 17. Implementation Constants
+
+- `_DEFAULT_IDLE_DAYS_THRESHOLD = 14`
diff --git a/docs/specs/aws/rds_idle.md b/docs/specs/aws/rds_idle.md
new file mode 100644
index 0000000..c0ecdf3
--- /dev/null
+++ b/docs/specs/aws/rds_idle.md
@@ -0,0 +1,313 @@
+# aws.rds.instance.idle — Canonical Rule Specification
+
+## 1. Intent
+
+Detect provisioned standalone DB instances that are currently `available`, old enough to
+evaluate, and show no trusted CloudWatch client-connection activity for the configured
+observation window, so they can be reviewed as possible cleanup candidates.
+
+This is a **CleanCloud-derived review heuristic**, not an AWS-native DB instance state.
+It is a **read-only review-candidate rule** — not a delete-safe rule.
+
+---
+
+## 2. AWS API Grounding
+
+Based on official RDS, CloudWatch, and pricing documentation.
+
+### Key facts
+
+1. `DescribeDBInstances` is the canonical API for enumerating provisioned DB instances in
+ the scanned Region/account scope and supports pagination.
+2. AWS explicitly notes that `DescribeDBInstances` can also return Amazon Neptune and Amazon
+ DocumentDB DB instances.
+3. `DBInstance.InstanceCreateTime` is a documented timestamp field.
+4. `DBInstance.DBInstanceStatus` is a documented state field with many values including
+ `available`, `creating`, `starting`, `stopped`, `stopping`, `backing-up`, `modifying`.
+5. The RDS status guide states that `available` DB instances are billed.
+6. `DBInstance.ReadReplicaSourceDBInstanceIdentifier`,
+ `DBInstance.ReadReplicaSourceDBClusterIdentifier`, and `DBInstance.DBClusterIdentifier`
+ are documented scope fields.
+7. RDS publishes instance-level metrics in CloudWatch namespace `AWS/RDS`.
+8. `DatabaseConnections` is the number of client network connections to the DB instance.
+9. AWS explicitly states that `DatabaseConnections` does **not** include:
+ - sessions that no longer have a network connection but which the database hasn't cleaned up
+ - sessions created by the database engine for its own purposes
+ - sessions created by the database engine's parallel execution capabilities
+ - sessions created by the database engine job scheduler
+ - Amazon RDS connections
+10. CloudWatch `GetMetricStatistics` uses inclusive `StartTime`, exclusive `EndTime`, rounds
+ `StartTime` based on lookback age, does not guarantee datapoint order, and imposes
+ retention / `Period` constraints.
+11. AWS pricing docs state that billing for DB instance hours starts when a DB instance
+ becomes available and continues while it is running in an available state.
+12. Fixed monthly USD cost estimates are not canonical from AWS docs.
+
+### Implications
+
+- Only `available` DB instances are eligible.
+- Age thresholding is supportable because `InstanceCreateTime` is documented.
+- `DatabaseConnections` Maximum is the sole required activity metric for this rule.
+- `DatabaseConnections == 0` does not prove total absence of all engine activity; it only
+ proves absence of observed client network connections in the metric contract.
+- Connection pooling and proxy layers (RDS Proxy, PgBouncer, application connection pools)
+ can reduce the reliability of instance-level observed client connection counts.
+- `estimated_monthly_cost_usd = null`.
+
+---
+
+## 3. Scope and Terminology
+
+- **"DB instance"** — an item returned by `DescribeDBInstances`.
+- **"standalone"** — not a read replica of another DB instance, not a read replica of a
+ DB cluster, and not a member of a DB cluster.
+- **"idle"** — no observed client connection activity via trusted CloudWatch
+ `DatabaseConnections` metric evidence for the full configured observation window.
+- `idle_days_threshold` — operator-configurable, default 14.
+- `observation_window_start_utc = now_utc − idle_days_threshold × 86400 seconds`
+- `observation_window_end_utc = now_utc`
+- `age_days = floor((now_utc − instance_create_time_utc) / 86400 seconds)`
+- The rule is evaluated independently per Region.
+
+**Scope boundary:** standalone provisioned DB instances only. Read replicas and cluster
+members are out of scope.
+
+---
+
+## 4. Canonical Rule Statement
+
+A DB instance is eligible only when **all** of the following are true:
+
+- Stable DB instance identity exists
+- `DBInstanceStatus == "available"`
+- The instance is standalone
+- `age_days >= idle_days_threshold`
+- All `DatabaseConnections Maximum` datapoints in the observation window are exactly zero
+
+No additional predicate may be required for baseline eligibility, including:
+CPU utilisation thresholds, storage I/O thresholds, engine family, instance class,
+Multi-AZ setting, allocated storage size, or tag presence/absence.
+
+---
+
+## 5. Normalization Contract
+
+All rule logic must operate on normalized fields only.
+
+| Canonical field | Source field | Absent / invalid |
+|---|---|---|
+| `resource_id` | `DBInstanceIdentifier` | skip item |
+| `db_instance_id` | `DBInstanceIdentifier` | skip item |
+| `normalized_status` | `DBInstanceStatus` | skip item |
+| `instance_create_time_utc` | `InstanceCreateTime` (tz-aware UTC) | skip item |
+| `age_days` | floor((now − create_time) / 86400) | skip item |
+| `db_cluster_identifier` | `DBClusterIdentifier` | null |
+| `read_replica_source_db_instance_identifier` | `ReadReplicaSourceDBInstanceIdentifier` | null |
+| `read_replica_source_db_cluster_identifier` | `ReadReplicaSourceDBClusterIdentifier` | null |
+| `engine` | `Engine` | null |
+| `engine_version` | `EngineVersion` | null |
+| `db_instance_class` | `DBInstanceClass` | null |
+| `multi_az` | `MultiAZ` (bool only) | null |
+| `allocated_storage_gib` | `AllocatedStorage` (int only) | null |
+| `storage_type` | `StorageType` | null |
+| `dbi_resource_id` | `DbiResourceId` | null |
+| `db_instance_arn` | `DBInstanceArn` | null |
+| `tag_set` | `TagList` (list only) | `[]` |
+
+Normalization requirements:
+- String-valued fields: normalize only from non-empty strings.
+- Timestamp fields: must be timezone-aware UTC before use; naive → skip item.
+- Future `InstanceCreateTime` → skip item.
+
+---
+
+## 6. Idle-Activity Determination
+
+CloudWatch is the **sole trusted activity source** for this rule.
+
+**Required metric:**
+
+| Field | Value |
+|---|---|
+| Namespace | `AWS/RDS` |
+| Dimension | `DBInstanceIdentifier = db_instance_id` |
+| Metric | `DatabaseConnections` |
+| Statistic | `Maximum` |
+| Period | `idle_days_threshold × 86400` (satisfies all CloudWatch retention constraints) |
+
+**Interpretation:**
+
+- If `DatabaseConnections Maximum > 0` anywhere in the observation window → **not idle** (skip item).
+- The DB instance is idle only when datapoints exist and all `Maximum` values are exactly `0`.
+
+**Datapoint completeness:**
+
+- Missing datapoints **must not** be interpreted as zero connections.
+- If `DatabaseConnections` returns no datapoints → **SKIP ITEM** (insufficient evidence).
+- If `DatabaseConnections` retrieval fails → **FAIL RULE**.
+
+---
+
+## 7. Pricing / Cost Boundary
+
+- `estimated_monthly_cost_usd = null` — no hardcoded per-engine or per-class estimates.
+
+---
+
+## 8. Deterministic Evaluation Order
+
+1. Retrieve and fully paginate `DescribeDBInstances`.
+2. Normalize each item.
+3. For each normalized item:
+ - `db_instance_id` absent → **SKIP ITEM**
+ - `normalized_status` absent → **SKIP ITEM**
+ - `normalized_status != "available"` → **SKIP ITEM**
+ - `db_cluster_identifier` present → **SKIP ITEM**
+ - `read_replica_source_db_instance_identifier` present → **SKIP ITEM**
+ - `read_replica_source_db_cluster_identifier` present → **SKIP ITEM**
+ - `instance_create_time_utc` absent/invalid/future → **SKIP ITEM**
+ - `age_days < idle_days_threshold` → **SKIP ITEM**
+ - Retrieve `DatabaseConnections Maximum`
+ - API failure → **FAIL RULE**
+ - No datapoints → **SKIP ITEM**
+ - Any `Maximum > 0` → **SKIP ITEM**
+ - Otherwise → **EMIT**
+
+---
+
+## 9. Exclusion Rules
+
+1. `db_instance_id` absent → malformed identity
+2. `normalized_status` absent → missing state signal
+3. `normalized_status != "available"` → not currently evaluable
+4. `db_cluster_identifier` present → cluster member (out of scope)
+5. `read_replica_source_db_instance_identifier` present → DB instance read replica
+6. `read_replica_source_db_cluster_identifier` present → cross-cluster read replica
+7. `instance_create_time_utc` absent/naive/future → missing/invalid age source
+8. `age_days < idle_days_threshold` → too young
+9. `DatabaseConnections` returns no datapoints → insufficient trusted evidence
+10. Any `DatabaseConnections Maximum > 0` → observed client connections
+
+---
+
+## 10. Failure Model
+
+**Rule-level failures (FAIL RULE):**
+- `DescribeDBInstances` request/pagination failure
+- `DatabaseConnections` CloudWatch retrieval failure
+- Permission failure for required APIs
+
+**Item-level skips (SKIP ITEM):**
+- Missing identity, status, or create-time
+- Non-available status
+- Replica / cluster-member scope exclusions
+- Too young
+- Insufficient CloudWatch datapoints
+- Observed client connections
+
+---
+
+## 11. Evidence / Details Contract
+
+### Required details fields
+
+```
+evaluation_path = "idle-rds-instance-review-candidate"
+db_instance_id
+normalized_status = "available"
+instance_create_time (ISO-8601 UTC)
+age_days
+idle_days_threshold
+engine
+engine_version
+db_instance_class
+database_connections_max
+```
+
+### Optional context fields
+
+```
+db_cluster_identifier
+read_replica_source_db_instance_identifier
+read_replica_source_db_cluster_identifier
+multi_az
+allocated_storage_gib
+storage_type
+dbi_resource_id
+db_instance_arn
+tag_set
+```
+
+### Required evidence wording
+
+**Signals used** must state:
+- DB instance Status is `available`
+- The DB instance is standalone (not a read replica or cluster member)
+- The DB instance age met the configured threshold
+- `DatabaseConnections` Maximum was zero across the observation window
+- The finding is based on a CleanCloud-derived idle heuristic over observed client network connections
+
+**Signals not checked** must state major blind spots:
+- Sessions without network connections that the database hasn't cleaned up
+- Sessions created by the database engine for its own purposes
+- Sessions created by parallel execution capabilities or job schedulers
+- Amazon RDS connections
+- RDS Proxy, PgBouncer, and application connection pools that can hide real usage while keeping observed client connection counts low or zero
+- Planned future usage or disaster recovery intent
+- Exact region-specific pricing impact
+
+---
+
+## 12. Confidence and Risk
+
+| Condition | Confidence | Risk |
+|---|---|---|
+| Datapoints present, all `Maximum == 0`, all gates satisfied | `MEDIUM` | `MEDIUM` |
+
+- **Do not** emit LOW-confidence findings when required metric data is unavailable — SKIP ITEM or FAIL RULE instead.
+- `DatabaseConnections` has documented blind spots (§2 item 9), so `MEDIUM` (not `HIGH`) is the ceiling.
+
+---
+
+## 13. Non-Goals / Blind Spots
+
+This rule does not prove:
+- The DB instance is safe to delete
+- The DB instance has no engine-internal activity
+- The DB instance had no uncounted sessions
+- The DB instance will not be used again
+- CPU or storage I/O was zero
+- Backup, snapshot, or retention needs have been evaluated
+
+---
+
+## 14. Acceptance Scenarios
+
+| # | Scenario | Expected |
+|---|---|---|
+| 1 | Standalone `available` instance, old enough, `DatabaseConnections Maximum == 0` across all datapoints | EMIT — confidence MEDIUM |
+| 2 | Instance status not `available` | SKIP ITEM |
+| 3 | DB instance read replica (`ReadReplicaSourceDBInstanceIdentifier` set) | SKIP ITEM |
+| 4 | Cross-cluster read replica (`ReadReplicaSourceDBClusterIdentifier` set) | SKIP ITEM |
+| 5 | DB cluster member (`DBClusterIdentifier` set) | SKIP ITEM |
+| 6 | Younger than `idle_days_threshold` | SKIP ITEM |
+| 7 | Any `DatabaseConnections Maximum > 0` | SKIP ITEM |
+| 8 | `DatabaseConnections` returns no datapoints | SKIP ITEM |
+| 9 | Missing/naive/future `InstanceCreateTime` | SKIP ITEM |
+| 10 | `DescribeDBInstances` fails | FAIL RULE |
+| 11 | `DatabaseConnections` retrieval fails | FAIL RULE |
+
+---
+
+## 15. Implementation Constraints
+
+- Use `DescribeDBInstances` as the sole required inventory source.
+- Use `DatabaseConnections Maximum` as the sole required activity metric.
+- Exhaust pagination; no early exit.
+- Use top-level `DBInstanceStatus` as the canonical state signal.
+- Use documented `InstanceCreateTime` for age gating; naive → skip.
+- Do not interpret missing datapoints as zero connections.
+- Do not emit LOW-confidence findings when required CloudWatch data is absent.
+- Do not require CPU or I/O metrics for baseline eligibility.
+- Do not hardcode engine/class/storage monthly cost estimates.
+- `estimated_monthly_cost_usd = null`.
diff --git a/pyproject.toml b/pyproject.toml
index b84c282..9df6169 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "cleancloud"
-version = "1.19.0"
+version = "1.20.0"
description = "Read-only cloud hygiene for AWS, Azure, and GCP. Multi-account org scanning, CI/CD enforcement, and deterministic cost modeling. No agents, no telemetry."
readme = "README.md"
requires-python = ">=3.10"
diff --git a/tests/cleancloud/providers/aws/test_aws_elastic_ip_unattached.py b/tests/cleancloud/providers/aws/test_aws_elastic_ip_unattached.py
index 4550763..17cbf8b 100644
--- a/tests/cleancloud/providers/aws/test_aws_elastic_ip_unattached.py
+++ b/tests/cleancloud/providers/aws/test_aws_elastic_ip_unattached.py
@@ -1,158 +1,487 @@
-from datetime import datetime, timedelta, timezone
+import pytest
+from botocore.exceptions import BotoCoreError, ClientError
from cleancloud.core.confidence import ConfidenceLevel
-from cleancloud.providers.aws.rules.elastic_ip_unattached import (
- find_unattached_elastic_ips,
-)
-
-
-def test_find_unattached_elastic_ips(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- old_date = datetime.now(timezone.utc) - timedelta(days=60)
- recent_date = datetime.now(timezone.utc) - timedelta(days=10)
-
- # Mock describe_addresses (non-paginated by AWS)
- ec2.describe_addresses.return_value = {
- "Addresses": [
- {
- "AllocationId": "eipalloc-1",
- "PublicIp": "203.0.113.1",
- "Domain": "vpc",
- "AllocationTime": old_date,
- # No AssociationId = unattached
- },
- {
- "AllocationId": "eipalloc-2",
- "PublicIp": "203.0.113.2",
- "Domain": "vpc",
- "AllocationTime": old_date,
- "AssociationId": "eipassoc-123", # attached
- "InstanceId": "i-123",
- },
- {
- "AllocationId": "eipalloc-3",
- "PublicIp": "203.0.113.3",
- "Domain": "vpc",
- "AllocationTime": recent_date, # too young
- # No AssociationId = unattached but recent
- },
- {
- # No AllocationId (genuine EC2-Classic EIP — identified by PublicIp)
- "PublicIp": "203.0.113.4",
- "Domain": "standard",
- # No AllocationTime
- # No AssociationId = unattached
- },
- {
- "AllocationId": "eipalloc-5",
- "PublicIp": "203.0.113.5",
- "Domain": "vpc",
- # No AllocationTime (VPC EIP missing timestamp — should be skipped)
- # No AssociationId = unattached
- },
- ]
- }
-
- findings = find_unattached_elastic_ips(mock_boto3_session, region)
- eip_ids = {f.resource_id for f in findings}
- findings_by_id = {f.resource_id: f for f in findings}
-
- # Positive: old (60 days) unattached EIP
- assert "eipalloc-1" in eip_ids
-
- # Positive: classic EIP (domain=standard) without AllocationTime (flagged conservatively)
- # Uses PublicIp as resource_id since Classic EIPs have no AllocationId
- assert "203.0.113.4" in eip_ids
-
- # Negative: attached EIP
- assert "eipalloc-2" not in eip_ids
-
- # Negative: unattached but too young (10 days < 30 day threshold)
- assert "eipalloc-3" not in eip_ids
-
- # Negative: VPC EIP without AllocationTime — cannot determine age, skip
- assert "eipalloc-5" not in eip_ids
-
- assert len(findings) == 2
-
- # Verify cost estimate ($3.75/month for each EIP)
- for f in findings:
- assert f.estimated_monthly_cost_usd == 3.75
-
- # Verify title includes "(Review Recommended)"
- for f in findings:
- assert f.title == "Unattached Elastic IP (Review Recommended)"
-
- # Verify confidence is HIGH for all findings
- for f in findings:
- assert f.confidence == ConfidenceLevel.HIGH
-
- # Verify VPC EIP details and wording
- f1 = findings_by_id["eipalloc-1"]
- assert f1.details["is_classic"] is False
- assert f1.details["age_days"] == 60
- assert "allocation_time" in f1.details
- assert "allocated" in f1.summary and "currently unattached" in f1.summary
- assert "allocated" in f1.evidence.signals_used[1]
-
- # Verify Classic EIP details, wording, and PublicIp fallback for resource_id
- f4 = findings_by_id["203.0.113.4"]
- assert f4.resource_id == "203.0.113.4"
- assert f4.details["is_classic"] is True
- assert "age_days" not in f4.details
- assert "allocation_time" not in f4.details
- assert "Classic" in f4.summary
- assert any("Classic EIP" in s for s in f4.evidence.signals_used)
- assert any("deprecated" in s for s in f4.evidence.signals_used)
- assert f4.evidence.time_window == "Unknown (Classic EIP, no AllocationTime)"
-
-
-def test_find_unattached_elastic_ips_custom_threshold(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- date_45_days_ago = datetime.now(timezone.utc) - timedelta(days=45)
-
- ec2.describe_addresses.return_value = {
- "Addresses": [
- {
- "AllocationId": "eipalloc-5",
- "PublicIp": "203.0.113.5",
- "Domain": "vpc",
- "AllocationTime": date_45_days_ago,
- }
+from cleancloud.core.risk import RiskLevel
+from cleancloud.providers.aws.rules.elastic_ip_unattached import find_unattached_elastic_ips
+
+# ---------------------------------------------------------------------------
+# Test helpers
+# ---------------------------------------------------------------------------
+
+_REGION = "us-east-1"
+
+
+def _eip(
+ allocation_id: str | None = "eipalloc-001",
+ public_ip: str | None = "203.0.113.1",
+ domain: str | None = "vpc",
+ association_id: str | None = None,
+ instance_id: str | None = None,
+ network_interface_id: str | None = None,
+ private_ip_address: str | None = None,
+ carrier_ip: str | None = None,
+ tags: list | None = None,
+ **extra,
+) -> dict:
+ raw: dict = {}
+ if allocation_id is not None:
+ raw["AllocationId"] = allocation_id
+ if public_ip is not None:
+ raw["PublicIp"] = public_ip
+ if domain is not None:
+ raw["Domain"] = domain
+ if association_id is not None:
+ raw["AssociationId"] = association_id
+ if instance_id is not None:
+ raw["InstanceId"] = instance_id
+ if network_interface_id is not None:
+ raw["NetworkInterfaceId"] = network_interface_id
+ if private_ip_address is not None:
+ raw["PrivateIpAddress"] = private_ip_address
+ if carrier_ip is not None:
+ raw["CarrierIp"] = carrier_ip
+ if tags is not None:
+ raw["Tags"] = tags
+ raw.update(extra)
+ return raw
+
+
+def _run(mock_boto3_session, addresses: list) -> list:
+ mock_boto3_session._ec2.describe_addresses.return_value = {"Addresses": addresses}
+ return find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+
+# ---------------------------------------------------------------------------
+# TestMustEmit
+# ---------------------------------------------------------------------------
+
+
+class TestMustEmit:
+ def test_vpc_eip_no_association_fields(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert len(findings) == 1
+ f = findings[0]
+ assert f.resource_id == "eipalloc-001"
+ assert f.rule_id == "aws.ec2.elastic_ip.unattached"
+ assert f.provider == "aws"
+ assert f.resource_type == "aws.ec2.elastic_ip"
+ assert f.region == _REGION
+
+ def test_standard_domain_no_association_fields(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip="203.0.113.10", domain="standard")],
+ )
+ assert len(findings) == 1
+ assert findings[0].resource_id == "203.0.113.10"
+
+ def test_carrier_ip_only_as_resource_id(self, mock_boto3_session):
+ """When only CarrierIp is present, it becomes the resource_id."""
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip=None, carrier_ip="203.0.113.20")],
+ )
+ assert len(findings) == 1
+ assert findings[0].resource_id == "203.0.113.20"
+
+ def test_byoip_and_service_managed_contextual_only(self, mock_boto3_session):
+ """BYOIP / service_managed fields are contextual and must not suppress the finding."""
+ findings = _run(
+ mock_boto3_session,
+ [
+ _eip(
+ PublicIpv4Pool="ipv4pool-ec2-abc",
+ ServiceManaged=True,
+ CustomerOwnedIp="10.0.0.5",
+ CustomerOwnedIpv4Pool="coip-pool-001",
+ )
+ ],
+ )
+ assert len(findings) == 1
+
+ def test_service_managed_false_still_emits(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip(ServiceManaged=False)])
+ assert len(findings) == 1
+
+ def test_tags_present_still_emits(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(tags=[{"Key": "env", "Value": "prod"}])],
+ )
+ assert len(findings) == 1
+
+ def test_multiple_unattached_all_emitted(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [
+ _eip(allocation_id="eipalloc-a", public_ip="1.2.3.4"),
+ _eip(allocation_id="eipalloc-b", public_ip="1.2.3.5"),
+ ],
+ )
+ assert {f.resource_id for f in findings} == {"eipalloc-a", "eipalloc-b"}
+
+ def test_empty_addresses_returns_empty(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [])
+ assert findings == []
+
+
+# ---------------------------------------------------------------------------
+# TestMustSkip
+# ---------------------------------------------------------------------------
+
+
+class TestMustSkip:
+ def test_association_id_present(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(association_id="eipassoc-123")],
+ )
+ assert findings == []
+
+ def test_instance_id_present_no_association_id(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip(instance_id="i-abc")])
+ assert findings == []
+
+ def test_network_interface_id_present_no_association_id(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip(network_interface_id="eni-abc")])
+ assert findings == []
+
+ def test_private_ip_address_present_no_association_id(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip(private_ip_address="10.0.0.5")])
+ assert findings == []
+
+ def test_missing_all_identity_fields(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip=None, carrier_ip=None)],
+ )
+ assert findings == []
+
+ def test_mixed_attached_and_unattached(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [
+ _eip(allocation_id="eipalloc-attached", association_id="eipassoc-x"),
+ _eip(allocation_id="eipalloc-free"),
+ ],
+ )
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eipalloc-free"
+
+ def test_non_dict_item_skipped_not_raised(self, mock_boto3_session):
+ """Non-dict items in Addresses must be skipped, not raise."""
+ valid = _eip()
+ for bad in (None, "string", 42, ["list"]):
+ mock_boto3_session._ec2.describe_addresses.return_value = {"Addresses": [bad, valid]}
+ findings = find_unattached_elastic_ips(mock_boto3_session, _REGION)
+ assert len(findings) == 1, f"Expected 1 finding with bad item={bad!r}"
+
+ def test_all_four_association_fields_each_independently_skip(self, mock_boto3_session):
+ """Each of the four association fields independently triggers SKIP."""
+ for field, value in [
+ ("association_id", "eipassoc-1"),
+ ("instance_id", "i-001"),
+ ("network_interface_id", "eni-001"),
+ ("private_ip_address", "10.0.0.1"),
+ ]:
+ findings = _run(mock_boto3_session, [_eip(**{field: value})])
+ assert findings == [], f"Expected SKIP when {field} is present"
+
+
+# ---------------------------------------------------------------------------
+# TestMustFailRule
+# ---------------------------------------------------------------------------
+
+
+class TestMustFailRule:
+ def test_describe_addresses_unauthorized(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.side_effect = ClientError(
+ {"Error": {"Code": "UnauthorizedOperation", "Message": "denied"}},
+ "DescribeAddresses",
+ )
+ with pytest.raises(PermissionError, match="ec2:DescribeAddresses"):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+ def test_describe_addresses_access_denied(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.side_effect = ClientError(
+ {"Error": {"Code": "AccessDenied", "Message": "denied"}},
+ "DescribeAddresses",
+ )
+ with pytest.raises(PermissionError, match="ec2:DescribeAddresses"):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+ def test_describe_addresses_client_error_propagates(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.side_effect = ClientError(
+ {"Error": {"Code": "RequestExpired", "Message": "expired"}},
+ "DescribeAddresses",
+ )
+ with pytest.raises(ClientError):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+ def test_describe_addresses_botocore_error_propagates(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.side_effect = BotoCoreError()
+ with pytest.raises(BotoCoreError):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+ def test_addresses_key_absent_fails_rule(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.return_value = {}
+ with pytest.raises(RuntimeError, match="Addresses"):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+ def test_addresses_not_a_list_fails_rule(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.return_value = {"Addresses": "bad"}
+ with pytest.raises(RuntimeError, match="Addresses"):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+ def test_addresses_none_fails_rule(self, mock_boto3_session):
+ mock_boto3_session._ec2.describe_addresses.return_value = {"Addresses": None}
+ with pytest.raises(RuntimeError, match="Addresses"):
+ find_unattached_elastic_ips(mock_boto3_session, _REGION)
+
+
+# ---------------------------------------------------------------------------
+# TestNormalization
+# ---------------------------------------------------------------------------
+
+
+class TestNormalization:
+ def test_allocation_id_is_preferred_resource_id(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id="eipalloc-pref", public_ip="1.2.3.4")],
+ )
+ assert findings[0].resource_id == "eipalloc-pref"
+
+ def test_public_ip_fallback_when_no_allocation_id(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip="5.6.7.8")],
+ )
+ assert findings[0].resource_id == "5.6.7.8"
+
+ def test_carrier_ip_fallback_when_no_allocation_or_public(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip=None, carrier_ip="9.10.11.12")],
+ )
+ assert findings[0].resource_id == "9.10.11.12"
+
+ def test_domain_absent_is_null_in_details(self, mock_boto3_session):
+ raw = {"AllocationId": "eipalloc-nd", "PublicIp": "1.2.3.4"}
+ _run(mock_boto3_session, [raw])
+ mock_boto3_session._ec2.describe_addresses.return_value = {"Addresses": [raw]}
+ findings = find_unattached_elastic_ips(mock_boto3_session, _REGION)
+ assert findings[0].details["domain"] is None
+
+ def test_empty_string_fields_treated_as_absent(self, mock_boto3_session):
+ """Empty string AllocationId must not be used as resource_id."""
+ raw = {"AllocationId": "", "PublicIp": "1.2.3.4"}
+ mock_boto3_session._ec2.describe_addresses.return_value = {"Addresses": [raw]}
+ findings = find_unattached_elastic_ips(mock_boto3_session, _REGION)
+ assert findings[0].resource_id == "1.2.3.4"
+
+ def test_optional_context_fields_captured(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [
+ _eip(
+ NetworkBorderGroup="us-east-1-wl1-bos-wlz-1",
+ PublicIpv4Pool="ipv4pool-ec2-abc",
+ SubnetId="subnet-xyz",
+ NetworkInterfaceOwnerId="111122223333",
+ CustomerOwnedIp="10.0.0.5",
+ CustomerOwnedIpv4Pool="coip-001",
+ )
+ ],
+ )
+ d = findings[0].details
+ assert d["network_border_group"] == "us-east-1-wl1-bos-wlz-1"
+ assert d["public_ipv4_pool"] == "ipv4pool-ec2-abc"
+ assert d["subnet_id"] == "subnet-xyz"
+ assert d["network_interface_owner_id"] == "111122223333"
+ assert d["customer_owned_ip"] == "10.0.0.5"
+ assert d["customer_owned_ipv4_pool"] == "coip-001"
+
+ def test_service_managed_string_enum_captured(self, mock_boto3_session):
+ """ServiceManaged is a string enum — captured as string context."""
+ for value in ("alb", "nlb", "rnat", "rds"):
+ findings = _run(mock_boto3_session, [_eip(ServiceManaged=value)])
+ assert findings[0].details["service_managed"] == value
+
+ def test_service_managed_non_string_not_in_details(self, mock_boto3_session):
+ """Non-string values (e.g. bool) must not be treated as valid string enum."""
+ for bad in (True, False, 1, None):
+ findings = _run(mock_boto3_session, [_eip(ServiceManaged=bad)])
+ assert "service_managed" not in findings[0].details
+
+ def test_service_managed_empty_string_not_in_details(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip(ServiceManaged="")])
+ assert "service_managed" not in findings[0].details
+
+ def test_tags_normalized_to_dict(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(tags=[{"Key": "env", "Value": "prod"}, {"Key": "team", "Value": "ops"}])],
+ )
+ assert findings[0].details["tags"] == {"env": "prod", "team": "ops"}
+
+ def test_empty_tags_not_in_details(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip(tags=[])])
+ assert "tags" not in findings[0].details
+
+ def test_allocation_id_null_in_details_when_absent(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip="1.2.3.4")],
+ )
+ assert findings[0].details["allocation_id"] is None
+
+ def test_carrier_ip_null_in_details_when_absent(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].details["carrier_ip"] is None
+
+
+# ---------------------------------------------------------------------------
+# TestConfidenceModel
+# ---------------------------------------------------------------------------
+
+
+class TestConfidenceModel:
+ def test_always_high_confidence(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].confidence == ConfidenceLevel.HIGH
+
+ def test_standard_domain_also_high(self, mock_boto3_session):
+ findings = _run(
+ mock_boto3_session,
+ [_eip(allocation_id=None, public_ip="1.2.3.4", domain="standard")],
+ )
+ assert findings[0].confidence == ConfidenceLevel.HIGH
+
+
+# ---------------------------------------------------------------------------
+# TestRiskModel
+# ---------------------------------------------------------------------------
+
+
+class TestRiskModel:
+ def test_risk_is_low(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].risk == RiskLevel.LOW
+
+
+# ---------------------------------------------------------------------------
+# TestCostModel
+# ---------------------------------------------------------------------------
+
+
+class TestCostModel:
+ def test_estimated_monthly_cost_always_none(self, mock_boto3_session):
+ """No hardcoded cost estimate allowed — must be None."""
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].estimated_monthly_cost_usd is None
+
+
+# ---------------------------------------------------------------------------
+# TestEvidenceContract
+# ---------------------------------------------------------------------------
+
+
+class TestEvidenceContract:
+ def test_all_required_fields_present(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ d = findings[0].details
+ required = [
+ "evaluation_path",
+ "resource_id",
+ "allocation_id",
+ "public_ip",
+ "carrier_ip",
+ "domain",
+ "currently_associated",
+ "association_id",
+ "instance_id",
+ "network_interface_id",
+ "private_ip_address",
]
- }
+ for field in required:
+ assert field in d, f"Missing required field: {field}"
+
+ def test_evaluation_path_exact(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].details["evaluation_path"] == "unattached-eip-review-candidate"
+
+ def test_currently_associated_always_false(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].details["currently_associated"] is False
+
+ def test_association_fields_always_null(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ d = findings[0].details
+ assert d["association_id"] is None
+ assert d["instance_id"] is None
+ assert d["network_interface_id"] is None
+ assert d["private_ip_address"] is None
+
+ def test_signals_used_mention_not_associated(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ combined = " ".join(findings[0].evidence.signals_used).lower()
+ assert "not associated" in combined or "currently not associated" in combined
+
+ def test_signals_used_mention_allocated(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ combined = " ".join(findings[0].evidence.signals_used).lower()
+ assert "allocated" in combined
+
+ def test_signals_used_mention_aws_recommends(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ combined = " ".join(findings[0].evidence.signals_used).lower()
+ assert "aws recommends" in combined or "recommends" in combined
+
+ def test_signals_not_checked_include_blind_spots(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ snc = " ".join(findings[0].evidence.signals_not_checked).lower()
+ assert "dns" in snc or "failover" in snc
+ assert "application" in snc or "app" in snc
+ assert "service-managed" in snc or "service managed" in snc
- # Test with custom 60-day threshold
- findings = find_unattached_elastic_ips(mock_boto3_session, region, days_unattached=60)
- eip_ids = {f.resource_id for f in findings}
+ def test_time_window_is_none(self, mock_boto3_session):
+ """No temporal threshold — time_window must be None."""
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].evidence.time_window is None
- # Should NOT be detected (45 days < 60 days threshold)
- assert "eipalloc-5" not in eip_ids
- # Test with custom 30-day threshold (default)
- findings = find_unattached_elastic_ips(mock_boto3_session, region, days_unattached=30)
- eip_ids = {f.resource_id for f in findings}
+# ---------------------------------------------------------------------------
+# TestTitleAndReasonContract
+# ---------------------------------------------------------------------------
- # Should be detected (45 days >= 30 days threshold)
- assert "eipalloc-5" in eip_ids
- # Verify wording uses allocation age, not unattached duration
- f = findings[0]
- assert "allocated" in f.summary
- assert "currently unattached" in f.summary
- assert f.evidence.time_window == "30 days since allocation"
+class TestTitleAndReasonContract:
+ def test_title(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].title == "Unattached Elastic IP review candidate"
+ def test_reason(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert findings[0].reason == "Address has no current association per DescribeAddresses"
-def test_find_unattached_elastic_ips_empty(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
+ def test_summary_contains_resource_id(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert "eipalloc-001" in findings[0].summary
- ec2.describe_addresses.return_value = {"Addresses": []}
+ def test_title_does_not_claim_safe_to_release(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ combined = (findings[0].title + findings[0].summary + findings[0].reason).lower()
+ assert "safe to release" not in combined
- findings = find_unattached_elastic_ips(mock_boto3_session, region)
+ def test_no_allocation_age_in_title_or_reason(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ combined = (findings[0].title + findings[0].summary + findings[0].reason).lower()
+ assert "days ago" not in combined
+ assert "allocated" not in combined or "no longer needed" in findings[0].summary.lower()
- assert len(findings) == 0
+ def test_no_hardcoded_cost_in_summary(self, mock_boto3_session):
+ findings = _run(mock_boto3_session, [_eip()])
+ assert "$3.75" not in findings[0].summary
+ assert "$3.75" not in findings[0].reason
diff --git a/tests/cleancloud/providers/aws/test_aws_elb_idle.py b/tests/cleancloud/providers/aws/test_aws_elb_idle.py
index afd0811..807cede 100644
--- a/tests/cleancloud/providers/aws/test_aws_elb_idle.py
+++ b/tests/cleancloud/providers/aws/test_aws_elb_idle.py
@@ -1,14 +1,36 @@
+"""Tests for cleancloud/providers/aws/rules/elb_idle.py
+
+Covers all spec acceptance scenarios:
+ Must emit / Must skip / Must fail / Normalization / Traffic signals /
+ Confidence model / Cost model / Evidence contract / Title-and-reason contract /
+ Backend enrichment / Pagination / NLB missing-datapoints behaviour
+"""
+
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock
+import pytest
+from botocore.exceptions import BotoCoreError, ClientError
+
from cleancloud.providers.aws.rules.elb_idle import find_idle_load_balancers
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+_REGION = "us-east-1"
+_ACCOUNT = "123456789012"
+_THRESHOLD = 14
+
+
+def _now():
+ return datetime.now(timezone.utc)
+
def _make_session(elbv2, elb, cloudwatch):
- """Create a mock session that returns the given clients."""
session = MagicMock()
- def client_side_effect(service_name, *args, **kwargs):
+ def _client(service_name, *args, **kwargs):
if service_name == "elbv2":
return elbv2
if service_name == "elb":
@@ -17,242 +39,1298 @@ def client_side_effect(service_name, *args, **kwargs):
return cloudwatch
raise ValueError(f"Unexpected service: {service_name}")
- session.client.side_effect = client_side_effect
+ session.client.side_effect = _client
return session
-def _make_elbv2_lb(
+def _elbv2_lb(
name="test-alb",
lb_type="application",
- age_days=30,
+ age_days=20,
state="active",
+ arn_suffix=None,
):
- now = datetime.now(timezone.utc)
- arn = f"arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/{name}/abc123"
+ suffix = arn_suffix if arn_suffix is not None else f"app/{name}/abc123"
+ arn = f"arn:aws:elasticloadbalancing:{_REGION}:{_ACCOUNT}:loadbalancer/{suffix}"
if lb_type == "network":
- arn = f"arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/net/{name}/abc123"
+ suffix = arn_suffix if arn_suffix is not None else f"net/{name}/abc123"
+ arn = f"arn:aws:elasticloadbalancing:{_REGION}:{_ACCOUNT}:loadbalancer/{suffix}"
return {
"LoadBalancerArn": arn,
"LoadBalancerName": name,
"Type": lb_type,
- "CreatedTime": now - timedelta(days=age_days),
+ "CreatedTime": _now() - timedelta(days=age_days),
"State": {"Code": state},
- "DNSName": f"{name}.us-east-1.elb.amazonaws.com",
+ "DNSName": f"{name}.{_REGION}.elb.amazonaws.com",
"VpcId": "vpc-12345",
+ "Scheme": "internet-facing",
}
-def _make_clb(name="test-clb", age_days=30, instances=None):
- now = datetime.now(timezone.utc)
+def _clb(name="test-clb", age_days=20, instances=None):
return {
"LoadBalancerName": name,
- "CreatedTime": now - timedelta(days=age_days),
- "DNSName": f"{name}.us-east-1.elb.amazonaws.com",
+ "CreatedTime": _now() - timedelta(days=age_days),
+ "DNSName": f"{name}.{_REGION}.elb.amazonaws.com",
"VPCId": "vpc-12345",
- "Instances": instances or [],
+ "Scheme": "internet-facing",
+ "Instances": instances if instances is not None else [],
}
-def test_idle_alb_detected():
- """Idle ALB with zero requests and no targets should be flagged as HIGH confidence."""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
+def _setup_elbv2(elbv2, lbs, tg_pages=None, target_health=None):
+ """Configure elbv2 mock with LB and target-group paginators."""
+ lb_pag = MagicMock()
+ lb_pag.paginate.return_value = [{"LoadBalancers": lbs}]
- # ALB setup
- paginator = elbv2.get_paginator.return_value
- paginator.paginate.return_value = [
- {"LoadBalancers": [_make_elbv2_lb(name="idle-alb", age_days=30)]}
- ]
+ tg_pag = MagicMock()
+ tg_pag.paginate.return_value = tg_pages if tg_pages is not None else [{"TargetGroups": []}]
- elbv2.describe_target_groups.return_value = {"TargetGroups": []}
- cloudwatch.get_metric_statistics.return_value = {"Datapoints": []}
+ def _pag(name):
+ if name == "describe_load_balancers":
+ return lb_pag
+ if name == "describe_target_groups":
+ return tg_pag
+ raise ValueError(f"Unexpected paginator: {name}")
- # CLB setup - empty
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [{"LoadBalancerDescriptions": []}]
+ elbv2.get_paginator.side_effect = _pag
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+ if target_health is not None:
+ elbv2.describe_target_health.return_value = target_health
+ else:
+ elbv2.describe_target_health.return_value = {"TargetHealthDescriptions": []}
- assert len(findings) == 1
- f = findings[0]
- assert f.rule_id == "aws.elbv2.alb.idle"
- assert f.resource_type == "aws.elbv2.load_balancer"
- assert f.confidence.value == "high"
- assert f.risk.value == "medium"
- assert f.details["type"] == "application"
- assert f.details["has_targets"] is False
- assert "idle-alb" in f.resource_id
- assert f.estimated_monthly_cost_usd == 18.0
+def _setup_clb(elb, lbs):
+ pag = elb.get_paginator.return_value
+ pag.paginate.return_value = [{"LoadBalancerDescriptions": lbs}]
-def test_active_alb_skipped():
- """ALB with traffic should NOT be flagged."""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
- paginator = elbv2.get_paginator.return_value
- paginator.paginate.return_value = [
- {"LoadBalancers": [_make_elbv2_lb(name="active-alb", age_days=30)]}
- ]
+def _cw_no_traffic():
+ """CloudWatch mock returning empty datapoints for all metrics."""
+ cw = MagicMock()
+ cw.get_metric_statistics.return_value = {"Datapoints": []}
+ return cw
- # Has traffic
- cloudwatch.get_metric_statistics.return_value = {"Datapoints": [{"Sum": 1000}]}
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [{"LoadBalancerDescriptions": []}]
+def _cw_nlb_zero_traffic(num_datapoints=None):
+ """NLB needs enough zero-valued datapoints to satisfy full-window completeness.
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+ Spec requires at least expected_days - 1 datapoints. Default to _THRESHOLD
+ datapoints so the completeness check (>= _THRESHOLD - 1) passes.
+ """
+ n = num_datapoints if num_datapoints is not None else _THRESHOLD
+ cw = MagicMock()
- assert len(findings) == 0
+ def _side(**kwargs):
+ stat = kwargs.get("Statistics", ["Sum"])[0]
+ return {"Datapoints": [{stat: 0}] * n}
+ cw.get_metric_statistics.side_effect = _side
+ return cw
-def test_idle_nlb_detected_unhealthy_targets():
- """Idle NLB with zero flows and only unhealthy targets should be MEDIUM confidence.
- Unhealthy targets are still *registered* targets — treating them as absent would
- produce a false HIGH-confidence finding. has_targets=True → MEDIUM confidence.
+def _cw_metric_with_signal(
+ trigger_metric: str, trigger_stat: str = "Sum", trigger_value: float = 100.0
+):
+ """CloudWatch mock that returns traffic only for the specified metric."""
+ cw = MagicMock()
+
+ def _side(**kwargs):
+ if kwargs.get("MetricName") == trigger_metric:
+ return {"Datapoints": [{trigger_stat: trigger_value}]}
+ stat = kwargs.get("Statistics", ["Sum"])[0]
+ return {"Datapoints": [{stat: 0}]}
+
+ cw.get_metric_statistics.side_effect = _side
+ return cw
+
+
+def _cw_nlb_missing_metric(missing_metric: str):
+ """NLB CloudWatch mock where one metric returns no datapoints (FAIL RULE).
+
+ Non-missing metrics return full-window coverage (_THRESHOLD datapoints)
+ so the completeness check passes for those metrics before we reach the
+ missing one.
"""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
-
- nlb = _make_elbv2_lb(name="idle-nlb", lb_type="network", age_days=20)
- paginator = elbv2.get_paginator.return_value
- paginator.paginate.return_value = [{"LoadBalancers": [nlb]}]
-
- elbv2.describe_target_groups.return_value = {"TargetGroups": [{"TargetGroupArn": "arn:tg"}]}
- elbv2.describe_target_health.return_value = {
- "TargetHealthDescriptions": [
- {"Target": {"Id": "i-123"}, "TargetHealth": {"State": "unhealthy"}}
- ]
+ cw = MagicMock()
+
+ def _side(**kwargs):
+ metric = kwargs.get("MetricName", "")
+ stat = kwargs.get("Statistics", ["Sum"])[0]
+ if metric == missing_metric:
+ return {"Datapoints": []}
+ return {"Datapoints": [{stat: 0}] * _THRESHOLD}
+
+ cw.get_metric_statistics.side_effect = _side
+ return cw
+
+
+def _cw_error(metric_name: str = None):
+ """CloudWatch mock that raises ClientError for the given metric (or all)."""
+ cw = MagicMock()
+ err = ClientError(
+ {"Error": {"Code": "ThrottlingException", "Message": "x"}}, "GetMetricStatistics"
+ )
+
+ def _side(**kwargs):
+ if metric_name is None or kwargs.get("MetricName") == metric_name:
+ raise err
+ stat = kwargs.get("Statistics", ["Sum"])[0]
+ return {"Datapoints": [{stat: 0}]}
+
+ cw.get_metric_statistics.side_effect = _side
+ return cw
+
+
+def _run(session, threshold=_THRESHOLD):
+ return find_idle_load_balancers(session, _REGION, idle_days_threshold=threshold)
+
+
+# ---------------------------------------------------------------------------
+# TestMustEmit
+# ---------------------------------------------------------------------------
+
+
+class TestMustEmit:
+ def test_idle_alb_zero_targets_emits_high(self):
+ """ALB older than threshold, active, no traffic, no targets → EMIT HIGH."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="idle-alb", age_days=20)])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+
+ assert len(findings) == 1
+ f = findings[0]
+ assert f.rule_id == "aws.elbv2.alb.idle"
+ assert f.confidence.value == "high"
+ assert f.risk.value == "medium"
+
+ def test_idle_alb_with_targets_emits_medium(self):
+ """ALB older than threshold, no traffic, but registered targets → EMIT MEDIUM."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(
+ elbv2,
+ [_elbv2_lb(name="idle-alb-targets", age_days=20)],
+ tg_pages=[{"TargetGroups": [{"TargetGroupArn": "arn:tg:1"}]}],
+ target_health={"TargetHealthDescriptions": [{"Target": {"Id": "i-1"}}]},
+ )
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+
+ assert len(findings) == 1
+ assert findings[0].confidence.value == "medium"
+ assert findings[0].details["has_registered_targets"] is True
+
+ def test_idle_nlb_active_impaired_zero_traffic_with_targets_emits_medium(self):
+ """NLB in active_impaired state, zero NLB traffic with valid datapoints, has targets → EMIT MEDIUM."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_zero_traffic()
+ nlb = _elbv2_lb(name="idle-nlb", lb_type="network", age_days=20, state="active_impaired")
+ _setup_elbv2(
+ elbv2,
+ [nlb],
+ tg_pages=[{"TargetGroups": [{"TargetGroupArn": "arn:tg:1"}]}],
+ target_health={"TargetHealthDescriptions": [{"Target": {"Id": "i-1"}}]},
+ )
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+
+ assert len(findings) == 1
+ f = findings[0]
+ assert f.rule_id == "aws.elbv2.nlb.idle"
+ assert f.confidence.value == "medium"
+
+ def test_idle_nlb_no_targets_emits_high(self):
+ """NLB older than threshold, zero NLB traffic with valid datapoints, no targets → EMIT HIGH."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_zero_traffic()
+ nlb = _elbv2_lb(name="idle-nlb", lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+
+ assert len(findings) == 1
+ assert findings[0].rule_id == "aws.elbv2.nlb.idle"
+ assert findings[0].confidence.value == "high"
+
+ def test_idle_clb_no_instances_emits_high(self):
+ """CLB older than threshold, no traffic, no instances → EMIT HIGH."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(name="idle-clb", age_days=20)])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+
+ assert len(findings) == 1
+ f = findings[0]
+ assert f.rule_id == "aws.elb.clb.idle"
+ assert f.resource_type == "aws.elb.load_balancer"
+ assert f.resource_id == "idle-clb"
+ assert f.confidence.value == "high"
+
+ def test_idle_clb_with_instances_emits_medium(self):
+ """CLB no traffic but has registered instances → EMIT MEDIUM."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(name="idle-clb", age_days=20, instances=[{"InstanceId": "i-1"}])])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+
+ assert len(findings) == 1
+ assert findings[0].confidence.value == "medium"
+ assert findings[0].details["registered_instance_count"] == 1
+
+
+# ---------------------------------------------------------------------------
+# TestMustSkip
+# ---------------------------------------------------------------------------
+
+
+class TestMustSkip:
+ def test_gateway_lb_skipped(self):
+ """ELBv2 with Type='gateway' must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb = _elbv2_lb(name="gwlb", lb_type="gateway", age_days=20)
+ _setup_elbv2(elbv2, [lb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_unknown_type_skipped(self):
+ """ELBv2 with an unrecognised Type must be skipped as unsupported."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb = _elbv2_lb(name="mystery", lb_type="classic_compat", age_days=20)
+ _setup_elbv2(elbv2, [lb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_alb_younger_than_threshold_skipped(self):
+ """ALB younger than idle_days_threshold is skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="new-alb", age_days=5)])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_clb_younger_than_threshold_skipped(self):
+ """CLB younger than idle_days_threshold is skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(name="new-clb", age_days=3)])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_alb_in_provisioning_state_skipped(self):
+ """ELBv2 in 'provisioning' state must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="prov-alb", age_days=20, state="provisioning")])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_alb_in_failed_state_skipped(self):
+ """ELBv2 in 'failed' state must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="failed-alb", age_days=20, state="failed")])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_elbv2_unparsable_arn_dimension_skipped(self):
+ """ELBv2 ARN without 'loadbalancer/' cannot yield a CW dimension → SKIP ITEM."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ # Build a LB with a raw bad ARN
+ lb = {
+ "LoadBalancerArn": "bad-arn-no-loadbalancer-segment",
+ "LoadBalancerName": "bad-lb",
+ "Type": "application",
+ "CreatedTime": _now() - timedelta(days=20),
+ "State": {"Code": "active"},
+ "DNSName": "bad.dns",
+ "VpcId": "vpc-1",
+ "Scheme": "internet-facing",
+ }
+ _setup_elbv2(elbv2, [lb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_elbv2_missing_arn_skipped(self):
+ """ELBv2 without LoadBalancerArn must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb = {
+ "LoadBalancerName": "no-arn",
+ "Type": "application",
+ "CreatedTime": _now() - timedelta(days=20),
+ "State": {"Code": "active"},
+ }
+ _setup_elbv2(elbv2, [lb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_elbv2_missing_created_time_skipped(self):
+ """ELBv2 without CreatedTime must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb = {
+ "LoadBalancerArn": f"arn:aws:elasticloadbalancing:{_REGION}:{_ACCOUNT}:loadbalancer/app/no-time/abc",
+ "LoadBalancerName": "no-time",
+ "Type": "application",
+ "State": {"Code": "active"},
+ }
+ _setup_elbv2(elbv2, [lb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_clb_missing_name_skipped(self):
+ """CLB without LoadBalancerName must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ lb = {"CreatedTime": _now() - timedelta(days=20), "Instances": []}
+ _setup_clb(elb, [lb])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_clb_missing_created_time_skipped(self):
+ """CLB without CreatedTime must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ lb = {"LoadBalancerName": "no-time", "Instances": []}
+ _setup_clb(elb, [lb])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_non_dict_elbv2_item_skipped(self):
+ """Non-dict ELBv2 item must be skipped without raising."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb_pag = MagicMock()
+ lb_pag.paginate.return_value = [{"LoadBalancers": ["not-a-dict"]}]
+ tg_pag = MagicMock()
+ tg_pag.paginate.return_value = [{"TargetGroups": []}]
+ elbv2.get_paginator.side_effect = lambda n: (
+ lb_pag if n == "describe_load_balancers" else tg_pag
+ )
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_non_dict_clb_item_skipped(self):
+ """Non-dict CLB item must be skipped without raising."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ pag = elb.get_paginator.return_value
+ pag.paginate.return_value = [{"LoadBalancerDescriptions": ["not-a-dict"]}]
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+
+# ---------------------------------------------------------------------------
+# TestTrafficSignals
+# ---------------------------------------------------------------------------
+
+
+class TestTrafficSignals:
+ """Each traffic metric independently causes a skip when > 0."""
+
+ # --- ALB ---
+
+ def test_alb_request_count_triggers_skip(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_metric_with_signal("RequestCount")
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ def test_alb_processed_bytes_triggers_skip(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_metric_with_signal("ProcessedBytes")
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ def test_alb_active_connection_count_triggers_skip(self):
+ """ActiveConnectionCount is the third ALB signal; > 0 must prevent emission."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_metric_with_signal("ActiveConnectionCount")
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ # --- NLB ---
+
+ @staticmethod
+ def _nlb_traffic_cw(trigger_metric, trigger_stat):
+ """Return full-window datapoints for all metrics; trigger metric has traffic."""
+ cw = MagicMock()
+
+ def _side(**kwargs):
+ metric = kwargs.get("MetricName", "")
+ stat = kwargs.get("Statistics", ["Sum"])[0]
+ if metric == trigger_metric:
+ return {"Datapoints": [{trigger_stat: 1}] * _THRESHOLD}
+ return {"Datapoints": [{stat: 0}] * _THRESHOLD}
+
+ cw.get_metric_statistics.side_effect = _side
+ return cw
+
+ def test_nlb_new_flow_count_triggers_skip(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = TestTrafficSignals._nlb_traffic_cw("NewFlowCount", "Sum")
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ def test_nlb_processed_bytes_triggers_skip(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = TestTrafficSignals._nlb_traffic_cw("ProcessedBytes", "Sum")
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ def test_nlb_active_flow_count_triggers_skip(self):
+ """ActiveFlowCount Maximum is the third NLB signal; > 0 must prevent emission."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = TestTrafficSignals._nlb_traffic_cw("ActiveFlowCount", "Maximum")
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ # --- CLB ---
+
+ def test_clb_request_count_triggers_skip(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_metric_with_signal("RequestCount")
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+ def test_clb_estimated_processed_bytes_triggers_skip(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_metric_with_signal("EstimatedProcessedBytes")
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+ assert _run(_make_session(elbv2, elb, cw)) == []
+
+
+# ---------------------------------------------------------------------------
+# TestMustFailRule
+# ---------------------------------------------------------------------------
+
+
+class TestMustFailRule:
+ def test_elbv2_inventory_client_error_raises(self):
+ """ELBv2 DescribeLoadBalancers failure raises (FAIL RULE)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ exc = ClientError(
+ {"Error": {"Code": "InternalError", "Message": "x"}}, "DescribeLoadBalancers"
+ )
+ lb_pag = MagicMock()
+ lb_pag.paginate.side_effect = exc
+ elbv2.get_paginator.return_value = lb_pag
+ _setup_clb(elb, [])
+
+ with pytest.raises(ClientError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_elbv2_inventory_bootocore_error_raises(self):
+ """ELBv2 inventory BotoCoreError propagates (FAIL RULE)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb_pag = MagicMock()
+ lb_pag.paginate.side_effect = BotoCoreError()
+ elbv2.get_paginator.return_value = lb_pag
+ _setup_clb(elb, [])
+
+ with pytest.raises(BotoCoreError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_clb_inventory_client_error_raises(self):
+ """CLB DescribeLoadBalancers failure raises (FAIL RULE)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ exc = ClientError(
+ {"Error": {"Code": "InternalError", "Message": "x"}}, "DescribeLoadBalancers"
+ )
+ pag = elb.get_paginator.return_value
+ pag.paginate.side_effect = exc
+
+ with pytest.raises(ClientError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_alb_cloudwatch_error_raises(self):
+ """CloudWatch error during ALB metric read raises (FAIL RULE, no LOW finding)."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_error()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+
+ with pytest.raises(ClientError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_clb_cloudwatch_error_raises(self):
+ """CloudWatch error during CLB metric read raises (FAIL RULE)."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_error()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ with pytest.raises(ClientError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_cloudwatch_permission_error_raises(self):
+ """CloudWatch AccessDenied raises PermissionError (FAIL RULE)."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = MagicMock()
+ cw.get_metric_statistics.side_effect = ClientError(
+ {"Error": {"Code": "AccessDenied", "Message": "x"}}, "GetMetricStatistics"
+ )
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+
+ with pytest.raises(PermissionError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_nlb_missing_new_flow_count_raises(self):
+ """NLB with missing NewFlowCount datapoints raises RuntimeError (FAIL RULE)."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_missing_metric("NewFlowCount")
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+
+ with pytest.raises(RuntimeError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_nlb_missing_processed_bytes_raises(self):
+ """NLB with missing ProcessedBytes datapoints raises RuntimeError (FAIL RULE)."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_missing_metric("ProcessedBytes")
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+
+ with pytest.raises(RuntimeError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_nlb_missing_active_flow_count_raises(self):
+ """NLB with missing ActiveFlowCount datapoints raises RuntimeError (FAIL RULE)."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_missing_metric("ActiveFlowCount")
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+
+ with pytest.raises(RuntimeError):
+ _run(_make_session(elbv2, elb, cw))
+
+ # --- Gap 2: NLB insufficient datapoints (partial coverage) also FAIL RULE ---
+
+ def test_nlb_insufficient_new_flow_count_coverage_raises(self):
+ """NLB NewFlowCount with only 1 datapoint (far below window) raises RuntimeError."""
+ elbv2, elb = MagicMock(), MagicMock()
+ # 1 datapoint for a 14-day window is incomplete coverage
+ cw = _cw_nlb_zero_traffic(num_datapoints=1)
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+
+ with pytest.raises(RuntimeError, match="NewFlowCount"):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_nlb_one_below_expected_days_raises(self):
+ """Spec requires full-window coverage; expected_days - 1 datapoints is a gap → FAIL RULE."""
+ elbv2, elb = MagicMock(), MagicMock()
+ # 13 datapoints for a 14-day window → 1-day gap → FAIL RULE (no tolerance)
+ cw = _cw_nlb_zero_traffic(num_datapoints=_THRESHOLD - 1)
+ nlb = _elbv2_lb(lb_type="network", age_days=20)
+ _setup_elbv2(elbv2, [nlb])
+ _setup_clb(elb, [])
+
+ with pytest.raises(RuntimeError):
+ _run(_make_session(elbv2, elb, cw))
+
+ def test_no_low_confidence_finding_on_metric_failure(self):
+ """Metric failure must never produce a LOW-confidence finding."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_error()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+
+ # Must raise, not emit
+ with pytest.raises(Exception):
+ findings = _run(_make_session(elbv2, elb, cw))
+ # If somehow no raise, ensure no LOW finding
+ for f in findings:
+ assert f.confidence.value != "low", "LOW confidence finding must never be emitted"
+
+
+# ---------------------------------------------------------------------------
+# TestNormalization
+# ---------------------------------------------------------------------------
+
+
+class TestNormalization:
+ def test_alb_lb_family_assigned(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="alb1", lb_type="application", age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["lb_family"] == "alb"
+
+ def test_nlb_lb_family_assigned(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_zero_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="nlb1", lb_type="network", age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["lb_family"] == "nlb"
+
+ def test_clb_lb_family_assigned(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(name="clb1", age_days=20)])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["lb_family"] == "clb"
+
+ def test_clb_uses_vpcid_key(self):
+ """CLB spec uses 'VPCId' (capital), not 'VpcId'."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(name="vpc-clb", age_days=20)])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["vpc_id"] == "vpc-12345"
+
+ def test_elbv2_state_code_captured(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="a", age_days=20, state="active")])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["state_code"] == "active"
+
+ def test_age_days_exact_threshold_emits(self):
+ """age_days == idle_days_threshold exactly — must emit (>= check, not >)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="exact", age_days=_THRESHOLD)])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert len(findings) == 1
+
+ def test_age_days_one_below_threshold_skips(self):
+ """age_days == threshold - 1 must be skipped."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="almost", age_days=_THRESHOLD - 1)])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == []
+
+ def test_alb_active_impaired_passes_state_check(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="imp", age_days=20, state="active_impaired")])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert len(findings) == 1
+
+ # --- Gap 1: naive CreatedTime must be SKIP, not coerced ---
+
+ def test_elbv2_naive_created_time_skipped(self):
+ """ELBv2 with a naive (tz-unaware) CreatedTime must be skipped, not coerced to UTC."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb = {
+ "LoadBalancerArn": f"arn:aws:elasticloadbalancing:{_REGION}:{_ACCOUNT}:loadbalancer/app/naive/abc",
+ "LoadBalancerName": "naive-alb",
+ "Type": "application",
+ # Naive datetime — no tzinfo
+ "CreatedTime": datetime.now() - timedelta(days=30),
+ "State": {"Code": "active"},
+ "DNSName": "naive.elb.amazonaws.com",
+ "VpcId": "vpc-1",
+ "Scheme": "internet-facing",
+ }
+ _setup_elbv2(elbv2, [lb])
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == [], "Naive ELBv2 CreatedTime must cause SKIP, not emit"
+
+ def test_clb_naive_created_time_skipped(self):
+ """CLB with a naive (tz-unaware) CreatedTime must be skipped, not coerced to UTC."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ lb = {
+ "LoadBalancerName": "naive-clb",
+ # Naive datetime — no tzinfo
+ "CreatedTime": datetime.now() - timedelta(days=30),
+ "DNSName": "naive.elb.amazonaws.com",
+ "VPCId": "vpc-1",
+ "Scheme": "internet-facing",
+ "Instances": [],
+ }
+ _setup_clb(elb, [lb])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert findings == [], "Naive CLB CreatedTime must cause SKIP, not emit"
+
+ def test_clb_load_balancer_arn_always_null(self):
+ """CLB details must have load_balancer_arn = None (CLBs have no ARN)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["load_balancer_arn"] is None
+
+
+# ---------------------------------------------------------------------------
+# TestConfidenceModel
+# ---------------------------------------------------------------------------
+
+
+class TestConfidenceModel:
+ def test_alb_no_targets_high(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw))[0].confidence.value == "high"
+
+ def test_alb_with_targets_medium(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(
+ elbv2,
+ [_elbv2_lb(age_days=20)],
+ tg_pages=[{"TargetGroups": [{"TargetGroupArn": "arn:tg:1"}]}],
+ target_health={"TargetHealthDescriptions": [{"Target": {"Id": "i-1"}}]},
+ )
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw))[0].confidence.value == "medium"
+
+ def test_alb_enrichment_failure_medium(self):
+ """When target-group enrichment fails, confidence degrades to MEDIUM (not HIGH)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb_pag = MagicMock()
+ lb_pag.paginate.return_value = [{"LoadBalancers": [_elbv2_lb(age_days=20)]}]
+ tg_pag = MagicMock()
+ tg_pag.paginate.side_effect = ClientError(
+ {"Error": {"Code": "InternalError", "Message": "x"}}, "DescribeTargetGroups"
+ )
+
+ def _pag(name):
+ if name == "describe_load_balancers":
+ return lb_pag
+ return tg_pag
+
+ elbv2.get_paginator.side_effect = _pag
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert len(findings) == 1
+ assert findings[0].confidence.value == "medium"
+
+ def test_clb_no_instances_high(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+ assert _run(_make_session(elbv2, elb, cw))[0].confidence.value == "high"
+
+ def test_clb_with_instances_medium(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20, instances=[{"InstanceId": "i-1"}])])
+ assert _run(_make_session(elbv2, elb, cw))[0].confidence.value == "medium"
+
+ def test_no_low_confidence_ever_emitted(self):
+ """Confidence must only be HIGH or MEDIUM — never LOW."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ for f in _run(_make_session(elbv2, elb, cw)):
+ assert f.confidence.value != "low"
+
+
+# ---------------------------------------------------------------------------
+# TestCostModel
+# ---------------------------------------------------------------------------
+
+
+class TestCostModel:
+ def test_alb_estimated_cost_null(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw))[0].estimated_monthly_cost_usd is None
+
+ def test_nlb_estimated_cost_null(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_zero_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(lb_type="network", age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw))[0].estimated_monthly_cost_usd is None
+
+ def test_clb_estimated_cost_null(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+ assert _run(_make_session(elbv2, elb, cw))[0].estimated_monthly_cost_usd is None
+
+ def test_no_hardcoded_cost_string_in_details(self):
+ """Details must not contain any hardcoded cost string like '~$16-22/month'."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ for f in _run(_make_session(elbv2, elb, cw)):
+ details_str = str(f.details)
+ assert "$" not in details_str, "Hardcoded cost string found in details"
+
+
+# ---------------------------------------------------------------------------
+# TestEvidenceContract
+# ---------------------------------------------------------------------------
+
+
+class TestEvidenceContract:
+ """Every emitted finding must include all required evidence/details fields."""
+
+ _ALB_REQUIRED = {
+ "evaluation_path",
+ "lb_family",
+ "resource_id",
+ "load_balancer_name",
+ "load_balancer_arn",
+ "scheme",
+ "dns_name",
+ "vpc_id",
+ "created_time",
+ "age_days",
+ "idle_days_threshold",
+ "traffic_window_days",
+ "traffic_signals_checked",
+ "traffic_detected",
+ "state_code",
+ "has_registered_targets",
+ "registered_target_count",
+ "target_group_count",
}
- cloudwatch.get_metric_statistics.return_value = {"Datapoints": []}
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [{"LoadBalancerDescriptions": []}]
+ _CLB_REQUIRED = {
+ "evaluation_path",
+ "lb_family",
+ "resource_id",
+ "load_balancer_name",
+ "load_balancer_arn",
+ "scheme",
+ "dns_name",
+ "vpc_id",
+ "created_time",
+ "age_days",
+ "idle_days_threshold",
+ "traffic_window_days",
+ "traffic_signals_checked",
+ "traffic_detected",
+ "has_registered_instances",
+ "registered_instance_count",
+ }
+
+ def test_alb_required_details_present(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ for key in self._ALB_REQUIRED:
+ assert key in f.details, f"Missing required details key: {key}"
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+ def test_clb_required_details_present(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
- nlb_findings = [f for f in findings if f.rule_id == "aws.elbv2.nlb.idle"]
- assert len(nlb_findings) == 1
- # Unhealthy but registered targets → has_targets=True → MEDIUM confidence
- assert nlb_findings[0].confidence.value == "medium"
- assert nlb_findings[0].details["has_targets"] is True
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ for key in self._CLB_REQUIRED:
+ assert key in f.details, f"Missing required details key: {key}"
+ def test_evaluation_path_exact_value(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
-def test_idle_nlb_healthy_targets_medium_confidence():
- """Idle NLB with zero flows but healthy targets should be MEDIUM confidence."""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["evaluation_path"] == "idle-load-balancer-review-candidate"
- nlb = _make_elbv2_lb(name="idle-nlb", lb_type="network", age_days=20)
- paginator = elbv2.get_paginator.return_value
- paginator.paginate.return_value = [{"LoadBalancers": [nlb]}]
+ def test_traffic_detected_always_false(self):
+ """traffic_detected must always be False for emitted findings."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [_clb(age_days=20)])
- elbv2.describe_target_groups.return_value = {"TargetGroups": [{"TargetGroupArn": "arn:tg"}]}
- elbv2.describe_target_health.return_value = {
- "TargetHealthDescriptions": [
- {"Target": {"Id": "i-123"}, "TargetHealth": {"State": "healthy"}}
+ for f in _run(_make_session(elbv2, elb, cw)):
+ assert f.details["traffic_detected"] is False
+
+ def test_alb_traffic_signals_checked_contains_active_connection_count(self):
+ """ALB traffic_signals_checked must include ActiveConnectionCount:Sum."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert "ActiveConnectionCount:Sum" in f.details["traffic_signals_checked"]
+
+ def test_nlb_traffic_signals_checked_contains_active_flow_count(self):
+ """NLB traffic_signals_checked must include ActiveFlowCount:Maximum."""
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_zero_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(lb_type="network", age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert "ActiveFlowCount:Maximum" in f.details["traffic_signals_checked"]
+
+ def test_clb_traffic_signals_checked_contains_estimated_bytes(self):
+ """CLB traffic_signals_checked must include EstimatedProcessedBytes:Sum."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert "EstimatedProcessedBytes:Sum" in f.details["traffic_signals_checked"]
+
+ def test_idle_days_threshold_in_details(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=30)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw), threshold=14)[0]
+ assert f.details["idle_days_threshold"] == 14
+ assert f.details["traffic_window_days"] == 14
+
+
+# ---------------------------------------------------------------------------
+# TestTitleAndReasonContract
+# ---------------------------------------------------------------------------
+
+
+class TestTitleAndReasonContract:
+ def test_alb_title(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.title == "Idle ALB review candidate"
+
+ def test_nlb_title(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ cw = _cw_nlb_zero_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(lb_type="network", age_days=20)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.title == "Idle NLB review candidate"
+
+ def test_clb_title(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.title == "Idle CLB review candidate"
+
+ def test_alb_reason_contains_threshold(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=30)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw), threshold=21)[0]
+ assert "21" in f.reason
+ assert "ALB" in f.reason
+
+ def test_nlb_reason_contains_threshold(self):
+ elbv2, elb = MagicMock(), MagicMock()
+ # Provide 21 datapoints so completeness check passes for threshold=21
+ cw = _cw_nlb_zero_traffic(num_datapoints=21)
+ _setup_elbv2(elbv2, [_elbv2_lb(lb_type="network", age_days=30)])
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw), threshold=21)[0]
+ assert "NLB" in f.reason
+ assert "21" in f.reason
+
+ def test_clb_reason_contains_threshold(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=30)])
+
+ f = _run(_make_session(elbv2, elb, cw), threshold=21)[0]
+ assert "CLB" in f.reason
+ assert "21" in f.reason
+
+ def test_title_does_not_claim_safe_to_delete(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [_clb(age_days=20)])
+
+ for f in _run(_make_session(elbv2, elb, cw)):
+ assert "safe" not in f.title.lower()
+ assert "delete" not in f.title.lower()
+
+
+# ---------------------------------------------------------------------------
+# TestRiskModel
+# ---------------------------------------------------------------------------
+
+
+class TestRiskModel:
+ def test_alb_risk_medium(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(age_days=20)])
+ _setup_clb(elb, [])
+ assert _run(_make_session(elbv2, elb, cw))[0].risk.value == "medium"
+
+ def test_clb_risk_medium(self):
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(elb, [_clb(age_days=20)])
+ assert _run(_make_session(elbv2, elb, cw))[0].risk.value == "medium"
+
+
+# ---------------------------------------------------------------------------
+# TestBackendEnrichment
+# ---------------------------------------------------------------------------
+
+
+class TestBackendEnrichment:
+ def test_target_enrichment_failure_does_not_fail_rule(self):
+ """Target-group enrichment failure must not raise — finding still emitted."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb_pag = MagicMock()
+ lb_pag.paginate.return_value = [{"LoadBalancers": [_elbv2_lb(age_days=20)]}]
+ tg_pag = MagicMock()
+ tg_pag.paginate.side_effect = ClientError(
+ {"Error": {"Code": "ServiceUnavailableException", "Message": "x"}},
+ "DescribeTargetGroups",
+ )
+
+ def _pag(name):
+ return lb_pag if name == "describe_load_balancers" else tg_pag
+
+ elbv2.get_paginator.side_effect = _pag
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ # Enrichment failure → finding still emitted, no exception
+ assert len(findings) == 1
+
+ def test_clb_instances_from_normalized_item(self):
+ """CLB backend context comes directly from the Instances field."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
+ _setup_clb(
+ elb, [_clb(age_days=20, instances=[{"InstanceId": "i-1"}, {"InstanceId": "i-2"}])]
+ )
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["registered_instance_count"] == 2
+ assert f.details["has_registered_instances"] is True
+
+ def test_enrichment_failure_counts_are_none_not_zero(self):
+ """Gap 3: when enrichment fails, registered_target_count and target_group_count
+ must be None (unknown), not silently set to 0 (which would look like zero targets)."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ lb_pag = MagicMock()
+ lb_pag.paginate.return_value = [{"LoadBalancers": [_elbv2_lb(age_days=20)]}]
+ tg_pag = MagicMock()
+ tg_pag.paginate.side_effect = ClientError(
+ {"Error": {"Code": "ServiceUnavailableException", "Message": "x"}},
+ "DescribeTargetGroups",
+ )
+
+ def _pag(name):
+ return lb_pag if name == "describe_load_balancers" else tg_pag
+
+ elbv2.get_paginator.side_effect = _pag
+ _setup_clb(elb, [])
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert len(findings) == 1
+ f = findings[0]
+ assert f.details["has_registered_targets"] is None
+ assert f.details["registered_target_count"] is None, "Must be None, not 0"
+ assert f.details["target_group_count"] is None, "Must be None, not 0"
+
+ def test_unhealthy_targets_count_as_registered(self):
+ """Any non-empty TargetHealthDescriptions entry counts as a registered target."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(
+ elbv2,
+ [_elbv2_lb(age_days=20)],
+ tg_pages=[{"TargetGroups": [{"TargetGroupArn": "arn:tg:1"}]}],
+ target_health={
+ "TargetHealthDescriptions": [
+ {"Target": {"Id": "i-1"}, "TargetHealth": {"State": "unhealthy"}}
+ ]
+ },
+ )
+ _setup_clb(elb, [])
+
+ f = _run(_make_session(elbv2, elb, cw))[0]
+ assert f.details["has_registered_targets"] is True
+ assert f.confidence.value == "medium"
+
+
+# ---------------------------------------------------------------------------
+# TestPagination
+# ---------------------------------------------------------------------------
+
+
+class TestPagination:
+ def test_elbv2_multiple_pages_all_processed(self):
+ """ELBv2 paginator with two pages — both pages' LBs are evaluated."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+
+ lb_pag = MagicMock()
+ lb_pag.paginate.return_value = [
+ {"LoadBalancers": [_elbv2_lb(name="alb1", age_days=20)]},
+ {"LoadBalancers": [_elbv2_lb(name="alb2", age_days=25)]},
]
- }
- cloudwatch.get_metric_statistics.return_value = {"Datapoints": []}
+ tg_pag = MagicMock()
+ tg_pag.paginate.return_value = [{"TargetGroups": []}]
+ elbv2.get_paginator.side_effect = lambda n: (
+ lb_pag if n == "describe_load_balancers" else tg_pag
+ )
+ _setup_clb(elb, [])
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [{"LoadBalancerDescriptions": []}]
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert len(findings) == 2
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+ def test_clb_multiple_pages_all_processed(self):
+ """CLB paginator with two pages — both pages' LBs are evaluated."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [])
- nlb_findings = [f for f in findings if f.rule_id == "aws.elbv2.nlb.idle"]
- assert len(nlb_findings) == 1
- # Healthy targets but no traffic -> MEDIUM confidence
- assert nlb_findings[0].confidence.value == "medium"
- assert nlb_findings[0].details["has_targets"] is True
+ pag = elb.get_paginator.return_value
+ pag.paginate.return_value = [
+ {"LoadBalancerDescriptions": [_clb(name="clb1", age_days=20)]},
+ {"LoadBalancerDescriptions": [_clb(name="clb2", age_days=25)]},
+ ]
+
+ findings = _run(_make_session(elbv2, elb, cw))
+ assert len(findings) == 2
+ def test_both_branches_run(self):
+ """ALB and CLB findings are both collected in a single call."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ _setup_elbv2(elbv2, [_elbv2_lb(name="idle-alb", age_days=20)])
+ _setup_clb(elb, [_clb(name="idle-clb", age_days=20)])
-def test_idle_clb_detected():
- """Idle CLB with zero requests and no instances should be flagged."""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
+ findings = _run(_make_session(elbv2, elb, cw))
+ rule_ids = {f.rule_id for f in findings}
+ assert "aws.elbv2.alb.idle" in rule_ids
+ assert "aws.elb.clb.idle" in rule_ids
- # elbv2 - empty
- elbv2_paginator = elbv2.get_paginator.return_value
- elbv2_paginator.paginate.return_value = [{"LoadBalancers": []}]
- # CLB setup
- clb = _make_clb(name="idle-clb", age_days=30, instances=[])
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [{"LoadBalancerDescriptions": [clb]}]
+# ---------------------------------------------------------------------------
+# TestBranchIsolation
+# ---------------------------------------------------------------------------
- cloudwatch.get_metric_statistics.return_value = {"Datapoints": []}
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+class TestBranchIsolation:
+ """ELBv2 and CLB branches must run independently.
- assert len(findings) == 1
- f = findings[0]
- assert f.rule_id == "aws.elb.clb.idle"
- assert f.resource_type == "aws.elb.load_balancer"
- assert f.resource_id == "idle-clb"
- assert f.confidence.value == "high" # No instances + no traffic
- assert f.details["has_instances"] is False
- assert f.estimated_monthly_cost_usd == 18.0
+ A failure in one branch must not prevent the other branch from being evaluated.
+ Both branches are always attempted; the first exception is re-raised afterward.
+ """
+ def test_elbv2_failure_does_not_prevent_clb_evaluation(self):
+ """ELBv2 inventory failure → CLB paginator is still called."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
-def test_young_lb_skipped():
- """LB younger than threshold should NOT be flagged."""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
+ # Make ELBv2 inventory fail
+ exc = ClientError(
+ {"Error": {"Code": "InternalError", "Message": "x"}}, "DescribeLoadBalancers"
+ )
+ lb_pag = MagicMock()
+ lb_pag.paginate.side_effect = exc
+ elbv2.get_paginator.return_value = lb_pag
- # Young ALB (5 days old)
- paginator = elbv2.get_paginator.return_value
- paginator.paginate.return_value = [
- {"LoadBalancers": [_make_elbv2_lb(name="young-alb", age_days=5)]}
- ]
+ # CLB has a valid idle LB
+ _setup_clb(elb, [_clb(name="surviving-clb", age_days=20)])
- # Young CLB (3 days old)
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [
- {"LoadBalancerDescriptions": [_make_clb(name="young-clb", age_days=3)]}
- ]
+ with pytest.raises(ClientError):
+ _run(_make_session(elbv2, elb, cw))
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+ # CLB paginator must have been called despite ELBv2 failure
+ elb.get_paginator.assert_called()
- assert len(findings) == 0
+ def test_clb_failure_does_not_prevent_elbv2_evaluation(self):
+ """CLB inventory failure → ELBv2 paginator was still called and evaluated."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+ # ELBv2 has a valid idle ALB
+ _setup_elbv2(elbv2, [_elbv2_lb(name="surviving-alb", age_days=20)])
-def test_clb_with_instances_medium_confidence():
- """CLB with instances but no traffic should be MEDIUM confidence."""
- elbv2 = MagicMock()
- elb = MagicMock()
- cloudwatch = MagicMock()
+ # Make CLB inventory fail
+ exc = ClientError(
+ {"Error": {"Code": "InternalError", "Message": "x"}}, "DescribeLoadBalancers"
+ )
+ clb_pag = elb.get_paginator.return_value
+ clb_pag.paginate.side_effect = exc
- elbv2_paginator = elbv2.get_paginator.return_value
- elbv2_paginator.paginate.return_value = [{"LoadBalancers": []}]
+ with pytest.raises(ClientError):
+ _run(_make_session(elbv2, elb, cw))
- clb = _make_clb(
- name="idle-with-instances",
- age_days=30,
- instances=[{"InstanceId": "i-123"}],
- )
- elb_paginator = elb.get_paginator.return_value
- elb_paginator.paginate.return_value = [{"LoadBalancerDescriptions": [clb]}]
+ # ELBv2 paginator must have been called (its branch completed)
+ elbv2.get_paginator.assert_called()
+
+ def test_elbv2_and_clb_both_fail_raises_elbv2_exception(self):
+ """When both branches fail, the ELBv2 exception (first) is re-raised."""
+ elbv2, elb, cw = MagicMock(), MagicMock(), _cw_no_traffic()
+
+ elbv2_exc = ClientError(
+ {"Error": {"Code": "ELBv2Error", "Message": "x"}}, "DescribeLoadBalancers"
+ )
+ clb_exc = ClientError(
+ {"Error": {"Code": "CLBError", "Message": "x"}}, "DescribeLoadBalancers"
+ )
+
+ lb_pag = MagicMock()
+ lb_pag.paginate.side_effect = elbv2_exc
+ elbv2.get_paginator.return_value = lb_pag
- cloudwatch.get_metric_statistics.return_value = {"Datapoints": []}
+ clb_pag = elb.get_paginator.return_value
+ clb_pag.paginate.side_effect = clb_exc
- session = _make_session(elbv2, elb, cloudwatch)
- findings = find_idle_load_balancers(session, "us-east-1")
+ with pytest.raises(ClientError) as exc_info:
+ _run(_make_session(elbv2, elb, cw))
- assert len(findings) == 1
- assert findings[0].confidence.value == "medium"
- assert findings[0].details["has_instances"] is True
- assert findings[0].details["instance_count"] == 1
+ # Must be the ELBv2 exception (first branch failure)
+ assert exc_info.value.response["Error"]["Code"] == "ELBv2Error"
diff --git a/tests/cleancloud/providers/aws/test_aws_eni_detached.py b/tests/cleancloud/providers/aws/test_aws_eni_detached.py
index 06a9537..90ff98a 100644
--- a/tests/cleancloud/providers/aws/test_aws_eni_detached.py
+++ b/tests/cleancloud/providers/aws/test_aws_eni_detached.py
@@ -1,289 +1,741 @@
-from datetime import datetime, timedelta, timezone
-
-from cleancloud.core.confidence import ConfidenceLevel
-from cleancloud.providers.aws.rules.eni_detached import find_detached_enis
+"""Tests for aws.ec2.eni.detached rule.
+Covers all acceptance scenarios from docs/specs/aws/eni_detached.md §15 and
+the normalization, evidence, confidence, cost, risk, title/reason, failure, and
+pagination contracts from the same spec.
+"""
-def test_find_detached_enis(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- old_date = datetime.now(timezone.utc) - timedelta(days=90) # Older than 60-day threshold
- recent_date = datetime.now(timezone.utc) - timedelta(days=30) # Younger than 60-day threshold
-
- # Mock paginator for describe_network_interfaces
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NetworkInterfaces": [
- {
- "NetworkInterfaceId": "eni-1",
- "Status": "available", # Detached
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "User-created ENI",
- "InterfaceType": "interface", # Standard ENI
- "TagSet": [{"Key": "Name", "Value": "test-eni"}],
- },
- {
- "NetworkInterfaceId": "eni-2",
- "Status": "in-use", # Attached
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "Attached ENI",
- "InterfaceType": "interface",
- },
- {
- "NetworkInterfaceId": "eni-3",
- "Status": "available", # Detached but recent
- "CreateTime": recent_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "Recently created",
- "InterfaceType": "interface",
- },
- {
- "NetworkInterfaceId": "eni-4",
- "Status": "available", # AWS infrastructure (Load Balancer)
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "ELB app/my-alb/1234567890",
- "InterfaceType": "load_balancer", # AWS infrastructure - exclude
- },
- {
- "NetworkInterfaceId": "eni-5",
- "Status": "available", # Detached Lambda ENI (USER resource - should flag!)
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "AWS Lambda VPC ENI-my-function",
- "InterfaceType": "interface", # Standard ENI type
- "RequesterManaged": True, # Created by AWS service, but YOUR resource
- },
- {
- "NetworkInterfaceId": "eni-6",
- "Status": "available", # Detached, old, no tags
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "",
- "InterfaceType": "interface",
- "TagSet": [],
- },
- ]
- }
- ]
-
- findings = find_detached_enis(mock_boto3_session, region)
- eni_ids = {f.resource_id for f in findings}
- findings_by_id = {f.resource_id: f for f in findings}
-
- # Positive: old (90 days) detached standard ENI with tags
- assert "eni-1" in eni_ids
-
- # Positive: old (90 days) detached Lambda ENI (RequesterManaged but user resource!)
- assert "eni-5" in eni_ids
-
- # Positive: old (90 days) detached ENI without tags
- assert "eni-6" in eni_ids
-
- # Negative: attached ENI
- assert "eni-2" not in eni_ids
-
- # Negative: detached but too young (30 days < 60 day threshold)
- assert "eni-3" not in eni_ids
-
- # Negative: AWS infrastructure (Load Balancer)
- assert "eni-4" not in eni_ids
-
- # Verify we got exactly 3 findings (including Lambda ENI)
- assert len(findings) == 3
-
- # Verify title includes "(Review Recommended)"
- for f in findings:
- assert f.title == "Detached Network Interface (Review Recommended)"
-
- # Verify confidence is MEDIUM for all findings
- for f in findings:
- assert f.confidence == ConfidenceLevel.MEDIUM
-
- # Verify standard ENI details
- f1 = findings_by_id["eni-1"]
- assert f1.details["interface_type"] == "interface"
- assert f1.details["requester_managed"] is False
- assert f1.details["age_days"] == 90
- assert "created" in f1.summary and "currently detached" in f1.summary
-
- # Verify Lambda ENI details and requester-managed signal
- f5 = findings_by_id["eni-5"]
- assert f5.details["interface_type"] == "interface"
- assert f5.details["requester_managed"] is True
- assert any("requester-managed" in s for s in f5.evidence.signals_used)
-
- # Verify untagged ENI has "no tags" signal
- f6 = findings_by_id["eni-6"]
- assert f6.details["requester_managed"] is False
- assert any("no tags" in s for s in f6.evidence.signals_used)
-
- # Verify Hyperplane in signals_not_checked
- for f in findings:
- assert any("Hyperplane" in s for s in f.evidence.signals_not_checked)
-
-
-def test_find_detached_enis_custom_threshold(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- date_45_days_ago = datetime.now(timezone.utc) - timedelta(days=45)
-
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NetworkInterfaces": [
- {
- "NetworkInterfaceId": "eni-7",
- "Status": "available",
- "CreateTime": date_45_days_ago,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "Test ENI",
- "InterfaceType": "interface",
- }
- ]
- }
- ]
-
- # Test with custom 60-day threshold
- findings = find_detached_enis(mock_boto3_session, region, max_age_days=60)
- eni_ids = {f.resource_id for f in findings}
-
- # Should NOT be detected (45 days < 60 days threshold)
- assert "eni-7" not in eni_ids
-
- # Test with custom 30-day threshold
- findings = find_detached_enis(mock_boto3_session, region, max_age_days=30)
- eni_ids = {f.resource_id for f in findings}
-
- # Should be detected (45 days >= 30 days threshold)
- assert "eni-7" in eni_ids
-
- # Verify wording uses creation age, not detached duration
- f = findings[0]
- assert "created" in f.summary
- assert "currently detached" in f.summary
- assert f.evidence.time_window == "30 days since creation"
-
-
-def test_find_detached_enis_empty(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [{"NetworkInterfaces": []}]
-
- findings = find_detached_enis(mock_boto3_session, region)
-
- assert len(findings) == 0
-
-
-def test_find_detached_enis_interface_types(mock_boto3_session):
- """Test that InterfaceType correctly distinguishes AWS infrastructure from user resources."""
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- old_date = datetime.now(timezone.utc) - timedelta(days=60)
-
- # Test various interface types
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NetworkInterfaces": [
- {
- "NetworkInterfaceId": "eni-user-1",
- "Status": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "User-created ENI",
- "InterfaceType": "interface", # Standard - should be flagged
- },
- {
- "NetworkInterfaceId": "eni-lambda-1",
- "Status": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "Lambda VPC ENI",
- "InterfaceType": "interface", # Lambda = user resource - should be flagged!
- "RequesterManaged": True,
- },
- {
- "NetworkInterfaceId": "eni-elb-1",
- "Status": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "ELB app/my-alb/1234567890",
- "InterfaceType": "load_balancer", # AWS infrastructure - exclude
- },
- {
- "NetworkInterfaceId": "eni-nat-1",
- "Status": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "NAT Gateway",
- "InterfaceType": "nat_gateway", # AWS infrastructure - exclude
- },
- {
- "NetworkInterfaceId": "eni-vpce-1",
- "Status": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "AvailabilityZone": "us-east-1a",
- "Description": "VPC Endpoint",
- "InterfaceType": "vpc_endpoint", # AWS infrastructure - exclude
- },
- ]
- }
- ]
+from unittest.mock import MagicMock
- findings = find_detached_enis(mock_boto3_session, region)
- eni_ids = {f.resource_id for f in findings}
- findings_by_id = {f.resource_id: f for f in findings}
+import pytest
+from botocore.exceptions import BotoCoreError, ClientError
- # Should flag user resources (including Lambda!)
- assert "eni-user-1" in eni_ids
- assert "eni-lambda-1" in eni_ids # Lambda ENI is a user resource!
-
- # Should exclude AWS infrastructure
- assert "eni-elb-1" not in eni_ids
- assert "eni-nat-1" not in eni_ids
- assert "eni-vpce-1" not in eni_ids
-
- assert len(findings) == 2 # Only user-1 and lambda-1
+from cleancloud.core.confidence import ConfidenceLevel
+from cleancloud.core.risk import RiskLevel
+from cleancloud.providers.aws.rules.eni_detached import find_detached_enis
- # Verify interface_type and requester_managed in details
- f_user = findings_by_id["eni-user-1"]
- assert f_user.details["interface_type"] == "interface"
- assert f_user.details["requester_managed"] is False
+_REGION = "us-east-1"
- f_lambda = findings_by_id["eni-lambda-1"]
- assert f_lambda.details["interface_type"] == "interface"
- assert f_lambda.details["requester_managed"] is True
- assert any("requester-managed" in s for s in f_lambda.evidence.signals_used)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_session(ec2: MagicMock) -> MagicMock:
+ session = MagicMock()
+ session.client.return_value = ec2
+ return session
+
+
+def _setup_ec2(enis: list) -> MagicMock:
+ """Return an ec2 client mock whose paginator yields one page of ENIs."""
+ ec2 = MagicMock()
+ paginator = MagicMock()
+ ec2.get_paginator.return_value = paginator
+ paginator.paginate.return_value = [{"NetworkInterfaces": enis}]
+ return ec2
+
+
+def _eni(
+ eni_id: str = "eni-aabbccdd",
+ status: str = "available",
+ **extra,
+) -> dict:
+ """Build a minimal ENI dict with defaults that pass all exclusion rules."""
+ base = {
+ "NetworkInterfaceId": eni_id,
+ "Status": status,
+ }
+ base.update(extra)
+ return base
+
+
+def _run(session: MagicMock) -> list:
+ return find_detached_enis(session, _REGION)
+
+
+def _client_error(code: str = "SomeError") -> ClientError:
+ return ClientError({"Error": {"Code": code, "Message": "test"}}, "DescribeNetworkInterfaces")
+
+
+# ---------------------------------------------------------------------------
+# §15 Must Emit
+# ---------------------------------------------------------------------------
+
+
+class TestMustEmit:
+ def test_available_no_attachment_object(self):
+ """Scenario 1: ENI available, no Attachment object → EMIT HIGH."""
+ ec2 = _setup_ec2([_eni("eni-1", "available")])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eni-1"
+ assert findings[0].confidence == ConfidenceLevel.HIGH
+
+ def test_available_attachment_detached(self):
+ """Scenario 2: ENI available, Attachment.Status == 'detached' → EMIT HIGH."""
+ eni = _eni(
+ "eni-2", "available", Attachment={"Status": "detached", "AttachmentId": "eni-attach-01"}
+ )
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eni-2"
+ assert findings[0].confidence == ConfidenceLevel.HIGH
+
+ def test_requester_managed_available(self):
+ """Scenario 3: Requester-managed ENI available → EMIT (no exclusion)."""
+ eni = _eni("eni-3", "available", RequesterManaged=True)
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eni-3"
+
+ def test_operator_managed_available(self):
+ """Scenario 4: Operator-managed ENI available → EMIT (no exclusion)."""
+ eni = _eni("eni-4", "available", Operator={"Managed": True, "Principal": "some-service"})
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eni-4"
+
+ def test_any_interface_type_available(self):
+ """Scenario 5: Any InterfaceType, Status available → EMIT (no type exclusion)."""
+ for itype in ("interface", "load_balancer", "nat_gateway", "vpc_endpoint", "efa", "branch"):
+ eni = _eni(f"eni-{itype}", "available", InterfaceType=itype)
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1, f"Expected emit for InterfaceType={itype!r}"
+ assert findings[0].resource_id == f"eni-{itype}"
+
+
+# ---------------------------------------------------------------------------
+# §15 Must Skip
+# ---------------------------------------------------------------------------
+
+
+class TestMustSkip:
+ def test_in_use_skipped(self):
+ """Scenario 6: Status == 'in-use' → SKIP."""
+ ec2 = _setup_ec2([_eni("eni-inuse", "in-use")])
+ findings = _run(_make_session(ec2))
+ assert findings == []
+
+ def test_attaching_skipped(self):
+ """Scenario 7a: Status == 'attaching' → SKIP."""
+ ec2 = _setup_ec2([_eni("eni-attaching", "attaching")])
+ assert _run(_make_session(ec2)) == []
+
+ def test_detaching_skipped(self):
+ """Scenario 7b: Status == 'detaching' → SKIP."""
+ ec2 = _setup_ec2([_eni("eni-detaching", "detaching")])
+ assert _run(_make_session(ec2)) == []
+
+ def test_associated_skipped(self):
+ """Scenario 7c: Status == 'associated' → SKIP."""
+ ec2 = _setup_ec2([_eni("eni-associated", "associated")])
+ assert _run(_make_session(ec2)) == []
+
+ def test_available_attachment_attached_skipped(self):
+ """Scenario 8: Status 'available' but Attachment.Status 'attached' → SKIP (inconsistency)."""
+ eni = _eni("eni-conflict", "available", Attachment={"Status": "attached"})
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+ def test_available_attachment_attaching_skipped(self):
+ """Structural inconsistency: 'available' + Attachment.Status 'attaching' → SKIP."""
+ eni = _eni("eni-conflict2", "available", Attachment={"Status": "attaching"})
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+ def test_available_attachment_detaching_skipped(self):
+ """Structural inconsistency: 'available' + Attachment.Status 'detaching' → SKIP."""
+ eni = _eni("eni-conflict3", "available", Attachment={"Status": "detaching"})
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+ def test_missing_network_interface_id_skipped(self):
+ """Scenario 9: Missing NetworkInterfaceId → SKIP."""
+ eni = {"Status": "available"}
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+ def test_missing_status_skipped(self):
+ """Scenario 10: Missing Status → SKIP."""
+ eni = {"NetworkInterfaceId": "eni-nostatus"}
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+
+# ---------------------------------------------------------------------------
+# §15 Must Fail
+# ---------------------------------------------------------------------------
+
+
+class TestMustFailRule:
+ def test_client_error_raises(self):
+ """Scenario 11: DescribeNetworkInterfaces request failure → FAIL RULE (re-raise)."""
+ ec2 = MagicMock()
+ ec2.get_paginator.return_value.paginate.side_effect = _client_error("AccessDenied")
+ with pytest.raises(ClientError):
+ _run(_make_session(ec2))
+
+ def test_unauthorized_operation_raises_permission_error(self):
+ ec2 = MagicMock()
+ ec2.get_paginator.return_value.paginate.side_effect = _client_error("UnauthorizedOperation")
+ with pytest.raises(PermissionError):
+ _run(_make_session(ec2))
+
+ def test_botocore_error_raises(self):
+ ec2 = MagicMock()
+ ec2.get_paginator.return_value.paginate.side_effect = BotoCoreError()
+ with pytest.raises(BotoCoreError):
+ _run(_make_session(ec2))
+
+
+# ---------------------------------------------------------------------------
+# §15 Must NOT Happen
+# ---------------------------------------------------------------------------
+
+
+class TestMustNotHappen:
+ def test_no_temporal_threshold_applied(self):
+ """No temporal threshold — any available ENI regardless of creation age emits."""
+ # Provide ENI with no CreateTime at all — must still emit.
+ ec2 = _setup_ec2([_eni("eni-notime", "available")])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+
+ def test_create_time_not_in_details(self):
+ """CreateTime must not appear in details — no temporal claim from DescribeNetworkInterfaces."""
+ ec2 = _setup_ec2([_eni("eni-ct", "available")])
+ findings = _run(_make_session(ec2))
+ assert "create_time" not in findings[0].details
+ assert "age_days" not in findings[0].details
+
+ def test_interface_type_not_exclusion(self):
+ """No interface_type may be used as an exclusion gate."""
+ excluded_types = [
+ "load_balancer",
+ "nat_gateway",
+ "vpc_endpoint",
+ "gateway_load_balancer",
+ "gateway_load_balancer_endpoint",
+ ]
+ for itype in excluded_types:
+ eni = _eni(f"eni-{itype}", "available", InterfaceType=itype)
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1, f"interface_type={itype!r} must not be excluded"
+
+ def test_requester_managed_true_not_exclusion(self):
+ """requester_managed == True must not exclude the ENI."""
+ eni = _eni("eni-rm", "available", RequesterManaged=True)
+ ec2 = _setup_ec2([eni])
+ assert len(_run(_make_session(ec2))) == 1
+
+ def test_cost_estimate_is_none(self):
+ """estimated_monthly_cost_usd must always be None."""
+ ec2 = _setup_ec2([_eni("eni-cost", "available")])
+ findings = _run(_make_session(ec2))
+ assert findings[0].estimated_monthly_cost_usd is None
+
+ def test_confidence_never_medium_or_low(self):
+ """HIGH confidence only; MEDIUM and LOW must not appear."""
+ ec2 = _setup_ec2([_eni("eni-conf", "available")])
+ f = _run(_make_session(ec2))[0]
+ assert f.confidence not in (ConfidenceLevel.MEDIUM, ConfidenceLevel.LOW)
+
+
+# ---------------------------------------------------------------------------
+# Normalization contract
+# ---------------------------------------------------------------------------
+
+
+class TestNormalization:
+ def test_non_dict_eni_skipped(self):
+ """Non-dict item in NetworkInterfaces → SKIP (not FAIL RULE)."""
+ ec2 = _setup_ec2(["not-a-dict", None, 42])
+ assert _run(_make_session(ec2)) == []
+
+ def test_empty_string_network_interface_id_skipped(self):
+ """Empty string NetworkInterfaceId treated as absent → SKIP."""
+ eni = {"NetworkInterfaceId": "", "Status": "available"}
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+ def test_empty_string_status_skipped(self):
+ """Empty string Status treated as absent → SKIP."""
+ eni = {"NetworkInterfaceId": "eni-x", "Status": ""}
+ ec2 = _setup_ec2([eni])
+ assert _run(_make_session(ec2)) == []
+
+ def test_requester_managed_string_treated_as_null(self):
+ """RequesterManaged as string → not a bool → normalized to null (not excluded)."""
+ eni = _eni("eni-rmstr", "available", RequesterManaged="true")
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].details["requester_managed"] is None
+
+ def test_requester_managed_false_stored_correctly(self):
+ eni = _eni("eni-rmf", "available", RequesterManaged=False)
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["requester_managed"] is False
+
+ def test_requester_managed_true_stored_correctly(self):
+ eni = _eni("eni-rmt", "available", RequesterManaged=True)
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["requester_managed"] is True
+
+ def test_operator_managed_non_bool_treated_as_null(self):
+ """Operator.Managed as string → null (not an exclusion)."""
+ eni = _eni("eni-opstr", "available", Operator={"Managed": "yes"})
+ ec2 = _setup_ec2([eni])
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].details["operator_managed"] is None
+
+ def test_operator_managed_true_stored_and_not_excluded(self):
+ eni = _eni("eni-opt", "available", Operator={"Managed": True, "Principal": "svc"})
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert len(findings) == 1
+ assert findings[0].details["operator_managed"] is True
+ assert findings[0].details["operator_principal"] == "svc"
+
+ def test_operator_non_dict_yields_null_fields(self):
+ eni = _eni("eni-opnd", "available", Operator="bad")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert len(findings) == 1
+ assert findings[0].details["operator_managed"] is None
+ assert findings[0].details["operator_principal"] is None
+
+ def test_tag_set_absent_yields_empty_list(self):
+ eni = _eni("eni-notag", "available")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["tag_set"] == []
+
+ def test_tag_set_list_preserved(self):
+ tags = [{"Key": "Name", "Value": "my-eni"}]
+ eni = _eni("eni-tag", "available", TagSet=tags)
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["tag_set"] == tags
+
+ def test_tag_set_non_list_yields_empty_list(self):
+ eni = _eni("eni-badtag", "available", TagSet="not-a-list")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["tag_set"] == []
+
+ def test_public_ip_from_association(self):
+ eni = _eni("eni-pub", "available", Association={"PublicIp": "1.2.3.4"})
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["public_ip"] == "1.2.3.4"
+
+ def test_public_ip_absent_when_no_association(self):
+ eni = _eni("eni-nopub", "available")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["public_ip"] is None
+
+ def test_association_non_dict_yields_null_public_ip(self):
+ eni = _eni("eni-assocstr", "available", Association="bad")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert findings[0].details["public_ip"] is None
+
+ def test_empty_string_contextual_fields_yield_null(self):
+ """Empty strings for contextual string fields must normalize to null."""
+ eni = _eni(
+ "eni-emptyctx",
+ "available",
+ InterfaceType="",
+ AvailabilityZone="",
+ SubnetId="",
+ VpcId="",
+ PrivateIpAddress="",
+ Description="",
+ )
+ findings = _run(_make_session(_setup_ec2([eni])))
+ d = findings[0].details
+ assert d["interface_type"] is None
+ assert d["availability_zone"] is None
+ assert d["subnet_id"] is None
+ assert d["vpc_id"] is None
+ assert d["private_ip_address"] is None
+ assert d["description"] is None
+
+ def test_attachment_absent_yields_null_attachment_fields(self):
+ eni = _eni("eni-noatt", "available")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ d = findings[0].details
+ assert d["attachment_status"] is None
+ assert d["attachment_id"] is None
+ assert d["attachment_instance_id"] is None
+ assert d["attachment_instance_owner_id"] is None
+
+ def test_attachment_fields_populated_from_object(self):
+ att = {
+ "Status": "detached",
+ "AttachmentId": "eni-attach-01",
+ "InstanceId": "i-abc123",
+ "InstanceOwnerId": "123456789012",
+ }
+ eni = _eni("eni-att", "available", Attachment=att)
+ findings = _run(_make_session(_setup_ec2([eni])))
+ d = findings[0].details
+ assert d["attachment_status"] == "detached"
+ assert d["attachment_id"] == "eni-attach-01"
+ assert d["attachment_instance_id"] == "i-abc123"
+ assert d["attachment_instance_owner_id"] == "123456789012"
+
+ def test_malformed_attachment_non_dict_yields_null_fields(self):
+ eni = _eni("eni-badatt", "available", Attachment="not-a-dict")
+ findings = _run(_make_session(_setup_ec2([eni])))
+ d = findings[0].details
+ assert d["attachment_status"] is None
+ assert d["attachment_id"] is None
+
+
+# ---------------------------------------------------------------------------
+# Attachment consistency (structural inconsistency rule)
+# ---------------------------------------------------------------------------
+
+
+class TestAttachmentConsistency:
+ def test_available_plus_detached_attachment_emits(self):
+ """available + Attachment.Status 'detached' → consistent → EMIT."""
+ eni = _eni("eni-detatt", "available", Attachment={"Status": "detached"})
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert len(findings) == 1
+
+ def test_available_plus_null_attachment_status_emits(self):
+ """available + Attachment object missing Status → null attachment_status → EMIT."""
+ eni = _eni("eni-noastatus", "available", Attachment={"AttachmentId": "eni-attach-01"})
+ findings = _run(_make_session(_setup_ec2([eni])))
+ assert len(findings) == 1
+
+ def test_available_plus_attached_skipped(self):
+ eni = _eni("eni-inconsis1", "available", Attachment={"Status": "attached"})
+ assert _run(_make_session(_setup_ec2([eni]))) == []
+
+ def test_available_plus_attaching_skipped(self):
+ eni = _eni("eni-inconsis2", "available", Attachment={"Status": "attaching"})
+ assert _run(_make_session(_setup_ec2([eni]))) == []
+
+ def test_available_plus_detaching_skipped(self):
+ eni = _eni("eni-inconsis3", "available", Attachment={"Status": "detaching"})
+ assert _run(_make_session(_setup_ec2([eni]))) == []
+
+ def test_available_plus_unknown_attachment_status_skipped(self):
+ """Unknown/malformed attachment_status (e.g. 'foo') → SKIP; only null/'detached' emits."""
+ eni = _eni("eni-unknown-att", "available", Attachment={"Status": "foo"})
+ assert _run(_make_session(_setup_ec2([eni]))) == []
+
+ def test_available_plus_arbitrary_string_attachment_status_skipped(self):
+ """Any non-null, non-'detached' attachment_status string → SKIP."""
+ for bad_status in ("pending", "error", "unknown", "AVAILABLE", ""):
+ eni_id = f"eni-bad-{bad_status or 'empty'}"
+ # Empty string normalizes to None via _str(), so it should emit.
+ # Non-empty unknown strings should skip.
+ eni = _eni(eni_id, "available", Attachment={"Status": bad_status})
+ findings = _run(_make_session(_setup_ec2([eni])))
+ if bad_status == "":
+ # Empty string → attachment_status normalizes to None → emit
+ assert (
+ len(findings) == 1
+ ), "Empty attachment Status should emit (normalized to null)"
+ else:
+ assert findings == [], f"attachment_status={bad_status!r} should skip"
+
+ def test_attachment_status_does_not_override_top_level_status(self):
+ """attachment_status is validation only; it must not independently produce eligibility."""
+ eni = _eni("eni-auth", "in-use", Attachment={"Status": "detached"})
+ assert _run(_make_session(_setup_ec2([eni]))) == []
+
+
+# ---------------------------------------------------------------------------
+# Signals used (§11.3)
+# ---------------------------------------------------------------------------
+
+
+class TestSignalsUsed:
+ def test_top_level_status_signal_always_present(self):
+ ec2 = _setup_ec2([_eni("eni-sig1", "available")])
+ signals = _run(_make_session(ec2))[0].evidence.signals_used
+ assert any("'available'" in s for s in signals)
+
+ def test_requester_managed_true_adds_signal(self):
+ eni = _eni("eni-rm-sig", "available", RequesterManaged=True)
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert any("requester-managed" in s.lower() for s in signals)
+
+ def test_requester_managed_false_no_extra_signal(self):
+ eni = _eni("eni-rmf-sig", "available", RequesterManaged=False)
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert not any("requester-managed" in s.lower() for s in signals)
+
+ def test_requester_managed_null_no_extra_signal(self):
+ eni = _eni("eni-rmn-sig", "available")
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert not any("requester-managed" in s.lower() for s in signals)
+
+ def test_operator_managed_true_adds_signal(self):
+ eni = _eni("eni-op-sig", "available", Operator={"Managed": True, "Principal": "svc-x"})
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert any("operator-managed" in s.lower() for s in signals)
+ assert any("svc-x" in s for s in signals)
+
+ def test_operator_managed_true_no_principal_uses_unknown(self):
+ eni = _eni("eni-op-noprinc", "available", Operator={"Managed": True})
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert any("operator-managed" in s.lower() for s in signals)
+ assert any("unknown" in s for s in signals)
+
+ def test_operator_managed_false_no_extra_signal(self):
+ eni = _eni("eni-opf-sig", "available", Operator={"Managed": False})
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert not any("operator-managed" in s.lower() for s in signals)
+
+ def test_both_requester_and_operator_managed_both_signals_present(self):
+ eni = _eni(
+ "eni-both-sig",
+ "available",
+ RequesterManaged=True,
+ Operator={"Managed": True, "Principal": "svc-y"},
+ )
+ signals = _run(_make_session(_setup_ec2([eni])))[0].evidence.signals_used
+ assert any("requester-managed" in s.lower() for s in signals)
+ assert any("operator-managed" in s.lower() for s in signals)
+
+
+# ---------------------------------------------------------------------------
+# Evidence contract (§11)
+# ---------------------------------------------------------------------------
+
+
+class TestEvidenceContract:
+ def test_required_details_fields_present(self):
+ """All required evidence/details fields must be present in every finding."""
+ eni = _eni(
+ "eni-evid",
+ "available",
+ InterfaceType="interface",
+ RequesterManaged=True,
+ Operator={"Managed": False, "Principal": "svc"},
+ AvailabilityZone="us-east-1a",
+ SubnetId="subnet-aaa",
+ VpcId="vpc-bbb",
+ PrivateIpAddress="10.0.0.5",
+ Association={"PublicIp": "52.1.2.3"},
+ )
+ findings = _run(_make_session(_setup_ec2([eni])))
+ d = findings[0].details
+
+ required_fields = [
+ "evaluation_path",
+ "network_interface_id",
+ "normalized_status",
+ "attachment_status",
+ "interface_type",
+ "requester_managed",
+ "operator_managed",
+ "operator_principal",
+ "availability_zone",
+ "subnet_id",
+ "vpc_id",
+ "private_ip_address",
+ "public_ip",
+ ]
+ for field in required_fields:
+ assert field in d, f"Required field '{field}' missing from details"
+
+ def test_evaluation_path_exact_value(self):
+ ec2 = _setup_ec2([_eni("eni-ep", "available")])
+ findings = _run(_make_session(ec2))
+ assert findings[0].details["evaluation_path"] == "detached-eni-review-candidate"
+
+ def test_normalized_status_always_available_in_details(self):
+ ec2 = _setup_ec2([_eni("eni-ns", "available")])
+ findings = _run(_make_session(ec2))
+ assert findings[0].details["normalized_status"] == "available"
+
+ def test_network_interface_id_in_details(self):
+ ec2 = _setup_ec2([_eni("eni-id-check", "available")])
+ findings = _run(_make_session(ec2))
+ assert findings[0].details["network_interface_id"] == "eni-id-check"
+
+
+# ---------------------------------------------------------------------------
+# Confidence model (§12)
+# ---------------------------------------------------------------------------
+
+
+class TestConfidenceModel:
+ def test_high_confidence_for_available_no_conflict(self):
+ ec2 = _setup_ec2([_eni("eni-conf1", "available")])
+ assert _run(_make_session(ec2))[0].confidence == ConfidenceLevel.HIGH
+
+ def test_high_confidence_with_detached_attachment(self):
+ eni = _eni("eni-conf2", "available", Attachment={"Status": "detached"})
+ assert _run(_make_session(_setup_ec2([eni])))[0].confidence == ConfidenceLevel.HIGH
+
+ def test_high_confidence_requester_managed(self):
+ eni = _eni("eni-conf3", "available", RequesterManaged=True)
+ assert _run(_make_session(_setup_ec2([eni])))[0].confidence == ConfidenceLevel.HIGH
+
+ def test_high_confidence_operator_managed(self):
+ eni = _eni("eni-conf4", "available", Operator={"Managed": True})
+ assert _run(_make_session(_setup_ec2([eni])))[0].confidence == ConfidenceLevel.HIGH
+
+
+# ---------------------------------------------------------------------------
+# Cost model (§11.2)
+# ---------------------------------------------------------------------------
+
+
+class TestCostModel:
+ def test_estimated_monthly_cost_always_none(self):
+ ec2 = _setup_ec2([_eni("eni-cost1", "available")])
+ assert _run(_make_session(ec2))[0].estimated_monthly_cost_usd is None
+
+
+# ---------------------------------------------------------------------------
+# Risk model (§14)
+# ---------------------------------------------------------------------------
+
+
+class TestRiskModel:
+ def test_risk_is_low(self):
+ ec2 = _setup_ec2([_eni("eni-risk", "available")])
+ assert _run(_make_session(ec2))[0].risk == RiskLevel.LOW
+
+
+# ---------------------------------------------------------------------------
+# Title and reason contract (§13)
+# ---------------------------------------------------------------------------
+
+
+class TestTitleAndReasonContract:
+ def test_title_exact(self):
+ ec2 = _setup_ec2([_eni("eni-title", "available")])
+ assert _run(_make_session(ec2))[0].title == "ENI not currently attached review candidate"
+
+ def test_reason_exact(self):
+ ec2 = _setup_ec2([_eni("eni-reason", "available")])
+ reason = _run(_make_session(ec2))[0].reason
+ assert (
+ reason
+ == "ENI Status is 'available' — not currently attached per DescribeNetworkInterfaces"
+ )
+
+ def test_title_does_not_claim_safe_to_delete(self):
+ ec2 = _setup_ec2([_eni("eni-safe", "available")])
+ title = _run(_make_session(ec2))[0].title
+ assert "delete" not in title.lower()
+ assert "safe" not in title.lower()
+
+
+# ---------------------------------------------------------------------------
+# Pagination exhaustion
+# ---------------------------------------------------------------------------
+
+
+class TestPagination:
+ def test_multiple_pages_all_evaluated(self):
+ """Pagination must be fully exhausted — all pages contribute findings."""
+ ec2 = MagicMock()
+ paginator = MagicMock()
+ ec2.get_paginator.return_value = paginator
+ paginator.paginate.return_value = [
+ {"NetworkInterfaces": [_eni("eni-p1", "available")]},
+ {"NetworkInterfaces": [_eni("eni-p2", "available")]},
+ {"NetworkInterfaces": [_eni("eni-p3", "in-use")]},
+ ]
+ findings = _run(_make_session(ec2))
+ ids = {f.resource_id for f in findings}
+ assert "eni-p1" in ids
+ assert "eni-p2" in ids
+ assert "eni-p3" not in ids
+ assert len(findings) == 2
+
+ def test_empty_page_yields_no_findings(self):
+ ec2 = _setup_ec2([])
+ assert _run(_make_session(ec2)) == []
+
+ def test_paginator_called_with_correct_operation(self):
+ ec2 = _setup_ec2([])
+ _run(_make_session(ec2))
+ ec2.get_paginator.assert_called_once_with("describe_network_interfaces")
+
+ def test_mixed_valid_and_malformed_items(self):
+ """Malformed items in a page are silently skipped; valid items emit."""
+ ec2 = MagicMock()
+ paginator = MagicMock()
+ ec2.get_paginator.return_value = paginator
+ paginator.paginate.return_value = [
+ {
+ "NetworkInterfaces": [
+ "not-a-dict",
+ None,
+ {"Status": "available"}, # missing NetworkInterfaceId
+ _eni("eni-valid", "available"),
+ ]
+ }
+ ]
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eni-valid"
+
+
+# ---------------------------------------------------------------------------
+# Additional correctness checks
+# ---------------------------------------------------------------------------
+
+
+class TestCorrectness:
+ def test_resource_id_matches_network_interface_id(self):
+ ec2 = _setup_ec2([_eni("eni-rid", "available")])
+ f = _run(_make_session(ec2))[0]
+ assert f.resource_id == "eni-rid"
+ assert f.details["network_interface_id"] == "eni-rid"
+
+ def test_region_in_finding(self):
+ ec2 = _setup_ec2([_eni("eni-reg", "available")])
+ session = MagicMock()
+ session.client.return_value = ec2
+ findings = find_detached_enis(session, "eu-west-1")
+ assert findings[0].region == "eu-west-1"
+
+ def test_rule_id_correct(self):
+ ec2 = _setup_ec2([_eni("eni-ruleid", "available")])
+ assert _run(_make_session(ec2))[0].rule_id == "aws.ec2.eni.detached"
+
+ def test_provider_is_aws(self):
+ ec2 = _setup_ec2([_eni("eni-prov", "available")])
+ assert _run(_make_session(ec2))[0].provider == "aws"
+
+ def test_multiple_available_enis_all_emit(self):
+ """All available ENIs in one page emit, regardless of other attributes."""
+ enis = [
+ _eni("eni-a1", "available"),
+ _eni("eni-a2", "available", RequesterManaged=True),
+ _eni("eni-a3", "available", InterfaceType="load_balancer"),
+ _eni("eni-a4", "available", Operator={"Managed": True}),
+ _eni("eni-a5", "available", Attachment={"Status": "detached"}),
+ ]
+ ec2 = _setup_ec2(enis)
+ findings = _run(_make_session(ec2))
+ ids = {f.resource_id for f in findings}
+ assert ids == {"eni-a1", "eni-a2", "eni-a3", "eni-a4", "eni-a5"}
+
+ def test_mixed_statuses_only_available_emits(self):
+ enis = [
+ _eni("eni-av", "available"),
+ _eni("eni-iu", "in-use"),
+ _eni("eni-at", "attaching"),
+ _eni("eni-dt", "detaching"),
+ _eni("eni-as", "associated"),
+ ]
+ ec2 = _setup_ec2(enis)
+ findings = _run(_make_session(ec2))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "eni-av"
diff --git a/tests/cleancloud/providers/aws/test_aws_nat_gateway_idle.py b/tests/cleancloud/providers/aws/test_aws_nat_gateway_idle.py
index 0fd4db3..8c7f34f 100644
--- a/tests/cleancloud/providers/aws/test_aws_nat_gateway_idle.py
+++ b/tests/cleancloud/providers/aws/test_aws_nat_gateway_idle.py
@@ -1,341 +1,723 @@
+"""Tests for aws.ec2.nat_gateway.idle rule.
+
+Covers all acceptance scenarios from docs/specs/aws/nat_gateway_idle.md §15
+and the normalization, evidence, confidence, cost, risk, title/reason,
+failure, and pagination contracts.
+"""
+
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock
-from botocore.exceptions import ClientError
+import pytest
+from botocore.exceptions import BotoCoreError, ClientError
+from cleancloud.core.confidence import ConfidenceLevel
+from cleancloud.core.risk import RiskLevel
from cleancloud.providers.aws.rules.nat_gateway_idle import find_idle_nat_gateways
+_REGION = "us-east-1"
+_THRESHOLD = 14
-def test_find_idle_nat_gateways(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
- recent_date = now - timedelta(days=5)
-
- # Mock paginator for describe_nat_gateways
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NatGateways": [
- # Idle NAT Gateway (30 days old, no traffic) - should be flagged
- {
- "NatGatewayId": "nat-idle123",
- "State": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "NatGatewayAddresses": [
- {
- "AllocationId": "eipalloc-123",
- "PublicIp": "54.1.2.3",
- "PrivateIp": "10.0.1.100",
- }
- ],
- "Tags": [{"Key": "Name", "Value": "idle-nat-gateway"}],
- },
- # Active NAT Gateway (has traffic) - should NOT be flagged
- {
- "NatGatewayId": "nat-active456",
- "State": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-456",
- "SubnetId": "subnet-456",
- "NatGatewayAddresses": [],
- "Tags": [],
- },
- # Young NAT Gateway (5 days old) - should NOT be flagged
- {
- "NatGatewayId": "nat-young789",
- "State": "available",
- "CreateTime": recent_date,
- "VpcId": "vpc-789",
- "SubnetId": "subnet-789",
- "NatGatewayAddresses": [],
- "Tags": [],
- },
- # Pending NAT Gateway - should NOT be flagged
- {
- "NatGatewayId": "nat-pending000",
- "State": "pending",
- "CreateTime": old_date,
- "VpcId": "vpc-000",
- "SubnetId": "subnet-000",
- "NatGatewayAddresses": [],
- "Tags": [],
- },
- ]
- }
- ]
-
- # Mock CloudWatch client
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
-
- # Mock CloudWatch metrics - idle for nat-idle123, active for nat-active456
- def mock_get_metric_statistics(**kwargs):
- nat_id = kwargs["Dimensions"][0]["Value"]
- if nat_id == "nat-idle123":
- # No traffic
- return {"Datapoints": []}
- elif nat_id == "nat-active456":
- # Has traffic
- return {"Datapoints": [{"Sum": 1000000}]}
- else:
- return {"Datapoints": []}
-
- cloudwatch_mock.get_metric_statistics.side_effect = mock_get_metric_statistics
-
- findings = find_idle_nat_gateways(mock_boto3_session, region)
- nat_ids = {f.resource_id for f in findings}
-
- # Should flag idle NAT Gateway
- assert "nat-idle123" in nat_ids
-
- # Should NOT flag active NAT Gateway (has traffic)
- assert "nat-active456" not in nat_ids
-
- # Should NOT flag young NAT Gateway
- assert "nat-young789" not in nat_ids
-
- # Should NOT flag pending NAT Gateway
- assert "nat-pending000" not in nat_ids
-
- # Verify finding details
- assert len(findings) == 1
- finding = findings[0]
- assert finding.provider == "aws"
- assert finding.rule_id == "aws.ec2.nat_gateway.idle"
- # Zero traffic + no route table references → HIGH confidence and risk
- assert finding.confidence.value == "high"
- assert finding.risk.value == "high"
- assert finding.details["name"] == "idle-nat-gateway"
- assert finding.details["vpc_id"] == "vpc-123"
- assert "~$32.85/month" in finding.details["estimated_monthly_cost"]
- assert finding.estimated_monthly_cost_usd == 32.85
-
-
-def test_find_idle_nat_gateways_empty_account(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [{"NatGateways": []}]
-
- # Mock CloudWatch client (needed even for empty results)
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
-
- findings = find_idle_nat_gateways(mock_boto3_session, region)
- assert findings == []
-
-
-def test_find_idle_nat_gateways_custom_threshold(mock_boto3_session):
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- now = datetime.now(timezone.utc)
- # NAT Gateway is 20 days old
- creation_date = now - timedelta(days=20)
-
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NatGateways": [
- {
- "NatGatewayId": "nat-test",
- "State": "available",
- "CreateTime": creation_date,
- "VpcId": "vpc-test",
- "SubnetId": "subnet-test",
- "NatGatewayAddresses": [],
- "Tags": [],
- },
- ]
- }
- ]
-
- # Mock CloudWatch - no traffic
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.return_value = {"Datapoints": []}
-
- # With 30-day threshold, should NOT be flagged (only 20 days old)
- findings_30 = find_idle_nat_gateways(mock_boto3_session, region, idle_days=30)
- assert len(findings_30) == 0
- # With 14-day threshold, should be flagged (20 > 14)
- findings_14 = find_idle_nat_gateways(mock_boto3_session, region, idle_days=14)
- assert len(findings_14) == 1
- assert findings_14[0].resource_id == "nat-test"
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
-def test_find_idle_nat_gateways_with_traffic(mock_boto3_session):
- """NAT Gateway with traffic should not be flagged."""
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
+def _make_session(ec2: MagicMock, cw: MagicMock) -> MagicMock:
+ session = MagicMock()
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
-
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NatGateways": [
- {
- "NatGatewayId": "nat-active",
- "State": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-123",
- "SubnetId": "subnet-123",
- "NatGatewayAddresses": [],
- "Tags": [],
- },
- ]
- }
- ]
+ def _client(service, **kwargs):
+ if service == "ec2":
+ return ec2
+ if service == "cloudwatch":
+ return cw
+ raise ValueError(f"Unexpected service: {service}")
- # Mock CloudWatch - has traffic
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.return_value = {
- "Datapoints": [
- {"Sum": 50000000}, # 50 MB of traffic
- {"Sum": 100000000}, # 100 MB of traffic
- ]
- }
-
- findings = find_idle_nat_gateways(mock_boto3_session, region)
- assert findings == []
-
-
-def test_find_idle_nat_gateways_title_includes_threshold(mock_boto3_session):
- """Verify title includes the days_idle threshold."""
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
-
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
-
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "NatGateways": [
- {
- "NatGatewayId": "nat-test",
- "State": "available",
- "CreateTime": old_date,
- "VpcId": "vpc-test",
- "SubnetId": "subnet-test",
- "NatGatewayAddresses": [],
- "Tags": [],
- },
- ]
- }
- ]
+ session.client.side_effect = _client
+ return session
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.return_value = {"Datapoints": []}
- # Test with custom threshold
- findings = find_idle_nat_gateways(mock_boto3_session, region, idle_days=7)
- assert len(findings) == 1
- assert "7+ Days" in findings[0].title
+def _setup_ec2(nat_gws: list) -> MagicMock:
+ ec2 = MagicMock()
+ paginator = MagicMock()
+ ec2.get_paginator.return_value = paginator
+ paginator.paginate.return_value = [{"NatGateways": nat_gws}]
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ return ec2
-def _make_nat_gw(nat_id, age_days=30):
+def _nat_gw(
+ gw_id: str = "nat-aabbccdd",
+ state: str = "available",
+ age_days: int = 20,
+ **extra,
+) -> dict:
now = datetime.now(timezone.utc)
- return {
- "NatGatewayId": nat_id,
- "State": "available",
+ base = {
+ "NatGatewayId": gw_id,
+ "State": state,
"CreateTime": now - timedelta(days=age_days),
"VpcId": "vpc-test",
"SubnetId": "subnet-test",
- "NatGatewayAddresses": [],
- "Tags": [],
+ "ConnectivityType": "public",
}
+ base.update(extra)
+ return base
-def test_metric_fetch_failure_produces_low_confidence_finding(mock_boto3_session):
- """When CloudWatch metrics fail with a transient error, a LOW-confidence finding
- is created with an 'unverified' title instead of being silently suppressed."""
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
+def _cw_zero_traffic(num_datapoints: int = 1) -> MagicMock:
+ """CloudWatch mock that returns `num_datapoints` zero-valued datapoints for every metric."""
+ cw = MagicMock()
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [{"NatGateways": [_make_nat_gw("nat-fetch-fail")]}]
+ def _get_stats(**kwargs):
+ stat = kwargs["Statistics"][0]
+ return {"Datapoints": [{stat: 0.0} for _ in range(num_datapoints)]}
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
+ cw.get_metric_statistics.side_effect = _get_stats
+ return cw
- error_response = {"Error": {"Code": "Throttling", "Message": "Rate exceeded"}}
- cloudwatch_mock.get_metric_statistics.side_effect = ClientError(
- error_response, "GetMetricStatistics"
- )
- findings = find_idle_nat_gateways(mock_boto3_session, region)
- assert len(findings) == 1
- f = findings[0]
- assert f.confidence.value == "low"
- assert "Requires Traffic Verification" in f.title
- assert "unverified" in f.reason.lower() or "could not be fetched" in f.reason.lower()
- signals_not_checked = [s.lower() for s in f.evidence.signals_not_checked]
- assert any("fetch failed" in s for s in signals_not_checked)
+def _cw_no_datapoints() -> MagicMock:
+ """CloudWatch mock that returns empty datapoints for every metric."""
+ cw = MagicMock()
+ cw.get_metric_statistics.return_value = {"Datapoints": []}
+ return cw
-def test_not_in_route_tables_signal(mock_boto3_session):
- """When no route table references the NAT GW, the finding includes a signal noting it."""
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
+def _cw_with_traffic(trigger_metric: str, trigger_stat: str, value: float) -> MagicMock:
+ """CloudWatch mock that returns traffic on one specific metric."""
+ cw = MagicMock()
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [{"NatGateways": [_make_nat_gw("nat-no-routes")]}]
+ def _get_stats(**kwargs):
+ metric = kwargs["MetricName"]
+ stat = kwargs["Statistics"][0]
+ if metric == trigger_metric:
+ return {"Datapoints": [{stat: value}]}
+ return {
+ "Datapoints": [
+ {"Sum": 0.0, "Maximum": 0.0}.get(stat, 0.0) and {stat: 0.0} or {stat: 0.0}
+ ]
+ }
+
+ cw.get_metric_statistics.side_effect = _get_stats
+ return cw
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.return_value = {"Datapoints": []}
- ec2.describe_route_tables.return_value = {"RouteTables": []}
- findings = find_idle_nat_gateways(mock_boto3_session, region)
- assert len(findings) == 1
- signals = findings[0].evidence.signals_used
- assert any("not referenced by any vpc route table" in s.lower() for s in signals)
- assert findings[0].details["in_route_tables"] is False
+def _cw_active_connection(value: float = 5.0) -> MagicMock:
+ """CloudWatch mock where ActiveConnectionCount Maximum > 0."""
+ cw = MagicMock()
+ def _get_stats(**kwargs):
+ metric = kwargs["MetricName"]
+ if metric == "ActiveConnectionCount":
+ return {"Datapoints": [{"Maximum": value}]}
+ return {"Datapoints": [{"Sum": 0.0}]}
-def test_in_route_tables_signal(mock_boto3_session):
- """When a route table references the NAT GW, the finding notes it (still idle by traffic)."""
- region = "us-east-1"
- ec2 = mock_boto3_session._ec2
+ cw.get_metric_statistics.side_effect = _get_stats
+ return cw
- paginator = ec2.get_paginator.return_value
- paginator.paginate.return_value = [{"NatGateways": [_make_nat_gw("nat-has-routes")]}]
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- ec2 if service == "ec2" else cloudwatch_mock
+def _cw_error(code: str = "Throttling") -> MagicMock:
+ cw = MagicMock()
+ cw.get_metric_statistics.side_effect = ClientError(
+ {"Error": {"Code": code, "Message": "test"}}, "GetMetricStatistics"
)
- cloudwatch_mock.get_metric_statistics.return_value = {"Datapoints": []}
- ec2.describe_route_tables.return_value = {"RouteTables": [{"RouteTableId": "rtb-abc"}]}
-
- findings = find_idle_nat_gateways(mock_boto3_session, region)
- assert len(findings) == 1
- signals = findings[0].evidence.signals_used
- assert any("referenced by at least one vpc route table" in s.lower() for s in signals)
- assert findings[0].details["in_route_tables"] is True
+ return cw
+
+
+def _run(session: MagicMock, threshold: int = _THRESHOLD) -> list:
+ return find_idle_nat_gateways(session, _REGION, idle_days_threshold=threshold)
+
+
+def _client_error(code: str = "SomeError") -> ClientError:
+ return ClientError({"Error": {"Code": code, "Message": "test"}}, "DescribeNatGateways")
+
+
+# ---------------------------------------------------------------------------
+# §15 Must Emit
+# ---------------------------------------------------------------------------
+
+
+class TestMustEmit:
+ def test_available_old_enough_zero_traffic_no_route_ref_high(self):
+ """Scenario 1: Available, old, zero traffic, no route ref → EMIT HIGH."""
+ ec2 = _setup_ec2([_nat_gw("nat-1", age_days=20)])
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ assert len(findings) == 1
+ assert findings[0].resource_id == "nat-1"
+ assert findings[0].confidence == ConfidenceLevel.HIGH
+
+ def test_available_old_enough_zero_traffic_route_ref_medium(self):
+ """Scenario 2: Available, old, zero traffic, route table still references → EMIT MEDIUM."""
+ ec2 = _setup_ec2([_nat_gw("nat-2", age_days=20)])
+ ec2.describe_route_tables.return_value = {"RouteTables": [{"RouteTableId": "rtb-abc"}]}
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ assert len(findings) == 1
+ assert findings[0].confidence == ConfidenceLevel.MEDIUM
+
+ def test_available_old_enough_zero_traffic_rt_lookup_failed_medium(self):
+ """Scenario 3a: DescribeRouteTables ClientError → EMIT MEDIUM, context unavailable."""
+ ec2 = _setup_ec2([_nat_gw("nat-3", age_days=20)])
+ ec2.describe_route_tables.side_effect = _client_error("AccessDenied")
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ assert len(findings) == 1
+ assert findings[0].confidence == ConfidenceLevel.MEDIUM
+
+ def test_available_old_enough_zero_traffic_rt_botocore_error_medium(self):
+ """Scenario 3b: DescribeRouteTables BotoCoreError → EMIT MEDIUM, context unavailable."""
+ ec2 = _setup_ec2([_nat_gw("nat-3b", age_days=20)])
+ ec2.describe_route_tables.side_effect = BotoCoreError()
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ assert len(findings) == 1
+ assert findings[0].confidence == ConfidenceLevel.MEDIUM
+ assert findings[0].details["route_table_referenced"] is None
+
+ def test_rt_any_exception_degrades_gracefully(self):
+ """Any exception from DescribeRouteTables degrades context — scan never blows up."""
+ ec2 = _setup_ec2([_nat_gw("nat-rtexc", age_days=20)])
+ ec2.describe_route_tables.side_effect = RuntimeError("unexpected")
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ assert len(findings) == 1
+ assert findings[0].confidence == ConfidenceLevel.MEDIUM
+ assert findings[0].details["route_table_referenced"] is None
+
+
+# ---------------------------------------------------------------------------
+# §15 Must Skip
+# ---------------------------------------------------------------------------
+
+
+class TestMustSkip:
+ def test_state_pending_skipped(self):
+ """Scenario 4a: State pending → SKIP."""
+ ec2 = _setup_ec2([_nat_gw("nat-pend", state="pending")])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_state_failed_skipped(self):
+ ec2 = _setup_ec2([_nat_gw("nat-fail", state="failed")])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_state_deleting_skipped(self):
+ ec2 = _setup_ec2([_nat_gw("nat-del", state="deleting")])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_state_deleted_skipped(self):
+ ec2 = _setup_ec2([_nat_gw("nat-deld", state="deleted")])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_too_young_skipped(self):
+ """Scenario 5: Available but younger than threshold → SKIP."""
+ ec2 = _setup_ec2([_nat_gw("nat-young", age_days=5)])
+ assert _run(_make_session(ec2, _cw_zero_traffic()), threshold=14) == []
+
+ def test_bytes_out_to_destination_nonzero_skipped(self):
+ """Scenario 6: BytesOutToDestination Sum > 0 → SKIP."""
+ ec2 = _setup_ec2([_nat_gw("nat-bytes")])
+ cw = MagicMock()
+
+ def _get_stats(**kwargs):
+ metric = kwargs["MetricName"]
+ stat = kwargs["Statistics"][0]
+ if metric == "BytesOutToDestination":
+ return {"Datapoints": [{"Sum": 100.0}]}
+ return {"Datapoints": [{stat: 0.0}]}
+
+ cw.get_metric_statistics.side_effect = _get_stats
+ assert _run(_make_session(ec2, cw)) == []
+
+ def test_bytes_in_from_source_nonzero_skipped(self):
+ ec2 = _setup_ec2([_nat_gw("nat-bifs")])
+ cw = MagicMock()
+
+ def _get_stats(**kwargs):
+ metric = kwargs["MetricName"]
+ stat = kwargs["Statistics"][0]
+ if metric == "BytesInFromSource":
+ return {"Datapoints": [{"Sum": 1.0}]}
+ return {"Datapoints": [{stat: 0.0}]}
+
+ cw.get_metric_statistics.side_effect = _get_stats
+ assert _run(_make_session(ec2, cw)) == []
+
+ def test_active_connection_count_nonzero_skipped(self):
+ """Scenario 7: ActiveConnectionCount Maximum > 0 → SKIP."""
+ ec2 = _setup_ec2([_nat_gw("nat-acc")])
+ cw = _cw_active_connection(value=3.0)
+ assert _run(_make_session(ec2, cw)) == []
+
+ def test_missing_create_time_skipped(self):
+ """Scenario 8a: Missing CreateTime → SKIP."""
+ gw = {"NatGatewayId": "nat-noct", "State": "available"}
+ ec2 = _setup_ec2([gw])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_naive_create_time_skipped(self):
+ """Scenario 8b: Naive (timezone-unaware) CreateTime → SKIP."""
+ gw = _nat_gw("nat-naive")
+ gw["CreateTime"] = datetime.now() # naive, no tzinfo
+ ec2 = _setup_ec2([gw])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_future_create_time_skipped(self):
+ """Scenario 8c: Future CreateTime → SKIP."""
+ gw = _nat_gw("nat-future")
+ gw["CreateTime"] = datetime.now(timezone.utc) + timedelta(days=10)
+ ec2 = _setup_ec2([gw])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_no_datapoints_any_metric_skipped(self):
+ """Scenario 9: Any required metric returns no datapoints → SKIP ITEM."""
+ ec2 = _setup_ec2([_nat_gw("nat-nodata")])
+ cw = _cw_no_datapoints()
+ assert _run(_make_session(ec2, cw)) == []
+
+ def test_partial_datapoints_missing_one_metric_skipped(self):
+ """If one metric has no datapoints but others do → SKIP ITEM."""
+ ec2 = _setup_ec2([_nat_gw("nat-partial")])
+ cw = MagicMock()
+ call_count = [0]
+
+ def _get_stats(**kwargs):
+ call_count[0] += 1
+ # First metric returns data; second metric returns nothing
+ if call_count[0] == 1:
+ return {"Datapoints": [{"Sum": 0.0}]}
+ return {"Datapoints": []}
+
+ cw.get_metric_statistics.side_effect = _get_stats
+ assert _run(_make_session(ec2, cw)) == []
+
+
+# ---------------------------------------------------------------------------
+# §15 Must Fail
+# ---------------------------------------------------------------------------
+
+
+class TestMustFailRule:
+ def test_describe_nat_gateways_client_error_raises(self):
+ """Scenario 10: DescribeNatGateways ClientError → FAIL RULE."""
+ ec2 = MagicMock()
+ ec2.get_paginator.return_value.paginate.side_effect = _client_error("InternalServerError")
+ with pytest.raises(ClientError):
+ _run(_make_session(ec2, _cw_zero_traffic()))
+
+ def test_describe_nat_gateways_unauthorized_raises_permission_error(self):
+ ec2 = MagicMock()
+ ec2.get_paginator.return_value.paginate.side_effect = _client_error("UnauthorizedOperation")
+ with pytest.raises(PermissionError):
+ _run(_make_session(ec2, _cw_zero_traffic()))
+
+ def test_describe_nat_gateways_botocore_error_raises(self):
+ ec2 = MagicMock()
+ ec2.get_paginator.return_value.paginate.side_effect = BotoCoreError()
+ with pytest.raises(BotoCoreError):
+ _run(_make_session(ec2, _cw_zero_traffic()))
+
+ def test_cloudwatch_client_error_raises(self):
+ """Scenario 11: CloudWatch metric fetch ClientError → FAIL RULE."""
+ ec2 = _setup_ec2([_nat_gw("nat-cwerr")])
+ cw = _cw_error("InternalServerError")
+ with pytest.raises(ClientError):
+ _run(_make_session(ec2, cw))
+
+ def test_cloudwatch_botocore_error_raises(self):
+ ec2 = _setup_ec2([_nat_gw("nat-cwboto")])
+ cw = MagicMock()
+ cw.get_metric_statistics.side_effect = BotoCoreError()
+ with pytest.raises(BotoCoreError):
+ _run(_make_session(ec2, cw))
+
+ def test_cloudwatch_unauthorized_raises_permission_error(self):
+ ec2 = _setup_ec2([_nat_gw("nat-cwunauth")])
+ cw = _cw_error("UnauthorizedOperation")
+ with pytest.raises(PermissionError):
+ _run(_make_session(ec2, cw))
+
+ def test_cloudwatch_throttle_raises_not_low_confidence(self):
+ """Throttling error must raise (FAIL RULE), NOT produce a LOW-confidence finding."""
+ ec2 = _setup_ec2([_nat_gw("nat-throttle")])
+ cw = _cw_error("Throttling")
+ with pytest.raises(ClientError):
+ _run(_make_session(ec2, cw))
+
+
+# ---------------------------------------------------------------------------
+# §15 Must NOT Happen
+# ---------------------------------------------------------------------------
+
+
+class TestMustNotHappen:
+ def test_low_confidence_never_emitted(self):
+ """LOW confidence finding must never be emitted."""
+ ec2 = _setup_ec2([_nat_gw("nat-nolow")])
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ for f in findings:
+ assert f.confidence != ConfidenceLevel.LOW
+
+ def test_missing_datapoints_not_treated_as_zero(self):
+ """Missing datapoints → SKIP ITEM, not zero traffic → no finding."""
+ ec2 = _setup_ec2([_nat_gw("nat-nodata2")])
+ cw = _cw_no_datapoints()
+ assert _run(_make_session(ec2, cw)) == []
+
+ def test_cost_is_none(self):
+ """estimated_monthly_cost_usd must always be None."""
+ ec2 = _setup_ec2([_nat_gw("nat-cost")])
+ cw = _cw_zero_traffic()
+ f = _run(_make_session(ec2, cw))[0]
+ assert f.estimated_monthly_cost_usd is None
+
+ def test_route_table_absence_not_substitute_for_cloudwatch(self):
+ """Route-table absence must not compensate for missing CloudWatch evidence."""
+ ec2 = _setup_ec2([_nat_gw("nat-rt-subst")])
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ cw = _cw_no_datapoints() # No CW data → must skip
+ assert _run(_make_session(ec2, cw)) == []
+
+
+# ---------------------------------------------------------------------------
+# Normalization contract
+# ---------------------------------------------------------------------------
+
+
+class TestNormalization:
+ def test_non_dict_item_skipped(self):
+ ec2 = MagicMock()
+ paginator = MagicMock()
+ ec2.get_paginator.return_value = paginator
+ paginator.paginate.return_value = [{"NatGateways": ["not-a-dict", None, 42]}]
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_missing_nat_gateway_id_skipped(self):
+ gw = {"State": "available", "CreateTime": datetime.now(timezone.utc) - timedelta(days=20)}
+ ec2 = _setup_ec2([gw])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_empty_string_nat_gateway_id_skipped(self):
+ gw = _nat_gw("nat-x")
+ gw["NatGatewayId"] = ""
+ ec2 = _setup_ec2([gw])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_missing_state_skipped(self):
+ gw = {
+ "NatGatewayId": "nat-nostate",
+ "CreateTime": datetime.now(timezone.utc) - timedelta(days=20),
+ }
+ ec2 = _setup_ec2([gw])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_age_exactly_at_threshold_emits(self):
+ """age_days == threshold → eligible (>= check)."""
+ ec2 = _setup_ec2([_nat_gw("nat-exact", age_days=_THRESHOLD)])
+ cw = _cw_zero_traffic()
+ findings = _run(_make_session(ec2, cw))
+ assert len(findings) == 1
+
+ def test_age_one_below_threshold_skipped(self):
+ ec2 = _setup_ec2([_nat_gw("nat-below", age_days=_THRESHOLD - 1)])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_tags_absent_yields_empty_list(self):
+ gw = _nat_gw("nat-notag")
+ gw.pop("Tags", None)
+ ec2 = _setup_ec2([gw])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["tag_set"] == []
+
+ def test_tags_list_preserved(self):
+ tags = [{"Key": "Name", "Value": "my-nat"}]
+ ec2 = _setup_ec2([_nat_gw("nat-tag", Tags=tags)])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["tag_set"] == tags
+
+ def test_nat_gateway_addresses_absent_yields_empty_list(self):
+ gw = _nat_gw("nat-noaddr")
+ gw.pop("NatGatewayAddresses", None)
+ ec2 = _setup_ec2([gw])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["nat_gateway_addresses"] == []
+
+ def test_connectivity_type_null_when_absent(self):
+ gw = _nat_gw("nat-notype")
+ gw.pop("ConnectivityType", None)
+ ec2 = _setup_ec2([gw])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["connectivity_type"] is None
+
+
+# ---------------------------------------------------------------------------
+# Evidence contract (§11)
+# ---------------------------------------------------------------------------
+
+
+class TestEvidenceContract:
+ def test_required_details_fields_present(self):
+ ec2 = _setup_ec2([_nat_gw("nat-evid")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ d = f.details
+ required = [
+ "evaluation_path",
+ "nat_gateway_id",
+ "normalized_state",
+ "create_time",
+ "age_days",
+ "idle_days_threshold",
+ "connectivity_type",
+ "availability_mode",
+ "vpc_id",
+ "subnet_id",
+ "bytes_out_to_destination",
+ "bytes_in_from_source",
+ "bytes_in_from_destination",
+ "bytes_out_to_source",
+ "active_connection_count_max",
+ ]
+ for field in required:
+ assert field in d, f"Required field '{field}' missing"
+
+ def test_evaluation_path_exact(self):
+ ec2 = _setup_ec2([_nat_gw("nat-ep")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["evaluation_path"] == "idle-nat-gateway-review-candidate"
+
+ def test_normalized_state_is_available(self):
+ ec2 = _setup_ec2([_nat_gw("nat-ns")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["normalized_state"] == "available"
+
+ def test_all_metric_values_zero_in_details(self):
+ ec2 = _setup_ec2([_nat_gw("nat-mv")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ d = f.details
+ assert d["bytes_out_to_destination"] == 0.0
+ assert d["bytes_in_from_source"] == 0.0
+ assert d["bytes_in_from_destination"] == 0.0
+ assert d["bytes_out_to_source"] == 0.0
+ assert d["active_connection_count_max"] == 0.0
+
+ def test_route_table_referenced_false_in_details(self):
+ ec2 = _setup_ec2([_nat_gw("nat-rtf")])
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["route_table_referenced"] is False
+
+ def test_route_table_referenced_true_in_details(self):
+ ec2 = _setup_ec2([_nat_gw("nat-rtt")])
+ ec2.describe_route_tables.return_value = {"RouteTables": [{"RouteTableId": "rtb-x"}]}
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["route_table_referenced"] is True
+
+ def test_route_table_referenced_none_when_check_fails(self):
+ ec2 = _setup_ec2([_nat_gw("nat-rtn")])
+ ec2.describe_route_tables.side_effect = _client_error("AccessDenied")
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.details["route_table_referenced"] is None
+
+ def test_idle_days_threshold_in_details(self):
+ ec2 = _setup_ec2([_nat_gw("nat-thresh", age_days=30)])
+ f = _run(_make_session(ec2, _cw_zero_traffic()), threshold=21)[0]
+ assert f.details["idle_days_threshold"] == 21
+
+ def test_active_connection_count_max_in_details(self):
+ ec2 = _setup_ec2([_nat_gw("nat-acc")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert "active_connection_count_max" in f.details
+
+
+# ---------------------------------------------------------------------------
+# Confidence model (§12)
+# ---------------------------------------------------------------------------
+
+
+class TestConfidenceModel:
+ def test_high_when_no_route_ref(self):
+ ec2 = _setup_ec2([_nat_gw("nat-ch")])
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.confidence == ConfidenceLevel.HIGH
+
+ def test_medium_when_route_table_referenced(self):
+ ec2 = _setup_ec2([_nat_gw("nat-cm-rt")])
+ ec2.describe_route_tables.return_value = {"RouteTables": [{"RouteTableId": "rtb-x"}]}
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.confidence == ConfidenceLevel.MEDIUM
+
+ def test_medium_when_route_table_check_fails(self):
+ ec2 = _setup_ec2([_nat_gw("nat-cm-fail")])
+ ec2.describe_route_tables.side_effect = _client_error("AccessDenied")
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.confidence == ConfidenceLevel.MEDIUM
+
+ def test_low_confidence_never_emitted(self):
+ ec2 = _setup_ec2([_nat_gw("nat-nolow")])
+ cw = _cw_zero_traffic()
+ for f in _run(_make_session(ec2, cw)):
+ assert f.confidence != ConfidenceLevel.LOW
+
+
+# ---------------------------------------------------------------------------
+# Cost model (§11.2)
+# ---------------------------------------------------------------------------
+
+
+class TestCostModel:
+ def test_estimated_monthly_cost_always_none(self):
+ ec2 = _setup_ec2([_nat_gw("nat-cost")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.estimated_monthly_cost_usd is None
+
+ def test_no_hardcoded_cost_in_details(self):
+ """No dollar-amount cost estimate should appear in details."""
+ ec2 = _setup_ec2([_nat_gw("nat-nodetcost")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ detail_str = str(f.details).lower()
+ assert "$32" not in detail_str
+ assert "estimated_monthly_cost" not in f.details
+
+
+# ---------------------------------------------------------------------------
+# Risk model (§14)
+# ---------------------------------------------------------------------------
+
+
+class TestRiskModel:
+ def test_risk_is_medium_no_route_ref(self):
+ ec2 = _setup_ec2([_nat_gw("nat-risk1")])
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.risk == RiskLevel.MEDIUM
+
+ def test_risk_is_medium_with_route_ref(self):
+ ec2 = _setup_ec2([_nat_gw("nat-risk2")])
+ ec2.describe_route_tables.return_value = {"RouteTables": [{"RouteTableId": "rtb-x"}]}
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.risk == RiskLevel.MEDIUM
+
+ def test_risk_never_high(self):
+ ec2 = _setup_ec2([_nat_gw("nat-nohigh")])
+ for f in _run(_make_session(ec2, _cw_zero_traffic())):
+ assert f.risk != RiskLevel.HIGH
+
+
+# ---------------------------------------------------------------------------
+# Title and reason contract (§13)
+# ---------------------------------------------------------------------------
+
+
+class TestTitleAndReasonContract:
+ def test_title_exact(self):
+ ec2 = _setup_ec2([_nat_gw("nat-title")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.title == "Idle NAT Gateway review candidate"
+
+ def test_reason_contains_threshold(self):
+ ec2 = _setup_ec2([_nat_gw("nat-reason", age_days=30)])
+ f = _run(_make_session(ec2, _cw_zero_traffic()), threshold=21)[0]
+ assert "21" in f.reason
+
+ def test_title_does_not_claim_safe_to_delete(self):
+ ec2 = _setup_ec2([_nat_gw("nat-safe")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert "delete" not in f.title.lower()
+ assert "safe" not in f.title.lower()
+
+
+# ---------------------------------------------------------------------------
+# Pagination
+# ---------------------------------------------------------------------------
+
+
+class TestPagination:
+ def test_multiple_pages_all_evaluated(self):
+ ec2 = MagicMock()
+ paginator = MagicMock()
+ ec2.get_paginator.return_value = paginator
+ ec2.describe_route_tables.return_value = {"RouteTables": []}
+ paginator.paginate.return_value = [
+ {"NatGateways": [_nat_gw("nat-p1")]},
+ {"NatGateways": [_nat_gw("nat-p2")]},
+ {"NatGateways": [_nat_gw("nat-p3", state="deleted")]},
+ ]
+ findings = _run(_make_session(ec2, _cw_zero_traffic()))
+ ids = {f.resource_id for f in findings}
+ assert "nat-p1" in ids
+ assert "nat-p2" in ids
+ assert "nat-p3" not in ids
+
+ def test_empty_page_yields_no_findings(self):
+ ec2 = _setup_ec2([])
+ assert _run(_make_session(ec2, _cw_zero_traffic())) == []
+
+ def test_paginator_called_with_correct_operation(self):
+ ec2 = _setup_ec2([])
+ _run(_make_session(ec2, _cw_zero_traffic()))
+ ec2.get_paginator.assert_called_once_with("describe_nat_gateways")
+
+
+# ---------------------------------------------------------------------------
+# Additional correctness
+# ---------------------------------------------------------------------------
+
+
+class TestCorrectness:
+ def test_resource_id_matches_nat_gateway_id(self):
+ ec2 = _setup_ec2([_nat_gw("nat-rid")])
+ f = _run(_make_session(ec2, _cw_zero_traffic()))[0]
+ assert f.resource_id == "nat-rid"
+ assert f.details["nat_gateway_id"] == "nat-rid"
+
+ def test_rule_id_correct(self):
+ ec2 = _setup_ec2([_nat_gw("nat-ruleid")])
+ assert _run(_make_session(ec2, _cw_zero_traffic()))[0].rule_id == "aws.ec2.nat_gateway.idle"
+
+ def test_provider_is_aws(self):
+ ec2 = _setup_ec2([_nat_gw("nat-prov")])
+ assert _run(_make_session(ec2, _cw_zero_traffic()))[0].provider == "aws"
+
+ def test_active_connection_count_metric_is_checked(self):
+ """ActiveConnectionCount must be in the required metrics; missing data → SKIP."""
+ ec2 = _setup_ec2([_nat_gw("nat-acccheck")])
+ cw = MagicMock()
+ metrics_queried = []
+
+ def _get_stats(**kwargs):
+ metrics_queried.append(kwargs["MetricName"])
+ return {
+ "Datapoints": [
+ {"Sum": 0.0, "Maximum": 0.0}.get(kwargs["Statistics"][0], 0.0)
+ and {"Sum": 0.0}
+ or {kwargs["Statistics"][0]: 0.0}
+ ]
+ }
+
+ cw.get_metric_statistics.side_effect = _get_stats
+ _run(_make_session(ec2, cw))
+ assert "ActiveConnectionCount" in metrics_queried
+
+ def test_five_metrics_queried_per_nat_gateway(self):
+ """Exactly 5 CloudWatch metric calls must be made per evaluated NAT Gateway."""
+ ec2 = _setup_ec2([_nat_gw("nat-5m")])
+ cw = _cw_zero_traffic()
+ _run(_make_session(ec2, cw))
+ assert cw.get_metric_statistics.call_count == 5
+
+ def test_connectivity_type_private_emits(self):
+ ec2 = _setup_ec2([_nat_gw("nat-priv", ConnectivityType="private")])
+ findings = _run(_make_session(ec2, _cw_zero_traffic()))
+ assert len(findings) == 1
+ assert findings[0].details["connectivity_type"] == "private"
+
+ def test_multiple_available_old_zero_traffic_nat_gws_all_emit(self):
+ nat_gws = [_nat_gw(f"nat-multi-{i}", age_days=20) for i in range(3)]
+ ec2 = _setup_ec2(nat_gws)
+ findings = _run(_make_session(ec2, _cw_zero_traffic()))
+ assert len(findings) == 3
diff --git a/tests/cleancloud/providers/aws/test_aws_rds_idle.py b/tests/cleancloud/providers/aws/test_aws_rds_idle.py
index 3125316..f4a064a 100644
--- a/tests/cleancloud/providers/aws/test_aws_rds_idle.py
+++ b/tests/cleancloud/providers/aws/test_aws_rds_idle.py
@@ -1,345 +1,954 @@
+"""
+Tests for aws.rds.instance.idle rule.
+
+Test class overview:
+ TestMustEmit — canonical detection path
+ TestMustSkip — all exclusion rules
+ TestMustFailRule — required API failure behaviour
+ TestNormalization — _normalize_db_instance field extraction
+ TestCloudWatchContract — metric name, statistic, period, dimension
+ TestEvidenceContract — signals_used, signals_not_checked, evaluation_path
+ TestConfidenceModel — always MEDIUM
+ TestCostModel — estimated_monthly_cost_usd always None
+ TestRiskModel — always MEDIUM
+ TestTitleAndReasonContract — exact title and evaluation_path
+ TestPagination — multi-page exhaustion
+ TestStandaloneScope — all three scope exclusion fields
+"""
+
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock
-from cleancloud.providers.aws.rules.rds_idle import find_idle_rds_instances
+import pytest
+from botocore.exceptions import BotoCoreError, ClientError
+from cleancloud.providers.aws.rules.rds_idle import (
+ _normalize_db_instance,
+ find_idle_rds_instances,
+)
-def _make_rds_paginator(mock_boto3_session, instances):
- rds = mock_boto3_session._rds
- paginator = rds.get_paginator.return_value
- paginator.paginate.return_value = [{"DBInstances": instances}]
- return rds
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+_REGION = "us-east-1"
-def _make_cw_side_effect(responses_by_db_and_metric):
- """Build a side_effect that routes by (db_id, metric_name)."""
- def side_effect(**kwargs):
- db_id = kwargs["Dimensions"][0]["Value"]
- metric = kwargs["MetricName"]
- return responses_by_db_and_metric.get(
- (db_id, metric),
- responses_by_db_and_metric.get(db_id, {"Datapoints": []}),
- )
+def _now() -> datetime:
+ return datetime.now(timezone.utc)
- return side_effect
+def _old() -> datetime:
+ """30 days ago — always older than the default 14-day threshold."""
+ return datetime.now(timezone.utc) - timedelta(days=30)
-def test_find_idle_rds_instances(mock_boto3_session):
- region = "us-east-1"
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
- recent_date = now - timedelta(days=5)
- rds = _make_rds_paginator(
- mock_boto3_session,
- [
- # Idle instance (30 days old, no connections) — should be flagged
- {
- "DBInstanceIdentifier": "idle-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.t3.medium",
- "Engine": "mysql",
- "EngineVersion": "8.0.35",
- "MultiAZ": False,
- "AllocatedStorage": 100,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [{"Key": "env", "Value": "dev"}],
- },
- # Active instance (has connections) — should NOT be flagged
- {
- "DBInstanceIdentifier": "active-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.r5.large",
- "Engine": "postgres",
- "EngineVersion": "15.4",
- "MultiAZ": True,
- "AllocatedStorage": 200,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [],
- },
- # Young instance (5 days old) — should NOT be flagged
- {
- "DBInstanceIdentifier": "young-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": recent_date,
- "DBInstanceClass": "db.t3.micro",
- "Engine": "mysql",
- "EngineVersion": "8.0.35",
- "MultiAZ": False,
- "AllocatedStorage": 20,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [],
- },
- # Read replica — should NOT be flagged
- {
- "DBInstanceIdentifier": "replica-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.t3.medium",
- "Engine": "mysql",
- "EngineVersion": "8.0.35",
- "MultiAZ": False,
- "AllocatedStorage": 100,
- "ReadReplicaSourceDBInstanceIdentifier": "source-db",
- "DBClusterIdentifier": None,
- "TagList": [],
- },
- # Aurora cluster member — should NOT be flagged
- {
- "DBInstanceIdentifier": "aurora-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.r5.large",
- "Engine": "aurora-mysql",
- "EngineVersion": "8.0.mysql_aurora.3.04.0",
- "MultiAZ": False,
- "AllocatedStorage": 0,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": "my-aurora-cluster",
- "TagList": [],
- },
- ],
- )
-
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- rds if service == "rds" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.side_effect = _make_cw_side_effect(
- {
- # idle-db: zero connections + low CPU + zero IO — all three signals agree
- ("idle-db", "DatabaseConnections"): {"Datapoints": [{"Sum": 0}]},
- ("idle-db", "CPUUtilization"): {"Datapoints": [{"Maximum": 2.0}]},
- ("idle-db", "ReadIOPS"): {"Datapoints": [{"Sum": 0}]},
- ("idle-db", "WriteIOPS"): {"Datapoints": [{"Sum": 0}]},
- # active-db: has connections
- ("active-db", "DatabaseConnections"): {"Datapoints": [{"Sum": 500}]},
- }
- )
-
- findings = find_idle_rds_instances(mock_boto3_session, region)
- db_ids = {f.resource_id for f in findings}
-
- assert "idle-db" in db_ids
- assert "active-db" not in db_ids
- assert "young-db" not in db_ids
- assert "replica-db" not in db_ids
- assert "aurora-db" not in db_ids
-
- assert len(findings) == 1
- finding = findings[0]
- assert finding.provider == "aws"
- assert finding.rule_id == "aws.rds.instance.idle"
- assert finding.resource_type == "aws.rds.instance"
- assert finding.confidence.value == "medium" # three-signal: connections + CPU + IO
- assert finding.risk.value == "high"
- assert finding.details["engine"] == "mysql 8.0.35"
- assert finding.details["instance_class"] == "db.t3.medium"
- assert finding.details["connections_14d"] == 0
- assert finding.details["allocated_storage_gb"] == 100
- assert "~$49/month" in finding.details["estimated_compute_cost"]
- assert finding.estimated_monthly_cost_usd is not None
- assert finding.estimated_monthly_cost_usd > 0
- assert finding.details["tags"] == {"env": "dev"}
- assert "cluster_id" not in finding.details
- assert "peak_cpu_pct" in finding.details
-
-
-def test_find_idle_rds_instances_empty(mock_boto3_session):
- region = "us-east-1"
- rds = mock_boto3_session._rds
-
- paginator = rds.get_paginator.return_value
- paginator.paginate.return_value = [{"DBInstances": []}]
-
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- rds if service == "rds" else cloudwatch_mock
- )
-
- findings = find_idle_rds_instances(mock_boto3_session, region)
- assert findings == []
-
-
-def test_find_idle_rds_instances_custom_threshold(mock_boto3_session):
- region = "us-east-1"
- rds = mock_boto3_session._rds
- now = datetime.now(timezone.utc)
- creation_date = now - timedelta(days=20)
-
- paginator = rds.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "DBInstances": [
- {
- "DBInstanceIdentifier": "test-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": creation_date,
- "DBInstanceClass": "db.t3.small",
- "Engine": "postgres",
- "EngineVersion": "15.4",
- "MultiAZ": False,
- "AllocatedStorage": 50,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [],
- },
- ]
- }
- ]
-
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- rds if service == "rds" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.side_effect = _make_cw_side_effect(
- {
- ("test-db", "DatabaseConnections"): {"Datapoints": [{"Sum": 0}]},
- ("test-db", "CPUUtilization"): {"Datapoints": [{"Maximum": 1.0}]},
- }
- )
-
- # With 30-day threshold, should NOT be flagged (only 20 days old)
- findings_30 = find_idle_rds_instances(mock_boto3_session, region, idle_days=30)
- assert len(findings_30) == 0
-
- # With 14-day threshold, should be flagged (20 > 14)
- findings_14 = find_idle_rds_instances(mock_boto3_session, region, idle_days=14)
- assert len(findings_14) == 1
- assert findings_14[0].resource_id == "test-db"
-
-
-def test_find_idle_rds_instances_with_connections(mock_boto3_session):
- """RDS instance with connections should not be flagged."""
- region = "us-east-1"
- rds = mock_boto3_session._rds
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
-
- paginator = rds.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "DBInstances": [
- {
- "DBInstanceIdentifier": "active-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.r5.large",
- "Engine": "postgres",
- "EngineVersion": "15.4",
- "MultiAZ": True,
- "AllocatedStorage": 200,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [],
- },
- ]
- }
- ]
-
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- rds if service == "rds" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.return_value = {
- "Datapoints": [{"Sum": 150}, {"Sum": 200}]
+def _young() -> datetime:
+ """5 days ago — always younger than the default 14-day threshold."""
+ return datetime.now(timezone.utc) - timedelta(days=5)
+
+
+def _client_error(code: str) -> ClientError:
+ return ClientError({"Error": {"Code": code, "Message": code}}, "op")
+
+
+def _botocore_error() -> BotoCoreError:
+ return BotoCoreError()
+
+
+def _make_instance(**overrides) -> dict:
+ """Return a minimal valid DescribeDBInstances item."""
+ base = {
+ "DBInstanceIdentifier": "test-db",
+ "DBInstanceStatus": "available",
+ "InstanceCreateTime": _old(),
+ "Engine": "mysql",
+ "EngineVersion": "8.0.35",
+ "DBInstanceClass": "db.t3.medium",
+ "MultiAZ": False,
+ "AllocatedStorage": 100,
+ "StorageType": "gp2",
+ "DBClusterIdentifier": None,
+ "ReadReplicaSourceDBInstanceIdentifier": None,
+ "ReadReplicaSourceDBClusterIdentifier": None,
+ "TagList": [],
}
+ base.update(overrides)
+ return base
- findings = find_idle_rds_instances(mock_boto3_session, region)
- assert findings == []
-
-
-def test_find_idle_rds_no_datapoints_skipped(mock_boto3_session):
- """Instance where CW returns zero datapoints should be skipped (no metric visibility)."""
- region = "us-east-1"
- rds = mock_boto3_session._rds
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
-
- paginator = rds.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "DBInstances": [
- {
- "DBInstanceIdentifier": "no-data-db",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.t3.medium",
- "Engine": "mysql",
- "EngineVersion": "8.0.35",
- "MultiAZ": False,
- "AllocatedStorage": 100,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [],
- },
+
+def _zero_connections_response() -> dict:
+ """CloudWatch response: datapoints present, all Maximum == 0."""
+ return {"Datapoints": [{"Maximum": 0.0}]}
+
+
+def _nonzero_connections_response(val: float = 5.0) -> dict:
+ return {"Datapoints": [{"Maximum": val}]}
+
+
+def _no_datapoints_response() -> dict:
+ return {"Datapoints": []}
+
+
+def _setup(
+ mock_boto3_session,
+ instances: list,
+ cw_response=None,
+ cw_side_effect=None,
+):
+ """Wire up RDS paginator and CloudWatch mock, return (rds, cloudwatch)."""
+ rds = MagicMock()
+ paginator = MagicMock()
+ paginator.paginate.return_value = [{"DBInstances": instances}]
+ rds.get_paginator.return_value = paginator
+
+ cloudwatch = MagicMock()
+ if cw_side_effect is not None:
+ cloudwatch.get_metric_statistics.side_effect = cw_side_effect
+ elif cw_response is not None:
+ cloudwatch.get_metric_statistics.return_value = cw_response
+
+ def client_side_effect(service, **kwargs):
+ if service == "rds":
+ return rds
+ if service == "cloudwatch":
+ return cloudwatch
+ raise ValueError(f"Unexpected service: {service}")
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ return rds, cloudwatch
+
+
+# ---------------------------------------------------------------------------
+# TestMustEmit
+# ---------------------------------------------------------------------------
+
+
+class TestMustEmit:
+ def test_standalone_available_old_zero_connections_emits(self, mock_boto3_session):
+ """Canonical path: standalone, available, old enough, zero DatabaseConnections."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+ f = findings[0]
+ assert f.provider == "aws"
+ assert f.rule_id == "aws.rds.instance.idle"
+ assert f.resource_type == "aws.rds.instance"
+ assert f.resource_id == "test-db"
+ assert f.region == _REGION
+
+ def test_multiple_datapoints_all_zero_emits(self, mock_boto3_session):
+ """Multiple datapoints all Maximum == 0 → EMIT."""
+ resp = {
+ "Datapoints": [
+ {"Maximum": 0.0},
+ {"Maximum": 0.0},
+ {"Maximum": 0.0},
]
}
- ]
-
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- rds if service == "rds" else cloudwatch_mock
- )
- # No datapoints at all — metric has no visibility
- cloudwatch_mock.get_metric_statistics.return_value = {"Datapoints": []}
-
- findings = find_idle_rds_instances(mock_boto3_session, region)
- # Zero datapoints → LOW-confidence "requires verification" finding (not silently skipped)
- assert len(findings) == 1
- assert findings[0].confidence.value == "low"
- assert findings[0].risk.value == "medium"
- assert "Requires" in findings[0].title or "Verification" in findings[0].title
- assert findings[0].details.get("connections_datapoints") == 0
-
-
-def test_find_idle_rds_low_confidence_without_cpu(mock_boto3_session):
- """Instance with zero connections but no CPU data should be LOW confidence."""
- region = "us-east-1"
- rds = mock_boto3_session._rds
- now = datetime.now(timezone.utc)
- old_date = now - timedelta(days=30)
-
- paginator = rds.get_paginator.return_value
- paginator.paginate.return_value = [
- {
- "DBInstances": [
- {
- "DBInstanceIdentifier": "idle-no-cpu",
- "DBInstanceStatus": "available",
- "InstanceCreateTime": old_date,
- "DBInstanceClass": "db.t3.medium",
- "Engine": "mysql",
- "EngineVersion": "8.0.35",
- "MultiAZ": False,
- "AllocatedStorage": 100,
- "ReadReplicaSourceDBInstanceIdentifier": None,
- "DBClusterIdentifier": None,
- "TagList": [],
- },
+ _setup(mock_boto3_session, [_make_instance()], cw_response=resp)
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+
+ def test_details_database_connections_max_zero(self, mock_boto3_session):
+ """Emitted finding must include database_connections_max == 0.0."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].details["database_connections_max"] == 0.0
+
+ def test_details_required_fields_present(self, mock_boto3_session):
+ """All required details fields from spec §11.1 must be present."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ d = findings[0].details
+ for key in (
+ "evaluation_path",
+ "db_instance_id",
+ "normalized_status",
+ "instance_create_time",
+ "age_days",
+ "idle_days_threshold",
+ "engine",
+ "engine_version",
+ "db_instance_class",
+ "database_connections_max",
+ ):
+ assert key in d, f"Missing required detail key: {key}"
+
+ def test_optional_context_fields_present(self, mock_boto3_session):
+ """Optional context fields from spec §11.1 must be present in details."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ d = findings[0].details
+ for key in (
+ "db_cluster_identifier",
+ "read_replica_source_db_instance_identifier",
+ "read_replica_source_db_cluster_identifier",
+ "multi_az",
+ "allocated_storage_gib",
+ "storage_type",
+ "tag_set",
+ ):
+ assert key in d, f"Missing optional context detail: {key}"
+
+
+# ---------------------------------------------------------------------------
+# TestMustSkip
+# ---------------------------------------------------------------------------
+
+
+class TestMustSkip:
+ def test_non_available_status_skipped(self, mock_boto3_session):
+ for status in ("stopped", "stopping", "creating", "modifying", "backing-up"):
+ _setup(
+ mock_boto3_session,
+ [_make_instance(DBInstanceStatus=status)],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == [], f"Expected skip for status={status}"
+
+ def test_cluster_member_skipped(self, mock_boto3_session):
+ """DB cluster member (DBClusterIdentifier present) → SKIP ITEM."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance(DBClusterIdentifier="my-cluster")],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_read_replica_db_instance_source_skipped(self, mock_boto3_session):
+ """ReadReplicaSourceDBInstanceIdentifier present → SKIP ITEM."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance(ReadReplicaSourceDBInstanceIdentifier="source-db")],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_read_replica_db_cluster_source_skipped(self, mock_boto3_session):
+ """ReadReplicaSourceDBClusterIdentifier present → SKIP ITEM."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance(ReadReplicaSourceDBClusterIdentifier="source-cluster")],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_too_young_skipped(self, mock_boto3_session):
+ """Instance younger than idle_days_threshold → SKIP ITEM."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance(InstanceCreateTime=_young())],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_exactly_at_threshold_emits(self, mock_boto3_session):
+ """age_days == idle_days_threshold satisfies >= check → EMIT."""
+ # Use 14 days + 1 hour to ensure floor(total_seconds / 86400) == 14
+ at_threshold = datetime.now(timezone.utc) - timedelta(days=14, hours=1)
+ _setup(
+ mock_boto3_session,
+ [_make_instance(InstanceCreateTime=at_threshold)],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+
+ def test_no_datapoints_skipped(self, mock_boto3_session):
+ """DatabaseConnections returns no datapoints → SKIP ITEM (not LOW finding)."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_no_datapoints_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_nonzero_connections_skipped(self, mock_boto3_session):
+ """DatabaseConnections Maximum > 0 → SKIP ITEM."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_nonzero_connections_response(val=1.0),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_any_nonzero_datapoint_skipped(self, mock_boto3_session):
+ """Multiple datapoints — one is nonzero → SKIP ITEM."""
+ resp = {
+ "Datapoints": [
+ {"Maximum": 0.0},
+ {"Maximum": 3.0},
]
}
- ]
-
- cloudwatch_mock = MagicMock()
- mock_boto3_session.client.side_effect = lambda service, **kwargs: (
- rds if service == "rds" else cloudwatch_mock
- )
- cloudwatch_mock.get_metric_statistics.side_effect = _make_cw_side_effect(
- {
- # Connections: zero (has datapoints)
- ("idle-no-cpu", "DatabaseConnections"): {"Datapoints": [{"Sum": 0}]},
- # CPU: no data available
- ("idle-no-cpu", "CPUUtilization"): {"Datapoints": []},
- }
- )
+ _setup(mock_boto3_session, [_make_instance()], cw_response=resp)
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_missing_db_instance_identifier_skipped(self, mock_boto3_session):
+ """Missing DBInstanceIdentifier → SKIP ITEM."""
+ item = _make_instance()
+ del item["DBInstanceIdentifier"]
+ _setup(mock_boto3_session, [item], cw_response=_zero_connections_response())
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_missing_status_skipped(self, mock_boto3_session):
+ """Missing DBInstanceStatus → SKIP ITEM."""
+ item = _make_instance()
+ del item["DBInstanceStatus"]
+ _setup(mock_boto3_session, [item], cw_response=_zero_connections_response())
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_missing_create_time_skipped(self, mock_boto3_session):
+ """Missing InstanceCreateTime → SKIP ITEM."""
+ item = _make_instance()
+ del item["InstanceCreateTime"]
+ _setup(mock_boto3_session, [item], cw_response=_zero_connections_response())
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_naive_create_time_skipped(self, mock_boto3_session):
+ """Naive InstanceCreateTime (no tzinfo) → SKIP ITEM."""
+ naive = _old().replace(tzinfo=None)
+ _setup(
+ mock_boto3_session,
+ [_make_instance(InstanceCreateTime=naive)],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_future_create_time_skipped(self, mock_boto3_session):
+ """InstanceCreateTime in the future → SKIP ITEM."""
+ future = datetime.now(timezone.utc) + timedelta(days=1)
+ _setup(
+ mock_boto3_session,
+ [_make_instance(InstanceCreateTime=future)],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_non_dict_item_skipped(self, mock_boto3_session):
+ """Non-dict items in DBInstances list → silently skipped."""
+ _setup(
+ mock_boto3_session,
+ [None, "string", 42, _make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+
+ def test_empty_db_instances_no_findings(self, mock_boto3_session):
+ _setup(mock_boto3_session, [], cw_response=_zero_connections_response())
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+
+# ---------------------------------------------------------------------------
+# TestMustFailRule
+# ---------------------------------------------------------------------------
+
+
+class TestMustFailRule:
+ def test_describe_db_instances_access_denied_raises_permission_error(self, mock_boto3_session):
+ rds = MagicMock()
+ rds.get_paginator.side_effect = _client_error("AccessDenied")
+ cloudwatch = MagicMock()
+
+ def client_side_effect(service, **kwargs):
+ return rds if service == "rds" else cloudwatch
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ with pytest.raises(PermissionError, match="rds:DescribeDBInstances"):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_describe_db_instances_unauthorized_raises_permission_error(self, mock_boto3_session):
+ rds = MagicMock()
+ rds.get_paginator.side_effect = _client_error("UnauthorizedOperation")
+ cloudwatch = MagicMock()
+
+ def client_side_effect(service, **kwargs):
+ return rds if service == "rds" else cloudwatch
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ with pytest.raises(PermissionError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_describe_db_instances_other_client_error_propagates(self, mock_boto3_session):
+ rds = MagicMock()
+ rds.get_paginator.side_effect = _client_error("InternalServerError")
+ cloudwatch = MagicMock()
+
+ def client_side_effect(service, **kwargs):
+ return rds if service == "rds" else cloudwatch
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ with pytest.raises(ClientError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_describe_db_instances_botocore_error_propagates(self, mock_boto3_session):
+ rds = MagicMock()
+ rds.get_paginator.side_effect = _botocore_error()
+ cloudwatch = MagicMock()
+
+ def client_side_effect(service, **kwargs):
+ return rds if service == "rds" else cloudwatch
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ with pytest.raises(BotoCoreError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_cloudwatch_access_denied_raises_permission_error(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_side_effect=_client_error("AccessDenied"),
+ )
+ with pytest.raises(PermissionError, match="cloudwatch:GetMetricStatistics"):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_cloudwatch_unauthorized_raises_permission_error(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_side_effect=_client_error("UnauthorizedOperation"),
+ )
+ with pytest.raises(PermissionError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_cloudwatch_throttle_raises_not_skipped(self, mock_boto3_session):
+ """Throttling is a required-call failure → FAIL RULE, not SKIP or LOW confidence."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_side_effect=_client_error("ThrottlingException"),
+ )
+ with pytest.raises(ClientError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_cloudwatch_internal_error_raises(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_side_effect=_client_error("InternalServerError"),
+ )
+ with pytest.raises(ClientError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+ def test_cloudwatch_botocore_error_raises(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_side_effect=_botocore_error(),
+ )
+ with pytest.raises(BotoCoreError):
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+
+
+# ---------------------------------------------------------------------------
+# TestNormalization
+# ---------------------------------------------------------------------------
+
+
+class TestNormalization:
+ def test_string_fields_extracted(self):
+ now = _now()
+ n = _normalize_db_instance(_make_instance(), now)
+ assert n is not None
+ assert n["db_instance_id"] == "test-db"
+ assert n["normalized_status"] == "available"
+ assert n["engine"] == "mysql"
+ assert n["engine_version"] == "8.0.35"
+ assert n["db_instance_class"] == "db.t3.medium"
+ assert n["storage_type"] == "gp2"
+
+ def test_age_days_computed(self):
+ now = _now()
+ create_time = now - timedelta(days=20)
+ n = _normalize_db_instance(_make_instance(InstanceCreateTime=create_time), now)
+ assert n is not None
+ assert n["age_days"] == 20
+
+ def test_multi_az_bool_preserved(self):
+ now = _now()
+ n = _normalize_db_instance(_make_instance(MultiAZ=True), now)
+ assert n["multi_az"] is True
+
+ def test_multi_az_non_bool_nulled(self):
+ now = _now()
+ n = _normalize_db_instance(_make_instance(MultiAZ="yes"), now)
+ assert n["multi_az"] is None
+
+ def test_allocated_storage_int_preserved(self):
+ now = _now()
+ n = _normalize_db_instance(_make_instance(AllocatedStorage=200), now)
+ assert n["allocated_storage_gib"] == 200
+
+ def test_allocated_storage_non_int_nulled(self):
+ now = _now()
+ n = _normalize_db_instance(_make_instance(AllocatedStorage="200"), now)
+ assert n["allocated_storage_gib"] is None
+
+ def test_tag_list_preserved(self):
+ now = _now()
+ tags = [{"Key": "env", "Value": "dev"}]
+ n = _normalize_db_instance(_make_instance(TagList=tags), now)
+ assert n["tag_set"] == tags
+
+ def test_tag_list_absent_defaults_to_empty_list(self):
+ now = _now()
+ item = _make_instance()
+ del item["TagList"]
+ n = _normalize_db_instance(item, now)
+ assert n["tag_set"] == []
+
+ def test_scope_fields_null_when_absent(self):
+ now = _now()
+ n = _normalize_db_instance(_make_instance(), now)
+ assert n["db_cluster_identifier"] is None
+ assert n["read_replica_source_db_instance_identifier"] is None
+ assert n["read_replica_source_db_cluster_identifier"] is None
+
+ def test_scope_fields_extracted_when_present(self):
+ now = _now()
+ n = _normalize_db_instance(
+ _make_instance(
+ DBClusterIdentifier="my-cluster",
+ ReadReplicaSourceDBInstanceIdentifier="source-db",
+ ReadReplicaSourceDBClusterIdentifier="source-cluster",
+ ),
+ now,
+ )
+ assert n["db_cluster_identifier"] == "my-cluster"
+ assert n["read_replica_source_db_instance_identifier"] == "source-db"
+ assert n["read_replica_source_db_cluster_identifier"] == "source-cluster"
+
+ def test_empty_string_db_instance_id_returns_none(self):
+ now = _now()
+ result = _normalize_db_instance(_make_instance(DBInstanceIdentifier=""), now)
+ assert result is None
+
+ def test_non_dict_returns_none(self):
+ now = _now()
+ assert _normalize_db_instance(None, now) is None
+ assert _normalize_db_instance("string", now) is None
+ assert _normalize_db_instance(42, now) is None
+
+ def test_naive_create_time_returns_none(self):
+ now = _now()
+ naive = _old().replace(tzinfo=None)
+ result = _normalize_db_instance(_make_instance(InstanceCreateTime=naive), now)
+ assert result is None
+
+ def test_future_create_time_returns_none(self):
+ now = _now()
+ future = now + timedelta(days=1)
+ result = _normalize_db_instance(_make_instance(InstanceCreateTime=future), now)
+ assert result is None
+
+ def test_non_datetime_create_time_returns_none(self):
+ now = _now()
+ result = _normalize_db_instance(
+ _make_instance(InstanceCreateTime="2025-01-01T00:00:00Z"), now
+ )
+ assert result is None
+
+ def test_resource_id_equals_db_instance_id(self):
+ n = _normalize_db_instance(_make_instance(), _now())
+ assert n["resource_id"] == n["db_instance_id"] == "test-db"
+
+
+# ---------------------------------------------------------------------------
+# TestCloudWatchContract
+# ---------------------------------------------------------------------------
+
+
+class TestCloudWatchContract:
+ def test_database_connections_maximum_statistic_used(self, mock_boto3_session):
+ """Spec requires DatabaseConnections with Maximum statistic — not Sum."""
+ _, cw = _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+ call_kwargs = cw.get_metric_statistics.call_args[1]
+ assert call_kwargs["MetricName"] == "DatabaseConnections"
+ assert call_kwargs["Statistics"] == ["Maximum"]
+
+ def test_correct_namespace(self, mock_boto3_session):
+ _, cw = _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert cw.get_metric_statistics.call_args[1]["Namespace"] == "AWS/RDS"
+
+ def test_correct_dimension(self, mock_boto3_session):
+ _, cw = _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+ dims = cw.get_metric_statistics.call_args[1]["Dimensions"]
+ assert dims == [{"Name": "DBInstanceIdentifier", "Value": "test-db"}]
+
+ def test_period_is_idle_days_times_86400(self, mock_boto3_session):
+ """Period = idle_days_threshold * 86400 satisfies all CW retention constraints."""
+ _, cw = _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ find_idle_rds_instances(mock_boto3_session, _REGION, idle_days_threshold=14)
+ assert cw.get_metric_statistics.call_args[1]["Period"] == 14 * 86400
+
+ def test_exactly_one_metric_queried(self, mock_boto3_session):
+ """Only DatabaseConnections — no CPU or I/O metrics."""
+ _, cw = _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert cw.get_metric_statistics.call_count == 1
+
+ def test_missing_datapoints_not_treated_as_zero(self, mock_boto3_session):
+ """Empty datapoints list must not be interpreted as zero connections."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_no_datapoints_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_no_cpu_or_io_gates(self, mock_boto3_session):
+ """CPU and storage I/O are not eligibility gates — zero connections alone emits."""
+ call_metrics = []
+
+ def cw_side_effect(**kwargs):
+ call_metrics.append(kwargs["MetricName"])
+ return _zero_connections_response()
+
+ _setup(mock_boto3_session, [_make_instance()], cw_side_effect=cw_side_effect)
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+ assert "CPUUtilization" not in call_metrics
+ assert "ReadIOPS" not in call_metrics
+ assert "WriteIOPS" not in call_metrics
+
+
+# ---------------------------------------------------------------------------
+# TestEvidenceContract
+# ---------------------------------------------------------------------------
+
+
+class TestEvidenceContract:
+ def _get_finding(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+ return findings[0]
+
+ def test_evaluation_path(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ assert f.details["evaluation_path"] == "idle-rds-instance-review-candidate"
+
+ def test_signals_used_not_empty(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ assert len(f.evidence.signals_used) >= 1
+
+ def test_signals_used_mentions_available_status(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ combined = " ".join(f.evidence.signals_used)
+ assert "available" in combined
+
+ def test_signals_used_mentions_standalone(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ combined = " ".join(f.evidence.signals_used)
+ assert "standalone" in combined.lower() or "read replica" in combined.lower()
+
+ def test_signals_used_mentions_database_connections(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ combined = " ".join(f.evidence.signals_used)
+ assert "DatabaseConnections" in combined
+
+ def test_signals_not_checked_mentions_proxy_layers(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ combined = " ".join(f.evidence.signals_not_checked)
+ assert any(term in combined for term in ("RDS Proxy", "PgBouncer", "connection pool"))
+
+ def test_time_window_matches_threshold(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION, idle_days_threshold=30)
+ assert "30" in findings[0].evidence.time_window
+
+ def test_normalized_status_in_details(self, mock_boto3_session):
+ f = self._get_finding(mock_boto3_session)
+ assert f.details["normalized_status"] == "available"
+
+
+# ---------------------------------------------------------------------------
+# TestConfidenceModel
+# ---------------------------------------------------------------------------
+
+
+class TestConfidenceModel:
+ def test_always_medium_confidence(self, mock_boto3_session):
+ """Spec §12: MEDIUM confidence when datapoints present and all zero."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].confidence.value == "medium"
+
+ def test_no_low_confidence_path(self, mock_boto3_session):
+ """There is no LOW-confidence finding path — missing datapoints → SKIP ITEM."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_no_datapoints_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ # Must be empty — not a LOW-confidence finding
+ assert findings == []
+
+
+# ---------------------------------------------------------------------------
+# TestCostModel
+# ---------------------------------------------------------------------------
+
+
+class TestCostModel:
+ def test_estimated_monthly_cost_usd_is_none(self, mock_boto3_session):
+ """Spec §7: no hardcoded cost estimates → estimated_monthly_cost_usd = None."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].estimated_monthly_cost_usd is None
+
+ def test_no_cost_fields_in_details(self, mock_boto3_session):
+ """No compute/storage cost fields should appear in details."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ d = findings[0].details
+ for key in d:
+ assert "cost" not in key.lower(), f"Unexpected cost field: {key}"
+
+
+# ---------------------------------------------------------------------------
+# TestRiskModel
+# ---------------------------------------------------------------------------
+
+
+class TestRiskModel:
+ def test_always_medium_risk(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].risk.value == "medium"
+
+
+# ---------------------------------------------------------------------------
+# TestTitleAndReasonContract
+# ---------------------------------------------------------------------------
+
+
+class TestTitleAndReasonContract:
+ def test_title_exact(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].title == "Idle RDS instance review candidate"
+
+ def test_rule_id_exact(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].rule_id == "aws.rds.instance.idle"
+
+ def test_resource_type_exact(self, mock_boto3_session):
+ _setup(
+ mock_boto3_session,
+ [_make_instance()],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings[0].resource_type == "aws.rds.instance"
+
+
+# ---------------------------------------------------------------------------
+# TestPagination
+# ---------------------------------------------------------------------------
+
+
+class TestPagination:
+ def test_multiple_pages_all_processed(self, mock_boto3_session):
+ """All pages must be exhausted — findings from both pages are emitted."""
+ rds = MagicMock()
+ paginator = MagicMock()
+ paginator.paginate.return_value = [
+ {"DBInstances": [_make_instance(DBInstanceIdentifier="db-1")]},
+ {"DBInstances": [_make_instance(DBInstanceIdentifier="db-2")]},
+ {"DBInstances": [_make_instance(DBInstanceIdentifier="db-3")]},
+ ]
+ rds.get_paginator.return_value = paginator
+
+ cloudwatch = MagicMock()
+ cloudwatch.get_metric_statistics.return_value = _zero_connections_response()
+
+ def client_side_effect(service, **kwargs):
+ return rds if service == "rds" else cloudwatch
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ ids = {f.resource_id for f in findings}
+ assert ids == {"db-1", "db-2", "db-3"}
+
+ def test_mixed_instances_across_pages(self, mock_boto3_session):
+ """Active and idle instances can be mixed across pages."""
+ rds = MagicMock()
+ paginator = MagicMock()
+ paginator.paginate.return_value = [
+ {"DBInstances": [_make_instance(DBInstanceIdentifier="idle-db")]},
+ {
+ "DBInstances": [
+ _make_instance(
+ DBInstanceIdentifier="active-db",
+ DBInstanceStatus="stopped",
+ )
+ ]
+ },
+ ]
+ rds.get_paginator.return_value = paginator
+
+ cloudwatch = MagicMock()
+ cloudwatch.get_metric_statistics.return_value = _zero_connections_response()
+
+ def client_side_effect(service, **kwargs):
+ return rds if service == "rds" else cloudwatch
+
+ mock_boto3_session.client.side_effect = client_side_effect
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+ assert findings[0].resource_id == "idle-db"
+
+
+# ---------------------------------------------------------------------------
+# TestStandaloneScope
+# ---------------------------------------------------------------------------
+
+
+class TestStandaloneScope:
+ def test_all_three_scope_fields_independently_exclude(self, mock_boto3_session):
+ """Each standalone-scope exclusion field independently causes SKIP ITEM."""
+ scope_cases = [
+ {"DBClusterIdentifier": "cluster-a"},
+ {"ReadReplicaSourceDBInstanceIdentifier": "source-instance"},
+ {"ReadReplicaSourceDBClusterIdentifier": "source-cluster"},
+ ]
+ for overrides in scope_cases:
+ _setup(
+ mock_boto3_session,
+ [_make_instance(**overrides)],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == [], f"Expected skip for scope overrides={overrides}"
+
+ def test_standalone_with_none_scope_fields_emits(self, mock_boto3_session):
+ """Explicit None values for all scope fields → standalone → EMIT."""
+ _setup(
+ mock_boto3_session,
+ [
+ _make_instance(
+ DBClusterIdentifier=None,
+ ReadReplicaSourceDBInstanceIdentifier=None,
+ ReadReplicaSourceDBClusterIdentifier=None,
+ )
+ ],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert len(findings) == 1
+
+ def test_read_replica_source_cluster_identifier_field_checked(self, mock_boto3_session):
+ """ReadReplicaSourceDBClusterIdentifier is the third scope field — must be checked."""
+ _setup(
+ mock_boto3_session,
+ [_make_instance(ReadReplicaSourceDBClusterIdentifier="my-source-cluster")],
+ cw_response=_zero_connections_response(),
+ )
+ findings = find_idle_rds_instances(mock_boto3_session, _REGION)
+ assert findings == []
+
+ def test_custom_threshold(self, mock_boto3_session):
+ """idle_days_threshold parameter controls age gate correctly."""
+ create_time = datetime.now(timezone.utc) - timedelta(days=20)
+ _setup(
+ mock_boto3_session,
+ [_make_instance(InstanceCreateTime=create_time)],
+ cw_response=_zero_connections_response(),
+ )
+ # 30-day threshold: 20 days < 30 → SKIP
+ findings_30 = find_idle_rds_instances(mock_boto3_session, _REGION, idle_days_threshold=30)
+ assert findings_30 == []
- findings = find_idle_rds_instances(mock_boto3_session, region)
- assert len(findings) == 1
- assert findings[0].confidence.value == "low"
+ # 14-day threshold: 20 days >= 14 → EMIT
+ findings_14 = find_idle_rds_instances(mock_boto3_session, _REGION, idle_days_threshold=14)
+ assert len(findings_14) == 1