gpuaudit · maksimov · Apr 18, 2026 · Apr 14, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/README.md b/README.md
@@ -1,22 +1,39 @@
 # gpuaudit
 
-Scan your AWS account for GPU waste and get actionable recommendations to cut your cloud spend.
+Scan your cloud for GPU waste and get actionable recommendations to cut your spend.
 
 ```
-$ gpuaudit scan --profile ml-prod
+$ gpuaudit scan --skip-eks
 
-  GPU Fleet Summary
-  Total GPU instances:       14
-  Total monthly GPU spend:   $47,832
-  Estimated monthly waste:   $18,240  (38%)
+  Found 103 GPU nodes across 111 nodes in ml-prod-iad
 
-  CRITICAL (3 instances, $8,940/mo potential savings)
+  gpuaudit — GPU Cost Audit for AWS
+  Account: 123456789012 | Regions: us-east-1 | Duration: 4.2s
 
-  i-0a1b2c3d4e  g5.12xlarge (4x A10G)     $4,380/mo   Idle — no activity for 18 days → terminate
-  i-9f8e7d6c5b  p4d.24xlarge (8x A100)    $23,652/mo   Idle — <1% CPU for 6 days → terminate
-  sagemaker:asr ml.g6.48xlarge (8x L40S)   $9,490/mo   GPU util avg 8% → downsize to ml.g5.xlarge
+  ┌──────────────────────────────────────────────────────────┐
+  │  GPU Fleet Summary                                       │
+  ├──────────────────────────────────────────────────────────┤
+  │  Total GPU instances:      103                           │
+  │  Total monthly GPU spend:  $365155                       │
+  │  Estimated monthly waste:  $23408      (   6%)           │
+  └──────────────────────────────────────────────────────────┘
+
+  CRITICAL — 4 instance(s), $21728/mo potential savings
+
+  Instance                             Type                       Monthly  Signal            Recommendation
+  ──────────────────────────────────── ────────────────────────── ────────  ────────────────  ──────────────────────────────────────────────
+  ml-prod-iad/ip-10-15-255-248         g6e.16xlarge (1× L40S)     $  6752  idle              Node up 13 days with 0 GPU pods scheduled.
+  ml-prod-iad/ip-10-22-250-15          g6e.16xlarge (1× L40S)     $  6752  idle              Node up 1 days with 0 GPU pods scheduled.
+  ...
 ```
 
+## What it scans
+
+- **EC2** — GPU instances (g4dn, g5, g6, g6e, p4d, p4de, p5, inf2, trn1) with CloudWatch metrics
+- **SageMaker** — Endpoints with GPU utilization and invocation metrics
+- **EKS** — Managed GPU node groups via the AWS EKS API
+- **Kubernetes** — GPU nodes and pod allocation via the Kubernetes API (Karpenter, self-managed, any CNI)
+
 ## What it detects
 
 - **Idle GPU instances** — running but doing nothing (low CPU + near-zero network for 24+ hours)
@@ -25,6 +42,7 @@ $ gpuaudit scan --profile ml-prod
 - **Stale instances** — non-production instances running 90+ days
 - **SageMaker low utilization** — endpoints with <10% GPU utilization
 - **SageMaker oversized** — endpoints using <30% GPU memory on multi-GPU instances
+- **K8s unallocated GPUs** — nodes with GPU capacity but no pods requesting GPUs
 
 ## Install
 
@@ -36,7 +54,7 @@ Or build from source:
 
 ```bash
 git clone https://github.com/gpuaudit/cli.git
-cd gpuaudit
+cd cli
 go build -o gpuaudit ./cmd/gpuaudit
 ```
 
@@ -49,22 +67,57 @@ gpuaudit scan
 # Specific profile and region
 gpuaudit scan --profile production --region us-east-1
 
+# Kubernetes cluster scan (uses KUBECONFIG or ~/.kube/config)
+gpuaudit scan --skip-eks
+
+# Specific kubeconfig and context
+gpuaudit scan --kubeconfig ~/.kube/config --kube-context ml-prod-iad
+
 # JSON output for automation
-gpuaudit scan --format json --output report.json
+gpuaudit scan --format json -o report.json
 
-# Markdown for docs/PRs
-gpuaudit scan --format markdown
+# Compare two scans to see what changed
+gpuaudit diff old-report.json new-report.json
 
 # Slack Block Kit payload (pipe to webhook)
-gpuaudit scan --format slack --output - | curl -X POST -H 'Content-Type: application/json' -d @- $SLACK_WEBHOOK
-
-# Skip CloudWatch metrics (faster, less accurate)
-gpuaudit scan --skip-metrics
+gpuaudit scan --format slack -o - | \
+  curl -X POST -H 'Content-Type: application/json' -d @- $SLACK_WEBHOOK
 
-# Skip SageMaker scanning
+# Skip specific scanners
+gpuaudit scan --skip-metrics    # faster, less accurate
 gpuaudit scan --skip-sagemaker
+gpuaudit scan --skip-eks        # skip AWS EKS API (use --skip-k8s for Kubernetes API)
+gpuaudit scan --skip-k8s
 ```
 
+## Comparing scans
+
+Save scan results as JSON, then diff them later:
+
+```bash
+gpuaudit scan --format json -o scan-apr-08.json
+# ... time passes, changes happen ...
+gpuaudit scan --format json -o scan-apr-15.json
+gpuaudit diff scan-apr-08.json scan-apr-15.json
+```
+
+```
+  gpuaudit diff — 2026-04-08 12:00 UTC → 2026-04-15 12:00 UTC
+
+  ┌──────────────────────────────────────────────────────────┐
+  │  Cost Delta                                              │
+  ├──────────────────────────────────────────────────────────┤
+  │  Monthly spend:   $372000    → $365155    (-$6845)       │
+  │  Estimated waste: $189000    → $23408     (-$165592)     │
+  │  Instances:       116 → 103  (-13 removed, +0 added)    │
+  └──────────────────────────────────────────────────────────┘
+
+  REMOVED — 13 instance(s), -$6845/mo
+  ...
+```
+
+Matches instances by ID. Reports added, removed, and changed instances with per-field diffs (instance type, pricing model, cost, state, GPU allocation, waste severity).
+
 ## IAM permissions
 
 gpuaudit is read-only. It never modifies your infrastructure. Generate the minimal IAM policy:
@@ -73,7 +126,7 @@ gpuaudit is read-only. It never modifies your infrastructure. Generate the minim
 gpuaudit iam-policy
 ```
 
-This outputs a JSON policy requiring only `Describe*`, `List*`, `Get*` permissions for EC2, SageMaker, CloudWatch, Cost Explorer, and Pricing APIs.
+For Kubernetes scanning, gpuaudit needs `get`/`list` on `nodes` and `pods` cluster-wide.
 
 ## GPU pricing reference
 
@@ -83,27 +136,25 @@ gpuaudit pricing
 
 # Filter by GPU model
 gpuaudit pricing --gpu H100
-gpuaudit pricing --gpu A10G
-gpuaudit pricing --gpu T4
+gpuaudit pricing --gpu L4
 ```
 
 ## Output formats
 
 | Format | Flag | Use case |
 |---|---|---|
 | Table | `--format table` (default) | Terminal viewing |
-| JSON | `--format json` | Automation, CI/CD pipelines |
+| JSON | `--format json` | Automation, CI/CD, `gpuaudit diff` |
 | Markdown | `--format markdown` | PRs, wikis, docs |
 | Slack | `--format slack` | Slack webhook integration |
 
 ## How it works
 
-1. **Discovery** — Scans EC2 and SageMaker across multiple regions for GPU instance families (g4dn, g5, g6, g6e, p4d, p4de, p5, inf2, trn1)
+1. **Discovery** — Scans EC2, SageMaker, EKS node groups, and Kubernetes API across multiple regions for GPU resources
 2. **Metrics** — Collects 7-day CloudWatch metrics: CPU, network I/O for EC2; GPU utilization, GPU memory, invocations for SageMaker
-3. **Analysis** — Applies 6 waste detection rules with severity levels (critical/warning)
-4. **Recommendations** — Generates specific actions (terminate, downsize, switch pricing) with estimated monthly savings
-
-Regions scanned by default: us-east-1, us-east-2, us-west-2, eu-west-1, eu-west-2, eu-central-1, ap-southeast-1, ap-northeast-1, ap-south-1.
+3. **K8s allocation** — Lists pods requesting `nvidia.com/gpu` resources and maps them to nodes
+4. **Analysis** — Applies 7 waste detection rules with severity levels (critical/warning/info)
+5. **Recommendations** — Generates specific actions (terminate, downsize, switch pricing) with estimated monthly savings
 
 ## Project structure
 
@@ -113,21 +164,22 @@ gpuaudit/
 ├── internal/
 │   ├── models/            Core data types (GPUInstance, WasteSignal, Recommendation)
 │   ├── pricing/           Bundled GPU pricing database (40+ instance types)
-│   ├── analysis/          Waste detection rules engine
-│   ├── output/            Formatters (table, JSON, markdown, Slack)
-│   └── providers/aws/     EC2, SageMaker, CloudWatch, scanner orchestrator
+│   ├── analysis/          Waste detection rules engine (7 rules)
+│   ├── diff/              Scan comparison logic
+│   ├── output/            Formatters (table, JSON, markdown, Slack, diff)
+│   └── providers/
+│       ├── aws/           EC2, SageMaker, EKS, CloudWatch, Cost Explorer
+│       └── k8s/           Kubernetes API GPU node/pod discovery
 └── LICENSE                Apache 2.0
 ```
 
 ## Roadmap
 
-- [ ] AWS Cost Explorer integration (actual vs projected spend)
-- [ ] EKS GPU pod discovery
+- [ ] DCGM GPU metrics via Kubernetes (actual GPU utilization, not just allocation)
 - [ ] SageMaker training job analysis
 - [ ] Multi-account (AWS Organizations) scanning
 - [ ] GCP + Azure support
 - [ ] GitHub Action for scheduled scans
-- [ ] Historical scan comparison (`gpuaudit diff`)
 
 ## License
 

diff --git a/cmd/gpuaudit/main.go b/cmd/gpuaudit/main.go
@@ -13,12 +13,13 @@ import (
 
 	"github.com/spf13/cobra"
 
-	"github.com/gpuaudit/cli/internal/models"
 	"github.com/gpuaudit/cli/internal/analysis"
-	awsprovider "github.com/gpuaudit/cli/internal/providers/aws"
-	k8sprovider "github.com/gpuaudit/cli/internal/providers/k8s"
+	"github.com/gpuaudit/cli/internal/diff"
+	"github.com/gpuaudit/cli/internal/models"
 	"github.com/gpuaudit/cli/internal/output"
 	"github.com/gpuaudit/cli/internal/pricing"
+	awsprovider "github.com/gpuaudit/cli/internal/providers/aws"
+	k8sprovider "github.com/gpuaudit/cli/internal/providers/k8s"
 )
 
 var version = "dev"
@@ -53,6 +54,17 @@ var (
 	scanMinUptimeDays int
 )
 
+// --- diff command ---
+
+var diffFormat string
+
+var diffCmd = &cobra.Command{
+	Use:   "diff <old.json> <new.json>",
+	Short: "Compare two scan results and show what changed",
+	Args:  cobra.ExactArgs(2),
+	RunE:  runDiff,
+}
+
 var scanCmd = &cobra.Command{
 	Use:   "scan",
 	Short: "Scan AWS account for GPU waste",
@@ -74,7 +86,10 @@ func init() {
 	scanCmd.Flags().StringSliceVar(&scanExcludeTags, "exclude-tag", nil, "Exclude instances matching tag (key=value, repeatable)")
 	scanCmd.Flags().IntVar(&scanMinUptimeDays, "min-uptime-days", 0, "Only flag instances running for at least this many days")
 
+	diffCmd.Flags().StringVar(&diffFormat, "format", "table", "Output format: table, json")
+
 	rootCmd.AddCommand(scanCmd)
+	rootCmd.AddCommand(diffCmd)
 	rootCmd.AddCommand(pricingCmd)
 	rootCmd.AddCommand(iamPolicyCmd)
 	rootCmd.AddCommand(versionCmd)
@@ -144,6 +159,40 @@ func runScan(cmd *cobra.Command, args []string) error {
 	return nil
 }
 
+func runDiff(cmd *cobra.Command, args []string) error {
+	old, err := loadScanResult(args[0])
+	if err != nil {
+		return fmt.Errorf("loading old scan: %w", err)
+	}
+	new, err := loadScanResult(args[1])
+	if err != nil {
+		return fmt.Errorf("loading new scan: %w", err)
+	}
+
+	result := diff.Compare(old, new)
+
+	switch strings.ToLower(diffFormat) {
+	case "json":
+		return output.FormatDiffJSON(os.Stdout, result)
+	default:
+		output.FormatDiffTable(os.Stdout, result)
+	}
+
+	return nil
+}
+
+func loadScanResult(path string) (*models.ScanResult, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	var result models.ScanResult
+	if err := json.Unmarshal(data, &result); err != nil {
+		return nil, fmt.Errorf("parsing %s: %w", path, err)
+	}
+	return &result, nil
+}
+
 // --- pricing command ---
 
 var pricingGPU string