Zipstack · hari-kuriakose · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.github/scripts/combine-test-reports.sh b/.github/scripts/combine-test-reports.sh
@@ -1,171 +1,27 @@
 #!/bin/bash
+# Thin wrapper around `python3 -m tests.rig report combine`.
+#
+# Kept for backward compatibility with any external script or local workflow
+# that still invokes this path. Prefer calling the rig directly.
 set -euo pipefail
 
-# Script to combine multiple test reports into a single markdown file
-# Usage: ./combine-test-reports.sh
-
-OUTPUT_FILE="combined-test-report.md"
-REPORTS=()
-
-# Find all test report files
-for report in runner-report.md sdk1-report.md; do
-    if [ -f "$report" ]; then
-        REPORTS+=("$report")
-    fi
-done
-
-# Exit if no reports found
-if [ ${#REPORTS[@]} -eq 0 ]; then
-    echo "No test reports found. Skipping report generation."
-    exit 0
+REPORTS_DIR="${REPORTS_DIR:-reports}"
+
+# Stock Ubuntu CI runners only ship `python3`, not `python`. Pick whichever is
+# on PATH; bail loudly if neither.
+if command -v python3 >/dev/null 2>&1; then
+    PY=python3
+elif command -v python >/dev/null 2>&1; then
+    PY=python
+else
+    echo "combine-test-reports.sh: no python interpreter on PATH" >&2
+    exit 1
 fi
 
-# Function to strip LaTeX formatting from pytest-md-report output
-# Converts $$\textcolor{...}{\tt{VALUE}}$$ to just VALUE
-strip_latex() {
-    local text="$1"
-    # Extract content between \tt{ and }}
-    if [[ "$text" =~ \\tt\{([^}]+)\} ]]; then
-        echo "${BASH_REMATCH[1]}"
-    else
-        echo "$text"
-    fi
-}
-
-# Function to extract test counts from pytest-md-report markdown table
-extract_test_counts() {
-    local report_file=$1
-    local passed=0
-    local failed=0
-    local total=0
-
-    # Find the header row to determine column positions
-    local header_line=$(grep -E '^\|.*filepath' "$report_file" | head -1)
-
-    if [ -z "$header_line" ]; then
-        echo "0:0:0"
-        return
-    fi
-
-    # Extract column names and find positions (strip LaTeX from headers)
-    IFS='|' read -ra headers <<< "$header_line"
-    local passed_col=-1
-    local failed_col=-1
-    local subtotal_col=-1
-
-    for i in "${!headers[@]}"; do
-        local col=$(strip_latex "${headers[$i]}" | tr -d ' ' | tr '[:upper:]' '[:lower:]')
-        case "$col" in
-            passed) passed_col=$i ;;
-            failed) failed_col=$i ;;
-            subtotal|sub) subtotal_col=$i ;;
-        esac
-    done
-
-    # Find the TOTAL row (TOTAL appears in first column, not as SUBTOTAL in header)
-    local total_line=$(grep -E '^\|.*\\tt\{TOTAL\}' "$report_file" | head -1)
-
-    if [ -z "$total_line" ]; then
-        echo "0:0:0"
-        return
-    fi
-
-    # Parse the TOTAL row values
-    IFS='|' read -ra values <<< "$total_line"
-
-    # Extract passed count (strip LaTeX and get number)
-    if [ "$passed_col" -ge 0 ] && [ "$passed_col" -lt "${#values[@]}" ]; then
-        local clean_value=$(strip_latex "${values[$passed_col]}")
-        passed=$(echo "$clean_value" | tr -d ' ' | grep -oE '[0-9]+' | head -1 || echo "0")
-    fi
-
-    # Extract failed count (strip LaTeX and get number)
-    if [ "$failed_col" -ge 0 ] && [ "$failed_col" -lt "${#values[@]}" ]; then
-        local clean_value=$(strip_latex "${values[$failed_col]}")
-        failed=$(echo "$clean_value" | tr -d ' ' | grep -oE '[0-9]+' | head -1 || echo "0")
-    fi
-
-    # Extract total from SUBTOTAL column (strip LaTeX and get number)
-    if [ "$subtotal_col" -ge 0 ] && [ "$subtotal_col" -lt "${#values[@]}" ]; then
-        local clean_value=$(strip_latex "${values[$subtotal_col]}")
-        total=$(echo "$clean_value" | tr -d ' ' | grep -oE '[0-9]+' | head -1 || echo "0")
-    fi
-
-    # If total is still 0, calculate from passed + failed
-    if [ "$total" -eq 0 ]; then
-        total=$((passed + failed))
-    fi
-
-    echo "${total}:${passed}:${failed}"
-}
+"$PY" -m tests.rig report combine --reports-dir "$REPORTS_DIR"
 
-# Initialize the combined report with collapsed summary
-cat > "$OUTPUT_FILE" << 'EOF'
-# Test Results
-
-<details open>
-<summary><b>Summary</b></summary>
-
-EOF
-
-# Extract and display summary for each report
-for report in "${REPORTS[@]}"; do
-    report_name=$(basename "$report" .md)
-
-    # Convert report name to title case
-    if [ "$report_name" = "runner-report" ]; then
-        title="Runner Tests"
-    elif [ "$report_name" = "sdk1-report" ]; then
-        title="SDK1 Tests"
-    else
-        title="${report_name}"
-    fi
-
-    # Extract counts
-    counts=$(extract_test_counts "$report")
-    IFS=':' read -r total passed failed <<< "$counts"
-
-    # Determine status icon
-    if [ "$failed" -gt 0 ]; then
-        status="❌"
-    elif [ "$passed" -gt 0 ]; then
-        status="✅"
-    else
-        status="⚠️"
-    fi
-
-    echo "- ${status} **${title}**: ${passed} passed, ${failed} failed (${total} total)" >> "$OUTPUT_FILE"
-done
-
-cat >> "$OUTPUT_FILE" << 'EOF'
-
-</details>
-
----
-
-EOF
-
-# Combine all reports with collapsible sections
-for report in "${REPORTS[@]}"; do
-    report_name=$(basename "$report" .md)
-
-    # Convert report name to title case
-    if [ "$report_name" = "runner-report" ]; then
-        title="Runner Tests"
-    elif [ "$report_name" = "sdk1-report" ]; then
-        title="SDK1 Tests"
-    else
-        title="${report_name}"
-    fi
-
-    echo "<details>" >> "$OUTPUT_FILE"
-    echo "<summary><b>${title} - Full Report</b></summary>" >> "$OUTPUT_FILE"
-    echo "" >> "$OUTPUT_FILE"
-    cat "$report" >> "$OUTPUT_FILE"
-    echo "" >> "$OUTPUT_FILE"
-    echo "</details>" >> "$OUTPUT_FILE"
-    echo "" >> "$OUTPUT_FILE"
-done
-
-echo "Combined test report created: $OUTPUT_FILE"
-echo "Included reports: ${REPORTS[*]}"
+# Backward-compatible alias for the existing sticky-comment step which uploads
+# combined-test-report.md from the repo root.
+if [ -f "$REPORTS_DIR/combined-test-report.md" ] && [ ! -f combined-test-report.md ]; then
+    cp "$REPORTS_DIR/combined-test-report.md" combined-test-report.md
+fi
diff --git a/.github/workflows/ci-test-e2e.yaml b/.github/workflows/ci-test-e2e.yaml
@@ -0,0 +1,85 @@
+name: Run e2e tests (rig + docker compose)
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [labeled, synchronize]
+    branches: [main]
+  schedule:
+    # Nightly at 02:00 UTC.
+    - cron: "0 2 * * *"
+  workflow_dispatch:
+
+jobs:
+  e2e:
+    # Only run on PRs that opt in via the `run-e2e` label, plus main + nightly + manual.
+    if: >
+      github.event_name != 'pull_request' ||
+      contains(github.event.pull_request.labels.*.name, 'run-e2e')
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          version: "0.6.14"
+          python-version: 3.12.9
+
+      - name: Install tox with UV
+        run: uv tool install tox --with tox-uv
+
+      - name: Validate test manifests
+        run: tox -e rig -- validate
+
+      - name: Restore main-branch test baseline
+        # See ci-test.yaml for the rationale on namespacing per-workflow.
+        uses: actions/cache@v5
+        with:
+          path: reports/previous-summary.json
+          key: unstract-test-baseline-e2e-main-${{ github.run_id }}
+          restore-keys: |
+            unstract-test-baseline-e2e-main-
+            unstract-test-baseline-e2e-
+
+      - name: Run e2e tier via docker compose
+        env:
+          UNSTRACT_E2E_RUNTIME: compose
+        run: |
+          # Use --tier e2e (not `all`) so this workflow runs only e2e groups.
+          if [ "${{ github.ref }}" = "refs/heads/main" ]; then
+            tox -e e2e -- --fail-on-critical-gap --update-baseline
+          else
+            tox -e e2e
+          fi
+
+      - name: Capture docker compose logs on failure
+        if: failure()
+        run: |
+          mkdir -p reports
+          docker compose -p unstract-test \
+            -f docker/docker-compose.yaml \
+            -f tests/compose/docker-compose.test.yaml \
+            logs --no-color > reports/docker-compose-logs.txt || true
+
+      - name: Output e2e report to job summary
+        if: always() && hashFiles('reports/summary.md') != ''
+        shell: bash
+        run: |
+          cat reports/summary.md >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload e2e reports artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-reports-e2e
+          path: reports/
+          if-no-files-found: ignore
+          retention-days: 30
diff --git a/.github/workflows/ci-test.yaml b/.github/workflows/ci-test.yaml
@@ -1,4 +1,4 @@
-name: Run tox tests with UV
+name: Run unit + integration tests (rig)
 
 on:
   push:
@@ -24,11 +24,12 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # rig --changed-only needs git history
 
       - name: Install uv
         uses: astral-sh/setup-uv@v7
         with:
-          # Install a specific version of uv.
           version: "0.6.14"
           python-version: 3.12.9
 
@@ -40,29 +41,78 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-tox-uv-
 
+      - name: Restore main-branch test baseline (for regression detection)
+        # actions/cache only saves on a cache miss. Include the run id in the
+        # key so each main build writes a fresh cache entry; the prefix in
+        # restore-keys pulls the most recent baseline.
+        #
+        # The unit/integration lane keeps a SEPARATE baseline from the e2e
+        # workflow because their `scope_groups` don't overlap — restoring an
+        # e2e-tier baseline here would flag every e2e-covered path as a
+        # regression in this lane (and vice versa). Each workflow is the
+        # source of truth for the paths covered by its own tiers.
+        uses: actions/cache@v5
+        with:
+          path: reports/previous-summary.json
+          key: unstract-test-baseline-ut-main-${{ github.run_id }}
+          restore-keys: |
+            unstract-test-baseline-ut-main-
+            unstract-test-baseline-ut-
+
       - name: Install tox with UV
         run: uv tool install tox --with tox-uv
 
-      - name: Run tox
-        id: tox
+      - name: Validate test manifests
+        # Cheap pre-flight: catches groups.yaml / critical_paths.yaml schema
+        # errors before we spend minutes on tier runs. Also catches malformed
+        # manifests on PRs that only touch paths-ignored files (because this
+        # step always runs).
+        run: tox -e rig -- validate
+
+      - name: Run unit tier
+        # Each tier runs as a separate rig invocation so its results land in
+        # reports/<group>/ before the next tier starts. --update-baseline (on
+        # main only) merges this tier's covered paths into the cached
+        # previous-summary.json; later tiers union on top.
         run: |
-          tox
+          if [ "${{ github.ref }}" = "refs/heads/main" ]; then
+            tox -e unit -- --fail-on-critical-gap --update-baseline
+          else
+            tox -e unit
+          fi
 
-      - name: Combine test reports
-        if: always() && (hashFiles('runner-report.md') != '' || hashFiles('sdk1-report.md') != '')
+      - name: Run integration tier
+        if: always()
         run: |
-          bash .github/scripts/combine-test-reports.sh
+          if [ "${{ github.ref }}" = "refs/heads/main" ]; then
+            tox -e integration -- --fail-on-critical-gap --update-baseline
+          else
+            tox -e integration
+          fi
+
+      - name: Re-aggregate reports from both tiers
+        if: always()
+        run: tox -e rig -- report combine
 
       - name: Render combined test report to PR
         uses: marocchino/sticky-pull-request-comment@70d2764d1a7d5d9560b100cbea0077fc8f633987 # v3.0.2
-        if: always() && hashFiles('combined-test-report.md') != '' && github.event.pull_request.head.repo.fork == false
+        if: always() && hashFiles('reports/combined-test-report.md') != '' && github.event.pull_request.head.repo.fork == false
         with:
           header: test-results
           recreate: true
-          path: combined-test-report.md
+          path: reports/combined-test-report.md
 
       - name: Output combined report to job summary
-        if: always() && hashFiles('combined-test-report.md') != ''
+        if: always() && hashFiles('reports/summary.md') != ''
         shell: bash
         run: |
-          cat combined-test-report.md >> $GITHUB_STEP_SUMMARY
+          cat reports/summary.md >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload reports artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-reports-unit-integration
+          path: reports/
+          if-no-files-found: ignore
+          retention-days: 14
diff --git a/.gitignore b/.gitignore
@@ -713,3 +713,7 @@ AGENTS.md
 
 # MCP servers
 .serena
+
+# Unstract test rig
+reports/
+.test-selection