diff --git a/sandbox-sidecar/src/routes/runRoutes.ts b/sandbox-sidecar/src/routes/runRoutes.ts
index 25988baea..2d7e72156 100644
--- a/sandbox-sidecar/src/routes/runRoutes.ts
+++ b/sandbox-sidecar/src/routes/runRoutes.ts
@@ -17,6 +17,14 @@ export function createRunRouter(
   router.post("/api/v1/sandboxes/runs", (req, res, next) => {
     try {
       const parsed = runRequestSchema.parse(req.body);
+      
+      // Debug: log received metadata including AWS region
+      console.log("Received run request metadata:", {
+        hasMetadata: !!parsed.metadata,
+        awsRegion: parsed.metadata?.AWS_REGION || "(not set)",
+        awsKeyLength: parsed.metadata?.AWS_ACCESS_KEY_ID?.length || 0,
+      });
+      
       const payload: SandboxRunPayload = {
         operation: parsed.operation,
         runId: parsed.run_id,
diff --git a/sandbox-sidecar/src/runners/e2bRunner.ts b/sandbox-sidecar/src/runners/e2bRunner.ts
index 84a6323a0..aa2760df2 100644
--- a/sandbox-sidecar/src/runners/e2bRunner.ts
+++ b/sandbox-sidecar/src/runners/e2bRunner.ts
@@ -47,13 +47,15 @@ export class E2BSandboxRunner implements SandboxRunner {
         appendLog?.(chunk);
       };
 
-      // Run terraform init
+      // Run terraform init (with AWS creds if configured for benchmark)
+      const metadata = job.payload.metadata;
       await this.runTerraformCommand(
         sandbox,
         workDir,
         ["init", "-input=false", "-no-color"],
         logs,
         streamLog,
+        metadata,
       );
 
       // Run terraform plan
@@ -61,13 +63,16 @@ export class E2BSandboxRunner implements SandboxRunner {
       if (job.payload.isDestroy) {
         planArgs.splice(1, 0, "-destroy");
       }
-      await this.runTerraformCommand(sandbox, workDir, planArgs, logs, streamLog);
+      await this.runTerraformCommand(sandbox, workDir, planArgs, logs, streamLog, metadata);
 
       // Get plan JSON
       const showResult = await this.runTerraformCommand(
         sandbox,
         workDir,
         ["show", "-json", "tfplan.binary"],
+        undefined,
+        undefined,
+        metadata,
       );
 
       const planJSON = showResult.stdout;
@@ -89,7 +94,15 @@ export class E2BSandboxRunner implements SandboxRunner {
   private async runApply(job: SandboxRunRecord, appendLog?: (chunk: string) => void): Promise<RunnerOutput> {
     const requestedVersion = job.payload.terraformVersion || "1.5.7";
       const requestedEngine = job.payload.engine || "terraform";
+      const startTime = Date.now();
       const { sandbox, needsInstall } = await this.createSandbox(requestedVersion, requestedEngine);
+      
+      logger.info({ 
+        sandboxId: sandbox.sandboxId, 
+        workingDir: job.payload.workingDirectory,
+        isDestroy: job.payload.isDestroy,
+      }, "Starting apply operation");
+      
       try {
         // Install IaC tool if using fallback template
         if (needsInstall) {
@@ -103,35 +116,100 @@ export class E2BSandboxRunner implements SandboxRunner {
           appendLog?.(chunk);
         };
 
-      // Run terraform init
+      // Run terraform init (with AWS creds if configured for benchmark)
+      const metadata = job.payload.metadata;
+      
+      logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Starting terraform init");
       await this.runTerraformCommand(
         sandbox,
         workDir,
         ["init", "-input=false", "-no-color"],
         logs,
         streamLog,
+        metadata,
       );
+      logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Terraform init completed");
 
       // Run terraform apply/destroy
       const applyCommand = job.payload.isDestroy ? "destroy" : "apply";
-      await this.runTerraformCommand(
+      logger.info({ sandboxId: sandbox.sandboxId, command: applyCommand, elapsed: Date.now() - startTime }, "Starting terraform apply/destroy");
+      const applyResult = await this.runTerraformCommand(
         sandbox,
         workDir,
         [applyCommand, "-auto-approve", "-input=false", "-no-color"],
         logs,
         streamLog,
+        metadata,
       );
+      logger.info({ sandboxId: sandbox.sandboxId, command: applyCommand, elapsed: Date.now() - startTime }, "Terraform apply/destroy completed");
+
+      // Log the apply output for debugging
+      logger.info({ 
+        stdout: applyResult.stdout.slice(-500),
+        stderr: applyResult.stderr.slice(-500),
+      }, "terraform apply output (last 500 chars)");
+
+      // Read the actual terraform.tfstate file (not terraform show -json which is different format)
+      // Check both standard location and workspace location
+      let stateBase64 = "";
+      
+      try {
+        // Try standard location first
+        let statePath = `${workDir}/terraform.tfstate`;
+        let stateContent: string | null = null;
+        
+        try {
+          stateContent = await sandbox.files.read(statePath);
+          logger.info({ path: statePath }, "found state file at standard location");
+        } catch {
+          // Try workspace location - find the workspace state directory
+          const lsResult = await sandbox.commands.run(`find ${workDir} -name "terraform.tfstate" -type f 2>/dev/null | head -1`);
+          const foundPath = lsResult.stdout.trim();
+          if (foundPath) {
+            stateContent = await sandbox.files.read(foundPath);
+            logger.info({ path: foundPath }, "found state file at workspace location");
+          }
+        }
+        
+        if (stateContent && stateContent.trim()) {
+          stateBase64 = Buffer.from(stateContent, "utf8").toString("base64");
+          logger.info({ stateSize: stateContent.length }, "captured terraform.tfstate file");
+        } else {
+          logger.info("no terraform.tfstate file found");
+        }
+      } catch (err) {
+        // State doesn't exist - this is OK for empty applies or destroys
+        logger.warn({ error: err }, "no state found after apply (may be empty apply)");
+      }
 
-      // Read the state file
-      const statePath = `${workDir}/terraform.tfstate`;
-      const stateContent = await sandbox.files.read(statePath);
       const result: SandboxRunResult = {
-        state: Buffer.from(stateContent, "utf8").toString("base64"),
+        state: stateBase64,
       };
 
+      logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Apply operation completed successfully");
       return { logs: logs.join(""), result };
+    } catch (err) {
+      const elapsed = Date.now() - startTime;
+      const errorMessage = err instanceof Error ? err.message : String(err);
+      
+      // Log detailed error info for debugging sandbox termination issues
+      logger.error({
+        sandboxId: sandbox.sandboxId,
+        elapsed,
+        elapsedSeconds: Math.round(elapsed / 1000),
+        errorMessage,
+        errorType: err instanceof Error ? err.constructor.name : typeof err,
+        workingDir: job.payload.workingDirectory,
+      }, "Apply operation failed - sandbox may have been terminated");
+      
+      throw err;
     } finally {
-      await sandbox.kill();
+      try {
+        await sandbox.kill();
+      } catch (killErr) {
+        // Sandbox may already be dead, that's fine
+        logger.debug({ killErr }, "Failed to kill sandbox (may already be terminated)");
+      }
     }
   }
 
@@ -157,11 +235,21 @@ export class E2BSandboxRunner implements SandboxRunner {
       logger.warn({ templateId, engine, version }, "no pre-built template found, will install at runtime");
     }
     
-    logger.info({ templateId }, "creating E2B sandbox");
+
+    const sandboxTimeoutSeconds = 60 * 60; // 1 hour
+    
+
+    
+    logger.info({ 
+      templateId, 
+      timeoutSeconds: sandboxTimeoutSeconds,
+    }, "creating E2B sandbox");
+    
     const sandbox = await Sandbox.create(templateId, {
       apiKey: this.options.apiKey,
+      timeoutMs: sandboxTimeoutSeconds * 1000,
     });
-    logger.info({ sandboxId: sandbox.sandboxId }, "E2B sandbox created");
+    logger.info({ sandboxId: sandbox.sandboxId }, "E2B sandbox created with extended timeout");
     
     // Store engine metadata for command execution
     (sandbox as any)._requestedEngine = engine;
@@ -169,6 +257,33 @@ export class E2BSandboxRunner implements SandboxRunner {
     return { sandbox, needsInstall };
   }
 
+  /**
+   * Build environment variables for Terraform execution.
+   * Includes AWS credentials if provided in metadata for benchmark runs.
+   */
+  private buildTerraformEnvs(metadata?: Record<string, string>): Record<string, string> {
+    const envs: Record<string, string> = {
+      TF_IN_AUTOMATION: "1",
+    };
+
+    // Inject AWS credentials if provided (for benchmark runs with real resources)
+    if (metadata?.AWS_ACCESS_KEY_ID) {
+      envs.AWS_ACCESS_KEY_ID = metadata.AWS_ACCESS_KEY_ID;
+      envs.AWS_SECRET_ACCESS_KEY = metadata.AWS_SECRET_ACCESS_KEY || "";
+      envs.AWS_REGION = metadata.AWS_REGION || "us-east-1";
+      // Also set default region for AWS SDK
+      envs.AWS_DEFAULT_REGION = envs.AWS_REGION;
+      logger.info({ 
+        region: envs.AWS_REGION,
+        keyLength: envs.AWS_ACCESS_KEY_ID.length,
+      }, "AWS credentials injected into terraform environment");
+    } else {
+      logger.warn("No AWS credentials in metadata - AWS resources will fail");
+    }
+
+    return envs;
+  }
+
 
   private async installIacTool(sandbox: Sandbox, engine: string, version: string): Promise<void> {
     logger.info({ engine, version }, "installing IaC tool at runtime");
@@ -231,6 +346,20 @@ export class E2BSandboxRunner implements SandboxRunner {
     // Use gunzip + tar separately for better compatibility across tar versions
     await sandbox.commands.run(`cd ${workDir} && gunzip -c bundle.tar.gz | tar -x --exclude='terraform.tfstate' --exclude='terraform.tfstate.backup'`);
 
+    // Debug: List extracted files to understand archive structure
+    const listResult = await sandbox.commands.run(`find ${workDir} -type f -name "*.tf" | head -20`);
+    logger.info({ 
+      tfFiles: listResult.stdout.trim().split('\n').filter(Boolean),
+      workDir,
+      workingDirectory: job.payload.workingDirectory || '(none)'
+    }, "extracted terraform files");
+
+    // Also list all files for debugging
+    const allFilesResult = await sandbox.commands.run(`ls -la ${workDir}`);
+    logger.info({ 
+      files: allFilesResult.stdout
+    }, "workspace directory listing");
+
     // Determine the execution directory
     const execDir = job.payload.workingDirectory
       ? `${workDir}/${job.payload.workingDirectory}`
@@ -273,6 +402,7 @@ export class E2BSandboxRunner implements SandboxRunner {
     args: string[],
     logBuffer?: string[],
     appendLog?: (chunk: string) => void,
+    metadata?: Record<string, string>,
   ): Promise<{ stdout: string; stderr: string }> {
     const engine = (sandbox as any)._requestedEngine || "terraform";
     const binaryName = engine === "tofu" ? "tofu" : "terraform";
@@ -289,13 +419,25 @@ export class E2BSandboxRunner implements SandboxRunner {
       appendLog?.(chunk);
     };
 
+    // Use long timeout for benchmarks (1 hour) - EKS and large operations need this
+    // Pro tier supports up to 24 hours, Hobby up to 1 hour
+    const timeoutMs = 60 * 60 * 1000; // 1 hour
+    
+    // Explicitly extend sandbox lifetime before running long commands
+    // This ensures the sandbox won't be killed mid-operation
+    try {
+      await Sandbox.setTimeout(sandbox.sandboxId, timeoutMs, { apiKey: this.options.apiKey });
+      logger.info({ sandboxId: sandbox.sandboxId, timeoutMs }, "Extended sandbox timeout before command");
+    } catch (err) {
+      logger.warn({ err, sandboxId: sandbox.sandboxId }, "Failed to extend sandbox timeout (continuing anyway)");
+    }
+    
     const result = await sandbox.commands.run(cmdStr, {
       cwd,
-      envs: {
-        TF_IN_AUTOMATION: "1",
-      },
+      envs: this.buildTerraformEnvs(metadata),
       onStdout: pipeChunk,
       onStderr: pipeChunk,
+      timeoutMs,
     });
 
     const stdout = result.stdout;
diff --git a/sandbox-sidecar/src/templateRegistry.ts b/sandbox-sidecar/src/templateRegistry.ts
index ae0760aea..5cd4e708a 100644
--- a/sandbox-sidecar/src/templateRegistry.ts
+++ b/sandbox-sidecar/src/templateRegistry.ts
@@ -8,7 +8,7 @@ export interface TemplateInfo {
 }
 
 // Template version - bump this when the build recipe changes
-const TEMPLATE_VERSION = "0.1.2";
+const TEMPLATE_VERSION = "0.1.3";
 
 // Generate alias matching the build system
 function aliasFor(engine: string, version: string, tplVersion: string): string {
diff --git a/sandbox-sidecar/templates/build-all.ts b/sandbox-sidecar/templates/build-all.ts
index 99c88c747..e8551f819 100644
--- a/sandbox-sidecar/templates/build-all.ts
+++ b/sandbox-sidecar/templates/build-all.ts
@@ -28,8 +28,8 @@ async function main() {
 
     await Template.build(buildTemplateObject(spec), {
       alias,
-      cpuCount: 2,
-      memoryMB: 4096,
+      cpuCount: 8,      // Max for Pro tier (was 2)
+      memoryMB: 8192,   // 8GB - Max for Pro tier (was 4GB)
       onBuildLogs: defaultBuildLogger(),
     });
 
diff --git a/sandbox-sidecar/templates/build.ts b/sandbox-sidecar/templates/build.ts
index 0f2d0f79f..18f5bfe68 100644
--- a/sandbox-sidecar/templates/build.ts
+++ b/sandbox-sidecar/templates/build.ts
@@ -5,8 +5,8 @@ import { template } from "./test-template.ts";
 async function main() {
   const buildInfo = await Template.build(template, {
     alias: "terraform-prebuilt-new",           // template name / alias
-    cpuCount: 4,
-    memoryMB: 2048,
+    cpuCount: 8,      // Max for Pro tier
+    memoryMB: 8192,   // 8GB - Max for Pro tier
     onBuildLogs: defaultBuildLogger(),
   });
 
diff --git a/sandbox-sidecar/templates/manifest.ts b/sandbox-sidecar/templates/manifest.ts
index 3748542e2..bc091889e 100644
--- a/sandbox-sidecar/templates/manifest.ts
+++ b/sandbox-sidecar/templates/manifest.ts
@@ -7,7 +7,7 @@ export interface TemplateSpec {
   tplVersion: string;      
 }
 
-export const TEMPLATE_VERSION = "0.1.2";  // bump this when recipe changes
+export const TEMPLATE_VERSION = "0.1.3";  // bump this when recipe changes
 
 export const TEMPLATES: TemplateSpec[] = [
   { engine: "terraform", engineVersion: "1.0.11", tplVersion: TEMPLATE_VERSION },
diff --git a/taco/cmd/statesman/main.go b/taco/cmd/statesman/main.go
index 360ddc7a7..0ec098c6b 100644
--- a/taco/cmd/statesman/main.go
+++ b/taco/cmd/statesman/main.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"os"
 	"os/signal"
+	"strings"
 	"time"
 
 	"github.com/diggerhq/digger/opentaco/internal/analytics"
@@ -21,6 +22,7 @@ import (
 	"github.com/diggerhq/digger/opentaco/internal/repositories"
 	"github.com/diggerhq/digger/opentaco/internal/sandbox"
 	"github.com/diggerhq/digger/opentaco/internal/storage"
+	"github.com/google/uuid"
 	"github.com/kelseyhightower/envconfig"
 	"github.com/labstack/echo/v4"
 	echomiddleware "github.com/labstack/echo/v4/middleware"
@@ -101,7 +103,16 @@ func main() {
 		if err != nil {
 			slog.Warn("Failed to list units from storage", "error", err)
 		} else {
+			syncedCount := 0
+			skippedCount := 0
 			for _, unit := range units {
+				// Skip non-unit paths (config-versions, plans, runs, etc.)
+				// Valid unit paths are: {org-uuid}/{unit-uuid}
+				if !isValidUnitPath(unit.ID) {
+					skippedCount++
+					continue
+				}
+				
 				if err := queryStore.SyncEnsureUnit(context.Background(), unit.ID); err != nil {
 					slog.Warn("Failed to sync unit", "unit_id", unit.ID, "error", err)
 					continue
@@ -110,8 +121,9 @@ func main() {
 				if err := queryStore.SyncUnitMetadata(context.Background(), unit.ID, unit.Size, unit.Updated); err != nil {
 					slog.Warn("Failed to sync metadata for unit", "unit_id", unit.ID, "error", err)
 				}
+				syncedCount++
 			}
-			slog.Info("Synced units from storage to database", "count", len(units))
+			slog.Info("Synced units from storage to database", "synced", syncedCount, "skipped_non_units", skippedCount)
 		}
 	} else {
 		slog.Info("Query backend already has units, skipping sync", "count", len(existingUnits))
@@ -275,3 +287,22 @@ func main() {
 	analytics.SendEssential("server_shutdown_complete")
 	slog.Info("Server shutdown complete")
 }
+
+// isValidUnitPath checks if a storage path matches the expected unit format: {org-uuid}/{unit-uuid}
+// This filters out TFE-related paths like config-versions/, plans/, runs/, etc.
+func isValidUnitPath(path string) bool {
+	parts := strings.SplitN(strings.Trim(path, "/"), "/", 2)
+	if len(parts) != 2 {
+		return false
+	}
+
+	// Both parts must be valid UUIDs
+	if _, err := uuid.Parse(parts[0]); err != nil {
+		return false
+	}
+	if _, err := uuid.Parse(parts[1]); err != nil {
+		return false
+	}
+
+	return true
+}
diff --git a/taco/internal/api/routes.go b/taco/internal/api/routes.go
index a2245194c..fe64d14e2 100644
--- a/taco/internal/api/routes.go
+++ b/taco/internal/api/routes.go
@@ -5,9 +5,11 @@ import (
 	"fmt"
 	"log"
 	"net/http"
+	"os"
 	"time"
 
 	"github.com/diggerhq/digger/opentaco/internal/analytics"
+	"github.com/diggerhq/digger/opentaco/internal/github"
 	"github.com/diggerhq/digger/opentaco/internal/tfe"
 
 	authpkg "github.com/diggerhq/digger/opentaco/internal/auth"
@@ -362,6 +364,48 @@ func RegisterRoutes(e *echo.Echo, deps Dependencies) {
 		})
 	})
 
+	// Register GitHub webhook for benchmarks (if OPENTACO_GITHUB_TOKEN is set)
+	RegisterGitHubWebhook(e, deps)
+
 	// Register webhook-authenticated internal routes (if OPENTACO_ENABLE_INTERNAL_ENDPOINTS is set)
 	RegisterInternalRoutes(e, deps)
 }
+
+// RegisterGitHubWebhook registers the GitHub webhook endpoint for benchmark operations.
+// This enables /opentaco plan, /opentaco apply, /opentaco destroy commands via PR comments.
+// Required env vars (BOTH must be set to enable):
+//   - OPENTACO_GITHUB_TOKEN: GitHub personal access token or app token
+//   - OPENTACO_GITHUB_WEBHOOK_SECRET: Secret for validating webhook signatures (required for security)
+func RegisterGitHubWebhook(e *echo.Echo, deps Dependencies) {
+	githubToken := os.Getenv("OPENTACO_GITHUB_TOKEN")
+	webhookSecret := os.Getenv("OPENTACO_GITHUB_WEBHOOK_SECRET")
+
+	// Require BOTH token and secret to enable - security by default
+	if githubToken == "" || webhookSecret == "" {
+		if githubToken != "" && webhookSecret == "" {
+			log.Println("WARNING: OPENTACO_GITHUB_TOKEN set but OPENTACO_GITHUB_WEBHOOK_SECRET missing - webhook disabled for security")
+		}
+		return
+	}
+
+	log.Println("Registering GitHub webhook endpoint at /webhooks/github")
+
+	// Create GitHub client
+	ghClient := github.NewClient(githubToken)
+
+	// Create command executor with sandbox and storage
+	executor := github.NewCommandExecutor(
+		ghClient,
+		deps.Sandbox,
+		deps.Repository,
+		deps.BlobStore,
+	)
+
+	// Create webhook handler
+	handler := github.NewWebhookHandler(ghClient, executor)
+
+	// Register the webhook endpoint (no auth required - uses webhook signature validation)
+	e.POST("/webhooks/github", handler.HandleWebhook)
+
+	log.Println("GitHub webhook registered successfully")
+}
diff --git a/taco/internal/github/client.go b/taco/internal/github/client.go
new file mode 100644
index 000000000..c9d49feac
--- /dev/null
+++ b/taco/internal/github/client.go
@@ -0,0 +1,372 @@
+package github
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// Client provides GitHub API operations for the webhook handler
+type Client struct {
+	token      string
+	httpClient *http.Client
+	baseURL    string
+}
+
+// NewClient creates a new GitHub API client
+func NewClient(token string) *Client {
+	return &Client{
+		token: token,
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		baseURL: "https://api.github.com",
+	}
+}
+
+// NewClientFromEnv creates a client from environment variables
+func NewClientFromEnv() (*Client, error) {
+	token := os.Getenv("OPENTACO_GITHUB_TOKEN")
+	if token == "" {
+		return nil, fmt.Errorf("OPENTACO_GITHUB_TOKEN is required")
+	}
+	return NewClient(token), nil
+}
+
+// PostComment posts a comment on an issue or PR
+func (c *Client) PostComment(ctx context.Context, owner, repo string, issueNumber int, body string) error {
+	url := fmt.Sprintf("%s/repos/%s/%s/issues/%d/comments", c.baseURL, owner, repo, issueNumber)
+
+	payload := map[string]string{"body": body}
+	jsonBody, err := json.Marshal(payload)
+	if err != nil {
+		return fmt.Errorf("failed to marshal comment: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(jsonBody))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to post comment: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	slog.Info("Posted comment to GitHub",
+		slog.String("repo", fmt.Sprintf("%s/%s", owner, repo)),
+		slog.Int("issue", issueNumber))
+
+	return nil
+}
+
+// UpdateComment updates an existing comment
+func (c *Client) UpdateComment(ctx context.Context, owner, repo string, commentID int64, body string) error {
+	url := fmt.Sprintf("%s/repos/%s/%s/issues/comments/%d", c.baseURL, owner, repo, commentID)
+
+	payload := map[string]string{"body": body}
+	jsonBody, err := json.Marshal(payload)
+	if err != nil {
+		return fmt.Errorf("failed to marshal comment: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPatch, url, bytes.NewReader(jsonBody))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to update comment: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	return nil
+}
+
+// GetPullRequest fetches PR details
+func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, number int) (*PullRequest, error) {
+	url := fmt.Sprintf("%s/repos/%s/%s/pulls/%d", c.baseURL, owner, repo, number)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get PR: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	var pr PullRequest
+	if err := json.NewDecoder(resp.Body).Decode(&pr); err != nil {
+		return nil, fmt.Errorf("failed to decode PR: %w", err)
+	}
+
+	return &pr, nil
+}
+
+// DownloadRepoArchive downloads the repository at a specific ref as a tar.gz archive
+func (c *Client) DownloadRepoArchive(ctx context.Context, owner, repo, ref string) ([]byte, error) {
+	// GitHub provides tarball downloads at /repos/{owner}/{repo}/tarball/{ref}
+	url := fmt.Sprintf("%s/repos/%s/%s/tarball/%s", c.baseURL, owner, repo, ref)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to download tarball: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Read the tarball
+	tarball, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read tarball: %w", err)
+	}
+
+	slog.Info("Downloaded repo archive",
+		slog.String("repo", fmt.Sprintf("%s/%s", owner, repo)),
+		slog.String("ref", ref),
+		slog.Int("size", len(tarball)))
+
+	return tarball, nil
+}
+
+// DownloadAndRepackage downloads the repo and repackages it without the GitHub wrapper directory
+// GitHub tarballs have a root directory like "owner-repo-sha/", this removes it
+func (c *Client) DownloadAndRepackage(ctx context.Context, owner, repo, ref string) ([]byte, error) {
+	tarball, err := c.DownloadRepoArchive(ctx, owner, repo, ref)
+	if err != nil {
+		return nil, err
+	}
+
+	// Repackage to remove the GitHub wrapper directory
+	return repackageTarball(tarball)
+}
+
+// repackageTarball removes the root directory wrapper from a GitHub tarball
+func repackageTarball(input []byte) ([]byte, error) {
+	// Open the gzipped input
+	gzr, err := gzip.NewReader(bytes.NewReader(input))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create gzip reader: %w", err)
+	}
+	defer gzr.Close()
+
+	tr := tar.NewReader(gzr)
+
+	// Create output buffer
+	var buf bytes.Buffer
+	gzw := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(gzw)
+
+	// Find the prefix (first directory) to strip
+	var prefix string
+	var fileCount int
+	var tfFileCount int
+
+	slog.Info("Starting tarball repackage", slog.Int("input_size", len(input)))
+
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return nil, fmt.Errorf("failed to read tar: %w", err)
+		}
+
+		// Skip PAX extended headers (GitHub tarballs use PAX format)
+		// These have Typeflag 'x' (PAX header) or 'g' (global PAX header)
+		// or names like "pax_global_header" or "PaxHeader/"
+		if hdr.Typeflag == tar.TypeXHeader || hdr.Typeflag == tar.TypeXGlobalHeader {
+			continue
+		}
+		if strings.HasPrefix(hdr.Name, "pax_global_header") || strings.HasPrefix(hdr.Name, "PaxHeader") {
+			continue
+		}
+
+		// Detect and strip the prefix directory (the GitHub wrapper like "owner-repo-sha/")
+		if prefix == "" {
+			parts := strings.SplitN(hdr.Name, "/", 2)
+			if len(parts) > 0 && parts[0] != "" {
+				prefix = parts[0] + "/"
+				slog.Info("Detected GitHub tarball prefix to strip", slog.String("prefix", prefix))
+			}
+		}
+
+		// Skip the root directory entry itself
+		if hdr.Name == prefix || hdr.Name == strings.TrimSuffix(prefix, "/") {
+			continue
+		}
+
+		// Strip the prefix from the path
+		newName := strings.TrimPrefix(hdr.Name, prefix)
+		if newName == "" {
+			continue
+		}
+
+		fileCount++
+		if strings.HasSuffix(newName, ".tf") {
+			tfFileCount++
+		}
+
+		// Create new header with stripped path
+		newHdr := &tar.Header{
+			Name:     newName,
+			Mode:     hdr.Mode,
+			Size:     hdr.Size,
+			ModTime:  hdr.ModTime,
+			Typeflag: hdr.Typeflag,
+		}
+
+		if err := tw.WriteHeader(newHdr); err != nil {
+			return nil, fmt.Errorf("failed to write header: %w", err)
+		}
+
+		if hdr.Size > 0 {
+			if _, err := io.Copy(tw, tr); err != nil {
+				return nil, fmt.Errorf("failed to copy file: %w", err)
+			}
+		}
+	}
+
+	slog.Info("Repackaged tarball completed",
+		slog.String("stripped_prefix", prefix),
+		slog.Int("total_files", fileCount),
+		slog.Int("tf_files", tfFileCount))
+
+	if tfFileCount == 0 {
+		slog.Warn("No .tf files found in archive after repackaging!")
+	}
+
+	if err := tw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close tar writer: %w", err)
+	}
+	if err := gzw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close gzip writer: %w", err)
+	}
+
+	return buf.Bytes(), nil
+}
+
+// CreateRepoArchiveFromDir creates a tar.gz archive from a local directory
+// This is useful for testing or when the repo is already cloned
+func CreateRepoArchiveFromDir(dir string) ([]byte, error) {
+	var buf bytes.Buffer
+	gzw := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(gzw)
+
+	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Get relative path
+		relPath, err := filepath.Rel(dir, path)
+		if err != nil {
+			return err
+		}
+
+		// Skip the root
+		if relPath == "." {
+			return nil
+		}
+
+		// Skip .git directory
+		if strings.HasPrefix(relPath, ".git") {
+			if info.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+
+		hdr, err := tar.FileInfoHeader(info, "")
+		if err != nil {
+			return err
+		}
+		hdr.Name = relPath
+
+		if err := tw.WriteHeader(hdr); err != nil {
+			return err
+		}
+
+		if !info.IsDir() {
+			f, err := os.Open(path)
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+			if _, err := io.Copy(tw, f); err != nil {
+				return err
+			}
+		}
+
+		return nil
+	})
+
+	if err != nil {
+		return nil, fmt.Errorf("failed to create archive: %w", err)
+	}
+
+	if err := tw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close tar: %w", err)
+	}
+	if err := gzw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close gzip: %w", err)
+	}
+
+	return buf.Bytes(), nil
+}
+
+func (c *Client) setHeaders(req *http.Request) {
+	req.Header.Set("Authorization", "Bearer "+c.token)
+	req.Header.Set("Accept", "application/vnd.github+json")
+	req.Header.Set("X-GitHub-Api-Version", "2022-11-28")
+	req.Header.Set("Content-Type", "application/json")
+}
+
diff --git a/taco/internal/github/commands.go b/taco/internal/github/commands.go
new file mode 100644
index 000000000..f2b240f11
--- /dev/null
+++ b/taco/internal/github/commands.go
@@ -0,0 +1,602 @@
+package github
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/diggerhq/digger/opentaco/internal/domain"
+	"github.com/diggerhq/digger/opentaco/internal/sandbox"
+	"github.com/diggerhq/digger/opentaco/internal/storage"
+	"github.com/google/uuid"
+)
+
+// CommandExecutor executes terraform commands via the sandbox
+type CommandExecutor struct {
+	client   *Client
+	sandbox  sandbox.Sandbox
+	unitRepo domain.UnitRepository
+	store    storage.UnitStore
+}
+
+// OrgID used for GitHub benchmark operations
+const benchmarkOrgID = "github-benchmark"
+
+// ExecuteRequest contains everything needed to execute a command
+type ExecuteRequest struct {
+	Command   Command
+	Owner     string
+	Repo      string
+	PRNumber  int
+	Branch    string
+	CommitSHA string
+}
+
+// NewCommandExecutor creates a new command executor
+func NewCommandExecutor(
+	client *Client,
+	sandbox sandbox.Sandbox,
+	unitRepo domain.UnitRepository,
+	store storage.UnitStore,
+) *CommandExecutor {
+	return &CommandExecutor{
+		client:   client,
+		sandbox:  sandbox,
+		unitRepo: unitRepo,
+		store:    store,
+	}
+}
+
+// Execute runs the terraform command and returns the result
+func (e *CommandExecutor) Execute(ctx context.Context, req *ExecuteRequest) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	totalStart := time.Now()
+
+	logger := slog.Default().With(
+		slog.String("action", req.Command.Action),
+		slog.String("repo", fmt.Sprintf("%s/%s", req.Owner, req.Repo)),
+		slog.Int("pr", req.PRNumber),
+	)
+
+	// 1. Download repository
+	logger.Info("Downloading repository")
+	cloneStart := time.Now()
+	archive, err := e.client.DownloadAndRepackage(ctx, req.Owner, req.Repo, req.CommitSHA)
+	if err != nil {
+		result.Error = fmt.Sprintf("Failed to download repository: %v", err)
+		logger.Error("Download failed", slog.String("error", err.Error()))
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+	result.Timing.Clone = time.Since(cloneStart)
+	logger.Info("Repository downloaded", slog.Duration("duration", result.Timing.Clone))
+
+	// 2. Generate unit ID for state storage
+	// Format: github/{owner}/{repo}/pr-{number}
+	unitID := fmt.Sprintf("github/%s/%s/pr-%d", req.Owner, req.Repo, req.PRNumber)
+
+	// 3. Load existing state if any
+	var state []byte
+	logger.Info("Looking for existing state", slog.String("unit_id", unitID))
+	if meta, err := e.unitRepo.Get(ctx, unitID); err == nil && meta != nil {
+		logger.Info("Unit found, downloading state...")
+		if stateData, err := e.store.Download(ctx, unitID); err == nil {
+			state = stateData
+			logger.Info("Loaded existing state", slog.Int("size", len(state)))
+		} else {
+			logger.Warn("Failed to download state", slog.String("error", err.Error()))
+		}
+	} else {
+		logger.Info("No existing unit/state found", slog.String("error", fmt.Sprintf("%v", err)))
+	}
+
+	// 4. Get terraform version from options or use default
+	tfVersion := req.Command.Options["version"]
+	if tfVersion == "" {
+		tfVersion = os.Getenv("OPENTACO_DEFAULT_TF_VERSION")
+		if tfVersion == "" {
+			tfVersion = "1.5.7" // Default version
+		}
+	}
+
+	// 5. Get engine (terraform or tofu)
+	engine := req.Command.Options["engine"]
+	if engine == "" {
+		engine = os.Getenv("OPENTACO_DEFAULT_ENGINE")
+		if engine == "" {
+			engine = "terraform"
+		}
+	}
+
+	// 6. Get working directory if specified (empty means root of repo)
+	workingDir := req.Command.Options["dir"]
+	// Don't default to "." - the sandbox handles empty string as the root workspace
+
+	// 7. Generate run ID
+	runID := uuid.New().String()
+
+	// Create sandbox request metadata
+	metadata := map[string]string{
+		"github_owner":   req.Owner,
+		"github_repo":    req.Repo,
+		"github_pr":      fmt.Sprintf("%d", req.PRNumber),
+		"github_branch":  req.Branch,
+		"github_sha":     req.CommitSHA,
+		"command_action": req.Command.Action,
+		"benchmark":      "true", // Flag for benchmark mode
+	}
+
+	// Pass AWS credentials to sandbox if configured
+	// Note: Only passed in metadata, never logged for security
+	if awsKey := os.Getenv("AWS_ACCESS_KEY_ID"); awsKey != "" {
+		metadata["AWS_ACCESS_KEY_ID"] = awsKey
+		metadata["AWS_SECRET_ACCESS_KEY"] = os.Getenv("AWS_SECRET_ACCESS_KEY")
+		metadata["AWS_REGION"] = os.Getenv("AWS_REGION")
+		if metadata["AWS_REGION"] == "" {
+			metadata["AWS_REGION"] = os.Getenv("AWS_DEFAULT_REGION")
+		}
+		if metadata["AWS_REGION"] == "" {
+			metadata["AWS_REGION"] = "us-east-1"
+		}
+		// Log that credentials are configured (not the values)
+		logger.Info("AWS credentials configured for sandbox",
+			slog.String("region", metadata["AWS_REGION"]),
+			slog.Int("key_length", len(awsKey)))
+	} else {
+		logger.Warn("AWS_ACCESS_KEY_ID not set - AWS resources will fail")
+	}
+
+	// Save clone timing before the switch (since result gets reassigned)
+	cloneTime := result.Timing.Clone
+
+	// 8. Execute based on action
+	switch req.Command.Action {
+	case "plan":
+		result = e.executePlan(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart)
+	case "apply":
+		result = e.executeApply(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart, false)
+	case "destroy":
+		result = e.executeApply(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart, true)
+	case "benchmark":
+		result = e.executeBenchmark(ctx, logger, req, runID, unitID, archive, tfVersion, engine, workingDir, metadata, totalStart)
+		// Restore the clone time we measured before the switch
+		result.Timing.Clone = cloneTime
+	default:
+		result.Error = fmt.Sprintf("Unknown action: %s", req.Command.Action)
+	}
+
+	// For non-benchmark actions, recalculate Clone as total minus init/execute
+	if req.Command.Action != "benchmark" {
+		result.Timing.Clone = time.Since(cloneStart) - result.Timing.Init - result.Timing.Execute
+		if result.Timing.Clone < 0 {
+			result.Timing.Clone = 0
+		}
+	}
+
+	return result
+}
+
+func (e *CommandExecutor) executePlan(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive, state []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+	
+	planReq := &sandbox.PlanRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  state,
+		Metadata:               metadata,
+	}
+
+	logger.Info("Executing plan in sandbox",
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion))
+
+	executeStart := time.Now()
+	planResult, err := e.sandbox.ExecutePlan(ctx, planReq)
+	result.Timing.Execute = time.Since(executeStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Plan execution failed: %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Plan failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	result.Success = true
+	result.Output = planResult.Logs
+	result.Summary = formatPlanSummary(planResult)
+
+	// Parse init time from logs if available
+	result.Timing.Init = parseInitTime(planResult.Logs)
+	// Adjust execute time to be plan-only (subtract init)
+	if result.Timing.Init > 0 && result.Timing.Execute > result.Timing.Init {
+		result.Timing.Execute = result.Timing.Execute - result.Timing.Init
+	}
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info("Plan completed",
+		slog.Bool("has_changes", planResult.HasChanges),
+		slog.Int("additions", planResult.ResourceAdditions),
+		slog.Int("changes", planResult.ResourceChanges),
+		slog.Int("destructions", planResult.ResourceDestructions),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
+func (e *CommandExecutor) executeApply(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive, state []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+	isDestroy bool,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+	
+	applyReq := &sandbox.ApplyRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              isDestroy,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  state,
+		Metadata:               metadata,
+	}
+
+	action := "apply"
+	if isDestroy {
+		action = "destroy"
+	}
+
+	logger.Info(fmt.Sprintf("Executing %s in sandbox", action),
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion),
+		slog.Bool("is_destroy", isDestroy))
+
+	executeStart := time.Now()
+	applyResult, err := e.sandbox.ExecuteApply(ctx, applyReq)
+	result.Timing.Execute = time.Since(executeStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("%s execution failed: %v", strings.Title(action), err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error(fmt.Sprintf("%s failed", action), slog.String("error", err.Error()))
+		return result
+	}
+
+	// Save the new state
+	logger.Info("Apply result received",
+		slog.Int("state_size", len(applyResult.State)),
+		slog.Int("logs_size", len(applyResult.Logs)),
+		slog.Bool("is_destroy", isDestroy))
+
+	if len(applyResult.State) > 0 && !isDestroy {
+		if err := e.saveState(ctx, unitID, applyResult.State); err != nil {
+			logger.Warn("Failed to save state", slog.String("error", err.Error()))
+		} else {
+			logger.Info("State saved successfully",
+				slog.String("unit_id", unitID),
+				slog.Int("size", len(applyResult.State)))
+		}
+	} else if !isDestroy {
+		logger.Warn("No state returned from apply - state will not persist!")
+	}
+
+	// For destroy, clean up the state
+	if isDestroy {
+		if err := e.cleanupState(ctx, unitID); err != nil {
+			logger.Warn("Failed to cleanup state", slog.String("error", err.Error()))
+		} else {
+			logger.Info("State cleaned up after destroy")
+		}
+	}
+
+	result.Success = true
+	result.Output = applyResult.Logs
+	result.Summary = formatApplySummary(applyResult.Logs, isDestroy)
+
+	// Parse init time from logs if available
+	result.Timing.Init = parseInitTime(applyResult.Logs)
+	// Adjust execute time
+	if result.Timing.Init > 0 && result.Timing.Execute > result.Timing.Init {
+		result.Timing.Execute = result.Timing.Execute - result.Timing.Init
+	}
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info(fmt.Sprintf("%s completed", action),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
+// executeBenchmark runs apply followed by destroy in a single flow
+// This keeps state in the sandbox and ensures resources are cleaned up
+func (e *CommandExecutor) executeBenchmark(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+
+	logger.Info("Starting benchmark: apply + destroy cycle",
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion))
+
+	var allLogs strings.Builder
+
+	// Phase 1: Apply
+	applyStart := time.Now()
+	applyReq := &sandbox.ApplyRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              false,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  nil, // Fresh apply
+		Metadata:               metadata,
+	}
+
+	applyResult, err := e.sandbox.ExecuteApply(ctx, applyReq)
+	result.Timing.Apply = time.Since(applyStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Apply phase failed: %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Benchmark apply failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	allLogs.WriteString("=== APPLY PHASE ===\n")
+	allLogs.WriteString(applyResult.Logs)
+	allLogs.WriteString("\n\n")
+
+	logger.Info("Benchmark apply completed",
+		slog.Duration("duration", result.Timing.Apply),
+		slog.Int("state_size", len(applyResult.State)))
+
+	// Phase 2: Destroy (using state from apply)
+	destroyStart := time.Now()
+	destroyReq := &sandbox.ApplyRequest{
+		RunID:                  runID + "-destroy",
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              true,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  applyResult.State, // Use state from apply
+		Metadata:               metadata,
+	}
+
+	destroyResult, err := e.sandbox.ExecuteApply(ctx, destroyReq)
+	result.Timing.Destroy = time.Since(destroyStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Destroy phase failed (resources may be orphaned!): %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Benchmark destroy failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	allLogs.WriteString("=== DESTROY PHASE ===\n")
+	allLogs.WriteString(destroyResult.Logs)
+
+	logger.Info("Benchmark destroy completed",
+		slog.Duration("duration", result.Timing.Destroy))
+
+	// Success!
+	result.Success = true
+	result.Output = allLogs.String()
+	result.Summary = fmt.Sprintf("Apply: %.2fs | Destroy: %.2fs | Total: %.2fs",
+		result.Timing.Apply.Seconds(),
+		result.Timing.Destroy.Seconds(),
+		time.Since(totalStart).Seconds())
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info("Benchmark completed successfully",
+		slog.Duration("apply", result.Timing.Apply),
+		slog.Duration("destroy", result.Timing.Destroy),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
+func (e *CommandExecutor) saveState(ctx context.Context, unitID string, state []byte) error {
+	// Check if unit exists, create if not
+	if _, err := e.unitRepo.Get(ctx, unitID); err != nil {
+		// Create the unit with the benchmark org ID
+		_, err = e.unitRepo.Create(ctx, benchmarkOrgID, unitID)
+		if err != nil {
+			return fmt.Errorf("failed to create unit: %w", err)
+		}
+	}
+
+	// Save state (empty lock ID since we're not holding a lock)
+	if err := e.store.Upload(ctx, unitID, state, ""); err != nil {
+		return fmt.Errorf("failed to save state: %w", err)
+	}
+
+	return nil
+}
+
+func (e *CommandExecutor) cleanupState(ctx context.Context, unitID string) error {
+	// Delete the unit and state
+	if err := e.unitRepo.Delete(ctx, unitID); err != nil {
+		return fmt.Errorf("failed to delete unit: %w", err)
+	}
+	return nil
+}
+
+// formatPlanSummary creates a summary from plan results
+func formatPlanSummary(result *sandbox.PlanResult) string {
+	if !result.HasChanges {
+		return "No changes. Infrastructure is up-to-date."
+	}
+	return fmt.Sprintf("%d to add, %d to change, %d to destroy",
+		result.ResourceAdditions,
+		result.ResourceChanges,
+		result.ResourceDestructions)
+}
+
+// formatApplySummary creates a summary from apply output
+func formatApplySummary(logs string, isDestroy bool) string {
+	// Try to extract the summary line from terraform output
+	// Example: "Apply complete! Resources: 10 added, 0 changed, 0 destroyed."
+	// Example: "Destroy complete! Resources: 10 destroyed."
+
+	lines := strings.Split(logs, "\n")
+	for _, line := range lines {
+		if strings.Contains(line, "Apply complete!") || strings.Contains(line, "Destroy complete!") {
+			return strings.TrimSpace(line)
+		}
+	}
+
+	if isDestroy {
+		return "Destroy completed"
+	}
+	return "Apply completed"
+}
+
+// parseInitTime attempts to extract init duration from terraform logs
+func parseInitTime(logs string) time.Duration {
+	// Look for patterns like "Initializing..." to "Terraform has been successfully initialized!"
+	// This is approximate since terraform doesn't output exact timing
+
+	// Try to find "initialized" marker and estimate based on log positions
+	// For now, return 0 and let the caller use the full execute time
+	// In a more sophisticated implementation, we could:
+	// 1. Have the sandbox report separate init/execute times
+	// 2. Parse timestamps from logs if available
+
+	// Check for common patterns that indicate init completed
+	if strings.Contains(logs, "Terraform has been successfully initialized") ||
+		strings.Contains(logs, "OpenTofu has been successfully initialized") {
+		// Estimate ~3 seconds for init (typical)
+		// This is a placeholder - real timing should come from sandbox
+		return 0
+	}
+
+	return 0
+}
+
+// extractResourceCounts extracts add/change/destroy counts from terraform output
+func extractResourceCounts(logs string) (add, change, destroy int) {
+	// Pattern: "Plan: X to add, Y to change, Z to destroy"
+	// Pattern: "Apply complete! Resources: X added, Y changed, Z destroyed"
+
+	patterns := []struct {
+		regex  *regexp.Regexp
+		addIdx int
+		chgIdx int
+		desIdx int
+	}{
+		{
+			regexp.MustCompile(`Plan: (\d+) to add, (\d+) to change, (\d+) to destroy`),
+			1, 2, 3,
+		},
+		{
+			regexp.MustCompile(`Resources: (\d+) added, (\d+) changed, (\d+) destroyed`),
+			1, 2, 3,
+		},
+	}
+
+	for _, p := range patterns {
+		matches := p.regex.FindStringSubmatch(logs)
+		if len(matches) >= 4 {
+			fmt.Sscanf(matches[p.addIdx], "%d", &add)
+			fmt.Sscanf(matches[p.chgIdx], "%d", &change)
+			fmt.Sscanf(matches[p.desIdx], "%d", &destroy)
+			return
+		}
+	}
+
+	return 0, 0, 0
+}
+
diff --git a/taco/internal/github/types.go b/taco/internal/github/types.go
new file mode 100644
index 000000000..5ccc69bfb
--- /dev/null
+++ b/taco/internal/github/types.go
@@ -0,0 +1,143 @@
+package github
+
+import "time"
+
+// Webhook event types from GitHub
+// Reference: https://docs.github.com/en/webhooks/webhook-events-and-payloads
+
+// IssueCommentEvent is triggered when a comment is created on an issue or PR
+type IssueCommentEvent struct {
+	Action  string  `json:"action"` // created, edited, deleted
+	Issue   Issue   `json:"issue"`
+	Comment Comment `json:"comment"`
+	Repo    Repo    `json:"repository"`
+	Sender  User    `json:"sender"`
+}
+
+// PullRequestEvent is triggered when a PR is opened, synchronized, etc.
+type PullRequestEvent struct {
+	Action      string      `json:"action"` // opened, synchronize, closed, reopened
+	Number      int         `json:"number"`
+	PullRequest PullRequest `json:"pull_request"`
+	Repo        Repo        `json:"repository"`
+	Sender      User        `json:"sender"`
+}
+
+// Issue represents a GitHub issue (PRs are also issues)
+type Issue struct {
+	ID        int64  `json:"id"`
+	Number    int    `json:"number"`
+	Title     string `json:"title"`
+	Body      string `json:"body"`
+	State     string `json:"state"` // open, closed
+	User      User   `json:"user"`
+	Labels    []Label `json:"labels"`
+	CreatedAt string `json:"created_at"`
+	UpdatedAt string `json:"updated_at"`
+	// If this issue is a PR, pull_request will be non-nil
+	PullRequest *IssuePR `json:"pull_request,omitempty"`
+}
+
+// IssuePR contains PR-specific fields when an issue is actually a PR
+type IssuePR struct {
+	URL      string `json:"url"`
+	HTMLURL  string `json:"html_url"`
+	DiffURL  string `json:"diff_url"`
+	PatchURL string `json:"patch_url"`
+}
+
+// PullRequest represents a GitHub pull request
+type PullRequest struct {
+	ID        int64  `json:"id"`
+	Number    int    `json:"number"`
+	Title     string `json:"title"`
+	Body      string `json:"body"`
+	State     string `json:"state"` // open, closed
+	Draft     bool   `json:"draft"`
+	Merged    bool   `json:"merged"`
+	User      User   `json:"user"`
+	Head      Branch `json:"head"`
+	Base      Branch `json:"base"`
+	CreatedAt string `json:"created_at"`
+	UpdatedAt string `json:"updated_at"`
+}
+
+// Branch represents a git branch reference
+type Branch struct {
+	Ref  string `json:"ref"`  // branch name
+	SHA  string `json:"sha"`  // commit SHA
+	Repo Repo   `json:"repo"` // repo containing the branch
+}
+
+// Comment represents a GitHub comment
+type Comment struct {
+	ID        int64  `json:"id"`
+	Body      string `json:"body"`
+	User      User   `json:"user"`
+	CreatedAt string `json:"created_at"`
+	UpdatedAt string `json:"updated_at"`
+}
+
+// Repo represents a GitHub repository
+type Repo struct {
+	ID            int64  `json:"id"`
+	Name          string `json:"name"`
+	FullName      string `json:"full_name"` // owner/repo
+	Owner         User   `json:"owner"`
+	Private       bool   `json:"private"`
+	HTMLURL       string `json:"html_url"`
+	CloneURL      string `json:"clone_url"`
+	SSHURL        string `json:"ssh_url"`
+	DefaultBranch string `json:"default_branch"`
+}
+
+// User represents a GitHub user
+type User struct {
+	ID    int64  `json:"id"`
+	Login string `json:"login"`
+	Type  string `json:"type"` // User, Bot, Organization
+	Email string `json:"email,omitempty"`
+}
+
+// Label represents a GitHub issue/PR label
+type Label struct {
+	ID    int64  `json:"id"`
+	Name  string `json:"name"`
+	Color string `json:"color"`
+}
+
+// Command represents a parsed /opentaco command
+type Command struct {
+	Action  string            // plan, apply, destroy
+	Options map[string]string // additional flags
+	Raw     string            // original comment text
+}
+
+// CommandResult holds the result of executing a command
+type CommandResult struct {
+	Command   Command
+	Success   bool
+	Error     string
+	Timing    TimingBreakdown
+	Output    string
+	Summary   string
+}
+
+// TimingBreakdown holds timing for each phase
+type TimingBreakdown struct {
+	Clone   time.Duration `json:"clone"`
+	Init    time.Duration `json:"init"`
+	Execute time.Duration `json:"execute"` // plan, apply, or destroy time
+	Apply   time.Duration `json:"apply,omitempty"`   // for benchmark: apply phase
+	Destroy time.Duration `json:"destroy,omitempty"` // for benchmark: destroy phase
+	Total   time.Duration `json:"total"`
+}
+
+// WebhookConfig holds configuration for the GitHub webhook handler
+type WebhookConfig struct {
+	WebhookSecret  string // Secret for validating webhook signatures
+	AppID          string // GitHub App ID (optional, for App auth)
+	PrivateKeyPath string // Path to GitHub App private key (optional)
+	Token          string // Personal access token (alternative to App auth)
+}
+
diff --git a/taco/internal/github/webhook.go b/taco/internal/github/webhook.go
new file mode 100644
index 000000000..1b213ba16
--- /dev/null
+++ b/taco/internal/github/webhook.go
@@ -0,0 +1,333 @@
+package github
+
+import (
+	"context"
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"strings"
+
+	"github.com/labstack/echo/v4"
+)
+
+// WebhookHandler handles GitHub webhook events
+type WebhookHandler struct {
+	client   *Client
+	executor *CommandExecutor
+	secret   string
+}
+
+// NewWebhookHandler creates a new webhook handler
+// Note: The webhook secret is required at registration time (see RegisterGitHubWebhook)
+func NewWebhookHandler(client *Client, executor *CommandExecutor) *WebhookHandler {
+	return &WebhookHandler{
+		client:   client,
+		executor: executor,
+		secret:   os.Getenv("OPENTACO_GITHUB_WEBHOOK_SECRET"),
+	}
+}
+
+// HandleWebhook is the main webhook endpoint handler
+func (h *WebhookHandler) HandleWebhook(c echo.Context) error {
+	// Read the body
+	body, err := io.ReadAll(c.Request().Body)
+	if err != nil {
+		slog.Error("Failed to read webhook body", slog.String("error", err.Error()))
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "failed to read body"})
+	}
+
+	// Validate webhook signature (required - enforced at registration)
+	signature := c.Request().Header.Get("X-Hub-Signature-256")
+	if !h.validateSignature(body, signature) {
+		slog.Warn("Invalid webhook signature received")
+		return c.JSON(http.StatusUnauthorized, map[string]string{"error": "invalid signature"})
+	}
+
+	// Get event type
+	eventType := c.Request().Header.Get("X-GitHub-Event")
+	deliveryID := c.Request().Header.Get("X-GitHub-Delivery")
+
+	slog.Info("Received GitHub webhook",
+		slog.String("event", eventType),
+		slog.String("delivery_id", deliveryID))
+
+	switch eventType {
+	case "issue_comment":
+		return h.handleIssueComment(c, body)
+	case "pull_request":
+		// Optional: auto-plan on PR open
+		return h.handlePullRequest(c, body)
+	case "ping":
+		return c.JSON(http.StatusOK, map[string]string{"message": "pong"})
+	default:
+		slog.Debug("Ignoring unhandled event type", slog.String("event", eventType))
+		return c.JSON(http.StatusOK, map[string]string{"message": "event ignored"})
+	}
+}
+
+// handleIssueComment processes issue/PR comment events
+func (h *WebhookHandler) handleIssueComment(c echo.Context, body []byte) error {
+	ctx := c.Request().Context()
+
+	var event IssueCommentEvent
+	if err := json.Unmarshal(body, &event); err != nil {
+		slog.Error("Failed to parse issue comment event", slog.String("error", err.Error()))
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "failed to parse event"})
+	}
+
+	// Only process new comments
+	if event.Action != "created" {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring non-created action"})
+	}
+
+	// Only process comments on PRs (issues with pull_request field)
+	if event.Issue.PullRequest == nil {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring non-PR comment"})
+	}
+
+	// Ignore bot comments to prevent loops
+	if event.Sender.Type == "Bot" {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring bot comment"})
+	}
+
+	// Parse command from comment
+	cmd := ParseCommand(event.Comment.Body)
+	if cmd == nil {
+		return c.JSON(http.StatusOK, map[string]string{"message": "no command found"})
+	}
+
+	slog.Info("Processing command",
+		slog.String("action", cmd.Action),
+		slog.String("repo", event.Repo.FullName),
+		slog.Int("pr", event.Issue.Number),
+		slog.String("user", event.Sender.Login))
+
+	// Parse owner/repo
+	parts := strings.Split(event.Repo.FullName, "/")
+	if len(parts) != 2 {
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "invalid repo name"})
+	}
+	owner, repo := parts[0], parts[1]
+
+	// Get PR details to find the branch
+	pr, err := h.client.GetPullRequest(ctx, owner, repo, event.Issue.Number)
+	if err != nil {
+		slog.Error("Failed to get PR details", slog.String("error", err.Error()))
+		// Post error comment
+		h.client.PostComment(ctx, owner, repo, event.Issue.Number,
+			fmt.Sprintf("❌ **OpenTaco Error**\n\nFailed to get PR details: %s", err.Error()))
+		return c.JSON(http.StatusOK, map[string]string{"message": "failed to get PR"})
+	}
+
+	// Post acknowledgment comment
+	ackMsg := fmt.Sprintf("🚀 **OpenTaco** starting `%s`...\n\n_Downloading repository and preparing sandbox..._", cmd.Action)
+	h.client.PostComment(ctx, owner, repo, event.Issue.Number, ackMsg)
+
+	// Execute command asynchronously with background context
+	// (the HTTP request context is canceled after response is sent)
+	go func() {
+		// Use background context since HTTP request will complete before execution finishes
+		bgCtx := context.Background()
+		
+		result := h.executor.Execute(bgCtx, &ExecuteRequest{
+			Command:   *cmd,
+			Owner:     owner,
+			Repo:      repo,
+			PRNumber:  event.Issue.Number,
+			Branch:    pr.Head.Ref,
+			CommitSHA: pr.Head.SHA,
+		})
+
+		// Post result comment
+		resultComment := FormatResult(result)
+		if err := h.client.PostComment(bgCtx, owner, repo, event.Issue.Number, resultComment); err != nil {
+			slog.Error("Failed to post result comment", slog.String("error", err.Error()))
+		}
+	}()
+
+	return c.JSON(http.StatusOK, map[string]string{"message": "command accepted"})
+}
+
+// handlePullRequest processes PR events (optional auto-plan)
+func (h *WebhookHandler) handlePullRequest(c echo.Context, body []byte) error {
+	var event PullRequestEvent
+	if err := json.Unmarshal(body, &event); err != nil {
+		slog.Error("Failed to parse PR event", slog.String("error", err.Error()))
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "failed to parse event"})
+	}
+
+	// Only process opened/synchronize if auto-plan is enabled
+	autoPlan := os.Getenv("OPENTACO_GITHUB_AUTO_PLAN") == "true"
+	if !autoPlan {
+		return c.JSON(http.StatusOK, map[string]string{"message": "auto-plan disabled"})
+	}
+
+	if event.Action != "opened" && event.Action != "synchronize" {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring action"})
+	}
+
+	// TODO: Implement auto-plan on PR open/sync
+	slog.Info("Would auto-plan for PR",
+		slog.String("repo", event.Repo.FullName),
+		slog.Int("pr", event.Number),
+		slog.String("action", event.Action))
+
+	return c.JSON(http.StatusOK, map[string]string{"message": "auto-plan not yet implemented"})
+}
+
+// validateSignature validates the webhook signature
+func (h *WebhookHandler) validateSignature(body []byte, signature string) bool {
+	if !strings.HasPrefix(signature, "sha256=") {
+		return false
+	}
+
+	expected := signature[7:] // Remove "sha256=" prefix
+
+	mac := hmac.New(sha256.New, []byte(h.secret))
+	mac.Write(body)
+	computed := hex.EncodeToString(mac.Sum(nil))
+
+	return hmac.Equal([]byte(expected), []byte(computed))
+}
+
+// ParseCommand parses an /opentaco command from comment text
+func ParseCommand(text string) *Command {
+	lines := strings.Split(text, "\n")
+
+	for _, line := range lines {
+		line = strings.TrimSpace(line)
+
+		// Check for /opentaco command
+		if !strings.HasPrefix(line, "/opentaco ") && line != "/opentaco" {
+			continue
+		}
+
+		// Parse the command
+		parts := strings.Fields(line)
+		if len(parts) < 2 {
+			continue
+		}
+
+		action := strings.ToLower(parts[1])
+
+		// Validate action
+		switch action {
+		case "plan", "apply", "destroy", "benchmark":
+			cmd := &Command{
+				Action:  action,
+				Options: make(map[string]string),
+				Raw:     line,
+			}
+
+			// Parse additional options
+			for i := 2; i < len(parts); i++ {
+				opt := parts[i]
+				if strings.HasPrefix(opt, "--") {
+					// Handle --key=value or --flag
+					opt = strings.TrimPrefix(opt, "--")
+					if idx := strings.Index(opt, "="); idx > 0 {
+						cmd.Options[opt[:idx]] = opt[idx+1:]
+					} else {
+						cmd.Options[opt] = "true"
+					}
+				}
+			}
+
+			return cmd
+		default:
+			// Unknown action, skip
+			continue
+		}
+	}
+
+	return nil
+}
+
+// FormatResult formats a command result as a markdown comment
+func FormatResult(result *CommandResult) string {
+	var sb strings.Builder
+
+	// Header based on action
+	switch result.Command.Action {
+	case "plan":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Plan\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Plan Failed\n\n")
+		}
+	case "apply":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Apply\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Apply Failed\n\n")
+		}
+	case "destroy":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Destroy\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Destroy Failed\n\n")
+		}
+	case "benchmark":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Benchmark Complete\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Benchmark Failed\n\n")
+		}
+	}
+
+	// Timing breakdown
+	sb.WriteString(fmt.Sprintf("**Total Duration:** %.2fs\n\n", result.Timing.Total.Seconds()))
+	sb.WriteString("| Phase | Duration |\n")
+	sb.WriteString("|-------|----------|\n")
+	if result.Timing.Clone > 0 {
+		sb.WriteString(fmt.Sprintf("| Clone | %.2fs |\n", result.Timing.Clone.Seconds()))
+	}
+	if result.Timing.Init > 0 {
+		sb.WriteString(fmt.Sprintf("| Init | %.2fs |\n", result.Timing.Init.Seconds()))
+	}
+
+	switch result.Command.Action {
+	case "plan":
+		sb.WriteString(fmt.Sprintf("| Plan | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "apply":
+		sb.WriteString(fmt.Sprintf("| Apply | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "destroy":
+		sb.WriteString(fmt.Sprintf("| Destroy | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "benchmark":
+		sb.WriteString(fmt.Sprintf("| Apply | %.2fs |\n", result.Timing.Apply.Seconds()))
+		sb.WriteString(fmt.Sprintf("| Destroy | %.2fs |\n", result.Timing.Destroy.Seconds()))
+	}
+
+	sb.WriteString("\n")
+
+	// Summary
+	if result.Summary != "" {
+		sb.WriteString(fmt.Sprintf("**Summary:** %s\n\n", result.Summary))
+	}
+
+	// Error message if failed
+	if !result.Success && result.Error != "" {
+		sb.WriteString(fmt.Sprintf("**Error:** %s\n\n", result.Error))
+	}
+
+	// Full output in collapsible section
+	if result.Output != "" {
+		sb.WriteString("<details>\n<summary>Full Output</summary>\n\n```\n")
+		// Truncate if too long
+		output := result.Output
+		if len(output) > 50000 {
+			output = output[:50000] + "\n... (truncated)"
+		}
+		sb.WriteString(output)
+		sb.WriteString("\n```\n</details>\n")
+	}
+
+	return sb.String()
+}
+
diff --git a/ui/src/routes/statesman/github/webhook.tsx b/ui/src/routes/statesman/github/webhook.tsx
new file mode 100644
index 000000000..2e166f655
--- /dev/null
+++ b/ui/src/routes/statesman/github/webhook.tsx
@@ -0,0 +1,59 @@
+import { createFileRoute } from '@tanstack/react-router';
+
+// GitHub webhook passthrough to Statesman
+// This route receives GitHub webhooks and forwards them to the internal Statesman service
+// Enable by setting STATESMAN_GITHUB_WEBHOOK_ENABLED=true
+export const Route = createFileRoute('/statesman/github/webhook')({
+  server: {
+    handlers: {
+      POST: async ({ request }) => {
+        // Check if GitHub webhook is enabled
+        if (process.env.STATESMAN_GITHUB_WEBHOOK_ENABLED !== 'true') {
+          console.log('GitHub webhook disabled (STATESMAN_GITHUB_WEBHOOK_ENABLED not set)');
+          return new Response(JSON.stringify({ error: 'GitHub webhook not enabled' }), { 
+            status: 404,
+            headers: { 'Content-Type': 'application/json' }
+          });
+        }
+
+        const statesmanUrl = process.env.STATESMAN_BACKEND_URL;
+        if (!statesmanUrl) {
+          console.error('STATESMAN_BACKEND_URL not configured');
+          return new Response(JSON.stringify({ error: 'Backend not configured' }), { 
+            status: 500,
+            headers: { 'Content-Type': 'application/json' }
+          });
+        }
+
+        try {
+          console.log('Forwarding GitHub webhook to Statesman');
+          
+          // Forward all headers (including GitHub signature headers)
+          const response = await fetch(`${statesmanUrl}/webhooks/github`, {
+            method: 'POST',
+            headers: request.headers,
+            body: request.body,
+            // @ts-expect-error: 'duplex' is required by Node/undici for streaming bodies
+            duplex: 'half',
+          });
+
+          // Return the response from Statesman
+          const responseBody = await response.text();
+          return new Response(responseBody, {
+            status: response.status,
+            headers: {
+              'Content-Type': response.headers.get('Content-Type') || 'application/json',
+            },
+          });
+        } catch (error) {
+          console.error('Error forwarding GitHub webhook to Statesman:', error);
+          return new Response(JSON.stringify({ error: 'Internal server error' }), { 
+            status: 500,
+            headers: { 'Content-Type': 'application/json' }
+          });
+        }
+      },
+    },
+  },
+});
+