Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sandbox-sidecar/src/routes/runRoutes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ export function createRunRouter(
router.post("/api/v1/sandboxes/runs", (req, res, next) => {
try {
const parsed = runRequestSchema.parse(req.body);

// Debug: log received metadata including AWS region
console.log("Received run request metadata:", {
hasMetadata: !!parsed.metadata,
awsRegion: parsed.metadata?.AWS_REGION || "(not set)",
awsKeyLength: parsed.metadata?.AWS_ACCESS_KEY_ID?.length || 0,
});

const payload: SandboxRunPayload = {
operation: parsed.operation,
runId: parsed.run_id,
Expand Down
170 changes: 156 additions & 14 deletions sandbox-sidecar/src/runners/e2bRunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,32 @@ export class E2BSandboxRunner implements SandboxRunner {
appendLog?.(chunk);
};

// Run terraform init
// Run terraform init (with AWS creds if configured for benchmark)
const metadata = job.payload.metadata;
await this.runTerraformCommand(
sandbox,
workDir,
["init", "-input=false", "-no-color"],
logs,
streamLog,
metadata,
);

// Run terraform plan
const planArgs = ["plan", "-input=false", "-no-color", "-out=tfplan.binary"];
if (job.payload.isDestroy) {
planArgs.splice(1, 0, "-destroy");
}
await this.runTerraformCommand(sandbox, workDir, planArgs, logs, streamLog);
await this.runTerraformCommand(sandbox, workDir, planArgs, logs, streamLog, metadata);

// Get plan JSON
const showResult = await this.runTerraformCommand(
sandbox,
workDir,
["show", "-json", "tfplan.binary"],
undefined,
undefined,
metadata,
);

const planJSON = showResult.stdout;
Expand All @@ -89,7 +94,15 @@ export class E2BSandboxRunner implements SandboxRunner {
private async runApply(job: SandboxRunRecord, appendLog?: (chunk: string) => void): Promise<RunnerOutput> {
const requestedVersion = job.payload.terraformVersion || "1.5.7";
const requestedEngine = job.payload.engine || "terraform";
const startTime = Date.now();
const { sandbox, needsInstall } = await this.createSandbox(requestedVersion, requestedEngine);

logger.info({
sandboxId: sandbox.sandboxId,
workingDir: job.payload.workingDirectory,
isDestroy: job.payload.isDestroy,
}, "Starting apply operation");

try {
// Install IaC tool if using fallback template
if (needsInstall) {
Expand All @@ -103,35 +116,100 @@ export class E2BSandboxRunner implements SandboxRunner {
appendLog?.(chunk);
};

// Run terraform init
// Run terraform init (with AWS creds if configured for benchmark)
const metadata = job.payload.metadata;

logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Starting terraform init");
await this.runTerraformCommand(
sandbox,
workDir,
["init", "-input=false", "-no-color"],
logs,
streamLog,
metadata,
);
logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Terraform init completed");

// Run terraform apply/destroy
const applyCommand = job.payload.isDestroy ? "destroy" : "apply";
await this.runTerraformCommand(
logger.info({ sandboxId: sandbox.sandboxId, command: applyCommand, elapsed: Date.now() - startTime }, "Starting terraform apply/destroy");
const applyResult = await this.runTerraformCommand(
sandbox,
workDir,
[applyCommand, "-auto-approve", "-input=false", "-no-color"],
logs,
streamLog,
metadata,
);
logger.info({ sandboxId: sandbox.sandboxId, command: applyCommand, elapsed: Date.now() - startTime }, "Terraform apply/destroy completed");

// Log the apply output for debugging
logger.info({
stdout: applyResult.stdout.slice(-500),
stderr: applyResult.stderr.slice(-500),
}, "terraform apply output (last 500 chars)");

// Read the actual terraform.tfstate file (not terraform show -json which is different format)
// Check both standard location and workspace location
let stateBase64 = "";

try {
// Try standard location first
let statePath = `${workDir}/terraform.tfstate`;
let stateContent: string | null = null;

try {
stateContent = await sandbox.files.read(statePath);
logger.info({ path: statePath }, "found state file at standard location");
} catch {
// Try workspace location - find the workspace state directory
const lsResult = await sandbox.commands.run(`find ${workDir} -name "terraform.tfstate" -type f 2>/dev/null | head -1`);
const foundPath = lsResult.stdout.trim();
if (foundPath) {
stateContent = await sandbox.files.read(foundPath);
logger.info({ path: foundPath }, "found state file at workspace location");
}
}

if (stateContent && stateContent.trim()) {
stateBase64 = Buffer.from(stateContent, "utf8").toString("base64");
logger.info({ stateSize: stateContent.length }, "captured terraform.tfstate file");
} else {
logger.info("no terraform.tfstate file found");
}
} catch (err) {
// State doesn't exist - this is OK for empty applies or destroys
logger.warn({ error: err }, "no state found after apply (may be empty apply)");
}

// Read the state file
const statePath = `${workDir}/terraform.tfstate`;
const stateContent = await sandbox.files.read(statePath);
const result: SandboxRunResult = {
state: Buffer.from(stateContent, "utf8").toString("base64"),
state: stateBase64,
};

logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Apply operation completed successfully");
return { logs: logs.join(""), result };
} catch (err) {
const elapsed = Date.now() - startTime;
const errorMessage = err instanceof Error ? err.message : String(err);

// Log detailed error info for debugging sandbox termination issues
logger.error({
sandboxId: sandbox.sandboxId,
elapsed,
elapsedSeconds: Math.round(elapsed / 1000),
errorMessage,
errorType: err instanceof Error ? err.constructor.name : typeof err,
workingDir: job.payload.workingDirectory,
}, "Apply operation failed - sandbox may have been terminated");

throw err;
} finally {
await sandbox.kill();
try {
await sandbox.kill();
} catch (killErr) {
// Sandbox may already be dead, that's fine
logger.debug({ killErr }, "Failed to kill sandbox (may already be terminated)");
}
}
}

Expand All @@ -157,18 +235,55 @@ export class E2BSandboxRunner implements SandboxRunner {
logger.warn({ templateId, engine, version }, "no pre-built template found, will install at runtime");
}

logger.info({ templateId }, "creating E2B sandbox");

const sandboxTimeoutSeconds = 60 * 60; // 1 hour



logger.info({
templateId,
timeoutSeconds: sandboxTimeoutSeconds,
}, "creating E2B sandbox");

const sandbox = await Sandbox.create(templateId, {
apiKey: this.options.apiKey,
timeoutMs: sandboxTimeoutSeconds * 1000,
});
logger.info({ sandboxId: sandbox.sandboxId }, "E2B sandbox created");
logger.info({ sandboxId: sandbox.sandboxId }, "E2B sandbox created with extended timeout");

// Store engine metadata for command execution
(sandbox as any)._requestedEngine = engine;

return { sandbox, needsInstall };
}

/**
* Build environment variables for Terraform execution.
* Includes AWS credentials if provided in metadata for benchmark runs.
*/
private buildTerraformEnvs(metadata?: Record<string, string>): Record<string, string> {
const envs: Record<string, string> = {
TF_IN_AUTOMATION: "1",
};

// Inject AWS credentials if provided (for benchmark runs with real resources)
if (metadata?.AWS_ACCESS_KEY_ID) {
envs.AWS_ACCESS_KEY_ID = metadata.AWS_ACCESS_KEY_ID;
envs.AWS_SECRET_ACCESS_KEY = metadata.AWS_SECRET_ACCESS_KEY || "";
envs.AWS_REGION = metadata.AWS_REGION || "us-east-1";
// Also set default region for AWS SDK
envs.AWS_DEFAULT_REGION = envs.AWS_REGION;
logger.info({
region: envs.AWS_REGION,
keyLength: envs.AWS_ACCESS_KEY_ID.length,
}, "AWS credentials injected into terraform environment");
} else {
logger.warn("No AWS credentials in metadata - AWS resources will fail");
}

return envs;
}


private async installIacTool(sandbox: Sandbox, engine: string, version: string): Promise<void> {
logger.info({ engine, version }, "installing IaC tool at runtime");
Expand Down Expand Up @@ -231,6 +346,20 @@ export class E2BSandboxRunner implements SandboxRunner {
// Use gunzip + tar separately for better compatibility across tar versions
await sandbox.commands.run(`cd ${workDir} && gunzip -c bundle.tar.gz | tar -x --exclude='terraform.tfstate' --exclude='terraform.tfstate.backup'`);

// Debug: List extracted files to understand archive structure
const listResult = await sandbox.commands.run(`find ${workDir} -type f -name "*.tf" | head -20`);
logger.info({
tfFiles: listResult.stdout.trim().split('\n').filter(Boolean),
workDir,
workingDirectory: job.payload.workingDirectory || '(none)'
}, "extracted terraform files");

// Also list all files for debugging
const allFilesResult = await sandbox.commands.run(`ls -la ${workDir}`);
logger.info({
files: allFilesResult.stdout
}, "workspace directory listing");

// Determine the execution directory
const execDir = job.payload.workingDirectory
? `${workDir}/${job.payload.workingDirectory}`
Expand Down Expand Up @@ -273,6 +402,7 @@ export class E2BSandboxRunner implements SandboxRunner {
args: string[],
logBuffer?: string[],
appendLog?: (chunk: string) => void,
metadata?: Record<string, string>,
): Promise<{ stdout: string; stderr: string }> {
const engine = (sandbox as any)._requestedEngine || "terraform";
const binaryName = engine === "tofu" ? "tofu" : "terraform";
Expand All @@ -289,13 +419,25 @@ export class E2BSandboxRunner implements SandboxRunner {
appendLog?.(chunk);
};

// Use long timeout for benchmarks (1 hour) - EKS and large operations need this
// Pro tier supports up to 24 hours, Hobby up to 1 hour
const timeoutMs = 60 * 60 * 1000; // 1 hour

// Explicitly extend sandbox lifetime before running long commands
// This ensures the sandbox won't be killed mid-operation
try {
await Sandbox.setTimeout(sandbox.sandboxId, timeoutMs, { apiKey: this.options.apiKey });
logger.info({ sandboxId: sandbox.sandboxId, timeoutMs }, "Extended sandbox timeout before command");
} catch (err) {
logger.warn({ err, sandboxId: sandbox.sandboxId }, "Failed to extend sandbox timeout (continuing anyway)");
}

const result = await sandbox.commands.run(cmdStr, {
cwd,
envs: {
TF_IN_AUTOMATION: "1",
},
envs: this.buildTerraformEnvs(metadata),
onStdout: pipeChunk,
onStderr: pipeChunk,
timeoutMs,
});

const stdout = result.stdout;
Expand Down
2 changes: 1 addition & 1 deletion sandbox-sidecar/src/templateRegistry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export interface TemplateInfo {
}

// Template version - bump this when the build recipe changes
const TEMPLATE_VERSION = "0.1.2";
const TEMPLATE_VERSION = "0.1.3";

// Generate alias matching the build system
function aliasFor(engine: string, version: string, tplVersion: string): string {
Expand Down
4 changes: 2 additions & 2 deletions sandbox-sidecar/templates/build-all.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ async function main() {

await Template.build(buildTemplateObject(spec), {
alias,
cpuCount: 2,
memoryMB: 4096,
cpuCount: 8, // Max for Pro tier (was 2)
memoryMB: 8192, // 8GB - Max for Pro tier (was 4GB)
onBuildLogs: defaultBuildLogger(),
});

Expand Down
4 changes: 2 additions & 2 deletions sandbox-sidecar/templates/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import { template } from "./test-template.ts";
async function main() {
const buildInfo = await Template.build(template, {
alias: "terraform-prebuilt-new", // template name / alias
cpuCount: 4,
memoryMB: 2048,
cpuCount: 8, // Max for Pro tier
memoryMB: 8192, // 8GB - Max for Pro tier
onBuildLogs: defaultBuildLogger(),
});

Expand Down
2 changes: 1 addition & 1 deletion sandbox-sidecar/templates/manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export interface TemplateSpec {
tplVersion: string;
}

export const TEMPLATE_VERSION = "0.1.2"; // bump this when recipe changes
export const TEMPLATE_VERSION = "0.1.3"; // bump this when recipe changes

export const TEMPLATES: TemplateSpec[] = [
{ engine: "terraform", engineVersion: "1.0.11", tplVersion: TEMPLATE_VERSION },
Expand Down
33 changes: 32 additions & 1 deletion taco/cmd/statesman/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"net/http"
"os"
"os/signal"
"strings"
"time"

"github.com/diggerhq/digger/opentaco/internal/analytics"
Expand All @@ -21,6 +22,7 @@ import (
"github.com/diggerhq/digger/opentaco/internal/repositories"
"github.com/diggerhq/digger/opentaco/internal/sandbox"
"github.com/diggerhq/digger/opentaco/internal/storage"
"github.com/google/uuid"
"github.com/kelseyhightower/envconfig"
"github.com/labstack/echo/v4"
echomiddleware "github.com/labstack/echo/v4/middleware"
Expand Down Expand Up @@ -101,7 +103,16 @@ func main() {
if err != nil {
slog.Warn("Failed to list units from storage", "error", err)
} else {
syncedCount := 0
skippedCount := 0
for _, unit := range units {
// Skip non-unit paths (config-versions, plans, runs, etc.)
// Valid unit paths are: {org-uuid}/{unit-uuid}
if !isValidUnitPath(unit.ID) {
skippedCount++
continue
}

if err := queryStore.SyncEnsureUnit(context.Background(), unit.ID); err != nil {
slog.Warn("Failed to sync unit", "unit_id", unit.ID, "error", err)
continue
Expand All @@ -110,8 +121,9 @@ func main() {
if err := queryStore.SyncUnitMetadata(context.Background(), unit.ID, unit.Size, unit.Updated); err != nil {
slog.Warn("Failed to sync metadata for unit", "unit_id", unit.ID, "error", err)
}
syncedCount++
}
slog.Info("Synced units from storage to database", "count", len(units))
slog.Info("Synced units from storage to database", "synced", syncedCount, "skipped_non_units", skippedCount)
}
} else {
slog.Info("Query backend already has units, skipping sync", "count", len(existingUnits))
Expand Down Expand Up @@ -275,3 +287,22 @@ func main() {
analytics.SendEssential("server_shutdown_complete")
slog.Info("Server shutdown complete")
}

// isValidUnitPath checks if a storage path matches the expected unit format: {org-uuid}/{unit-uuid}
// This filters out TFE-related paths like config-versions/, plans/, runs/, etc.
func isValidUnitPath(path string) bool {
parts := strings.SplitN(strings.Trim(path, "/"), "/", 2)
if len(parts) != 2 {
return false
}

// Both parts must be valid UUIDs
if _, err := uuid.Parse(parts[0]); err != nil {
return false
}
if _, err := uuid.Parse(parts[1]); err != nil {
return false
}

return true
}
Loading
Loading