diff --git a/e2e-tests/dataset-eval-integration.test.ts b/e2e-tests/dataset-eval-integration.test.ts
new file mode 100644
index 000000000..c08406fd7
--- /dev/null
+++ b/e2e-tests/dataset-eval-integration.test.ts
@@ -0,0 +1,187 @@
+/**
+ * E2E tests for dataset-driven evaluation integration.
+ *
+ * Flow: create project WITH agent (Strands, Bedrock, no memory)
+ *       → add dataset (predefined, 3 simple scenarios)
+ *       → deploy → wait for agent readiness (invoke with retry)
+ *       → run eval with --dataset flag using Builtin evaluator → verify results
+ *
+ * Prerequisites:
+ *   - AWS credentials
+ *   - npm, git, uv installed
+ */
+import { parseJsonOutput, retry } from '../src/test-utils/index.js';
+import {
+  baseCanRun,
+  hasAws,
+  installCdkTarball,
+  runAgentCoreCLI,
+  teardownE2EProject,
+  writeAwsTargets,
+} from './e2e-helper.js';
+import { randomUUID } from 'node:crypto';
+import { mkdir, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+const canRun = baseCanRun && hasAws;
+
+describe.sequential('e2e: dataset eval integration', () => {
+  let testDir: string;
+  let projectPath: string;
+  const agentName = `E2eDsEval${String(Date.now()).slice(-8)}`;
+  const datasetName = 'E2eEvalDataset';
+
+  beforeAll(async () => {
+    if (!canRun) return;
+
+    testDir = join(tmpdir(), `agentcore-e2e-dataset-eval-${randomUUID()}`);
+    await mkdir(testDir, { recursive: true });
+
+    // Create project with agent (Strands, Bedrock, no memory)
+    const result = await runAgentCoreCLI(
+      [
+        'create',
+        '--name',
+        agentName,
+        '--language',
+        'Python',
+        '--framework',
+        'Strands',
+        '--model-provider',
+        'Bedrock',
+        '--memory',
+        'none',
+        '--json',
+      ],
+      testDir
+    );
+    expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0);
+    projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath;
+
+    await writeAwsTargets(projectPath);
+    installCdkTarball(projectPath);
+  }, 300000);
+
+  afterAll(async () => {
+    if (projectPath && hasAws) {
+      await teardownE2EProject(projectPath, agentName, 'Bedrock');
+    }
+    if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 });
+  }, 600000);
+
+  const run = (args: string[]) => runAgentCoreCLI(args, projectPath);
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Add dataset with predefined scenarios
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'adds a dataset with predefined scenarios',
+    async () => {
+      const result = await run([
+        'add',
+        'dataset',
+        '--name',
+        datasetName,
+        '--schema-type',
+        'AGENTCORE_EVALUATION_PREDEFINED_V1',
+        '--description',
+        'E2E dataset for eval integration test',
+        '--json',
+      ]);
+
+      expect(result.exitCode, `Add dataset failed: ${result.stdout}`).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; datasetName: string };
+      expect(json.success).toBe(true);
+      expect(json.datasetName).toBe(datasetName);
+
+      // Write 3 simple evaluation scenarios
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+      const examples = [
+        '{"scenario_id": "greeting", "turns": [{"input": "Hello, how are you?", "expectedResponse": "I am doing well, thank you!"}]}',
+        '{"scenario_id": "math", "turns": [{"input": "What is 2+2?", "expectedResponse": "4"}]}',
+        '{"scenario_id": "weather", "turns": [{"input": "What is the weather like?", "expectedResponse": "I cannot check the weather, but I can help with other questions."}]}',
+      ];
+      await writeFile(datasetFile, examples.join('\n') + '\n', 'utf-8');
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Deploy agent + dataset
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploys agent with dataset',
+    async () => {
+      const result = await run(['deploy', '--yes', '--json']);
+
+      if (result.exitCode !== 0) {
+        console.log('Deploy stdout:', result.stdout);
+        console.log('Deploy stderr:', result.stderr);
+      }
+
+      expect(result.exitCode, 'Deploy failed').toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Wait for agent readiness (invoke with retry)
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'agent is invocable after deploy',
+    async () => {
+      await retry(
+        async () => {
+          const result = await run(['invoke', '--prompt', 'Say hello', '--runtime', agentName, '--json']);
+          expect(result.exitCode, `Invoke failed: ${result.stderr}`).toBe(0);
+          const json = parseJsonOutput(result.stdout) as { success: boolean };
+          expect(json.success).toBe(true);
+        },
+        3,
+        15000
+      );
+    },
+    180000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Run eval with --dataset flag using Builtin evaluator
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'runs evaluation using dataset as input',
+    async () => {
+      await retry(
+        async () => {
+          const result = await run([
+            'run',
+            'eval',
+            '--runtime',
+            agentName,
+            '--dataset',
+            datasetName,
+            '--evaluator',
+            'Builtin.Faithfulness',
+            '--json',
+          ]);
+
+          expect(result.exitCode, `Run eval failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0);
+
+          const json = parseJsonOutput(result.stdout) as Record<string, unknown>;
+          expect(json).toHaveProperty('success', true);
+          expect(json).toHaveProperty('run');
+        },
+        18,
+        10000
+      );
+    },
+    300000
+  );
+});
diff --git a/e2e-tests/dataset-large-batch.test.ts b/e2e-tests/dataset-large-batch.test.ts
new file mode 100644
index 000000000..29150650e
--- /dev/null
+++ b/e2e-tests/dataset-large-batch.test.ts
@@ -0,0 +1,147 @@
+/**
+ * E2E tests for Dataset large batch upload (1000 examples — service maximum).
+ *
+ * Flow: create project (no agent) → add dataset → write 1000 examples
+ *       → deploy (pushes full batch in single API call)
+ *       → verify exampleIds on ALL 1000 lines → re-deploy (no-op hash match)
+ *
+ * Prerequisites:
+ *   - AWS credentials
+ *   - npm, git, uv installed
+ */
+import { parseJsonOutput } from '../src/test-utils/index.js';
+import {
+  baseCanRun,
+  hasAws,
+  installCdkTarball,
+  runAgentCoreCLI,
+  teardownE2EProject,
+  writeAwsTargets,
+} from './e2e-helper.js';
+import { randomUUID } from 'node:crypto';
+import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+const canRun = baseCanRun && hasAws;
+
+describe.sequential('e2e: dataset large batch', () => {
+  let testDir: string;
+  let projectPath: string;
+  const agentName = `E2eDsBatch${String(Date.now()).slice(-8)}`;
+  const datasetName = 'E2eLargeBatchDataset';
+  const EXAMPLE_COUNT = 1000;
+
+  beforeAll(async () => {
+    if (!canRun) return;
+
+    testDir = join(tmpdir(), `agentcore-e2e-dataset-batch-${randomUUID()}`);
+    await mkdir(testDir, { recursive: true });
+
+    // Create project (no agent needed for dataset-only tests)
+    const result = await runAgentCoreCLI(['create', '--name', agentName, '--no-agent', '--json'], testDir);
+    expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0);
+    projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath;
+
+    await writeAwsTargets(projectPath);
+    installCdkTarball(projectPath);
+  }, 300000);
+
+  afterAll(async () => {
+    if (projectPath && hasAws) {
+      await teardownE2EProject(projectPath, agentName, 'Bedrock');
+    }
+    if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 });
+  }, 600000);
+
+  const run = (args: string[]) => runAgentCoreCLI(args, projectPath);
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Add dataset
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'adds a dataset to the project',
+    async () => {
+      const result = await run([
+        'add',
+        'dataset',
+        '--name',
+        datasetName,
+        '--schema-type',
+        'AGENTCORE_EVALUATION_PREDEFINED_V1',
+        '--description',
+        'E2E large batch test dataset',
+        '--json',
+      ]);
+
+      expect(result.exitCode, `Add failed: ${result.stdout}`).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; datasetName: string; location: string };
+      expect(json.success).toBe(true);
+      expect(json.datasetName).toBe(datasetName);
+      expect(json.location).toContain('.jsonl');
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Write 1000 examples and deploy
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploy creates dataset and syncs 1000 examples',
+    async () => {
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+
+      // Generate 1050 JSONL examples programmatically
+      const examples: string[] = [];
+      for (let i = 0; i < EXAMPLE_COUNT; i++) {
+        examples.push(
+          JSON.stringify({
+            scenario_id: `s_${i}`,
+            turns: [{ input: `test ${i}` }],
+          })
+        );
+      }
+      await writeFile(datasetFile, examples.join('\n') + '\n', 'utf-8');
+
+      const result = await run(['deploy', '--yes', '--json']);
+
+      if (result.exitCode !== 0) {
+        console.log('Deploy stdout:', result.stdout);
+        console.log('Deploy stderr:', result.stderr);
+      }
+
+      expect(result.exitCode, 'Deploy failed').toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+
+      // Verify exampleIds written back to ALL 1050 lines
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      expect(lines.length).toBe(EXAMPLE_COUNT);
+      for (let i = 0; i < lines.length; i++) {
+        const obj = JSON.parse(lines[i]!) as { exampleId?: string };
+        expect(obj.exampleId, `Line ${i} should have exampleId`).toBeTruthy();
+      }
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Re-deploy with no changes — verify no-op (hash match)
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploy with no file changes skips dataset sync (hash match)',
+    async () => {
+      const result = await run(['deploy', '--yes', '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+    },
+    600000
+  );
+});
diff --git a/e2e-tests/dataset-lifecycle.test.ts b/e2e-tests/dataset-lifecycle.test.ts
new file mode 100644
index 000000000..0b2223d23
--- /dev/null
+++ b/e2e-tests/dataset-lifecycle.test.ts
@@ -0,0 +1,404 @@
+/**
+ * E2E tests for Dataset Management lifecycle.
+ *
+ * Flow: create project → add dataset → write examples → deploy (creates resource + syncs examples)
+ *       → deploy again (no-op, hash match) → update examples → deploy (detects change, syncs)
+ *       → publish-version → download → download version → remove-version
+ *
+ * Prerequisites:
+ *   - AWS credentials (gamma account)
+ *   - npm, git, uv installed
+ */
+import { parseJsonOutput } from '../src/test-utils/index.js';
+import {
+  baseCanRun,
+  hasAws,
+  installCdkTarball,
+  runAgentCoreCLI,
+  teardownE2EProject,
+  writeAwsTargets,
+} from './e2e-helper.js';
+import { randomUUID } from 'node:crypto';
+import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+const canRun = baseCanRun && hasAws;
+
+describe.sequential('e2e: dataset lifecycle', () => {
+  let testDir: string;
+  let projectPath: string;
+  const agentName = `E2eDs${String(Date.now()).slice(-8)}`;
+  const datasetName = 'E2eTestDataset';
+
+  beforeAll(async () => {
+    if (!canRun) return;
+
+    testDir = join(tmpdir(), `agentcore-e2e-dataset-${randomUUID()}`);
+    await mkdir(testDir, { recursive: true });
+
+    // Create project (no agent needed for dataset tests)
+    const result = await runAgentCoreCLI(['create', '--name', agentName, '--no-agent', '--json'], testDir);
+    expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0);
+    projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath;
+
+    await writeAwsTargets(projectPath);
+    installCdkTarball(projectPath);
+  }, 300000);
+
+  afterAll(async () => {
+    if (projectPath && hasAws) {
+      await teardownE2EProject(projectPath, agentName, 'Bedrock');
+    }
+    if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 });
+  }, 600000);
+
+  const run = (args: string[]) => runAgentCoreCLI(args, projectPath);
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Add dataset
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'adds a dataset to the project',
+    async () => {
+      const result = await run([
+        'add',
+        'dataset',
+        '--name',
+        datasetName,
+        '--schema-type',
+        'AGENTCORE_EVALUATION_PREDEFINED_V1',
+        '--description',
+        'E2E test dataset',
+        '--json',
+      ]);
+
+      expect(result.exitCode, `Add failed: ${result.stdout}`).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; datasetName: string; location: string };
+      expect(json.success).toBe(true);
+      expect(json.datasetName).toBe(datasetName);
+      expect(json.location).toContain('.jsonl');
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Write examples and deploy (creates resource + syncs examples)
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploy creates dataset and syncs examples from local file',
+    async () => {
+      // Write 3 examples to the dataset file (overwriting starter)
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+      const examples = [
+        '{"scenario_id": "refund", "turns": [{"input": "I want a refund", "expectedResponse": "Let me help with that."}]}',
+        '{"scenario_id": "billing", "turns": [{"input": "Why was I charged?", "expectedResponse": "Let me check your account."}]}',
+        '{"scenario_id": "shipping", "turns": [{"input": "Where is my order?", "expectedResponse": "Let me track that for you."}]}',
+      ];
+      await writeFile(datasetFile, examples.join('\n') + '\n', 'utf-8');
+
+      const result = await run(['deploy', '--yes', '--json']);
+
+      if (result.exitCode !== 0) {
+        console.log('Deploy stdout:', result.stdout);
+        console.log('Deploy stderr:', result.stderr);
+      }
+
+      expect(result.exitCode, 'Deploy failed').toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+
+      // Verify exampleIds written back to local file
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      expect(lines.length).toBe(3);
+      for (const line of lines) {
+        const obj = JSON.parse(line);
+        expect(obj.exampleId).toBeTruthy();
+      }
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Deploy again — no changes (hash match → skip)
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploy with no file changes skips dataset sync',
+    async () => {
+      const result = await run(['deploy', '--yes', '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Update examples and re-deploy (detects change, syncs)
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploy detects content change and syncs updated examples',
+    async () => {
+      // Modify one example's content (keep exampleId)
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      const firstExample = JSON.parse(lines[0]!);
+      firstExample.turns[0].expectedResponse = 'Updated response for refund.';
+      lines[0] = JSON.stringify(firstExample);
+      await writeFile(datasetFile, lines.join('\n') + '\n', 'utf-8');
+
+      const result = await run(['deploy', '--yes', '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Publish Version
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'publishes DRAFT as version 1',
+    async () => {
+      const result = await run(['dataset', 'publish-version', '--name', datasetName, '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; version: string; exampleCount: number };
+      expect(json.success).toBe(true);
+      expect(json.version).toBe('1');
+      expect(json.exampleCount).toBe(3);
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Status (via agentcore status --type dataset)
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'shows dataset in project status',
+    async () => {
+      const result = await run(['status', '--type', 'dataset', '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as {
+        success: boolean;
+        resources: { resourceType: string; name: string; deploymentState: string }[];
+      };
+      expect(json.success).toBe(true);
+      const datasetResource = json.resources.find(r => r.name === datasetName);
+      expect(datasetResource).toBeTruthy();
+      expect(datasetResource!.deploymentState).toBe('deployed');
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Download DRAFT
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'downloads DRAFT back to local file',
+    async () => {
+      // Clear local file first
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+      await writeFile(datasetFile, '', 'utf-8');
+
+      const result = await run(['dataset', 'download', '--name', datasetName, '--yes', '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; exampleCount: number; version: string };
+      expect(json.success).toBe(true);
+      expect(json.exampleCount).toBe(3);
+      expect(json.version).toBe('DRAFT');
+
+      // Verify file has content
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      expect(lines.length).toBe(3);
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Download specific version
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'downloads a specific version',
+    async () => {
+      const result = await run(['dataset', 'download', '--name', datasetName, '--version', '1', '--yes', '--json']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; exampleCount: number; version: string };
+      expect(json.success).toBe(true);
+      expect(json.exampleCount).toBe(3);
+      expect(json.version).toBe('1');
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Remove version
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'removes a specific published version',
+    async () => {
+      const result = await run(['dataset', 'remove-version', '--name', datasetName, '--json', '1']);
+
+      expect(result.exitCode).toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean; deletedVersion: string };
+      expect(json.success).toBe(true);
+      expect(json.deletedVersion).toBe('1');
+    },
+    60000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Force push — replace all examples
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'force push replaces all examples with new content',
+    async () => {
+      // Overwrite the dataset file with completely new examples (no exampleIds)
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+      const newExamples = [
+        '{"scenario_id": "returns", "turns": [{"input": "How do I return an item?", "expectedResponse": "You can initiate a return from your orders page."}]}',
+        '{"scenario_id": "cancel", "turns": [{"input": "Cancel my order", "expectedResponse": "Let me help you cancel that order."}]}',
+      ];
+      await writeFile(datasetFile, newExamples.join('\n') + '\n', 'utf-8');
+
+      // Deploy with force to replace remote examples
+      const result = await run(['deploy', '--yes', '--json']);
+
+      if (result.exitCode !== 0) {
+        console.log('Force push deploy stdout:', result.stdout);
+        console.log('Force push deploy stderr:', result.stderr);
+      }
+
+      expect(result.exitCode, 'Force push deploy failed').toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+
+      // Verify exampleIds written back to local file (new IDs for new examples)
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      expect(lines.length).toBe(2);
+      for (const line of lines) {
+        const obj = JSON.parse(line) as { exampleId?: string };
+        expect(obj.exampleId).toBeTruthy();
+      }
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Delete examples by removing lines, then deploy
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'removing lines from local file and deploying deletes remote examples',
+    async () => {
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${datasetName}.jsonl`);
+
+      // Read current file (should have 2 examples from force push)
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      expect(lines.length).toBe(2);
+
+      // Keep only the first example (delete the second)
+      await writeFile(datasetFile, lines[0]! + '\n', 'utf-8');
+
+      const result = await run(['deploy', '--yes', '--json']);
+
+      if (result.exitCode !== 0) {
+        console.log('Delete deploy stdout:', result.stdout);
+        console.log('Delete deploy stderr:', result.stderr);
+      }
+
+      expect(result.exitCode, 'Delete deploy failed').toBe(0);
+      const json = parseJsonOutput(result.stdout) as { success: boolean };
+      expect(json.success).toBe(true);
+
+      // Verify local file still has 1 example with exampleId
+      const updatedContent = await readFile(datasetFile, 'utf-8');
+      const updatedLines = updatedContent.split('\n').filter(l => l.trim());
+      expect(updatedLines.length).toBe(1);
+      const obj = JSON.parse(updatedLines[0]!) as { exampleId?: string };
+      expect(obj.exampleId).toBeTruthy();
+    },
+    600000
+  );
+
+  // ════════════════════════════════════════════════════════════════════════
+  // Simulated schema type deploys successfully
+  // ════════════════════════════════════════════════════════════════════════
+
+  it.skipIf(!canRun)(
+    'deploys a SIMULATED_V1 schema type dataset',
+    async () => {
+      const simulatedDatasetName = 'E2eSimulatedDataset';
+
+      // Add a dataset with SIMULATED_V1 schema type
+      const addResult = await run([
+        'add',
+        'dataset',
+        '--name',
+        simulatedDatasetName,
+        '--schema-type',
+        'AGENTCORE_EVALUATION_SIMULATED_V1',
+        '--description',
+        'E2E simulated schema test dataset',
+        '--json',
+      ]);
+
+      expect(addResult.exitCode, `Add simulated dataset failed: ${addResult.stdout}`).toBe(0);
+      const addJson = parseJsonOutput(addResult.stdout) as { success: boolean; datasetName: string };
+      expect(addJson.success).toBe(true);
+      expect(addJson.datasetName).toBe(simulatedDatasetName);
+
+      // Write simulated examples to the dataset file (must match SIMULATED_V1 schema)
+      const datasetFile = join(projectPath, 'agentcore/datasets', `${simulatedDatasetName}.jsonl`);
+      const examples = [
+        '{"scenario_id": "sim_booking", "input": "Book a flight", "actor_profile": {"traits": {"personality": "impatient"}, "context": "frequent flyer", "goal": "book cheapest flight"}}',
+        '{"scenario_id": "sim_cancel", "input": "Cancel reservation", "actor_profile": {"traits": {"personality": "polite"}, "context": "first time user", "goal": "get full refund"}}',
+      ];
+      await writeFile(datasetFile, examples.join('\n') + '\n', 'utf-8');
+
+      // Deploy — should succeed with simulated schema type
+      const deployResult = await run(['deploy', '--yes', '--json']);
+
+      if (deployResult.exitCode !== 0) {
+        console.log('Simulated deploy stdout:', deployResult.stdout);
+        console.log('Simulated deploy stderr:', deployResult.stderr);
+      }
+
+      expect(deployResult.exitCode, 'Simulated deploy failed').toBe(0);
+      const deployJson = parseJsonOutput(deployResult.stdout) as { success: boolean };
+      expect(deployJson.success).toBe(true);
+
+      // Verify exampleIds written back to local file
+      const content = await readFile(datasetFile, 'utf-8');
+      const lines = content.split('\n').filter(l => l.trim());
+      expect(lines.length).toBe(2);
+      for (const line of lines) {
+        const obj = JSON.parse(line) as { exampleId?: string };
+        expect(obj.exampleId).toBeTruthy();
+      }
+    },
+    600000
+  );
+});
diff --git a/integ-tests/add-remove-dataset.test.ts b/integ-tests/add-remove-dataset.test.ts
new file mode 100644
index 000000000..82fa94ebb
--- /dev/null
+++ b/integ-tests/add-remove-dataset.test.ts
@@ -0,0 +1,217 @@
+/**
+ * Integration tests for dataset add/remove lifecycle.
+ *
+ * Verifies:
+ * - `agentcore add dataset` scaffolds .jsonl and updates agentcore.json
+ * - `agentcore remove dataset` removes from agentcore.json
+ * - Schema type validation
+ * - Config.managed.location is set correctly
+ */
+import { parseJsonOutput, runCLI } from '../src/test-utils/index.js';
+import { randomUUID } from 'node:crypto';
+import { existsSync } from 'node:fs';
+import { mkdir, readFile, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+describe('add/remove dataset', () => {
+  let testDir: string;
+  let projectDir: string;
+
+  beforeAll(async () => {
+    testDir = join(tmpdir(), `agentcore-dataset-integ-${randomUUID()}`);
+    await mkdir(testDir, { recursive: true });
+
+    const result = await runCLI(['create', '--name', 'DatasetInteg', '--no-agent'], testDir);
+    expect(result.exitCode, `Create failed: ${result.stdout} ${result.stderr}`).toBe(0);
+    projectDir = join(testDir, 'DatasetInteg');
+  });
+
+  afterAll(async () => {
+    await rm(testDir, { recursive: true, force: true });
+  });
+
+  it('adds a predefined dataset with scaffolded file', async () => {
+    const result = await runCLI(
+      ['add', 'dataset', '--name', 'MyPredefined', '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+      projectDir
+    );
+
+    expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; datasetName: string; location: string };
+    expect(json.success).toBe(true);
+    expect(json.datasetName).toBe('MyPredefined');
+    expect(json.location).toBe('agentcore/datasets/MyPredefined.jsonl');
+
+    // Verify agentcore.json
+    const spec = JSON.parse(await readFile(join(projectDir, 'agentcore/agentcore.json'), 'utf-8'));
+    const dataset = spec.datasets.find((d: { name: string }) => d.name === 'MyPredefined');
+    expect(dataset).toBeTruthy();
+    expect(dataset.schemaType).toBe('AGENTCORE_EVALUATION_PREDEFINED_V1');
+    expect(dataset.config.managed.location).toBe('datasets/MyPredefined.jsonl');
+
+    // Verify .jsonl file was scaffolded
+    const jsonlPath = join(projectDir, 'agentcore/datasets/MyPredefined.jsonl');
+    expect(existsSync(jsonlPath)).toBe(true);
+    const content = await readFile(jsonlPath, 'utf-8');
+    expect(content).toContain('scenario_id');
+    expect(content).toContain('turns');
+  });
+
+  it('adds a simulated dataset with correct starter', async () => {
+    const result = await runCLI(
+      ['add', 'dataset', '--name', 'MySimulated', '--schema-type', 'AGENTCORE_EVALUATION_SIMULATED_V1', '--json'],
+      projectDir
+    );
+
+    expect(result.exitCode).toBe(0);
+
+    const jsonlPath = join(projectDir, 'agentcore/datasets/MySimulated.jsonl');
+    expect(existsSync(jsonlPath)).toBe(true);
+    const content = await readFile(jsonlPath, 'utf-8');
+    expect(content).toContain('actor_profile');
+    expect(content).toContain('max_turns');
+  });
+
+  it('rejects invalid schema type', async () => {
+    const result = await runCLI(
+      ['add', 'dataset', '--name', 'BadType', '--schema-type', 'INVALID_TYPE', '--json'],
+      projectDir
+    );
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+  });
+
+  it('rejects duplicate dataset name', async () => {
+    const result = await runCLI(
+      ['add', 'dataset', '--name', 'MyPredefined', '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+      projectDir
+    );
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+    expect(json.error).toContain('already exists');
+  });
+
+  it('adds dataset with description', async () => {
+    const result = await runCLI(
+      [
+        'add',
+        'dataset',
+        '--name',
+        'Described',
+        '--schema-type',
+        'AGENTCORE_EVALUATION_PREDEFINED_V1',
+        '--description',
+        'Test scenarios for billing',
+        '--json',
+      ],
+      projectDir
+    );
+
+    expect(result.exitCode).toBe(0);
+
+    const spec = JSON.parse(await readFile(join(projectDir, 'agentcore/agentcore.json'), 'utf-8'));
+    const dataset = spec.datasets.find((d: { name: string }) => d.name === 'Described');
+    expect(dataset.description).toBe('Test scenarios for billing');
+  });
+
+  it('removes a dataset', async () => {
+    const result = await runCLI(['remove', 'dataset', '--name', 'MyPredefined', '--json'], projectDir);
+
+    expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
+    const json = parseJsonOutput(result.stdout) as { success: boolean };
+    expect(json.success).toBe(true);
+
+    // Verify removed from agentcore.json
+    const spec = JSON.parse(await readFile(join(projectDir, 'agentcore/agentcore.json'), 'utf-8'));
+    const dataset = spec.datasets.find((d: { name: string }) => d.name === 'MyPredefined');
+    expect(dataset).toBeUndefined();
+  });
+
+  it('remove fails for non-existent dataset', async () => {
+    const result = await runCLI(['remove', 'dataset', '--name', 'NonExistent', '--json'], projectDir);
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+    expect(json.error).toContain('not found');
+  });
+
+  it('rejects empty name', async () => {
+    const result = await runCLI(
+      ['add', 'dataset', '--name', '', '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+      projectDir
+    );
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+  });
+
+  it('rejects name starting with a digit', async () => {
+    const result = await runCLI(
+      ['add', 'dataset', '--name', '1invalid', '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+      projectDir
+    );
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+    expect(json.error).toContain('Must begin with a letter');
+  });
+
+  it('predefined .jsonl content is valid JSON lines with scenario_id and turns', async () => {
+    const jsonlPath = join(projectDir, 'agentcore/datasets/Described.jsonl');
+    expect(existsSync(jsonlPath)).toBe(true);
+
+    const content = await readFile(jsonlPath, 'utf-8');
+    const lines = content.trim().split('\n');
+    expect(lines.length).toBeGreaterThan(0);
+
+    for (const line of lines) {
+      const parsed = JSON.parse(line);
+      expect(parsed).toHaveProperty('scenario_id');
+      expect(parsed).toHaveProperty('turns');
+      expect(Array.isArray(parsed.turns)).toBe(true);
+    }
+  });
+
+  it('simulated .jsonl content is valid JSON lines with actor_profile and max_turns', async () => {
+    const jsonlPath = join(projectDir, 'agentcore/datasets/MySimulated.jsonl');
+    expect(existsSync(jsonlPath)).toBe(true);
+
+    const content = await readFile(jsonlPath, 'utf-8');
+    const lines = content.trim().split('\n');
+    expect(lines.length).toBeGreaterThan(0);
+
+    for (const line of lines) {
+      const parsed = JSON.parse(line);
+      expect(parsed).toHaveProperty('actor_profile');
+      expect(parsed).toHaveProperty('max_turns');
+    }
+  });
+
+  it('remove does NOT delete local .jsonl file', async () => {
+    // Add a dataset specifically for this test
+    const addResult = await runCLI(
+      ['add', 'dataset', '--name', 'FileKeep', '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+      projectDir
+    );
+    expect(addResult.exitCode).toBe(0);
+
+    const jsonlPath = join(projectDir, 'agentcore/datasets/FileKeep.jsonl');
+    expect(existsSync(jsonlPath)).toBe(true);
+
+    // Remove the dataset
+    const removeResult = await runCLI(['remove', 'dataset', '--name', 'FileKeep', '--json'], projectDir);
+    expect(removeResult.exitCode).toBe(0);
+
+    // .jsonl file should still exist
+    expect(existsSync(jsonlPath)).toBe(true);
+  });
+});
diff --git a/integ-tests/dataset-commands-undeployed.test.ts b/integ-tests/dataset-commands-undeployed.test.ts
new file mode 100644
index 000000000..38eb4def4
--- /dev/null
+++ b/integ-tests/dataset-commands-undeployed.test.ts
@@ -0,0 +1,89 @@
+/**
+ * Integration tests for dataset subcommands that require a deployment.
+ *
+ * Verifies that `dataset download`, `dataset publish-version`, and
+ * `dataset remove-version` fail gracefully with a helpful error when
+ * the project has not been deployed yet.
+ */
+import { createTestProject, parseJsonOutput, runCLI } from '../src/test-utils/index.js';
+import type { TestProject } from '../src/test-utils/index.js';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+describe('dataset commands when project is not deployed', () => {
+  let project: TestProject;
+
+  beforeAll(async () => {
+    project = await createTestProject({ noAgent: true });
+
+    // Add a dataset so the commands have something to resolve
+    const addResult = await runCLI(
+      ['add', 'dataset', '--name', 'UndeployedDS', '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+      project.projectPath
+    );
+    expect(addResult.exitCode, `Failed to add dataset: ${addResult.stdout} ${addResult.stderr}`).toBe(0);
+  });
+
+  afterAll(async () => {
+    await project.cleanup();
+  });
+
+  it('dataset download --json fails with deploy-first error', async () => {
+    const result = await runCLI(['dataset', 'download', '--name', 'UndeployedDS', '--json'], project.projectPath);
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+    expect(json.error.toLowerCase()).toMatch(/deploy/);
+  });
+
+  it('dataset publish-version --json fails with deploy-first error', async () => {
+    const result = await runCLI(
+      ['dataset', 'publish-version', '--name', 'UndeployedDS', '--json'],
+      project.projectPath
+    );
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+    expect(json.error.toLowerCase()).toMatch(/deploy/);
+  });
+
+  it('dataset remove-version 1 --json fails with deploy-first error', async () => {
+    const result = await runCLI(
+      ['dataset', 'remove-version', '1', '--name', 'UndeployedDS', '--json'],
+      project.projectPath
+    );
+
+    expect(result.exitCode).toBe(1);
+    const json = parseJsonOutput(result.stdout) as { success: boolean; error: string };
+    expect(json.success).toBe(false);
+    expect(json.error.toLowerCase()).toMatch(/deploy/);
+  });
+
+  it('dataset download without --yes prompts for confirmation and respects decline', async () => {
+    // In non-interactive (piped) mode, readline gets empty input which defaults to "N"
+    // This test doesn't need a deployed dataset — it fails at the resolve step,
+    // but the confirmation prompt behavior is the same pattern
+    const result = await runCLI(['dataset', 'download', '--name', 'UndeployedDS'], project.projectPath);
+
+    // Either it shows "Skipped" (confirmation declined) or fails with deploy error
+    // Both are acceptable — the key is it doesn't hang waiting for stdin
+    expect(result.exitCode).not.toBe(0);
+  });
+
+  it('status --type dataset --json returns gracefully when undeployed', async () => {
+    const result = await runCLI(['status', '--type', 'dataset', '--json'], project.projectPath);
+
+    expect(result.exitCode).toBe(0);
+    const json = parseJsonOutput(result.stdout) as {
+      success: boolean;
+      resources: { resourceType: string; deploymentState: string; name: string }[];
+    };
+    expect(json.success).toBe(true);
+    // The dataset should appear as local-only since not deployed
+    const datasetResource = json.resources.find(r => r.name === 'UndeployedDS');
+    expect(datasetResource).toBeDefined();
+    expect(datasetResource!.resourceType).toBe('dataset');
+    expect(datasetResource!.deploymentState).toBe('local-only');
+  });
+});
diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json
index 4f01163bf..c01dc5513 100644
--- a/npm-shrinkwrap.json
+++ b/npm-shrinkwrap.json
@@ -35,6 +35,7 @@
         "@smithy/shared-ini-file-loader": "^4.4.2",
         "commander": "^14.0.2",
         "dotenv": "^17.2.3",
+        "fast-json-stable-stringify": "^2.1.0",
         "fflate": "^0.8.2",
         "handlebars": "^4.7.8",
         "ink": "^6.6.0",
diff --git a/package.json b/package.json
index de3448466..12eed6f29 100644
--- a/package.json
+++ b/package.json
@@ -99,6 +99,7 @@
     "@smithy/shared-ini-file-loader": "^4.4.2",
     "commander": "^14.0.2",
     "dotenv": "^17.2.3",
+    "fast-json-stable-stringify": "^2.1.0",
     "fflate": "^0.8.2",
     "handlebars": "^4.7.8",
     "ink": "^6.6.0",
diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
index 9fad266c2..bec42f397 100644
--- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
+++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap
@@ -386,6 +386,7 @@ test('AgentCoreStack synthesizes with empty spec', () => {
       agentCoreGateways: [],
       mcpRuntimeTools: [],
       unassignedTargets: [],
+      datasets: [],
     },
   });
   const template = Template.fromStack(stack);
@@ -448,6 +449,8 @@ exports[`Assets Directory Snapshots > File listing > should match the expected f
   "container/python/dockerignore.template",
   "container/typescript/Dockerfile",
   "container/typescript/dockerignore.template",
+  "datasets/predefined-v1.jsonl",
+  "datasets/simulated-v1.jsonl",
   "evaluators/python-lambda/execution-role-policy.json",
   "evaluators/python-lambda/lambda_function.py",
   "evaluators/python-lambda/pyproject.toml",
diff --git a/src/assets/cdk/test/cdk.test.ts b/src/assets/cdk/test/cdk.test.ts
index df5c767f9..c540efbe7 100644
--- a/src/assets/cdk/test/cdk.test.ts
+++ b/src/assets/cdk/test/cdk.test.ts
@@ -18,6 +18,7 @@ test('AgentCoreStack synthesizes with empty spec', () => {
       agentCoreGateways: [],
       mcpRuntimeTools: [],
       unassignedTargets: [],
+      datasets: [],
     },
   });
   const template = Template.fromStack(stack);
diff --git a/src/assets/datasets/predefined-v1.jsonl b/src/assets/datasets/predefined-v1.jsonl
new file mode 100644
index 000000000..903dbd499
--- /dev/null
+++ b/src/assets/datasets/predefined-v1.jsonl
@@ -0,0 +1,3 @@
+{"scenario_id": "refund-policy", "turns": [{"input": "What is your refund policy?", "expectedResponse": "We offer full refunds within 30 days of purchase. After 30 days, we can provide store credit."}, {"input": "What if I lost my receipt?", "expectedResponse": "No problem! We can look up your purchase using your email address or payment method."}], "assertions": ["Agent should clearly state the 30-day refund window", "Agent should offer alternatives for lost receipts"], "expected_trajectory": ["lookup_policy", "check_eligibility"]}
+{"scenario_id": "order-tracking", "turns": [{"input": "Where is my order #12345?", "expectedResponse": "Let me look up order #12345 for you. I can see it shipped on Monday and is expected to arrive by Thursday."}], "assertions": ["Agent should reference the specific order number", "Agent should provide estimated delivery date"], "expected_trajectory": ["lookup_order", "get_shipping_status"]}
+{"scenario_id": "account-locked", "turns": [{"input": "I can't log into my account", "expectedResponse": "I'm sorry you're having trouble logging in. Let me help you regain access. Can you provide the email address associated with your account?"}, {"input": "It's john@example.com", "expectedResponse": "I've sent a password reset link to john@example.com. Please check your inbox and spam folder. The link expires in 24 hours."}], "assertions": ["Agent should ask for identifying information", "Agent should explain the reset process clearly"], "expected_trajectory": ["verify_identity", "send_reset_link"]}
diff --git a/src/assets/datasets/simulated-v1.jsonl b/src/assets/datasets/simulated-v1.jsonl
new file mode 100644
index 000000000..7a22ab0ee
--- /dev/null
+++ b/src/assets/datasets/simulated-v1.jsonl
@@ -0,0 +1,3 @@
+{"scenario_id": "frustrated-refund-customer", "input": "I want a refund for my cancelled flight BK-98765", "actor_profile": {"traits": {"personality": "impatient and frustrated", "communication_style": "direct and demanding"}, "context": "Has been waiting 3 days for a refund with no response. Previously had a bad experience with customer service.", "goal": "Get a full cash refund for cancelled flight BK-98765, not a voucher or credit"}, "max_turns": 10, "assertions": ["Agent should acknowledge the frustration", "Agent should not offer only vouchers when cash refund is requested", "Agent should provide a timeline for the refund"]}
+{"scenario_id": "confused-new-user", "input": "How do I set up my new account?", "actor_profile": {"traits": {"personality": "polite but confused", "communication_style": "asks many follow-up questions", "technical_level": "beginner"}, "context": "First time using the service. Not familiar with technical terminology. English is a second language.", "goal": "Successfully create and configure a new account with basic settings"}, "max_turns": 15, "assertions": ["Agent should use simple non-technical language", "Agent should break instructions into small steps", "Agent should confirm understanding at each step"]}
+{"scenario_id": "edge-case-multi-issue", "input": "I need to change my flight AND get a refund for the hotel that was bundled with it", "actor_profile": {"traits": {"personality": "methodical and detail-oriented", "communication_style": "provides lots of context upfront"}, "context": "Has a bundled flight+hotel booking. Flight was changed by the airline, making the hotel dates wrong. Wants partial refund for hotel and rebooking for flight.", "goal": "Get the flight rebooked to new dates AND get a refund for the hotel nights that no longer align"}, "max_turns": 12, "assertions": ["Agent should handle both issues without losing track", "Agent should clarify which parts are refundable", "Agent should not close the conversation until both issues are resolved"]}
diff --git a/src/cli/aws/__tests__/agentcore-datasets.test.ts b/src/cli/aws/__tests__/agentcore-datasets.test.ts
new file mode 100644
index 000000000..8b33277d0
--- /dev/null
+++ b/src/cli/aws/__tests__/agentcore-datasets.test.ts
@@ -0,0 +1,303 @@
+import {
+  addDatasetExamples,
+  createDatasetVersion,
+  deleteDatasetExamples,
+  deleteDatasetVersionApi,
+  downloadDataset,
+  getDataset,
+  listAllDatasetExamples,
+  listDatasetExamples,
+  updateDatasetExamples,
+} from '../agentcore-datasets.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+// ── Mocks ─────────────────────────────────────────────────────────────────
+
+const mockSign = vi.fn();
+
+vi.mock('@smithy/signature-v4', () => ({
+  SignatureV4: class {
+    sign = mockSign;
+  },
+}));
+
+vi.mock('@aws-crypto/sha256-js', () => ({
+  Sha256: class {},
+}));
+
+vi.mock('@smithy/protocol-http', () => ({
+  HttpRequest: class {
+    constructor(public options: unknown) {}
+  },
+}));
+
+vi.mock('@aws-sdk/credential-provider-node', () => ({
+  defaultProvider: () => vi.fn(),
+}));
+
+vi.mock('../account', () => ({
+  getCredentialProvider: () => undefined,
+}));
+
+vi.mock('../partition', () => ({
+  dnsSuffix: () => 'amazonaws.com',
+}));
+
+const mockFetch = vi.fn();
+
+describe('agentcore-datasets', () => {
+  beforeEach(() => {
+    vi.stubGlobal('fetch', mockFetch);
+    mockSign.mockResolvedValue({ headers: { 'Content-Type': 'application/json', host: 'test.amazonaws.com' } });
+  });
+
+  afterEach(() => {
+    vi.clearAllMocks();
+    vi.unstubAllGlobals();
+    delete process.env.AGENTCORE_STAGE;
+  });
+
+  describe('getControlPlaneEndpoint', () => {
+    it('returns beta URL when AGENTCORE_STAGE=beta', async () => {
+      process.env.AGENTCORE_STAGE = 'beta';
+      mockFetch.mockResolvedValue({ ok: true, status: 200, json: () => Promise.resolve({ datasetId: 'ds-1' }) });
+
+      await getDataset({ region: 'us-east-1', datasetId: 'ds-1' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('beta.us-east-1.elcapcp.genesis-primitives.aws.dev');
+    });
+
+    it('returns gamma URL when AGENTCORE_STAGE=gamma', async () => {
+      process.env.AGENTCORE_STAGE = 'gamma';
+      mockFetch.mockResolvedValue({ ok: true, status: 200, json: () => Promise.resolve({ datasetId: 'ds-1' }) });
+
+      await getDataset({ region: 'us-east-1', datasetId: 'ds-1' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('gamma.us-east-1.elcapcp.genesis-primitives.aws.dev');
+    });
+
+    it('returns prod URL when no stage set', async () => {
+      mockFetch.mockResolvedValue({ ok: true, status: 200, json: () => Promise.resolve({ datasetId: 'ds-1' }) });
+
+      await getDataset({ region: 'us-west-2', datasetId: 'ds-1' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('bedrock-agentcore-control.us-west-2.amazonaws.com');
+    });
+  });
+
+  describe('signedRequest', () => {
+    it('throws with status and body on non-OK response', async () => {
+      mockFetch.mockResolvedValue({ ok: false, status: 403, text: () => Promise.resolve('Access denied') });
+
+      await expect(getDataset({ region: 'us-east-1', datasetId: 'ds-1' })).rejects.toThrow(
+        'Dataset API error (403): Access denied'
+      );
+    });
+
+    it('returns empty object on 204', async () => {
+      mockFetch.mockResolvedValue({ ok: true, status: 204 });
+
+      await expect(
+        deleteDatasetVersionApi({ region: 'us-east-1', datasetId: 'ds-1', version: '1' })
+      ).resolves.toBeUndefined();
+    });
+  });
+
+  describe('getDataset', () => {
+    it('constructs path without version param for DRAFT', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () => Promise.resolve({ datasetId: 'ds-1', status: 'ACTIVE' }),
+      });
+
+      await getDataset({ region: 'us-east-1', datasetId: 'ds-1' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('/datasets/ds-1');
+      expect(fetchUrl).not.toContain('datasetVersion');
+    });
+
+    it('appends datasetVersion query param when version provided', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () => Promise.resolve({ datasetId: 'ds-1', datasetVersion: '2' }),
+      });
+
+      await getDataset({ region: 'us-east-1', datasetId: 'ds-1', version: '2' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('?datasetVersion=2');
+    });
+  });
+
+  describe('addDatasetExamples', () => {
+    it('sends correct body with clientToken', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () => Promise.resolve({ addedCount: 2, exampleIds: ['e1', 'e2'], status: 'ACTIVE' }),
+      });
+
+      const result = await addDatasetExamples({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+        examples: [{ input: 'a' }, { input: 'b' }],
+        clientToken: 'token-123',
+      });
+
+      const fetchOptions = mockFetch.mock.calls[0]![1] as { body: string };
+      const body = JSON.parse(fetchOptions.body);
+      expect(body.source.inlineExamples.examples).toHaveLength(2);
+      expect(body.clientToken).toBe('token-123');
+      expect(result.addedCount).toBe(2);
+    });
+  });
+
+  describe('updateDatasetExamples', () => {
+    it('sends examples with exampleIds', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () => Promise.resolve({ updatedCount: 1, status: 'ACTIVE' }),
+      });
+
+      await updateDatasetExamples({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+        examples: [{ exampleId: 'e1', input: 'updated' }],
+        clientToken: 'tok-456',
+      });
+
+      const fetchOptions = mockFetch.mock.calls[0]![1] as { body: string };
+      const body = JSON.parse(fetchOptions.body);
+      expect(body.examples[0].exampleId).toBe('e1');
+      expect(body.clientToken).toBe('tok-456');
+    });
+  });
+
+  describe('deleteDatasetExamples', () => {
+    it('sends exampleIds array', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () => Promise.resolve({ deletedCount: 2, status: 'ACTIVE' }),
+      });
+
+      await deleteDatasetExamples({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+        exampleIds: ['e1', 'e2'],
+        clientToken: 'tok-789',
+      });
+
+      const fetchOptions = mockFetch.mock.calls[0]![1] as { body: string };
+      const body = JSON.parse(fetchOptions.body);
+      expect(body.exampleIds).toEqual(['e1', 'e2']);
+      expect(body.clientToken).toBe('tok-789');
+    });
+  });
+
+  describe('listDatasetExamples', () => {
+    it('passes maxResults and nextToken as query params', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () => Promise.resolve({ examples: [{ exampleId: 'e1' }], nextToken: 'next-abc' }),
+      });
+
+      const result = await listDatasetExamples({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+        maxResults: 50,
+        nextToken: 'tok-start',
+      });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('maxResults=50');
+      expect(fetchUrl).toContain('nextToken=tok-start');
+      expect(result.examples).toHaveLength(1);
+      expect(result.nextToken).toBe('next-abc');
+    });
+  });
+
+  describe('listAllDatasetExamples', () => {
+    it('paginates until no nextToken', async () => {
+      mockFetch
+        .mockResolvedValueOnce({
+          ok: true,
+          status: 200,
+          json: () => Promise.resolve({ examples: [{ exampleId: 'e1' }], nextToken: 'page2' }),
+        })
+        .mockResolvedValueOnce({
+          ok: true,
+          status: 200,
+          json: () => Promise.resolve({ examples: [{ exampleId: 'e2' }] }),
+        });
+
+      const result = await listAllDatasetExamples({ region: 'us-east-1', datasetId: 'ds-1' });
+
+      expect(result).toHaveLength(2);
+      expect(mockFetch).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe('createDatasetVersion', () => {
+    it('POSTs to correct path', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: () =>
+          Promise.resolve({ datasetId: 'ds-1', datasetArn: 'arn:ds', datasetVersion: '1', status: 'CREATING' }),
+      });
+
+      const result = await createDatasetVersion({ region: 'us-east-1', datasetId: 'ds-1' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('/datasets/ds-1/versions');
+      const fetchOptions = mockFetch.mock.calls[0]![1] as { method: string; body: string };
+      expect(fetchOptions.method).toBe('POST');
+      expect(fetchOptions.body).toBe('{}');
+      expect(result.datasetVersion).toBe('1');
+    });
+  });
+
+  describe('deleteDatasetVersionApi', () => {
+    it('sends DELETE with version query param', async () => {
+      mockFetch.mockResolvedValue({ ok: true, status: 204 });
+
+      await deleteDatasetVersionApi({ region: 'us-east-1', datasetId: 'ds-1', version: '3' });
+
+      const fetchUrl = mockFetch.mock.calls[0]![0] as string;
+      expect(fetchUrl).toContain('/datasets/ds-1?datasetVersion=3');
+      const fetchOptions = mockFetch.mock.calls[0]![1] as { method: string };
+      expect(fetchOptions.method).toBe('DELETE');
+    });
+  });
+
+  describe('downloadDataset', () => {
+    it('buffer mode returns full text', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        status: 200,
+        text: () => Promise.resolve('{"exampleId":"e1","input":"hello"}\n'),
+      });
+
+      const result = await downloadDataset('https://s3.amazonaws.com/bucket/key', { mode: 'buffer' });
+
+      expect(result).toBe('{"exampleId":"e1","input":"hello"}\n');
+    });
+
+    it('stream mode writes to file and returns line count', () => {
+      // Stream mode uses dynamic imports (node:stream, node:fs, node:stream/promises)
+      // that are difficult to mock in unit tests. The HTTP-level behavior (fetch + headers)
+      // is already covered by the buffer mode tests above.
+      // Full stream-mode coverage is deferred to integration tests.
+      expect(true).toBe(true);
+    });
+  });
+});
diff --git a/src/cli/aws/agentcore-datasets.ts b/src/cli/aws/agentcore-datasets.ts
new file mode 100644
index 000000000..225811e1c
--- /dev/null
+++ b/src/cli/aws/agentcore-datasets.ts
@@ -0,0 +1,428 @@
+/**
+ * AWS client wrappers for Dataset Management operations.
+ *
+ * The Dataset API lives on the control plane. Endpoints:
+ *   GET    /datasets/{datasetId}                    → GetDataset
+ *   GET    /datasets/{datasetId}/versions           → ListDatasetVersions
+ *   POST   /datasets/{datasetId}/versions           → CreateDatasetVersion
+ *   POST   /datasets/{datasetId}/examples/add       → AddDatasetExamples
+ *   POST   /datasets/{datasetId}/examples/update    → UpdateDatasetExamples
+ *   POST   /datasets/{datasetId}/examples/delete    → DeleteDatasetExamples
+ *   GET    /datasets/{datasetId}/examples           → ListDatasetExamples
+ *
+ * Uses direct HTTP requests with SigV4 signing against the control plane
+ * because the @aws-sdk/client-bedrock-agentcore-control package does not yet
+ * include Dataset commands.
+ *
+ * TODO: Migrate to @aws-sdk/client-bedrock-agentcore-control once Dataset
+ * commands are available in the SDK. When that happens:
+ * 1. Replace signedRequest() calls with SDK client commands
+ *    (e.g., GetDatasetCommand, CreateDatasetVersionCommand, etc.)
+ * 2. Remove the SigV4 signing helper and endpoint resolution logic
+ * 3. Follow the pattern in agentcore-control.ts which already uses the SDK
+ * 4. Keep the same exported function signatures so callers don't change
+ */
+import { getCredentialProvider } from './account';
+import { dnsSuffix } from './partition';
+import { Sha256 } from '@aws-crypto/sha256-js';
+import { defaultProvider } from '@aws-sdk/credential-provider-node';
+import { HttpRequest } from '@smithy/protocol-http';
+import { SignatureV4 } from '@smithy/signature-v4';
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export interface GetDatasetOptions {
+  region: string;
+  datasetId: string;
+  version?: string;
+}
+
+export interface GetDatasetResult {
+  datasetId: string;
+  datasetArn: string;
+  datasetName: string;
+  datasetVersion: string;
+  schemaType: string;
+  status: string;
+  draftStatus?: string;
+  exampleCount: number;
+  description?: string;
+  downloadUrl?: string;
+  downloadUrlExpiresAt?: number;
+  createdAt: number;
+  updatedAt: number;
+}
+
+export interface CreateDatasetVersionOptions {
+  region: string;
+  datasetId: string;
+}
+
+export interface CreateDatasetVersionResult {
+  datasetArn: string;
+  datasetId: string;
+  datasetVersion: string;
+  status: string;
+  createdAt: number;
+}
+
+export interface ListDatasetVersionsOptions {
+  region: string;
+  datasetId: string;
+}
+
+export interface DatasetVersionSummary {
+  datasetVersion: string;
+  exampleCount: number;
+  status?: string;
+  failureReason?: string;
+  createdAt: number;
+}
+
+export interface ListDatasetVersionsResult {
+  versions: DatasetVersionSummary[];
+}
+
+export interface AddDatasetExamplesOptions {
+  region: string;
+  datasetId: string;
+  examples: Record<string, unknown>[];
+  /** Idempotency token (8-hour service-side dedup). Reuse across retries of the same batch. */
+  clientToken?: string;
+}
+
+export interface AddDatasetExamplesResult {
+  addedCount: number;
+  exampleIds: string[];
+  status: string;
+}
+
+export interface UpdateDatasetExamplesOptions {
+  region: string;
+  datasetId: string;
+  examples: ({ exampleId: string } & Record<string, unknown>)[];
+  /** Idempotency token (8-hour service-side dedup). Reuse across retries of the same batch. */
+  clientToken?: string;
+}
+
+export interface UpdateDatasetExamplesResult {
+  updatedCount: number;
+  status: string;
+}
+
+export interface DeleteDatasetExamplesOptions {
+  region: string;
+  datasetId: string;
+  exampleIds: string[];
+  /** Idempotency token (8-hour service-side dedup). Reuse across retries of the same batch. */
+  clientToken?: string;
+}
+
+export interface DeleteDatasetExamplesResult {
+  deletedCount: number;
+  status: string;
+}
+
+export interface DatasetExampleSummary {
+  exampleId: string;
+  [key: string]: unknown;
+}
+
+export interface ListDatasetExamplesOptions {
+  region: string;
+  datasetId: string;
+  maxResults?: number;
+  nextToken?: string;
+}
+
+export interface ListDatasetExamplesResult {
+  examples: DatasetExampleSummary[];
+  nextToken?: string;
+}
+
+// ============================================================================
+// HTTP signing helper
+// ============================================================================
+
+function getControlPlaneEndpoint(region: string): string {
+  const stage = process.env.AGENTCORE_STAGE?.toLowerCase();
+  if (stage === 'beta') return `https://beta.${region}.elcapcp.genesis-primitives.aws.dev`;
+  if (stage === 'gamma') return `https://gamma.${region}.elcapcp.genesis-primitives.aws.dev`;
+  return `https://bedrock-agentcore-control.${region}.${dnsSuffix(region)}`;
+}
+
+async function signedRequest(options: {
+  region: string;
+  method: string;
+  path: string;
+  body?: string;
+}): Promise<unknown> {
+  const { region, method, path, body } = options;
+  const endpoint = getControlPlaneEndpoint(region);
+  const url = new URL(path, endpoint);
+
+  const query: Record<string, string> = {};
+  url.searchParams.forEach((value, key) => {
+    query[key] = value;
+  });
+
+  const request = new HttpRequest({
+    method,
+    protocol: 'https:',
+    hostname: url.hostname,
+    path: url.pathname,
+    ...(Object.keys(query).length > 0 && { query }),
+    headers: {
+      'Content-Type': 'application/json',
+      host: url.hostname,
+    },
+    ...(body && { body }),
+  });
+
+  const credentials = getCredentialProvider() ?? defaultProvider();
+  const signer = new SignatureV4({
+    service: 'bedrock-agentcore',
+    region,
+    credentials,
+    sha256: Sha256,
+  });
+
+  const signedReq = await signer.sign(request);
+
+  const response = await fetch(`${endpoint}${path}`, {
+    method,
+    headers: signedReq.headers as Record<string, string>,
+    ...(body && { body }),
+    signal: AbortSignal.timeout(30_000),
+  });
+
+  if (!response.ok) {
+    const errorBody = await response.text();
+    throw new Error(`Dataset API error (${response.status}): ${errorBody}`);
+  }
+
+  if (response.status === 204) return {};
+  return response.json();
+}
+
+// ============================================================================
+// Dataset Operations
+// ============================================================================
+
+/**
+ * Get dataset metadata and download URL.
+ * Pass `version` to get a specific published version (e.g. "1", "2").
+ * Omit `version` to get DRAFT.
+ */
+export async function getDataset(options: GetDatasetOptions): Promise<GetDatasetResult> {
+  const { region, datasetId, version } = options;
+  const params = version ? `?datasetVersion=${version}` : '';
+
+  return (await signedRequest({
+    region,
+    method: 'GET',
+    path: `/datasets/${datasetId}${params}`,
+  })) as GetDatasetResult;
+}
+
+/**
+ * Create a new immutable version from the current DRAFT.
+ */
+export async function createDatasetVersion(options: CreateDatasetVersionOptions): Promise<CreateDatasetVersionResult> {
+  const { region, datasetId } = options;
+
+  return (await signedRequest({
+    region,
+    method: 'POST',
+    path: `/datasets/${datasetId}/versions`,
+    body: '{}',
+  })) as CreateDatasetVersionResult;
+}
+
+/**
+ * List all published versions for a dataset.
+ */
+export async function listDatasetVersions(options: ListDatasetVersionsOptions): Promise<ListDatasetVersionsResult> {
+  const { region, datasetId } = options;
+
+  return (await signedRequest({
+    region,
+    method: 'GET',
+    path: `/datasets/${datasetId}/versions`,
+  })) as ListDatasetVersionsResult;
+}
+
+/**
+ * Add examples to a dataset's DRAFT.
+ */
+export async function addDatasetExamples(options: AddDatasetExamplesOptions): Promise<AddDatasetExamplesResult> {
+  const { region, datasetId, examples, clientToken } = options;
+  const body = JSON.stringify({
+    source: {
+      inlineExamples: { examples },
+    },
+    ...(clientToken && { clientToken }),
+  });
+
+  return (await signedRequest({
+    region,
+    method: 'POST',
+    path: `/datasets/${datasetId}/examples/add`,
+    body,
+  })) as AddDatasetExamplesResult;
+}
+
+/**
+ * Update existing examples in a dataset's DRAFT by exampleId.
+ */
+export async function updateDatasetExamples(
+  options: UpdateDatasetExamplesOptions
+): Promise<UpdateDatasetExamplesResult> {
+  const { region, datasetId, examples, clientToken } = options;
+  const body = JSON.stringify({
+    examples,
+    ...(clientToken && { clientToken }),
+  });
+
+  return (await signedRequest({
+    region,
+    method: 'POST',
+    path: `/datasets/${datasetId}/examples/update`,
+    body,
+  })) as UpdateDatasetExamplesResult;
+}
+
+/**
+ * Delete examples from a dataset's DRAFT by exampleId.
+ */
+export async function deleteDatasetExamples(
+  options: DeleteDatasetExamplesOptions
+): Promise<DeleteDatasetExamplesResult> {
+  const { region, datasetId, exampleIds, clientToken } = options;
+  const body = JSON.stringify({
+    exampleIds,
+    ...(clientToken && { clientToken }),
+  });
+
+  return (await signedRequest({
+    region,
+    method: 'POST',
+    path: `/datasets/${datasetId}/examples/delete`,
+    body,
+  })) as DeleteDatasetExamplesResult;
+}
+
+/**
+ * List examples for a dataset (one page).
+ */
+export async function listDatasetExamples(options: ListDatasetExamplesOptions): Promise<ListDatasetExamplesResult> {
+  const { region, datasetId, maxResults, nextToken } = options;
+  const params = new URLSearchParams();
+  if (maxResults) params.set('maxResults', String(maxResults));
+  if (nextToken) params.set('nextToken', nextToken);
+  const query = params.toString();
+
+  const data = (await signedRequest({
+    region,
+    method: 'GET',
+    path: `/datasets/${datasetId}/examples${query ? `?${query}` : ''}`,
+  })) as { examples?: DatasetExampleSummary[]; nextToken?: string };
+
+  return {
+    examples: data.examples ?? [],
+    nextToken: data.nextToken,
+  };
+}
+
+/**
+ * Delete a specific published version of a dataset.
+ */
+export async function deleteDatasetVersionApi(options: {
+  region: string;
+  datasetId: string;
+  version: string;
+}): Promise<void> {
+  const { region, datasetId, version } = options;
+
+  await signedRequest({
+    region,
+    method: 'DELETE',
+    path: `/datasets/${datasetId}?datasetVersion=${version}`,
+  });
+}
+
+/**
+ * List all examples for a dataset, paginating through all results.
+ */
+export async function listAllDatasetExamples(options: {
+  region: string;
+  datasetId: string;
+}): Promise<DatasetExampleSummary[]> {
+  const all: DatasetExampleSummary[] = [];
+  let nextToken: string | undefined;
+
+  do {
+    const result = await listDatasetExamples({
+      region: options.region,
+      datasetId: options.datasetId,
+      maxResults: 100,
+      nextToken,
+    });
+    all.push(...result.examples);
+    nextToken = result.nextToken;
+  } while (nextToken);
+
+  return all;
+}
+
+/**
+ * Download dataset content from a pre-signed S3 URL.
+ *
+ * Two modes:
+ * - `buffer`: Returns full content as string (for push — needs in-memory diffing)
+ * - `stream`: Streams directly to file on disk (for pull — avoids memory pressure on large datasets)
+ */
+export async function downloadDataset(downloadUrl: string, options: { mode: 'buffer' }): Promise<string>;
+export async function downloadDataset(
+  downloadUrl: string,
+  options: { mode: 'stream'; filePath: string }
+): Promise<number>;
+export async function downloadDataset(
+  downloadUrl: string,
+  options: { mode: 'buffer' } | { mode: 'stream'; filePath: string }
+): Promise<string | number> {
+  const response = await fetch(downloadUrl);
+  if (!response.ok) {
+    throw new Error(`Failed to download dataset (${response.status}): ${await response.text()}`);
+  }
+
+  if (options.mode === 'buffer') {
+    return response.text();
+  }
+
+  // Stream mode: pipe response body → line counter → file
+  const { Transform } = await import('node:stream');
+  const { Readable } = await import('node:stream');
+  const { createWriteStream } = await import('node:fs');
+  const { pipeline } = await import('node:stream/promises');
+
+  let lineCount = 0;
+  const counter = new Transform({
+    transform(chunk: Buffer, _enc: string, cb: () => void) {
+      lineCount += chunk
+        .toString()
+        .split('\n')
+        .filter((l: string) => l.trim()).length;
+      this.push(chunk);
+      cb();
+    },
+  });
+
+  const nodeStream = Readable.fromWeb(response.body!);
+  const fileStream = createWriteStream(options.filePath);
+  await pipeline(nodeStream, counter, fileStream);
+
+  return lineCount;
+}
diff --git a/src/cli/aws/agentcore.ts b/src/cli/aws/agentcore.ts
index b99ea2f4e..58ebc4544 100644
--- a/src/cli/aws/agentcore.ts
+++ b/src/cli/aws/agentcore.ts
@@ -68,6 +68,8 @@ export interface InvokeAgentRuntimeOptions {
   bearerToken?: string;
   /** W3C baggage header value (e.g. config bundle ref for runtime) */
   baggage?: string;
+  /** Runtime endpoint qualifier (e.g. DEFAULT, PROMPT_V1). Defaults to DEFAULT. */
+  endpoint?: string;
 }
 
 export interface InvokeAgentRuntimeResult {
@@ -154,9 +156,10 @@ export function extractResult(text: string): string {
 /**
  * Build the invoke URL for a runtime ARN.
  */
-function buildInvokeUrl(region: string, runtimeArn: string): string {
+function buildInvokeUrl(region: string, runtimeArn: string, endpoint?: string): string {
   const escapedArn = encodeURIComponent(runtimeArn);
-  return `https://${serviceEndpoint('bedrock-agentcore', region)}/runtimes/${escapedArn}/invocations?qualifier=DEFAULT`;
+  const qualifier = endpoint ?? 'DEFAULT';
+  return `https://${serviceEndpoint('bedrock-agentcore', region)}/runtimes/${escapedArn}/invocations?qualifier=${qualifier}`;
 }
 
 /**
@@ -192,7 +195,7 @@ export function buildBearerInvokeHeaders(
  * Used when the runtime has CUSTOM_JWT authorizer configured.
  */
 async function invokeWithBearerTokenStreaming(options: InvokeAgentRuntimeOptions): Promise<StreamingInvokeResult> {
-  const url = buildInvokeUrl(options.region, options.runtimeArn);
+  const url = buildInvokeUrl(options.region, options.runtimeArn, options.endpoint);
   const headers = buildBearerInvokeHeaders(options, 'application/json, text/event-stream');
 
   const res = await fetch(url, {
@@ -278,7 +281,7 @@ async function invokeWithBearerTokenStreaming(options: InvokeAgentRuntimeOptions
  * Invoke an AgentCore Runtime using bearer token auth (non-streaming).
  */
 async function invokeWithBearerToken(options: InvokeAgentRuntimeOptions): Promise<InvokeAgentRuntimeResult> {
-  const url = buildInvokeUrl(options.region, options.runtimeArn);
+  const url = buildInvokeUrl(options.region, options.runtimeArn, options.endpoint);
   const headers = buildBearerInvokeHeaders(options, 'application/json');
 
   const res = await fetch(url, {
diff --git a/src/cli/aws/cloudwatch.ts b/src/cli/aws/cloudwatch.ts
index c67b77fcd..435688575 100644
--- a/src/cli/aws/cloudwatch.ts
+++ b/src/cli/aws/cloudwatch.ts
@@ -1,7 +1,22 @@
+import { DEFAULT_ENDPOINT_NAME } from '../constants';
 import { getCredentialProvider } from './account';
 import { arnPrefix } from './partition';
 import { CloudWatchLogsClient, FilterLogEventsCommand, StartLiveTailCommand } from '@aws-sdk/client-cloudwatch-logs';
 
+/**
+ * Resolve runtime endpoint: CLI flag → env var → DEFAULT.
+ */
+export function resolveEndpointName(optEndpoint?: string): string {
+  return optEndpoint ?? process.env.AGENTCORE_RUNTIME_ENDPOINT ?? DEFAULT_ENDPOINT_NAME;
+}
+
+/**
+ * CloudWatch log group path for an AgentCore runtime endpoint.
+ */
+export function runtimeLogGroup(runtimeId: string, endpoint?: string): string {
+  return `/aws/bedrock-agentcore/runtimes/${runtimeId}-${resolveEndpointName(endpoint)}`;
+}
+
 export interface LogEvent {
   timestamp: number;
   message: string;
diff --git a/src/cli/aws/retry.ts b/src/cli/aws/retry.ts
new file mode 100644
index 000000000..1b7c2e8f1
--- /dev/null
+++ b/src/cli/aws/retry.ts
@@ -0,0 +1,31 @@
+/**
+ * AWS error-retryability helpers.
+ *
+ * Mirrors the signals the AWS SDK's internal retry middleware uses
+ * (@smithy/service-error-classification): name-based throttling/transient
+ * sets plus HTTP status fallback. Kept intentionally small — no message
+ * matching, no ad-hoc per-service rules.
+ */
+
+const THROTTLING_NAME = /^(Throttling|TooManyRequests|RequestLimitExceeded|LimitExceeded)(Exception)?$/i;
+const TRANSIENT_NAME = /^(ServiceUnavailable|InternalServer|InternalFailure)(Exception)?$/i;
+
+interface AwsErrorShape {
+  name?: string;
+  code?: string;
+  statusCode?: number;
+  $metadata?: { httpStatusCode?: number };
+}
+
+/** Returns true if the error is a transient AWS error worth retrying. */
+export function isRetryableAwsError(err: unknown): boolean {
+  const e = err as AwsErrorShape;
+  const name = e.name ?? e.code ?? '';
+  if (THROTTLING_NAME.test(name) || TRANSIENT_NAME.test(name)) return true;
+
+  const status = e.statusCode ?? e.$metadata?.httpStatusCode;
+  if (status === 429) return true;
+  if (status !== undefined && status >= 500 && status < 600) return true;
+
+  return false;
+}
diff --git a/src/cli/cli.ts b/src/cli/cli.ts
index 5517afb9c..387a802ac 100644
--- a/src/cli/cli.ts
+++ b/src/cli/cli.ts
@@ -4,6 +4,7 @@ import { registerAdd } from './commands/add';
 import { registerArchive } from './commands/archive';
 import { registerConfigBundle } from './commands/config-bundle';
 import { registerCreate } from './commands/create';
+import { registerDataset } from './commands/dataset';
 import { registerDeploy } from './commands/deploy';
 import { registerDev } from './commands/dev';
 import { registerEval } from './commands/eval';
@@ -201,6 +202,7 @@ export function registerCommands(program: Command) {
   registerUpdate(program);
   registerValidate(program);
   registerConfigBundle(program);
+  registerDataset(program);
   registerArchive(program);
 
   // Register primitive subcommands (add agent, remove agent, add memory, etc.)
diff --git a/src/cli/cloudformation/outputs.ts b/src/cli/cloudformation/outputs.ts
index 377cc3e9d..1009a31e3 100644
--- a/src/cli/cloudformation/outputs.ts
+++ b/src/cli/cloudformation/outputs.ts
@@ -1,5 +1,6 @@
 import type {
   AgentCoreDeployedState,
+  DatasetDeployedState,
   DeployedState,
   EvaluatorDeployedState,
   MemoryDeployedState,
@@ -375,6 +376,37 @@ export function parseRuntimeEndpointOutputs(
   return endpoints;
 }
 
+/**
+ * Parse stack outputs into deployed state for datasets.
+ *
+ * Output key pattern: ApplicationDataset{PascalName}(Id|Arn)Output{Hash}
+ */
+export function parseDatasetOutputs(
+  outputs: StackOutputs,
+  datasetNames: string[]
+): Record<string, DatasetDeployedState> {
+  const datasets: Record<string, DatasetDeployedState> = {};
+  const outputKeys = Object.keys(outputs);
+
+  for (const datasetName of datasetNames) {
+    const pascal = toPascalId('Dataset', datasetName);
+    const idPrefix = `Application${pascal}IdOutput`;
+    const arnPrefix = `Application${pascal}ArnOutput`;
+
+    const idKey = outputKeys.find(k => k.startsWith(idPrefix));
+    const arnKey = outputKeys.find(k => k.startsWith(arnPrefix));
+
+    if (idKey && arnKey) {
+      datasets[datasetName] = {
+        datasetId: outputs[idKey]!,
+        datasetArn: outputs[arnKey]!,
+      };
+    }
+  }
+
+  return datasets;
+}
+
 export interface BuildDeployedStateOptions {
   targetName: string;
   stackName: string;
@@ -389,6 +421,7 @@ export interface BuildDeployedStateOptions {
   policyEngines?: Record<string, PolicyEngineDeployedState>;
   policies?: Record<string, PolicyDeployedState>;
   runtimeEndpoints?: Record<string, RuntimeEndpointDeployedState>;
+  datasets?: Record<string, DatasetDeployedState>;
 }
 
 /**
@@ -409,6 +442,7 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta
     policyEngines,
     policies,
     runtimeEndpoints,
+    datasets,
   } = opts;
   const targetState: TargetDeployedState = {
     resources: {
@@ -448,6 +482,10 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta
     targetState.resources!.runtimeEndpoints = runtimeEndpoints;
   }
 
+  if (datasets && Object.keys(datasets).length > 0) {
+    targetState.resources!.datasets = datasets;
+  }
+
   // Carry forward config bundles from existing state (managed post-deploy, not via CFN outputs)
   const existingConfigBundles = existingState?.targets?.[targetName]?.resources?.configBundles;
   if (existingConfigBundles && Object.keys(existingConfigBundles).length > 0) {
diff --git a/src/cli/commands/add/__tests__/add-dataset.test.ts b/src/cli/commands/add/__tests__/add-dataset.test.ts
new file mode 100644
index 000000000..48aef4f50
--- /dev/null
+++ b/src/cli/commands/add/__tests__/add-dataset.test.ts
@@ -0,0 +1,100 @@
+import { runCLI } from '../../../../test-utils/index.js';
+import { randomUUID } from 'node:crypto';
+import { mkdir, readFile, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+describe('add dataset command', () => {
+  let testDir: string;
+  let projectDir: string;
+
+  beforeAll(async () => {
+    testDir = join(tmpdir(), `agentcore-add-dataset-${randomUUID()}`);
+    await mkdir(testDir, { recursive: true });
+
+    // Create project
+    const projectName = 'DatasetProj';
+    const result = await runCLI(['create', '--name', projectName, '--no-agent'], testDir);
+    if (result.exitCode !== 0) {
+      throw new Error(`Failed to create project: ${result.stdout} ${result.stderr}`);
+    }
+    projectDir = join(testDir, projectName);
+  });
+
+  afterAll(async () => {
+    await rm(testDir, { recursive: true, force: true });
+  });
+
+  describe('validation', () => {
+    it('requires name flag', async () => {
+      const result = await runCLI(['add', 'dataset', '--json'], projectDir);
+      expect(result.exitCode).toBe(1);
+      const json = JSON.parse(result.stdout);
+      expect(json.success).toBe(false);
+      expect(json.error.includes('--name'), `Error: ${json.error}`).toBeTruthy();
+    });
+  });
+
+  describe('dataset creation', () => {
+    it('creates dataset as top-level resource', async () => {
+      const datasetName = `dataset${Date.now()}`;
+      const result = await runCLI(
+        ['add', 'dataset', '--name', datasetName, '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+        projectDir
+      );
+
+      expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
+      const json = JSON.parse(result.stdout);
+      expect(json.success).toBe(true);
+      expect(json.datasetName).toBe(datasetName);
+
+      // Verify in agentcore.json as top-level resource
+      const projectSpec = JSON.parse(await readFile(join(projectDir, 'agentcore/agentcore.json'), 'utf-8'));
+      const dataset = projectSpec.datasets.find((d: { name: string }) => d.name === datasetName);
+      expect(dataset, 'Dataset should be in project datasets').toBeTruthy();
+    });
+
+    it('creates dataset with description', async () => {
+      const datasetName = `dsdesc${Date.now()}`;
+      const result = await runCLI(
+        [
+          'add',
+          'dataset',
+          '--name',
+          datasetName,
+          '--schema-type',
+          'AGENTCORE_EVALUATION_PREDEFINED_V1',
+          '--description',
+          'My test dataset',
+          '--json',
+        ],
+        projectDir
+      );
+
+      expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
+
+      // Verify description
+      const projectSpec = JSON.parse(await readFile(join(projectDir, 'agentcore/agentcore.json'), 'utf-8'));
+      const dataset = projectSpec.datasets.find((d: { name: string }) => d.name === datasetName);
+      expect(dataset?.description).toBe('My test dataset');
+    });
+
+    it('rejects duplicate dataset names', async () => {
+      const datasetName = `dsdup${Date.now()}`;
+      // Create first
+      await runCLI(
+        ['add', 'dataset', '--name', datasetName, '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+        projectDir
+      );
+      // Try duplicate
+      const result = await runCLI(
+        ['add', 'dataset', '--name', datasetName, '--schema-type', 'AGENTCORE_EVALUATION_PREDEFINED_V1', '--json'],
+        projectDir
+      );
+      expect(result.exitCode).toBe(1);
+      const json = JSON.parse(result.stdout);
+      expect(json.success).toBe(false);
+    });
+  });
+});
diff --git a/src/cli/commands/add/types.ts b/src/cli/commands/add/types.ts
index c1dd6641f..5208b8a7e 100644
--- a/src/cli/commands/add/types.ts
+++ b/src/cli/commands/add/types.ts
@@ -1,4 +1,5 @@
 import type {
+  DatasetSchemaType,
   GatewayAuthorizerType,
   ModelProvider,
   ProtocolMode,
@@ -99,6 +100,19 @@ export interface AddMemoryOptions {
   json?: boolean;
 }
 
+// Dataset types
+export interface AddDatasetOptions {
+  name: string;
+  schemaType: DatasetSchemaType;
+  description?: string;
+  json?: boolean;
+}
+
+export interface AddDatasetResult {
+  success: boolean;
+  datasetName?: string;
+  error?: string;
+}
 // Credential types (v2: credential, no owner/user concept)
 export interface AddCredentialOptions {
   name?: string;
diff --git a/src/cli/commands/add/validate.ts b/src/cli/commands/add/validate.ts
index b39f89454..7ec0928d9 100644
--- a/src/cli/commands/add/validate.ts
+++ b/src/cli/commands/add/validate.ts
@@ -2,6 +2,8 @@ import { ConfigIO, findConfigRoot } from '../../../lib';
 import {
   AgentNameSchema,
   BuildTypeSchema,
+  DatasetNameSchema,
+  DatasetSchemaTypeSchema,
   GatewayAuthorizerTypeSchema,
   GatewayExceptionLevelSchema,
   GatewayNameSchema,
@@ -25,6 +27,7 @@ import { validateJwtAuthorizerOptions } from './auth-options';
 import type {
   AddAgentOptions,
   AddCredentialOptions,
+  AddDatasetOptions,
   AddGatewayOptions,
   AddGatewayTargetOptions,
   AddMemoryOptions,
@@ -780,6 +783,30 @@ export function validateAddMemoryOptions(options: AddMemoryOptions): ValidationR
   return { valid: true };
 }
 
+// Dataset validation
+export function validateAddDatasetOptions(options: AddDatasetOptions): ValidationResult {
+  if (!options.name) {
+    return { valid: false, error: '--name is required' };
+  }
+
+  const nameResult = DatasetNameSchema.safeParse(options.name);
+  if (!nameResult.success) {
+    return { valid: false, error: nameResult.error.issues[0]?.message ?? 'Invalid dataset name' };
+  }
+
+  if (!options.schemaType) {
+    return { valid: false, error: '--schema-type is required' };
+  }
+
+  const schemaTypeResult = DatasetSchemaTypeSchema.safeParse(options.schemaType);
+  if (!schemaTypeResult.success) {
+    const valid = DatasetSchemaTypeSchema.options.join(', ');
+    return { valid: false, error: `Invalid schema type: ${options.schemaType}. Valid options: ${valid}` };
+  }
+
+  return { valid: true };
+}
+
 // Credential validation (v2: credential resource, no owner)
 export function validateAddCredentialOptions(options: AddCredentialOptions): ValidationResult {
   if (!options.name) {
diff --git a/src/cli/commands/dataset/command.tsx b/src/cli/commands/dataset/command.tsx
new file mode 100644
index 000000000..626a1111b
--- /dev/null
+++ b/src/cli/commands/dataset/command.tsx
@@ -0,0 +1,179 @@
+/**
+ * Dataset management commands: download, publish-version, remove-version.
+ *
+ * Dataset content is synced to the service automatically during `agentcore deploy`.
+ * The local JSONL file always represents the DRAFT working copy.
+ */
+import { ConfigIO } from '../../../lib';
+import { getDataset } from '../../aws/agentcore-datasets';
+import { deleteDatasetVersion, publishDataset, pullDataset, resolveDataset } from '../../operations/dataset';
+import { runCliCommand } from '../../telemetry/cli-command-run.js';
+import { requireProject } from '../../tui/guards';
+import type { Command } from '@commander-js/extra-typings';
+import { Box, Text, render } from 'ink';
+import readline from 'node:readline';
+import React from 'react';
+
+/**
+ * Prompt user for confirmation. Returns true if confirmed.
+ */
+async function confirm(question: string): Promise<boolean> {
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  const answer = await new Promise<string>(resolve => rl.question(question, resolve));
+  rl.close();
+  return answer.toLowerCase() === 'y';
+}
+
+export function registerDataset(program: Command) {
+  const datasetCmd = program.command('dataset').description('Manage dataset content and versions');
+
+  // ══════════════════════════════════════════════════════════════════════════
+  // download
+  // ══════════════════════════════════════════════════════════════════════════
+
+  datasetCmd
+    .command('download')
+    .description('Download dataset from service to local file')
+    .option('--name <name>', 'Dataset name')
+    .option('--version <version>', 'Version to pull (default: DRAFT)')
+    .option('--yes', 'Skip overwrite confirmation')
+    .option('--json', 'Output as JSON')
+    .action(async (cliOptions: { name?: string; version?: string; yes?: boolean; json?: boolean }) => {
+      requireProject();
+
+      await runCliCommand('dataset.download', !!cliOptions.json, async () => {
+        const resolved = await resolveDataset(cliOptions.name);
+        const configIO = new ConfigIO();
+        const configBaseDir = configIO.getConfigRoot();
+
+        if (!cliOptions.yes && !cliOptions.json) {
+          const versionLabel = cliOptions.version ? `version ${cliOptions.version}` : 'DRAFT';
+          console.log(`⚠ This will overwrite: ${resolved.location}`);
+          console.log(`  (pulling ${versionLabel})`);
+
+          if (!(await confirm('? Continue? (y/N) '))) {
+            console.log('Skipped.');
+            return {};
+          }
+        }
+
+        const result = await pullDataset({
+          region: resolved.region,
+          datasetId: resolved.datasetId,
+          localFilePath: resolved.location,
+          configBaseDir,
+          version: cliOptions.version,
+        });
+
+        if (cliOptions.json) {
+          console.log(JSON.stringify({ success: true, ...result }));
+        } else {
+          render(
+            <Box flexDirection="column">
+              <Text color="green">
+                ✓ {result.exampleCount} examples written to {resolved.location}
+              </Text>
+              <Text dimColor> Pulled from: {result.version === 'DRAFT' ? 'DRAFT' : `version ${result.version}`}</Text>
+            </Box>
+          );
+        }
+
+        return {};
+      });
+    });
+
+  // ══════════════════════════════════════════════════════════════════════════
+  // publish-version
+  // ══════════════════════════════════════════════════════════════════════════
+
+  datasetCmd
+    .command('publish-version')
+    .description('Publish DRAFT as a new immutable version')
+    .option('--name <name>', 'Dataset name')
+    .option('--json', 'Output as JSON')
+    .action(async (cliOptions: { name?: string; json?: boolean }) => {
+      requireProject();
+
+      await runCliCommand('dataset.publish-version', !!cliOptions.json, async () => {
+        const resolved = await resolveDataset(cliOptions.name);
+
+        // Check draftStatus before publishing
+        const info = await getDataset({ region: resolved.region, datasetId: resolved.datasetId });
+        if (info.draftStatus === 'UNMODIFIED' && !cliOptions.json) {
+          console.log('⚠ DRAFT has no unpublished changes (draftStatus: UNMODIFIED)');
+          if (!(await confirm('? Publish anyway? (y/N) '))) {
+            console.log('Skipped.');
+            return {};
+          }
+        }
+
+        const result = await publishDataset({
+          region: resolved.region,
+          datasetId: resolved.datasetId,
+        });
+
+        if (cliOptions.json) {
+          console.log(JSON.stringify({ success: true, ...result }));
+        } else {
+          render(
+            <Box flexDirection="column">
+              <Text color="green">
+                ✓ Published version {result.version} ({result.exampleCount} examples)
+              </Text>
+              <Text dimColor> draftStatus: {result.draftStatus}</Text>
+            </Box>
+          );
+        }
+
+        return {};
+      });
+    });
+
+  // ══════════════════════════════════════════════════════════════════════════
+  // remove-version
+  // ══════════════════════════════════════════════════════════════════════════
+
+  datasetCmd
+    .command('remove-version')
+    .description('Delete a specific published version')
+    .argument('<version-id>', 'Version number to remove')
+    .option('--name <name>', 'Dataset name')
+    .option('--json', 'Output as JSON')
+    .action(async (versionId: string, cliOptions: { name?: string; json?: boolean }) => {
+      requireProject();
+
+      await runCliCommand('dataset.remove-version', !!cliOptions.json, async () => {
+        const resolved = await resolveDataset(cliOptions.name);
+
+        if (!cliOptions.json) {
+          console.log(`⚠ This will permanently delete version ${versionId} of dataset "${resolved.name}".`);
+          if (!(await confirm('? Continue? (y/N) '))) {
+            console.log('Skipped.');
+            return {};
+          }
+        }
+
+        await deleteDatasetVersion({
+          region: resolved.region,
+          datasetId: resolved.datasetId,
+          version: versionId,
+        });
+
+        if (cliOptions.json) {
+          console.log(JSON.stringify({ success: true, name: resolved.name, deletedVersion: versionId }));
+        } else {
+          render(
+            <Box flexDirection="column">
+              <Text color="green">
+                ✓ Deleted version {versionId} of dataset &quot;{resolved.name}&quot;
+              </Text>
+            </Box>
+          );
+        }
+
+        return {};
+      });
+    });
+
+  return datasetCmd;
+}
diff --git a/src/cli/commands/dataset/index.ts b/src/cli/commands/dataset/index.ts
new file mode 100644
index 000000000..0cb62e60e
--- /dev/null
+++ b/src/cli/commands/dataset/index.ts
@@ -0,0 +1 @@
+export { registerDataset } from './command';
diff --git a/src/cli/commands/deploy/actions.ts b/src/cli/commands/deploy/actions.ts
index eba2ab113..4422ebe2c 100644
--- a/src/cli/commands/deploy/actions.ts
+++ b/src/cli/commands/deploy/actions.ts
@@ -8,6 +8,7 @@ import {
   buildDeployedState,
   getStackOutputs,
   parseAgentOutputs,
+  parseDatasetOutputs,
   parseEvaluatorOutputs,
   parseGatewayOutputs,
   parseMemoryOutputs,
@@ -39,6 +40,7 @@ import {
   resolveConfigBundleComponentKeys,
   setupConfigBundles,
 } from '../../operations/deploy/post-deploy-config-bundles';
+import { syncDatasets } from '../../operations/deploy/post-deploy-datasets';
 import { setupHttpGateways } from '../../operations/deploy/post-deploy-http-gateways';
 import { enableOnlineEvalConfigs } from '../../operations/deploy/post-deploy-online-evals';
 import { toStackName } from '../import/import-utils';
@@ -463,6 +465,10 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
       ) ?? {};
     const gateways = parseGatewayOutputs(outputs, gatewaySpecs);
 
+    // Parse dataset outputs
+    const datasetNames = (context.projectSpec.datasets ?? []).map(d => d.name);
+    const datasets = parseDatasetOutputs(outputs, datasetNames);
+
     const existingState = await configIO.readDeployedState().catch(() => undefined);
     let deployedState = buildDeployedState({
       targetName: target.name,
@@ -478,6 +484,7 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
       policyEngines,
       policies,
       runtimeEndpoints,
+      datasets,
     });
     await configIO.writeDeployedState(deployedState);
 
@@ -499,10 +506,11 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
 
     endStep('success');
 
+    const postDeployWarnings: string[] = [];
+
     // Post-deploy: Enable online eval configs that have enableOnCreate (CFN deploys them as DISABLED).
     // Only enable configs that are newly deployed — skip configs that already existed before this
     // deploy run, so we don't re-enable configs a customer intentionally disabled.
-    const postDeployWarnings: string[] = [];
     const onlineEvalFullSpecs = context.projectSpec.onlineEvalConfigs ?? [];
     const deployedOnlineEvalConfigs = deployedState.targets?.[target.name]?.resources?.onlineEvalConfigs ?? {};
     const previouslyDeployedOnlineEvals = existingState?.targets?.[target.name]?.resources?.onlineEvalConfigs ?? {};
@@ -522,6 +530,43 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
       }
     }
 
+    // Post-deploy: Sync dataset examples from local JSONL to service DRAFT.
+    // Uses a local content hash to skip unchanged files (hybrid approach).
+    const datasetSpecs = context.projectSpec.datasets ?? [];
+    const deployedDatasetsRecord = deployedState.targets?.[target.name]?.resources?.datasets ?? {};
+    if (datasetSpecs.length > 0 && Object.keys(deployedDatasetsRecord).length > 0) {
+      const datasetSyncResult = await syncDatasets({
+        region: target.region,
+        datasets: datasetSpecs,
+        deployedDatasets: deployedDatasetsRecord,
+        configBaseDir: configIO.getConfigRoot(),
+      });
+
+      // Update deployed state with new content hashes
+      if (datasetSyncResult.results.some(r => r.status === 'synced')) {
+        const updatedState = await configIO.readDeployedState().catch(() => deployedState);
+        const targetResources = updatedState.targets[target.name]?.resources;
+        if (targetResources) {
+          targetResources.datasets = datasetSyncResult.updatedDatasets;
+          await configIO.writeDeployedState(updatedState);
+          deployedState = updatedState;
+        }
+      }
+
+      if (datasetSyncResult.hasErrors) {
+        const errors = datasetSyncResult.results.filter(r => r.status === 'error');
+        const errorMessages = errors.map(err => `"${err.datasetName}": ${err.error}`).join('; ');
+        logger.log(`Dataset sync warnings: ${errorMessages}`, 'warn');
+        postDeployWarnings.push(...errors.map(err => `Dataset "${err.datasetName}": ${err.error}`));
+      }
+
+      for (const r of datasetSyncResult.results) {
+        if (r.status === 'synced') {
+          logger.log(`Dataset "${r.datasetName}": +${r.added} added, ~${r.updated} updated, -${r.deleted} deleted`);
+        }
+      }
+    }
+
     // Pre-gateway: Delete orphaned AB tests so their gateway rules are cleaned up
     // before we attempt to delete orphaned HTTP gateways.
     const existingABTestsForCleanup = deployedState.targets?.[target.name]?.resources?.abTests;
diff --git a/src/cli/commands/logs/__tests__/action.test.ts b/src/cli/commands/logs/__tests__/action.test.ts
index 1cd58c625..807fb87de 100644
--- a/src/cli/commands/logs/__tests__/action.test.ts
+++ b/src/cli/commands/logs/__tests__/action.test.ts
@@ -63,6 +63,7 @@ describe('resolveAgentContext', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     },
     deployedState: {
       targets: {
@@ -127,6 +128,7 @@ describe('resolveAgentContext', () => {
         configBundles: [],
         abTests: [],
         httpGateways: [],
+        datasets: [],
       },
     });
     const result = resolveAgentContext(context, {});
@@ -171,6 +173,7 @@ describe('resolveAgentContext', () => {
         configBundles: [],
         abTests: [],
         httpGateways: [],
+        datasets: [],
       },
       deployedState: {
         targets: {
@@ -225,6 +228,7 @@ describe('resolveAgentContext', () => {
         configBundles: [],
         abTests: [],
         httpGateways: [],
+        datasets: [],
       },
     });
     const result = resolveAgentContext(context, {});
diff --git a/src/cli/commands/logs/action.ts b/src/cli/commands/logs/action.ts
index b045b1c96..6f2530387 100644
--- a/src/cli/commands/logs/action.ts
+++ b/src/cli/commands/logs/action.ts
@@ -1,7 +1,7 @@
 import { ResourceNotFoundError, ValidationError } from '../../../lib';
 import type { Result } from '../../../lib/result';
 import { parseTimeString } from '../../../lib/utils';
-import { searchLogs, streamLogs } from '../../aws/cloudwatch';
+import { runtimeLogGroup, searchLogs, streamLogs } from '../../aws/cloudwatch';
 import { DEFAULT_ENDPOINT_NAME } from '../../constants';
 import type { DeployedProjectConfig } from '../../operations/resolve-agent';
 import { loadDeployedProjectConfig, resolveAgent } from '../../operations/resolve-agent';
@@ -53,7 +53,7 @@ export function resolveAgentContext(
   }
   const { agent } = result;
   const endpointName = DEFAULT_ENDPOINT_NAME;
-  const logGroupName = `/aws/bedrock-agentcore/runtimes/${agent.runtimeId}-${endpointName}`;
+  const logGroupName = runtimeLogGroup(agent.runtimeId);
   return {
     success: true,
     agentContext: {
diff --git a/src/cli/commands/remove/command.tsx b/src/cli/commands/remove/command.tsx
index 369a323d7..c4b296089 100644
--- a/src/cli/commands/remove/command.tsx
+++ b/src/cli/commands/remove/command.tsx
@@ -38,6 +38,7 @@ async function handleRemoveAll(_options: RemoveAllOptions): Promise<RemoveResult
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     });
 
     // Preserve aws-targets.json and deployed-state.json so that
diff --git a/src/cli/commands/remove/types.ts b/src/cli/commands/remove/types.ts
index b45c3ba4a..dafcbee3c 100644
--- a/src/cli/commands/remove/types.ts
+++ b/src/cli/commands/remove/types.ts
@@ -12,7 +12,8 @@ export type ResourceType =
   | 'policy-engine'
   | 'policy'
   | 'config-bundle'
-  | 'ab-test';
+  | 'ab-test'
+  | 'dataset';
 
 export interface RemoveOptions {
   resourceType: ResourceType;
diff --git a/src/cli/commands/run/command.tsx b/src/cli/commands/run/command.tsx
index 09b18358d..091d95727 100644
--- a/src/cli/commands/run/command.tsx
+++ b/src/cli/commands/run/command.tsx
@@ -36,7 +36,10 @@ function formatRunOutput(result: Awaited<ReturnType<typeof handleRunEval>>): voi
     hour: '2-digit',
     minute: '2-digit',
   });
-  console.log(`\nAgent: ${run.agent} | ${date} | Sessions: ${run.sessionCount} | Lookback: ${run.lookbackDays}d`);
+  const lookbackStr = run.source === 'dataset' ? '' : ` | Lookback: ${run.lookbackDays}d`;
+  const datasetStr =
+    run.source === 'dataset' && run.dataset ? ` | Dataset: ${run.dataset.id}@${run.dataset.version}` : '';
+  console.log(`\nAgent: ${run.agent} | ${date} | Sessions: ${run.sessionCount}${lookbackStr}${datasetStr}`);
 
   if (run.referenceInputs) {
     const parts: string[] = [];
@@ -91,6 +94,8 @@ export const registerRun = (program: Command) => {
     .option('--expected-trajectory <names>', 'Ground truth: expected tool call names in order (comma-separated)')
     .option('--expected-response <text>', 'Ground truth: expected agent response text to compare against')
     .option('--output <path>', 'Custom output file path for results')
+    .option('--dataset <name>', 'Dataset name — invoke agent with dataset scenarios instead of historical traces')
+    .option('--dataset-version <version>', 'Dataset version to use (omit for local file, or N/DRAFT)')
     .option('--json', 'Output as JSON')
     .action(
       async (cliOptions: {
@@ -107,6 +112,8 @@ export const registerRun = (program: Command) => {
         expectedResponse?: string;
         days: string;
         output?: string;
+        dataset?: string;
+        datasetVersion?: string;
         json?: boolean;
       }) => {
         const isArnMode = !!(cliOptions.runtimeArn && cliOptions.evaluatorArn);
@@ -140,6 +147,8 @@ export const registerRun = (program: Command) => {
           expectedResponse: cliOptions.expectedResponse,
           days: parseInt(cliOptions.days, 10),
           output: cliOptions.output,
+          dataset: cliOptions.dataset,
+          datasetVersion: cliOptions.datasetVersion,
           json: cliOptions.json,
         };
 
@@ -180,6 +189,12 @@ export const registerRun = (program: Command) => {
       'JSON file with session metadata and ground truth (assertions, expected trajectory, turns)'
     )
     .option('--region <region>', 'AWS region (auto-detected if omitted)')
+    .option(
+      '--endpoint <name>',
+      'Runtime endpoint name (e.g. PROMPT_V1). Defaults to AGENTCORE_RUNTIME_ENDPOINT env var, then DEFAULT'
+    )
+    .option('--dataset <name>', 'Dataset name — invoke agent with dataset scenarios before batch evaluation')
+    .option('--dataset-version <version>', 'Dataset version to use (omit for local file, or N/DRAFT)')
     .option('--json', 'Output as JSON')
     .action(
       async (cliOptions: {
@@ -190,6 +205,9 @@ export const registerRun = (program: Command) => {
         sessionIds?: string[];
         groundTruth?: string;
         region?: string;
+        endpoint?: string;
+        dataset?: string;
+        datasetVersion?: string;
         json?: boolean;
       }) => {
         requireProject();
@@ -218,9 +236,12 @@ export const registerRun = (program: Command) => {
             evaluators: cliOptions.evaluator,
             name: cliOptions.name,
             region: cliOptions.region,
+            endpoint: cliOptions.endpoint,
             sessionIds: cliOptions.sessionIds,
             lookbackDays: lookbackDays && !isNaN(lookbackDays) ? lookbackDays : undefined,
             sessionMetadata,
+            dataset: cliOptions.dataset,
+            datasetVersion: cliOptions.datasetVersion,
             onProgress: cliOptions.json
               ? undefined
               : (_status, message) => {
@@ -231,7 +252,16 @@ export const registerRun = (program: Command) => {
           // Save results locally
           if (result.success) {
             try {
-              const filePath = saveBatchEvalRun(result);
+              const datasetInfo = cliOptions.dataset
+                ? {
+                    source: 'dataset',
+                    dataset: {
+                      id: cliOptions.dataset,
+                      version: cliOptions.datasetVersion ?? 'LOCAL',
+                    },
+                  }
+                : {};
+              const filePath = saveBatchEvalRun({ result, ...datasetInfo });
               if (!cliOptions.json) {
                 console.log(`\nResults saved to: ${filePath}`);
               }
diff --git a/src/cli/commands/status/action.ts b/src/cli/commands/status/action.ts
index e821b1f32..f2e9edb6e 100644
--- a/src/cli/commands/status/action.ts
+++ b/src/cli/commands/status/action.ts
@@ -23,6 +23,7 @@ export interface ResourceStatusEntry {
     | 'policy'
     | 'config-bundle'
     | 'ab-test'
+    | 'dataset'
     | 'runtime-endpoint';
   name: string;
   deploymentState: ResourceDeploymentState;
@@ -238,6 +239,14 @@ export function computeResourceStatuses(
     getLocalDetail: item => item.description,
   });
 
+  const datasets = diffResourceSet({
+    resourceType: 'dataset',
+    localItems: project.datasets ?? [],
+    deployedRecord: resources?.datasets ?? {},
+    getIdentifier: deployed => deployed.datasetArn,
+    getLocalDetail: item => item.schemaType,
+  });
+
   const abTests = diffResourceSet({
     resourceType: 'ab-test',
     localItems: project.abTests ?? [],
@@ -296,6 +305,7 @@ export function computeResourceStatuses(
     ...onlineEvalConfigs,
     ...policyEngines,
     ...policies,
+    ...datasets,
     ...configBundles,
     ...abTests,
   ];
diff --git a/src/cli/commands/status/command.tsx b/src/cli/commands/status/command.tsx
index a155d71f0..c3588e153 100644
--- a/src/cli/commands/status/command.tsx
+++ b/src/cli/commands/status/command.tsx
@@ -1,5 +1,7 @@
 import { serializeResult } from '../../../lib';
 import { getErrorMessage } from '../../errors';
+import { getDatasetStatus } from '../../operations/dataset';
+import type { DatasetStatusResult } from '../../operations/dataset';
 import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
 import { requireProject } from '../../tui/guards';
 import type { ResourceStatusEntry } from './action';
@@ -20,6 +22,7 @@ const VALID_RESOURCE_TYPES = [
   'policy',
   'config-bundle',
   'ab-test',
+  'dataset',
 ] as const;
 const VALID_STATES = ['deployed', 'local-only', 'pending-removal'] as const;
 
@@ -62,7 +65,7 @@ export const registerStatus = (program: Command) => {
     .option('--target <name>', 'Select deployment target')
     .option(
       '--type <type>',
-      'Filter by resource type (agent, runtime-endpoint, memory, credential, gateway, evaluator, online-eval, policy-engine, policy, config-bundle, ab-test)'
+      'Filter by resource type (agent, runtime-endpoint, memory, credential, gateway, evaluator, online-eval, policy-engine, policy, config-bundle, ab-test, dataset)'
     )
     .option('--state <state>', 'Filter by deployment state (deployed, local-only, pending-removal)')
     .option('--runtime <name>', 'Filter to a specific runtime')
@@ -153,8 +156,28 @@ export const registerStatus = (program: Command) => {
         const policies = filtered.filter(r => r.resourceType === 'policy');
         const configBundles = filtered.filter(r => r.resourceType === 'config-bundle');
         const abTests = filtered.filter(r => r.resourceType === 'ab-test');
+        const datasets = filtered.filter(r => r.resourceType === 'dataset');
         // TODO: Add http-gateway resource type when diffResourceSet for HTTP gateways is added to action.ts
 
+        // Fetch enriched dataset info when --type dataset is specified
+        let datasetDetails: DatasetStatusResult[] = [];
+        if (cliOptions.type === 'dataset' && datasets.length > 0 && result.targetRegion && result.targetName) {
+          const deployedState = context.deployedState;
+          const targetResources = deployedState.targets?.[result.targetName]?.resources;
+          const deployedDatasets = targetResources?.datasets ?? {};
+
+          const detailPromises = datasets
+            .filter(d => d.deploymentState === 'deployed' && deployedDatasets[d.name])
+            .map(d =>
+              getDatasetStatus({
+                region: result.targetRegion!,
+                datasetId: deployedDatasets[d.name]!.datasetId,
+                name: d.name,
+              }).catch(() => null)
+            );
+          datasetDetails = (await Promise.all(detailPromises)).filter((d): d is DatasetStatusResult => d !== null);
+        }
+
         render(
           <Box flexDirection="column">
             <Text bold>
@@ -292,6 +315,57 @@ export const registerStatus = (program: Command) => {
               </Box>
             )}
 
+            {datasets.length > 0 && (
+              <Box flexDirection="column" marginTop={1}>
+                <Text bold>Datasets</Text>
+                {datasets.map(entry => (
+                  <ResourceEntry key={`${entry.resourceType}-${entry.name}`} entry={entry} />
+                ))}
+                {datasetDetails.length > 0 &&
+                  datasetDetails.map(d => (
+                    <Box key={d.datasetId} flexDirection="column" marginTop={1} marginLeft={2}>
+                      <Text bold>{d.name}</Text>
+                      <Text dimColor> Schema: {d.schemaType}</Text>
+                      <Text>
+                        {' '}
+                        DRAFT: {d.draftExampleCount} examples{' '}
+                        <Text color={d.draftStatus === 'MODIFIED' ? 'yellow' : 'green'}>({d.draftStatus})</Text>
+                        {' · Updated: '}
+                        {new Date(d.updatedAt * 1000).toLocaleDateString([], {
+                          month: 'short',
+                          day: 'numeric',
+                          year: 'numeric',
+                        })}
+                      </Text>
+                      {d.versions.length > 0 ? (
+                        <Box flexDirection="column">
+                          <Text dimColor> Versions:</Text>
+                          {d.versions.map((v, i) => (
+                            <Text key={v.datasetVersion} dimColor={i > 0}>
+                              {'   '}v{v.datasetVersion}
+                              {i === 0 ? ' (latest)' : ''} —{' '}
+                              {v.failureReason ? (
+                                <Text color="red">FAILED: {v.failureReason}</Text>
+                              ) : (
+                                <>{v.exampleCount} examples</>
+                              )}
+                              {' · Created: '}
+                              {new Date(v.createdAt * 1000).toLocaleDateString([], {
+                                month: 'short',
+                                day: 'numeric',
+                                year: 'numeric',
+                              })}
+                            </Text>
+                          ))}
+                        </Box>
+                      ) : (
+                        <Text dimColor> No published versions</Text>
+                      )}
+                    </Box>
+                  ))}
+              </Box>
+            )}
+
             {/* TODO: Add HTTP Gateways render section when diffResourceSet is added to action.ts */}
 
             {filtered.length === 0 && <Text dimColor>No resources match the given filters.</Text>}
diff --git a/src/cli/external-requirements/__tests__/checks-extended.test.ts b/src/cli/external-requirements/__tests__/checks-extended.test.ts
index 462d9be14..6ee6a2a90 100644
--- a/src/cli/external-requirements/__tests__/checks-extended.test.ts
+++ b/src/cli/external-requirements/__tests__/checks-extended.test.ts
@@ -56,6 +56,7 @@ describe('requiresUv', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresUv(project)).toBe(true);
   });
@@ -84,6 +85,7 @@ describe('requiresUv', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresUv(project)).toBe(false);
   });
@@ -103,6 +105,7 @@ describe('requiresUv', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresUv(project)).toBe(false);
   });
@@ -133,6 +136,7 @@ describe('requiresContainerRuntime', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresContainerRuntime(project)).toBe(true);
   });
@@ -161,6 +165,7 @@ describe('requiresContainerRuntime', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresContainerRuntime(project)).toBe(false);
   });
@@ -180,6 +185,7 @@ describe('requiresContainerRuntime', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresContainerRuntime(project)).toBe(false);
   });
@@ -216,6 +222,7 @@ describe('requiresContainerRuntime', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
     expect(requiresContainerRuntime(project)).toBe(true);
   });
@@ -286,6 +293,7 @@ describe('checkDependencyVersions', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const result = await checkDependencyVersions(project);
@@ -309,6 +317,7 @@ describe('checkDependencyVersions', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const result = await checkDependencyVersions(project);
@@ -340,6 +349,7 @@ describe('checkDependencyVersions', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const result = await checkDependencyVersions(project);
diff --git a/src/cli/logging/remove-logger.ts b/src/cli/logging/remove-logger.ts
index 54f8aa0ba..fc5dbe8bc 100644
--- a/src/cli/logging/remove-logger.ts
+++ b/src/cli/logging/remove-logger.ts
@@ -19,7 +19,8 @@ export interface RemoveLoggerOptions {
     | 'policy-engine'
     | 'policy'
     | 'config-bundle'
-    | 'ab-test';
+    | 'ab-test'
+    | 'dataset';
   /** Name of the resource being removed */
   resourceName: string;
 }
diff --git a/src/cli/operations/agent/generate/write-agent-to-project.ts b/src/cli/operations/agent/generate/write-agent-to-project.ts
index 38c89fd85..8bf810ea3 100644
--- a/src/cli/operations/agent/generate/write-agent-to-project.ts
+++ b/src/cli/operations/agent/generate/write-agent-to-project.ts
@@ -74,6 +74,7 @@ export async function writeAgentToProject(config: GenerateConfig, options?: Writ
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     await configIO.writeProjectSpec(project);
diff --git a/src/cli/operations/dataset/__tests__/publish.test.ts b/src/cli/operations/dataset/__tests__/publish.test.ts
new file mode 100644
index 000000000..5c1c68787
--- /dev/null
+++ b/src/cli/operations/dataset/__tests__/publish.test.ts
@@ -0,0 +1,45 @@
+import { publishDataset } from '../publish.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockCreateDatasetVersion = vi.fn();
+const mockWaitForDatasetActive = vi.fn();
+const mockGetDataset = vi.fn();
+
+vi.mock('../../../aws/agentcore-datasets', () => ({
+  createDatasetVersion: (...args: unknown[]) => mockCreateDatasetVersion(...args),
+  getDataset: (...args: unknown[]) => mockGetDataset(...args),
+}));
+
+vi.mock('../wait', () => ({
+  waitForDatasetActive: (...args: unknown[]) => mockWaitForDatasetActive(...args),
+}));
+
+describe('publishDataset', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('calls createDatasetVersion, waits for ACTIVE, returns version + count + draftStatus', async () => {
+    mockCreateDatasetVersion.mockResolvedValue({
+      datasetArn: 'arn:ds:1',
+      datasetId: 'ds-1',
+      datasetVersion: '3',
+      status: 'CREATING',
+      createdAt: 1716230000,
+    });
+    mockWaitForDatasetActive.mockResolvedValue(undefined);
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-1',
+      status: 'ACTIVE',
+      exampleCount: 50,
+      draftStatus: 'UNMODIFIED',
+      datasetVersion: 'DRAFT',
+    });
+
+    const result = await publishDataset({ region: 'us-east-1', datasetId: 'ds-1' });
+
+    expect(result.version).toBe('3');
+    expect(result.exampleCount).toBe(50);
+    expect(result.draftStatus).toBe('UNMODIFIED');
+    expect(mockCreateDatasetVersion).toHaveBeenCalledWith({ region: 'us-east-1', datasetId: 'ds-1' });
+    expect(mockWaitForDatasetActive).toHaveBeenCalledWith('us-east-1', 'ds-1');
+  });
+});
diff --git a/src/cli/operations/dataset/__tests__/pull.test.ts b/src/cli/operations/dataset/__tests__/pull.test.ts
new file mode 100644
index 000000000..923bf08ae
--- /dev/null
+++ b/src/cli/operations/dataset/__tests__/pull.test.ts
@@ -0,0 +1,73 @@
+import { pullDataset } from '../pull.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockGetDataset = vi.fn();
+const mockDownloadDataset = vi.fn();
+
+vi.mock('../../../aws/agentcore-datasets', () => ({
+  getDataset: (...args: unknown[]) => mockGetDataset(...args),
+  downloadDataset: (...args: unknown[]) => mockDownloadDataset(...args),
+}));
+
+describe('pullDataset', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('throws when dataset status is not ACTIVE', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-1',
+      status: 'CREATING',
+      datasetVersion: 'DRAFT',
+    });
+
+    await expect(
+      pullDataset({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+        localFilePath: 'datasets/test.jsonl',
+        configBaseDir: '/project',
+      })
+    ).rejects.toThrow('Dataset is not ready (status: CREATING)');
+  });
+
+  it('throws when no downloadUrl available', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-1',
+      status: 'ACTIVE',
+      datasetVersion: 'DRAFT',
+      downloadUrl: undefined,
+    });
+
+    await expect(
+      pullDataset({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+        localFilePath: 'datasets/test.jsonl',
+        configBaseDir: '/project',
+      })
+    ).rejects.toThrow('Dataset has no download URL available');
+  });
+
+  it('streams to file and returns exampleCount and version', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-1',
+      status: 'ACTIVE',
+      datasetVersion: '2',
+      downloadUrl: 'https://s3.example.com/data',
+    });
+    mockDownloadDataset.mockResolvedValue(42);
+
+    const result = await pullDataset({
+      region: 'us-east-1',
+      datasetId: 'ds-1',
+      localFilePath: 'datasets/test.jsonl',
+      configBaseDir: '/project',
+    });
+
+    expect(result.exampleCount).toBe(42);
+    expect(result.version).toBe('2');
+    expect(mockDownloadDataset).toHaveBeenCalledWith('https://s3.example.com/data', {
+      mode: 'stream',
+      filePath: expect.stringContaining('datasets/test.jsonl'),
+    });
+  });
+});
diff --git a/src/cli/operations/dataset/__tests__/push.test.ts b/src/cli/operations/dataset/__tests__/push.test.ts
new file mode 100644
index 000000000..073442d3b
--- /dev/null
+++ b/src/cli/operations/dataset/__tests__/push.test.ts
@@ -0,0 +1,387 @@
+import { pushDataset } from '../push.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+// ── Hoisted mocks ─────────────────────────────────────────────────────────
+
+const mockGetDataset = vi.fn();
+const mockDownloadDataset = vi.fn();
+const mockAddDatasetExamples = vi.fn();
+const mockUpdateDatasetExamples = vi.fn();
+const mockDeleteDatasetExamples = vi.fn();
+const mockWaitForDatasetActive = vi.fn();
+const mockReadFile = vi.fn();
+const mockWriteFile = vi.fn();
+
+vi.mock('../../../aws/agentcore-datasets', () => ({
+  getDataset: (...args: unknown[]) => mockGetDataset(...args),
+  downloadDataset: (...args: unknown[]) => mockDownloadDataset(...args),
+  addDatasetExamples: (...args: unknown[]) => mockAddDatasetExamples(...args),
+  updateDatasetExamples: (...args: unknown[]) => mockUpdateDatasetExamples(...args),
+  deleteDatasetExamples: (...args: unknown[]) => mockDeleteDatasetExamples(...args),
+}));
+
+vi.mock('../wait', () => ({
+  waitForDatasetActive: (...args: unknown[]) => mockWaitForDatasetActive(...args),
+}));
+
+vi.mock('../../../aws/retry', () => ({
+  isRetryableAwsError: (err: unknown) => {
+    const e = err as { name?: string; statusCode?: number };
+    return e.name === 'ThrottlingException' || e.statusCode === 429 || (e.statusCode ?? 0) >= 500;
+  },
+}));
+
+vi.mock('node:fs/promises', () => ({
+  readFile: (...args: unknown[]) => mockReadFile(...args),
+  writeFile: (...args: unknown[]) => mockWriteFile(...args),
+}));
+
+vi.mock('node:crypto', () => ({
+  randomUUID: () => 'uuid-mock',
+}));
+
+// ── Helpers ───────────────────────────────────────────────────────────────
+
+function makeLocalContent(examples: Record<string, unknown>[]): string {
+  return examples.map(e => JSON.stringify(e)).join('\n') + '\n';
+}
+
+function makeRemoteContent(examples: Record<string, unknown>[]): string {
+  return examples.map(e => JSON.stringify(e)).join('\n') + '\n';
+}
+
+const baseOptions = {
+  region: 'us-east-1',
+  datasetId: 'ds-123',
+  localFilePath: 'datasets/test.jsonl',
+  configBaseDir: '/project',
+};
+
+// ── Tests ─────────────────────────────────────────────────────────────────
+
+describe('pushDataset', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockWaitForDatasetActive.mockResolvedValue(undefined);
+    mockWriteFile.mockResolvedValue(undefined);
+  });
+
+  afterEach(() => vi.restoreAllMocks());
+
+  describe('Parsing', () => {
+    it('parses valid JSONL with exampleIds into ParsedExample array', async () => {
+      const local = makeLocalContent([
+        { exampleId: 'e1', input: 'hello' },
+        { exampleId: 'e2', input: 'world' },
+      ]);
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({
+        downloadUrl: 'https://s3.example.com/data',
+        exampleCount: 2,
+      });
+      mockDownloadDataset.mockResolvedValue(
+        makeRemoteContent([
+          { exampleId: 'e1', input: 'hello' },
+          { exampleId: 'e2', input: 'world' },
+        ])
+      );
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.unchanged).toBe(2);
+      expect(result.added).toBe(0);
+      expect(result.updated).toBe(0);
+      expect(result.deleted).toBe(0);
+    });
+
+    it('throws with line number on invalid JSON', async () => {
+      const local = '{"valid":"line"}\nnot-json-at-all\n';
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+
+      await expect(pushDataset(baseOptions)).rejects.toThrow('Invalid JSON at line 2');
+    });
+
+    it('contentEquals returns true for same content with different key order', async () => {
+      const local = makeLocalContent([{ exampleId: 'e1', input: 'hi', output: 'bye' }]);
+      // Remote has different key order but same content
+      const remote = makeRemoteContent([{ exampleId: 'e1', output: 'bye', input: 'hi' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 1 });
+      mockDownloadDataset.mockResolvedValue(remote);
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.unchanged).toBe(1);
+      expect(result.updated).toBe(0);
+    });
+
+    it('contentEquals returns false for different content', async () => {
+      const local = makeLocalContent([{ exampleId: 'e1', input: 'changed' }]);
+      const remote = makeRemoteContent([{ exampleId: 'e1', input: 'original' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 1 });
+      mockDownloadDataset.mockResolvedValue(remote);
+      mockUpdateDatasetExamples.mockResolvedValue({ updatedCount: 1, status: 'ACTIVE' });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.updated).toBe(1);
+      expect(result.unchanged).toBe(0);
+    });
+  });
+
+  describe('Incremental Diff', () => {
+    it('identifies examples without exampleId as adds', async () => {
+      const local = makeLocalContent([{ input: 'new example without id' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+      mockAddDatasetExamples.mockResolvedValue({ addedCount: 1, exampleIds: ['new-id-1'], status: 'ACTIVE' });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.added).toBe(1);
+      expect(mockAddDatasetExamples).toHaveBeenCalled();
+    });
+
+    it('identifies stale exampleId (not in remote) as adds', async () => {
+      const local = makeLocalContent([{ exampleId: 'stale-id', input: 'data' }]);
+      const remote = makeRemoteContent([{ exampleId: 'other-id', input: 'other' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 1 });
+      mockDownloadDataset.mockResolvedValue(remote);
+      mockDeleteDatasetExamples.mockResolvedValue({ deletedCount: 1, status: 'ACTIVE' });
+      mockAddDatasetExamples.mockResolvedValue({ addedCount: 1, exampleIds: ['fresh-id'], status: 'ACTIVE' });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.added).toBe(1);
+      expect(result.deleted).toBe(1);
+    });
+
+    it('identifies changed content as updates', async () => {
+      const local = makeLocalContent([{ exampleId: 'e1', input: 'updated-content' }]);
+      const remote = makeRemoteContent([{ exampleId: 'e1', input: 'old-content' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 1 });
+      mockDownloadDataset.mockResolvedValue(remote);
+      mockUpdateDatasetExamples.mockResolvedValue({ updatedCount: 1, status: 'ACTIVE' });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.updated).toBe(1);
+    });
+
+    it('counts unchanged examples correctly', async () => {
+      const examples = [
+        { exampleId: 'e1', input: 'same1' },
+        { exampleId: 'e2', input: 'same2' },
+        { exampleId: 'e3', input: 'same3' },
+      ];
+      const local = makeLocalContent(examples);
+      const remote = makeRemoteContent(examples);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 3 });
+      mockDownloadDataset.mockResolvedValue(remote);
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.unchanged).toBe(3);
+      expect(mockAddDatasetExamples).not.toHaveBeenCalled();
+      expect(mockUpdateDatasetExamples).not.toHaveBeenCalled();
+      expect(mockDeleteDatasetExamples).not.toHaveBeenCalled();
+    });
+
+    it('identifies remote-only examples as deletes', async () => {
+      const local = makeLocalContent([{ exampleId: 'e1', input: 'kept' }]);
+      const remote = makeRemoteContent([
+        { exampleId: 'e1', input: 'kept' },
+        { exampleId: 'e2', input: 'removed' },
+      ]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 2 });
+      mockDownloadDataset.mockResolvedValue(remote);
+      mockDeleteDatasetExamples.mockResolvedValue({ deletedCount: 1, status: 'ACTIVE' });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.deleted).toBe(1);
+      expect(mockDeleteDatasetExamples).toHaveBeenCalled();
+    });
+
+    it('writes back new exampleIds to local file after add', async () => {
+      const local = makeLocalContent([{ input: 'new-example' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+      mockAddDatasetExamples.mockResolvedValue({ addedCount: 1, exampleIds: ['assigned-id-1'], status: 'ACTIVE' });
+
+      await pushDataset(baseOptions);
+
+      expect(mockWriteFile).toHaveBeenCalled();
+      const writtenContent = mockWriteFile.mock.calls[0]![1] as string;
+      expect(writtenContent).toContain('assigned-id-1');
+    });
+
+    it('reordered examples (same IDs + content) results in zero mutations', async () => {
+      const local = makeLocalContent([
+        { exampleId: 'e2', input: 'second' },
+        { exampleId: 'e1', input: 'first' },
+      ]);
+      const remote = makeRemoteContent([
+        { exampleId: 'e1', input: 'first' },
+        { exampleId: 'e2', input: 'second' },
+      ]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 2 });
+      mockDownloadDataset.mockResolvedValue(remote);
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.unchanged).toBe(2);
+      expect(result.added).toBe(0);
+      expect(result.updated).toBe(0);
+      expect(result.deleted).toBe(0);
+    });
+  });
+
+  describe('Force Mode', () => {
+    it('force mode deletes all remote then re-adds all local', async () => {
+      const local = makeLocalContent([{ exampleId: 'e1', input: 'data' }]);
+      const remote = makeRemoteContent([
+        { exampleId: 'r1', input: 'remote1' },
+        { exampleId: 'r2', input: 'remote2' },
+      ]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: 'https://s3/data', exampleCount: 2 });
+      mockDownloadDataset.mockResolvedValue(remote);
+      mockDeleteDatasetExamples.mockResolvedValue({ deletedCount: 2, status: 'ACTIVE' });
+      mockAddDatasetExamples.mockResolvedValue({ addedCount: 1, exampleIds: ['new-id'], status: 'ACTIVE' });
+
+      const result = await pushDataset({ ...baseOptions, force: true });
+
+      expect(result.deleted).toBe(2);
+      expect(result.added).toBe(1);
+      expect(mockDeleteDatasetExamples).toHaveBeenCalled();
+      expect(mockAddDatasetExamples).toHaveBeenCalled();
+    });
+
+    it('force mode writes back all new exampleIds', async () => {
+      const local = makeLocalContent([{ exampleId: 'old1', input: 'a' }, { input: 'b' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+      mockAddDatasetExamples.mockResolvedValue({
+        addedCount: 2,
+        exampleIds: ['fresh-1', 'fresh-2'],
+        status: 'ACTIVE',
+      });
+
+      await pushDataset({ ...baseOptions, force: true });
+
+      expect(mockWriteFile).toHaveBeenCalled();
+      const writtenContent = mockWriteFile.mock.calls[0]![1] as string;
+      expect(writtenContent).toContain('fresh-1');
+      expect(writtenContent).toContain('fresh-2');
+    });
+  });
+
+  describe('Batching and Retry', () => {
+    it('batches items into chunks of API_BATCH_LIMIT (1000)', async () => {
+      // Create 2001 examples to test batching
+      const examples = Array.from({ length: 2001 }, (_, i) => ({ input: `item-${i}` }));
+      const local = makeLocalContent(examples);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+
+      let callCount = 0;
+      mockAddDatasetExamples.mockImplementation(({ examples: batch }: { examples: unknown[] }) => {
+        callCount++;
+        return Promise.resolve({
+          addedCount: batch.length,
+          exampleIds: batch.map((_, i) => `id-${callCount}-${i}`),
+          status: 'ACTIVE',
+        });
+      });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.added).toBe(2001);
+      // Should be 3 batches: 1000, 1000, 1
+      expect(mockAddDatasetExamples).toHaveBeenCalledTimes(3);
+    });
+
+    it('retries transient errors up to 3 times with backoff', async () => {
+      const local = makeLocalContent([{ input: 'data' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+
+      let attempts = 0;
+      mockAddDatasetExamples.mockImplementation(() => {
+        attempts++;
+        if (attempts < 3) {
+          const err = new Error('Throttled') as Error & { name: string };
+          err.name = 'ThrottlingException';
+          return Promise.reject(err);
+        }
+        return Promise.resolve({ addedCount: 1, exampleIds: ['id-1'], status: 'ACTIVE' });
+      });
+
+      const result = await pushDataset(baseOptions);
+
+      expect(result.added).toBe(1);
+      expect(attempts).toBe(3);
+    });
+
+    it('throws immediately on non-retryable client error', async () => {
+      const local = makeLocalContent([{ input: 'data' }]);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+      mockAddDatasetExamples.mockRejectedValue(
+        Object.assign(new Error('Validation error'), { name: 'ValidationException', statusCode: 400 })
+      );
+
+      await expect(pushDataset(baseOptions)).rejects.toThrow('Push failed during add phase');
+    });
+
+    it('throws descriptive error with batch progress on final failure', async () => {
+      // Create 2001 examples to guarantee multiple batches
+      const examples = Array.from({ length: 2001 }, (_, i) => ({ input: `item-${i}` }));
+      const local = makeLocalContent(examples);
+
+      mockReadFile.mockResolvedValue(local);
+      mockGetDataset.mockResolvedValue({ downloadUrl: null, exampleCount: 0 });
+
+      let callCount = 0;
+      mockAddDatasetExamples.mockImplementation(() => {
+        callCount++;
+        if (callCount === 2) {
+          // Non-retryable error so it fails immediately without retry
+          const err = new Error('Validation error') as Error & { name: string; statusCode: number };
+          err.name = 'ValidationException';
+          err.statusCode = 400;
+          return Promise.reject(err);
+        }
+        return Promise.resolve({
+          addedCount: 1000,
+          exampleIds: Array.from({ length: 1000 }, (_, i) => `id-${callCount}-${i}`),
+          status: 'ACTIVE',
+        });
+      });
+
+      await expect(pushDataset(baseOptions)).rejects.toThrow(/Push failed during add phase.*1\/3 batches completed/);
+    });
+  });
+});
diff --git a/src/cli/operations/dataset/__tests__/resolve-dataset.test.ts b/src/cli/operations/dataset/__tests__/resolve-dataset.test.ts
new file mode 100644
index 000000000..ed9c9aa69
--- /dev/null
+++ b/src/cli/operations/dataset/__tests__/resolve-dataset.test.ts
@@ -0,0 +1,103 @@
+import { resolveDataset } from '../resolve-dataset.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockReadProjectSpec = vi.fn();
+const mockResolveAWSDeploymentTargets = vi.fn();
+const mockReadDeployedState = vi.fn();
+
+vi.mock('../../../../lib', () => ({
+  ConfigIO: class {
+    readProjectSpec = mockReadProjectSpec;
+    resolveAWSDeploymentTargets = mockResolveAWSDeploymentTargets;
+    readDeployedState = mockReadDeployedState;
+  },
+}));
+
+function makeDataset(name: string) {
+  return {
+    name,
+    schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+    config: { managed: { location: `datasets/${name}.jsonl` } },
+  };
+}
+
+describe('resolveDataset', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('throws when no datasets in config', async () => {
+    mockReadProjectSpec.mockResolvedValue({ datasets: [] });
+
+    await expect(resolveDataset()).rejects.toThrow('No datasets found');
+  });
+
+  it('resolves by name when found', async () => {
+    mockReadProjectSpec.mockResolvedValue({ datasets: [makeDataset('alpha'), makeDataset('beta')] });
+    mockResolveAWSDeploymentTargets.mockResolvedValue([{ region: 'us-east-1', name: 'default' }]);
+    mockReadDeployedState.mockResolvedValue({
+      targets: {
+        default: {
+          resources: {
+            datasets: {
+              alpha: { datasetId: 'ds-alpha', datasetArn: 'arn:ds:alpha' },
+            },
+          },
+        },
+      },
+    });
+
+    const result = await resolveDataset('alpha');
+
+    expect(result.name).toBe('alpha');
+    expect(result.datasetId).toBe('ds-alpha');
+    expect(result.region).toBe('us-east-1');
+  });
+
+  it('throws with available list when name not found', async () => {
+    mockReadProjectSpec.mockResolvedValue({ datasets: [makeDataset('alpha'), makeDataset('beta')] });
+
+    await expect(resolveDataset('nonexistent')).rejects.toThrow(/not found.*Available.*alpha.*beta/);
+  });
+
+  it('auto-selects when exactly one dataset and no name', async () => {
+    mockReadProjectSpec.mockResolvedValue({ datasets: [makeDataset('only-one')] });
+    mockResolveAWSDeploymentTargets.mockResolvedValue([{ region: 'us-west-2', name: 'default' }]);
+    mockReadDeployedState.mockResolvedValue({
+      targets: {
+        default: {
+          resources: {
+            datasets: {
+              'only-one': { datasetId: 'ds-only', datasetArn: 'arn:ds:only' },
+            },
+          },
+        },
+      },
+    });
+
+    const result = await resolveDataset();
+
+    expect(result.name).toBe('only-one');
+    expect(result.datasetId).toBe('ds-only');
+  });
+
+  it('throws "Specify --name" when multiple datasets and no name', async () => {
+    mockReadProjectSpec.mockResolvedValue({ datasets: [makeDataset('a'), makeDataset('b')] });
+
+    await expect(resolveDataset()).rejects.toThrow(/Multiple datasets.*Specify --name/);
+  });
+
+  it('throws when dataset has no deployed state', async () => {
+    mockReadProjectSpec.mockResolvedValue({ datasets: [makeDataset('mine')] });
+    mockResolveAWSDeploymentTargets.mockResolvedValue([{ region: 'us-east-1', name: 'default' }]);
+    mockReadDeployedState.mockResolvedValue({
+      targets: {
+        default: {
+          resources: {
+            datasets: {},
+          },
+        },
+      },
+    });
+
+    await expect(resolveDataset('mine')).rejects.toThrow('has not been deployed');
+  });
+});
diff --git a/src/cli/operations/dataset/__tests__/status.test.ts b/src/cli/operations/dataset/__tests__/status.test.ts
new file mode 100644
index 000000000..601c67658
--- /dev/null
+++ b/src/cli/operations/dataset/__tests__/status.test.ts
@@ -0,0 +1,179 @@
+import { getDatasetStatus } from '../status';
+import { describe, expect, it, vi } from 'vitest';
+
+const mockGetDataset = vi.fn();
+const mockListDatasetVersions = vi.fn();
+
+vi.mock('../../../aws/agentcore-datasets', () => ({
+  getDataset: (...args: unknown[]) => mockGetDataset(...args),
+  listDatasetVersions: (...args: unknown[]) => mockListDatasetVersions(...args),
+}));
+
+describe('getDatasetStatus', () => {
+  it('returns correct structure with name, datasetId, schemaType, status, draftExampleCount, draftStatus, updatedAt, and versions', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-123',
+      datasetArn: 'arn:aws:bedrock:us-east-1:123456789:dataset/ds-123',
+      datasetName: 'my-dataset',
+      datasetVersion: 'DRAFT',
+      schemaType: 'CONVERSATIONAL',
+      status: 'ACTIVE',
+      draftStatus: 'READY',
+      exampleCount: 42,
+      createdAt: 1716230000,
+      updatedAt: 1716235200,
+    });
+
+    mockListDatasetVersions.mockResolvedValue({
+      versions: [
+        {
+          datasetVersion: '1',
+          exampleCount: 30,
+          status: 'AVAILABLE',
+          createdAt: 1716220000,
+        },
+        {
+          datasetVersion: '2',
+          exampleCount: 42,
+          status: 'AVAILABLE',
+          createdAt: 1716230000,
+        },
+      ],
+    });
+
+    const result = await getDatasetStatus({
+      region: 'us-east-1',
+      datasetId: 'ds-123',
+      name: 'my-dataset',
+    });
+
+    expect(result).toEqual({
+      name: 'my-dataset',
+      datasetId: 'ds-123',
+      schemaType: 'CONVERSATIONAL',
+      status: 'ACTIVE',
+      draftExampleCount: 42,
+      draftStatus: 'READY',
+      updatedAt: 1716235200,
+      versions: [
+        {
+          datasetVersion: '1',
+          exampleCount: 30,
+          status: 'AVAILABLE',
+          createdAt: 1716220000,
+        },
+        {
+          datasetVersion: '2',
+          exampleCount: 42,
+          status: 'AVAILABLE',
+          createdAt: 1716230000,
+        },
+      ],
+    });
+  });
+
+  it('handles empty versions list', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-456',
+      datasetArn: 'arn:aws:bedrock:us-east-1:123456789:dataset/ds-456',
+      datasetName: 'empty-dataset',
+      datasetVersion: 'DRAFT',
+      schemaType: 'CONVERSATIONAL',
+      status: 'ACTIVE',
+      draftStatus: 'READY',
+      exampleCount: 5,
+      createdAt: 1716230000,
+      updatedAt: 1716235000,
+    });
+
+    mockListDatasetVersions.mockResolvedValue({
+      versions: [],
+    });
+
+    const result = await getDatasetStatus({
+      region: 'us-east-1',
+      datasetId: 'ds-456',
+      name: 'empty-dataset',
+    });
+
+    expect(result.versions).toEqual([]);
+  });
+
+  it('passes through updatedAt from getDataset', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-789',
+      datasetArn: 'arn:aws:bedrock:us-east-1:123456789:dataset/ds-789',
+      datasetName: 'dated-dataset',
+      datasetVersion: 'DRAFT',
+      schemaType: 'CONVERSATIONAL',
+      status: 'ACTIVE',
+      draftStatus: 'READY',
+      exampleCount: 10,
+      createdAt: 1716220000,
+      updatedAt: 1716235200,
+    });
+
+    mockListDatasetVersions.mockResolvedValue({
+      versions: [],
+    });
+
+    const result = await getDatasetStatus({
+      region: 'us-east-1',
+      datasetId: 'ds-789',
+      name: 'dated-dataset',
+    });
+
+    expect(result.updatedAt).toBe(1716235200);
+  });
+
+  it('passes through version failureReason', async () => {
+    mockGetDataset.mockResolvedValue({
+      datasetId: 'ds-fail',
+      datasetArn: 'arn:aws:bedrock:us-east-1:123456789:dataset/ds-fail',
+      datasetName: 'failed-dataset',
+      datasetVersion: 'DRAFT',
+      schemaType: 'CONVERSATIONAL',
+      status: 'ACTIVE',
+      draftStatus: 'READY',
+      exampleCount: 10,
+      createdAt: 1716220000,
+      updatedAt: 1716230000,
+    });
+
+    mockListDatasetVersions.mockResolvedValue({
+      versions: [
+        {
+          datasetVersion: '1',
+          exampleCount: 10,
+          status: 'FAILED',
+          failureReason: 'Content validation error',
+          createdAt: 1716225000,
+        },
+      ],
+    });
+
+    const result = await getDatasetStatus({
+      region: 'us-east-1',
+      datasetId: 'ds-fail',
+      name: 'failed-dataset',
+    });
+
+    expect(result.versions[0]!.failureReason).toBe('Content validation error');
+  });
+
+  it('handles API errors gracefully by propagating them', async () => {
+    mockGetDataset.mockRejectedValue(new Error('Dataset API error (403): Access denied'));
+
+    mockListDatasetVersions.mockResolvedValue({
+      versions: [],
+    });
+
+    await expect(
+      getDatasetStatus({
+        region: 'us-east-1',
+        datasetId: 'ds-error',
+        name: 'error-dataset',
+      })
+    ).rejects.toThrow('Dataset API error (403): Access denied');
+  });
+});
diff --git a/src/cli/operations/dataset/__tests__/wait.test.ts b/src/cli/operations/dataset/__tests__/wait.test.ts
new file mode 100644
index 000000000..5530381fc
--- /dev/null
+++ b/src/cli/operations/dataset/__tests__/wait.test.ts
@@ -0,0 +1,50 @@
+import { waitForDatasetActive } from '../wait.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockGetDataset = vi.fn();
+
+vi.mock('../../../aws/agentcore-datasets', () => ({
+  getDataset: (...args: unknown[]) => mockGetDataset(...args),
+}));
+
+describe('waitForDatasetActive', () => {
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('resolves immediately when status is ACTIVE', async () => {
+    mockGetDataset.mockResolvedValue({ status: 'ACTIVE' });
+
+    await waitForDatasetActive('us-east-1', 'ds-1');
+
+    expect(mockGetDataset).toHaveBeenCalledTimes(1);
+  });
+
+  it('throws on terminal _FAILED status', async () => {
+    mockGetDataset.mockResolvedValue({ status: 'CREATE_FAILED' });
+
+    await expect(waitForDatasetActive('us-east-1', 'ds-1')).rejects.toThrow(
+      'Dataset entered failed state: CREATE_FAILED'
+    );
+  });
+
+  it('throws timeout error after maxWaitMs', async () => {
+    // Mock Date.now to simulate time passing
+    const originalNow = Date.now;
+    let currentTime = 1000;
+    vi.spyOn(Date, 'now').mockImplementation(() => {
+      // Advance time on each call so the while loop condition fails
+      const val = currentTime;
+      currentTime += 70_000; // jump past default maxWaitMs on second call
+      return val;
+    });
+
+    mockGetDataset.mockResolvedValue({ status: 'CREATING' });
+
+    await expect(waitForDatasetActive('us-east-1', 'ds-1', 60_000)).rejects.toThrow(
+      'Timed out waiting for dataset to become ACTIVE'
+    );
+
+    Date.now = originalNow;
+  });
+});
diff --git a/src/cli/operations/dataset/delete.ts b/src/cli/operations/dataset/delete.ts
new file mode 100644
index 000000000..f4154953b
--- /dev/null
+++ b/src/cli/operations/dataset/delete.ts
@@ -0,0 +1,12 @@
+import { deleteDatasetVersionApi } from '../../aws/agentcore-datasets';
+
+export interface DeleteDatasetVersionOptions {
+  region: string;
+  datasetId: string;
+  version: string;
+}
+
+export async function deleteDatasetVersion(options: DeleteDatasetVersionOptions): Promise<void> {
+  const { region, datasetId, version } = options;
+  await deleteDatasetVersionApi({ region, datasetId, version });
+}
diff --git a/src/cli/operations/dataset/index.ts b/src/cli/operations/dataset/index.ts
new file mode 100644
index 000000000..ddd26ba15
--- /dev/null
+++ b/src/cli/operations/dataset/index.ts
@@ -0,0 +1,11 @@
+export { resolveDataset, getDatasetNames } from './resolve-dataset';
+export { pushDataset } from './push';
+export type { PushResult } from './push';
+export { pullDataset } from './pull';
+export type { PullResult } from './pull';
+export { publishDataset } from './publish';
+export type { PublishResult } from './publish';
+export { deleteDatasetVersion } from './delete';
+export { getDatasetStatus } from './status';
+export type { DatasetStatusResult } from './status';
+export { waitForDatasetActive } from './wait';
diff --git a/src/cli/operations/dataset/publish.ts b/src/cli/operations/dataset/publish.ts
new file mode 100644
index 000000000..c2c8e5480
--- /dev/null
+++ b/src/cli/operations/dataset/publish.ts
@@ -0,0 +1,37 @@
+/**
+ * Publish dataset DRAFT as a new immutable version.
+ */
+import { createDatasetVersion, getDataset } from '../../aws/agentcore-datasets';
+import { waitForDatasetActive } from './wait';
+
+export interface PublishOptions {
+  region: string;
+  datasetId: string;
+}
+
+export interface PublishResult {
+  version: string;
+  exampleCount: number;
+  draftStatus: string;
+}
+
+/**
+ * Publish the current DRAFT as a new numbered version.
+ * Polls until the dataset returns to ACTIVE state.
+ */
+export async function publishDataset(options: PublishOptions): Promise<PublishResult> {
+  const { region, datasetId } = options;
+
+  const versionResult = await createDatasetVersion({ region, datasetId });
+
+  await waitForDatasetActive(region, datasetId);
+
+  // Re-fetch to get final state after publish
+  const info = await getDataset({ region, datasetId });
+
+  return {
+    version: versionResult.datasetVersion,
+    exampleCount: info.exampleCount,
+    draftStatus: info.draftStatus ?? 'UNMODIFIED',
+  };
+}
diff --git a/src/cli/operations/dataset/pull.ts b/src/cli/operations/dataset/pull.ts
new file mode 100644
index 000000000..d38678b12
--- /dev/null
+++ b/src/cli/operations/dataset/pull.ts
@@ -0,0 +1,46 @@
+/**
+ * Pull dataset content from service to local file.
+ *
+ * Uses streaming download to avoid memory pressure on large datasets.
+ */
+import { downloadDataset, getDataset } from '../../aws/agentcore-datasets';
+import { resolve } from 'node:path';
+
+export interface PullOptions {
+  region: string;
+  datasetId: string;
+  localFilePath: string;
+  configBaseDir: string;
+  version?: string;
+}
+
+export interface PullResult {
+  exampleCount: number;
+  version: string;
+}
+
+/**
+ * Pull dataset content from the service and stream to local file.
+ */
+export async function pullDataset(options: PullOptions): Promise<PullResult> {
+  const { region, datasetId, localFilePath, configBaseDir, version } = options;
+  const absolutePath = resolve(configBaseDir, localFilePath);
+
+  const datasetInfo = await getDataset({ region, datasetId, version });
+
+  if (datasetInfo.status !== 'ACTIVE') {
+    throw new Error(`Dataset is not ready (status: ${datasetInfo.status}). Please try again later.`);
+  }
+
+  if (!datasetInfo.downloadUrl) {
+    throw new Error('Dataset has no download URL available. The dataset may not be ready yet. Please try again later.');
+  }
+
+  // Stream directly to file — avoids holding full content in memory
+  const lineCount = await downloadDataset(datasetInfo.downloadUrl, { mode: 'stream', filePath: absolutePath });
+
+  return {
+    exampleCount: lineCount,
+    version: datasetInfo.datasetVersion,
+  };
+}
diff --git a/src/cli/operations/dataset/push.ts b/src/cli/operations/dataset/push.ts
new file mode 100644
index 000000000..6999f90b0
--- /dev/null
+++ b/src/cli/operations/dataset/push.ts
@@ -0,0 +1,362 @@
+/**
+ * Push local dataset file to service DRAFT using incremental diff.
+ *
+ * Algorithm:
+ * 1. Read local JSONL file
+ * 2. Download remote DRAFT via pre-signed URL
+ * 3. Diff by exampleId
+ * 4. Delete removed, update changed, add new
+ * 5. Write back exampleIds to local file
+ */
+import {
+  addDatasetExamples,
+  deleteDatasetExamples,
+  downloadDataset,
+  getDataset,
+  updateDatasetExamples,
+} from '../../aws/agentcore-datasets';
+import { isRetryableAwsError } from '../../aws/retry';
+import { waitForDatasetActive } from './wait';
+import stableStringify from 'fast-json-stable-stringify';
+import { randomUUID } from 'node:crypto';
+import { readFile, writeFile } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+/** Maximum examples per API call (service limit). */
+const API_BATCH_LIMIT = 1000;
+
+export interface PushOptions {
+  region: string;
+  datasetId: string;
+  localFilePath: string;
+  configBaseDir: string;
+  force?: boolean;
+}
+
+export interface PushResult {
+  added: number;
+  updated: number;
+  deleted: number;
+  unchanged: number;
+  totalRemote: number;
+}
+
+interface ParsedExample {
+  exampleId?: string;
+  content: Record<string, unknown>;
+  lineIndex: number;
+}
+
+// ============================================================================
+// Parsing
+// ============================================================================
+
+/**
+ * Parse a JSONL file into structured examples with line index tracking.
+ * @throws Error with line number context if any line contains invalid JSON.
+ */
+function parseLocalFile(content: string): ParsedExample[] {
+  const lines = content.split('\n').filter(line => line.trim() !== '');
+  return lines.map((line, index) => {
+    try {
+      const obj = JSON.parse(line) as Record<string, unknown>;
+      const exampleId = obj.exampleId as string | undefined;
+      return { exampleId, content: obj, lineIndex: index };
+    } catch (err) {
+      throw new Error(
+        `Invalid JSON at line ${index + 1}: ${err instanceof Error ? err.message : String(err)}\n` +
+          `  ${line.length > 120 ? line.slice(0, 120) + '...' : line}`
+      );
+    }
+  });
+}
+
+/**
+ * Parse remote JSONL (from download URL) into a map of exampleId → full content object.
+ */
+function parseRemoteJsonl(content: string): Map<string, Record<string, unknown>> {
+  const map = new Map<string, Record<string, unknown>>();
+  const lines = content.split('\n').filter(line => line.trim() !== '');
+  for (const line of lines) {
+    const obj = JSON.parse(line) as Record<string, unknown>;
+    const exampleId = obj.exampleId as string;
+    if (exampleId) {
+      map.set(exampleId, obj);
+    }
+  }
+  return map;
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/**
+ * Strip exampleId from an object, returning remaining fields.
+ * Used when submitting examples to the API (service assigns its own IDs)
+ * and when comparing content equality (ID is not part of the content).
+ */
+function stripExampleId(obj: Record<string, unknown>): Record<string, unknown> {
+  const { exampleId: _, ...rest } = obj;
+  return rest;
+}
+
+/**
+ * Compare two examples for equality (ignoring exampleId field).
+ * Uses `fast-json-stable-stringify` for deterministic, key-order-independent serialization so
+ * server-reordered examples don't appear as false-positive updates.
+ */
+function contentEquals(a: Record<string, unknown>, b: Record<string, unknown>): boolean {
+  return stableStringify(stripExampleId(a)) === stableStringify(stripExampleId(b));
+}
+
+/** Max retry attempts for a single batch on transient AWS errors. */
+const BATCH_MAX_RETRIES = 3;
+/** Base exponential-backoff delay (doubled each attempt). */
+const BATCH_RETRY_BASE_MS = 1_000;
+
+/**
+ * Run an async operation with bounded retry on transient AWS errors.
+ * Retries on throttling / 5xx / 429; surfaces 4xx client errors immediately.
+ * The operation should carry its own idempotency token so retries are safe.
+ */
+async function withRetry<R>(op: () => Promise<R>): Promise<R> {
+  let lastErr: unknown;
+  for (let attempt = 0; attempt < BATCH_MAX_RETRIES; attempt++) {
+    try {
+      return await op();
+    } catch (err) {
+      lastErr = err;
+      if (attempt === BATCH_MAX_RETRIES - 1 || !isRetryableAwsError(err)) throw err;
+      await sleep(BATCH_RETRY_BASE_MS * 2 ** attempt);
+    }
+  }
+  throw lastErr;
+}
+
+/**
+ * Execute a batched API operation with error context and retry.
+ * Processes items in chunks of API_BATCH_LIMIT, waits for ACTIVE between batches,
+ * retries each batch up to BATCH_MAX_RETRIES times with exponential backoff on
+ * transient errors, and wraps final failures with progress information. A fresh
+ * idempotency token is generated per batch and reused across its retries so the
+ * service can dedupe.
+ */
+async function batchOperation<T, R>(options: {
+  items: T[];
+  operation: (batch: T[], clientToken: string) => Promise<R>;
+  phaseName: string;
+  region: string;
+  datasetId: string;
+  waitBetweenBatches?: boolean;
+}): Promise<R[]> {
+  const { items, operation, phaseName, region, datasetId, waitBetweenBatches = true } = options;
+  if (items.length === 0) return [];
+
+  const totalBatches = Math.ceil(items.length / API_BATCH_LIMIT);
+  let completed = 0;
+  const results: R[] = [];
+
+  try {
+    for (let i = 0; i < items.length; i += API_BATCH_LIMIT) {
+      const batch = items.slice(i, i + API_BATCH_LIMIT);
+      const clientToken = randomUUID();
+      const result = await withRetry(() => operation(batch, clientToken));
+      results.push(result);
+      completed++;
+      if (waitBetweenBatches && i + API_BATCH_LIMIT < items.length) {
+        await waitForDatasetActive(region, datasetId);
+      }
+    }
+  } catch (err) {
+    throw new Error(
+      `Push failed during ${phaseName} phase (${completed}/${totalBatches} batches completed). ` +
+        `Re-run \`agentcore dataset push\` to retry and reconcile. ` +
+        `Original error: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+
+  return results;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+/**
+ * Write exampleIds back into the local JSONL file after push.
+ * In force mode (no addedExamples), all examples get new IDs.
+ * In incremental mode, only newly-added examples get IDs assigned.
+ */
+async function rewriteLocalFileWithIds(
+  filePath: string,
+  allExamples: ParsedExample[],
+  newIds: string[],
+  addedExamples?: ParsedExample[]
+): Promise<void> {
+  let newIdIndex = 0;
+  const lines: string[] = [];
+
+  for (const example of allExamples) {
+    if (addedExamples?.includes(example)) {
+      // Stale exampleId or new example — strip old ID and assign fresh one from API
+      const content = stripExampleId(example.content);
+      lines.push(JSON.stringify({ exampleId: newIds[newIdIndex++], ...content }));
+    } else if (!addedExamples) {
+      // Force mode — all examples get new IDs
+      const content = stripExampleId(example.content);
+      lines.push(JSON.stringify({ exampleId: newIds[newIdIndex++], ...content }));
+    } else {
+      // Unchanged or updated — keep existing content
+      lines.push(JSON.stringify(example.content));
+    }
+  }
+
+  await writeFile(filePath, lines.join('\n') + '\n', 'utf8');
+}
+
+// ============================================================================
+// Main
+// ============================================================================
+
+/**
+ * Sync local dataset file to the service DRAFT using incremental diff.
+ * In force mode, deletes all remote examples and re-adds from local.
+ */
+export async function pushDataset(options: PushOptions): Promise<PushResult> {
+  const { region, datasetId, localFilePath, configBaseDir, force } = options;
+  const absolutePath = resolve(configBaseDir, localFilePath);
+
+  // Read local file
+  const localContent = await readFile(absolutePath, 'utf8');
+  const localExamples = parseLocalFile(localContent);
+
+  // Download remote DRAFT (buffered — needed for in-memory diffing)
+  const datasetInfo = await getDataset({ region, datasetId });
+  let remoteMap = new Map<string, Record<string, unknown>>();
+  if (datasetInfo.downloadUrl && datasetInfo.exampleCount > 0) {
+    const remoteContent = await downloadDataset(datasetInfo.downloadUrl, { mode: 'buffer' });
+    remoteMap = parseRemoteJsonl(remoteContent);
+  }
+
+  if (force) {
+    // Force mode: delete all remote, re-add all local
+    if (remoteMap.size > 0) {
+      const remoteIds = Array.from(remoteMap.keys());
+      await batchOperation({
+        items: remoteIds,
+        operation: (batch, clientToken) => deleteDatasetExamples({ region, datasetId, exampleIds: batch, clientToken }),
+        phaseName: 'delete',
+        region,
+        datasetId,
+      });
+      await waitForDatasetActive(region, datasetId);
+    }
+
+    const examplesToAdd = localExamples.map(e => stripExampleId(e.content));
+    const newIds: string[] = [];
+
+    if (examplesToAdd.length > 0) {
+      const results = await batchOperation({
+        items: examplesToAdd,
+        operation: (batch, clientToken) => addDatasetExamples({ region, datasetId, examples: batch, clientToken }),
+        phaseName: 'add',
+        region,
+        datasetId,
+      });
+      for (const r of results) newIds.push(...r.exampleIds);
+    }
+
+    await rewriteLocalFileWithIds(absolutePath, localExamples, newIds);
+
+    return {
+      added: localExamples.length,
+      updated: 0,
+      deleted: remoteMap.size,
+      unchanged: 0,
+      totalRemote: localExamples.length,
+    };
+  }
+
+  // Incremental diff mode
+  const toAdd: ParsedExample[] = [];
+  const toUpdate: ParsedExample[] = [];
+  const localExampleIds = new Set<string>();
+  let unchanged = 0;
+
+  for (const local of localExamples) {
+    if (local.exampleId && remoteMap.has(local.exampleId)) {
+      // Exists in remote — check if content changed
+      localExampleIds.add(local.exampleId);
+      const remote = remoteMap.get(local.exampleId)!;
+      if (contentEquals(local.content, remote)) {
+        unchanged++;
+      } else {
+        toUpdate.push(local);
+      }
+    } else if (local.exampleId && !remoteMap.has(local.exampleId)) {
+      // Stale exampleId (not in remote) — treat as new add
+      toAdd.push(local);
+    } else {
+      // No exampleId — new example
+      toAdd.push(local);
+    }
+  }
+
+  // Remote examples not in local → delete
+  const toDeleteIds: string[] = [];
+  for (const remoteId of remoteMap.keys()) {
+    if (!localExampleIds.has(remoteId)) {
+      toDeleteIds.push(remoteId);
+    }
+  }
+
+  // Execute: Delete → Update → Add
+  if (toDeleteIds.length > 0) {
+    await batchOperation({
+      items: toDeleteIds,
+      operation: (batch, clientToken) => deleteDatasetExamples({ region, datasetId, exampleIds: batch, clientToken }),
+      phaseName: 'delete',
+      region,
+      datasetId,
+    });
+    await waitForDatasetActive(region, datasetId);
+  }
+
+  if (toUpdate.length > 0) {
+    await batchOperation({
+      items: toUpdate.map(e => e.content as { exampleId: string } & Record<string, unknown>),
+      operation: (batch, clientToken) => updateDatasetExamples({ region, datasetId, examples: batch, clientToken }),
+      phaseName: 'update',
+      region,
+      datasetId,
+    });
+    await waitForDatasetActive(region, datasetId);
+  }
+
+  const newIds: string[] = [];
+  if (toAdd.length > 0) {
+    const addExamples = toAdd.map(e => stripExampleId(e.content));
+    const results = await batchOperation({
+      items: addExamples,
+      operation: (batch, clientToken) => addDatasetExamples({ region, datasetId, examples: batch, clientToken }),
+      phaseName: 'add',
+      region,
+      datasetId,
+    });
+    for (const r of results) newIds.push(...r.exampleIds);
+  }
+
+  // Write back new exampleIds to local file
+  if (newIds.length > 0) {
+    await rewriteLocalFileWithIds(absolutePath, localExamples, newIds, toAdd);
+  }
+
+  return {
+    added: toAdd.length,
+    updated: toUpdate.length,
+    deleted: toDeleteIds.length,
+    unchanged,
+    totalRemote: localExamples.length,
+  };
+}
diff --git a/src/cli/operations/dataset/resolve-dataset.ts b/src/cli/operations/dataset/resolve-dataset.ts
new file mode 100644
index 000000000..5f5e59ec5
--- /dev/null
+++ b/src/cli/operations/dataset/resolve-dataset.ts
@@ -0,0 +1,75 @@
+/**
+ * Resolves a dataset name to its deployed state (datasetId, region, local file path).
+ */
+import { ConfigIO } from '../../../lib';
+import type { Dataset } from '../../../schema';
+
+export interface ResolvedDataset {
+  name: string;
+  datasetId: string;
+  datasetArn: string;
+  region: string;
+  location: string;
+}
+
+/**
+ * Resolve a dataset by name from the project config and deployed state.
+ *
+ * If `name` is undefined and there's exactly one dataset, auto-selects it.
+ * If `name` is undefined and there are multiple datasets, throws with available names.
+ */
+export async function resolveDataset(name?: string): Promise<ResolvedDataset> {
+  const configIO = new ConfigIO();
+  const projectSpec = await configIO.readProjectSpec();
+  const datasets: Dataset[] = projectSpec.datasets ?? [];
+
+  if (datasets.length === 0) {
+    throw new Error('No datasets found in agentcore.json. Run `agentcore add dataset` first.');
+  }
+
+  let dataset: Dataset;
+  if (name) {
+    const found = datasets.find(d => d.name === name);
+    if (!found) {
+      const available = datasets.map(d => d.name).join(', ');
+      throw new Error(`Dataset "${name}" not found. Available: ${available}`);
+    }
+    dataset = found;
+  } else if (datasets.length === 1) {
+    dataset = datasets[0]!;
+  } else {
+    const available = datasets.map(d => d.name).join(', ');
+    throw new Error(`Multiple datasets found. Specify --name. Available: ${available}`);
+  }
+
+  const targets = await configIO.resolveAWSDeploymentTargets();
+  if (targets.length === 0) {
+    throw new Error('No AWS deployment targets configured. Run `agentcore deploy` first.');
+  }
+  const region = targets[0]!.region;
+  const targetName = targets[0]!.name;
+
+  const deployedState = await configIO.readDeployedState().catch(() => undefined);
+  const datasetState = deployedState?.targets?.[targetName]?.resources?.datasets?.[dataset.name];
+
+  if (!datasetState) {
+    throw new Error(`Dataset "${dataset.name}" has not been deployed. Run \`agentcore deploy\` first.`);
+  }
+
+  return {
+    name: dataset.name,
+    datasetId: datasetState.datasetId,
+    datasetArn: datasetState.datasetArn,
+    region,
+    location: dataset.config.managed.location,
+  };
+}
+
+/**
+ * Get all dataset names from the project config.
+ */
+export async function getDatasetNames(): Promise<string[]> {
+  const configIO = new ConfigIO();
+  const projectSpec = await configIO.readProjectSpec();
+  return (projectSpec.datasets ?? []).map(d => d.name);
+}
diff --git a/src/cli/operations/dataset/status.ts b/src/cli/operations/dataset/status.ts
new file mode 100644
index 000000000..5c8f5362f
--- /dev/null
+++ b/src/cli/operations/dataset/status.ts
@@ -0,0 +1,45 @@
+/**
+ * Get dataset status — DRAFT info and version history.
+ */
+import { getDataset, listDatasetVersions } from '../../aws/agentcore-datasets';
+import type { DatasetVersionSummary } from '../../aws/agentcore-datasets';
+
+export interface StatusOptions {
+  region: string;
+  datasetId: string;
+  name: string;
+}
+
+export interface DatasetStatusResult {
+  name: string;
+  datasetId: string;
+  schemaType: string;
+  status: string;
+  draftExampleCount: number;
+  draftStatus: string;
+  updatedAt: number;
+  versions: DatasetVersionSummary[];
+}
+
+/**
+ * Get dataset status combining DRAFT info and version history.
+ */
+export async function getDatasetStatus(options: StatusOptions): Promise<DatasetStatusResult> {
+  const { region, datasetId, name } = options;
+
+  const [datasetInfo, versionsInfo] = await Promise.all([
+    getDataset({ region, datasetId }),
+    listDatasetVersions({ region, datasetId }),
+  ]);
+
+  return {
+    name,
+    datasetId,
+    schemaType: datasetInfo.schemaType,
+    status: datasetInfo.status,
+    draftExampleCount: datasetInfo.exampleCount,
+    draftStatus: datasetInfo.draftStatus ?? 'UNKNOWN',
+    updatedAt: datasetInfo.updatedAt,
+    versions: versionsInfo.versions,
+  };
+}
diff --git a/src/cli/operations/dataset/wait.ts b/src/cli/operations/dataset/wait.ts
new file mode 100644
index 000000000..5f787d55e
--- /dev/null
+++ b/src/cli/operations/dataset/wait.ts
@@ -0,0 +1,32 @@
+/**
+ * Shared polling utility for dataset operations.
+ * Waits until a dataset reaches ACTIVE status after an async mutation.
+ */
+import { getDataset } from '../../aws/agentcore-datasets';
+
+/** Maximum time to wait for dataset to become ACTIVE (ms). */
+const DEFAULT_MAX_WAIT_MS = 60_000;
+
+/** Interval between status polls (ms). */
+const POLL_INTERVAL_MS = 2_000;
+
+/**
+ * Poll GetDataset until the dataset status is ACTIVE.
+ * Throws if the dataset enters a terminal failed state or the timeout expires.
+ */
+export async function waitForDatasetActive(
+  region: string,
+  datasetId: string,
+  maxWaitMs = DEFAULT_MAX_WAIT_MS
+): Promise<void> {
+  const start = Date.now();
+  while (Date.now() - start < maxWaitMs) {
+    const result = await getDataset({ region, datasetId });
+    if (result.status === 'ACTIVE') return;
+    if (result.status.endsWith('_FAILED')) {
+      throw new Error(`Dataset entered failed state: ${result.status}`);
+    }
+    await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS));
+  }
+  throw new Error(`Timed out waiting for dataset to become ACTIVE (waited ${maxWaitMs / 1000}s)`);
+}
diff --git a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts
index 75f36ebcc..9d0b67492 100644
--- a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts
+++ b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts
@@ -68,6 +68,7 @@ function makeProjectSpec(abTests: AgentCoreProjectSpec['abTests'] = []): AgentCo
     policyEngines: [],
     configBundles: [],
     httpGateways: [],
+    datasets: [],
     abTests,
   };
 }
diff --git a/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts
index ecfc285cd..f916a89e3 100644
--- a/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts
+++ b/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts
@@ -506,6 +506,7 @@ describe('resolveConfigBundleComponentKeys', () => {
       policyEngines: [],
       configBundles,
       httpGateways: [],
+      datasets: [],
       abTests: [],
     };
   }
diff --git a/src/cli/operations/deploy/__tests__/post-deploy-datasets.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-datasets.test.ts
new file mode 100644
index 000000000..a841898b7
--- /dev/null
+++ b/src/cli/operations/deploy/__tests__/post-deploy-datasets.test.ts
@@ -0,0 +1,101 @@
+import { syncDatasets } from '../post-deploy-datasets.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockPushDataset = vi.fn();
+const mockReadFile = vi.fn();
+
+vi.mock('../../dataset', () => ({
+  pushDataset: (...args: unknown[]) => mockPushDataset(...args),
+}));
+
+vi.mock('node:fs/promises', () => ({
+  readFile: (...args: unknown[]) => mockReadFile(...args),
+}));
+
+function makeDataset(name: string) {
+  return {
+    name,
+    schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1' as const,
+    config: { managed: { location: `datasets/${name}.jsonl` } },
+  };
+}
+
+describe('syncDatasets', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  it('skips dataset when contentHash matches', async () => {
+    // We need to compute the actual sha256 hash for the content
+    const content = '{"input":"hello"}\n';
+    const { createHash } = await import('node:crypto');
+    const expectedHash = createHash('sha256').update(content).digest('hex');
+
+    mockReadFile.mockResolvedValue(content);
+
+    const result = await syncDatasets({
+      region: 'us-east-1',
+      datasets: [makeDataset('ds1')],
+      deployedDatasets: {
+        ds1: { datasetId: 'ds-1', datasetArn: 'arn:ds:1', contentHash: expectedHash },
+      },
+      configBaseDir: '/project',
+    });
+
+    expect(result.results[0]!.status).toBe('skipped');
+    expect(mockPushDataset).not.toHaveBeenCalled();
+  });
+
+  it('calls pushDataset and updates hash when content changed', async () => {
+    mockReadFile.mockResolvedValue('{"input":"new content"}\n');
+    mockPushDataset.mockResolvedValue({ added: 1, updated: 0, deleted: 0, unchanged: 0, totalRemote: 1 });
+
+    const result = await syncDatasets({
+      region: 'us-east-1',
+      datasets: [makeDataset('ds1')],
+      deployedDatasets: {
+        ds1: { datasetId: 'ds-1', datasetArn: 'arn:ds:1', contentHash: 'old-hash-value' },
+      },
+      configBaseDir: '/project',
+    });
+
+    expect(result.results[0]!.status).toBe('synced');
+    expect(result.results[0]!.added).toBe(1);
+    expect(mockPushDataset).toHaveBeenCalledWith(
+      expect.objectContaining({
+        region: 'us-east-1',
+        datasetId: 'ds-1',
+      })
+    );
+    // Updated datasets should contain new hash
+    expect(result.updatedDatasets.ds1!.contentHash).not.toBe('old-hash-value');
+  });
+
+  it('records error and continues when push throws', async () => {
+    mockReadFile.mockResolvedValue('{"input":"data"}\n');
+    mockPushDataset.mockRejectedValue(new Error('Push failed: network error'));
+
+    const result = await syncDatasets({
+      region: 'us-east-1',
+      datasets: [makeDataset('ds1')],
+      deployedDatasets: {
+        ds1: { datasetId: 'ds-1', datasetArn: 'arn:ds:1', contentHash: 'old-hash' },
+      },
+      configBaseDir: '/project',
+    });
+
+    expect(result.hasErrors).toBe(true);
+    expect(result.results[0]!.status).toBe('error');
+    expect(result.results[0]!.error).toBe('Push failed: network error');
+  });
+
+  it('skips datasets not present in deployed state', async () => {
+    const result = await syncDatasets({
+      region: 'us-east-1',
+      datasets: [makeDataset('missing')],
+      deployedDatasets: {},
+      configBaseDir: '/project',
+    });
+
+    expect(result.results).toHaveLength(0);
+    expect(mockReadFile).not.toHaveBeenCalled();
+  });
+});
diff --git a/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts
index 32c7e6252..afb43bc9e 100644
--- a/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts
+++ b/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts
@@ -81,6 +81,7 @@ function makeProjectSpec(httpGateways: AgentCoreProjectSpec['httpGateways'] = []
     configBundles: [],
     abTests: [],
     httpGateways,
+    datasets: [],
   };
 }
 
diff --git a/src/cli/operations/deploy/__tests__/preflight.test.ts b/src/cli/operations/deploy/__tests__/preflight.test.ts
index 04b75bb52..58cfc0f12 100644
--- a/src/cli/operations/deploy/__tests__/preflight.test.ts
+++ b/src/cli/operations/deploy/__tests__/preflight.test.ts
@@ -110,6 +110,31 @@ describe('validateProject', () => {
     expect(result.isTeardownDeploy).toBe(false);
   });
 
+  it('allows deploy when datasets exist but no agents or gateways', async () => {
+    mockRequireConfigRoot.mockReturnValue('/project/agentcore');
+    mockValidate.mockReturnValue(undefined);
+    mockReadProjectSpec.mockResolvedValue({
+      name: 'test-project',
+      runtimes: [],
+      memories: [],
+      datasets: [
+        {
+          name: 'test-dataset',
+          schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+          config: { managed: { location: 'datasets/test.jsonl' } },
+        },
+      ],
+      agentCoreGateways: [],
+    });
+    mockReadAWSDeploymentTargets.mockResolvedValue([]);
+    mockValidateAwsCredentials.mockResolvedValue(undefined);
+
+    const result = await validateProject();
+
+    expect(result.projectSpec.name).toBe('test-project');
+    expect(result.isTeardownDeploy).toBe(false);
+  });
+
   it('allows deploy when both agents and gateways exist', async () => {
     mockRequireConfigRoot.mockReturnValue('/project/agentcore');
     mockValidate.mockReturnValue(undefined);
diff --git a/src/cli/operations/deploy/post-deploy-datasets.ts b/src/cli/operations/deploy/post-deploy-datasets.ts
new file mode 100644
index 000000000..9d908318d
--- /dev/null
+++ b/src/cli/operations/deploy/post-deploy-datasets.ts
@@ -0,0 +1,91 @@
+import type { Dataset, DatasetDeployedState } from '../../../schema';
+import { pushDataset } from '../dataset';
+import { createHash } from 'node:crypto';
+import { readFile } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+export interface SyncDatasetsOptions {
+  region: string;
+  datasets: Dataset[];
+  deployedDatasets: Record<string, DatasetDeployedState>;
+  configBaseDir: string;
+}
+
+export interface SyncDatasetsResult {
+  hasErrors: boolean;
+  results: DatasetSyncResultEntry[];
+  updatedDatasets: Record<string, DatasetDeployedState>;
+}
+
+export interface DatasetSyncResultEntry {
+  datasetName: string;
+  status: 'synced' | 'skipped' | 'error';
+  added?: number;
+  updated?: number;
+  deleted?: number;
+  error?: string;
+}
+
+function computeFileHash(content: string): string {
+  return createHash('sha256').update(content).digest('hex');
+}
+
+export async function syncDatasets(options: SyncDatasetsOptions): Promise<SyncDatasetsResult> {
+  const { region, datasets, deployedDatasets, configBaseDir } = options;
+  const results: DatasetSyncResultEntry[] = [];
+  const updatedDatasets = { ...deployedDatasets };
+
+  for (const dataset of datasets) {
+    const state = deployedDatasets[dataset.name];
+    if (!state) continue;
+
+    try {
+      const localFilePath = dataset.config.managed.location;
+      const absolutePath = resolve(configBaseDir, localFilePath);
+      const localContent = await readFile(absolutePath, 'utf8');
+      const currentHash = computeFileHash(localContent);
+
+      if (state.contentHash === currentHash) {
+        results.push({ datasetName: dataset.name, status: 'skipped' });
+        continue;
+      }
+
+      const pushResult = await pushDataset({
+        region,
+        datasetId: state.datasetId,
+        localFilePath,
+        configBaseDir,
+      });
+
+      // Re-read the file after push because pushDataset rewrites it with new exampleIds.
+      // The hash must reflect the actual on-disk content so subsequent deploys can skip unchanged datasets.
+      const postPushContent = await readFile(absolutePath, 'utf8');
+      const postPushHash = computeFileHash(postPushContent);
+
+      updatedDatasets[dataset.name] = {
+        ...state,
+        contentHash: postPushHash,
+      };
+
+      results.push({
+        datasetName: dataset.name,
+        status: 'synced',
+        added: pushResult.added,
+        updated: pushResult.updated,
+        deleted: pushResult.deleted,
+      });
+    } catch (err) {
+      results.push({
+        datasetName: dataset.name,
+        status: 'error',
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
+  return {
+    hasErrors: results.some(r => r.status === 'error'),
+    results,
+    updatedDatasets,
+  };
+}
diff --git a/src/cli/operations/deploy/preflight.ts b/src/cli/operations/deploy/preflight.ts
index ba423a088..4124dea3f 100644
--- a/src/cli/operations/deploy/preflight.ts
+++ b/src/cli/operations/deploy/preflight.ts
@@ -86,11 +86,12 @@ export async function validateProject(): Promise<PreflightContext> {
   const hasMemories = projectSpec.memories && projectSpec.memories.length > 0;
   const hasEvaluators = projectSpec.evaluators && projectSpec.evaluators.length > 0;
   const hasPolicyEngines = projectSpec.policyEngines && projectSpec.policyEngines.length > 0;
+  const hasDatasets = projectSpec.datasets && projectSpec.datasets.length > 0;
 
   // Check for gateways in agentcore.json
   const hasGateways = projectSpec.agentCoreGateways && projectSpec.agentCoreGateways.length > 0;
 
-  if (!hasAgents && !hasGateways && !hasMemories && !hasEvaluators && !hasPolicyEngines) {
+  if (!hasAgents && !hasGateways && !hasMemories && !hasEvaluators && !hasPolicyEngines && !hasDatasets) {
     let hasExistingStack = false;
     try {
       const deployedState = await configIO.readDeployedState();
diff --git a/src/cli/operations/dev/__tests__/config.test.ts b/src/cli/operations/dev/__tests__/config.test.ts
index 3d942ca7c..6ba805506 100644
--- a/src/cli/operations/dev/__tests__/config.test.ts
+++ b/src/cli/operations/dev/__tests__/config.test.ts
@@ -24,6 +24,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project);
@@ -55,6 +56,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project);
@@ -85,6 +87,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -121,6 +124,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     expect(() => getDevConfig(workingDir, project, undefined, 'NonExistentAgent')).toThrow(
@@ -152,6 +156,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, undefined, 'TsAgent');
@@ -184,6 +189,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -216,6 +222,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     // No configRoot provided
@@ -248,6 +255,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -280,6 +288,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -311,6 +320,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -342,6 +352,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -373,6 +384,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -404,6 +416,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -436,6 +449,7 @@ describe('getDevConfig', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const config = getDevConfig(workingDir, project, '/test/project/agentcore');
@@ -481,6 +495,7 @@ describe('getAgentPort', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     expect(getAgentPort(project, 'Agent1', 8080)).toBe(8080);
@@ -502,6 +517,7 @@ describe('getAgentPort', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     expect(getAgentPort(project, 'NonExistent', 9000)).toBe(9000);
@@ -528,6 +544,7 @@ describe('getDevSupportedAgents', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     expect(getDevSupportedAgents(project)).toEqual([]);
@@ -557,6 +574,7 @@ describe('getDevSupportedAgents', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const supported = getDevSupportedAgents(project);
@@ -626,6 +644,7 @@ describe('getDevSupportedAgents', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const supported = getDevSupportedAgents(project);
@@ -665,6 +684,7 @@ describe('getDevSupportedAgents', () => {
       configBundles: [],
       abTests: [],
       httpGateways: [],
+      datasets: [],
     };
 
     const supported = getDevSupportedAgents(project);
diff --git a/src/cli/operations/eval/batch-eval-storage.ts b/src/cli/operations/eval/batch-eval-storage.ts
index 9b55e5240..2c47141d1 100644
--- a/src/cli/operations/eval/batch-eval-storage.ts
+++ b/src/cli/operations/eval/batch-eval-storage.ts
@@ -15,6 +15,8 @@ export interface BatchEvalRunRecord {
   evaluators: string[];
   results: BatchEvaluationResult[];
   evaluationResults?: EvaluationResults;
+  source?: string;
+  dataset?: { id: string; version: string };
 }
 
 function getResultsDir(): string {
@@ -25,10 +27,22 @@ function getResultsDir(): string {
   return join(configRoot, '.cli', BATCH_EVAL_RESULTS_DIR);
 }
 
-export function saveBatchEvalRun(result: RunBatchEvaluationCommandResult): string {
+export interface SaveBatchEvalRunOptions {
+  result: RunBatchEvaluationCommandResult;
+  source?: string;
+  dataset?: { id: string; version: string };
+}
+
+export function saveBatchEvalRun(resultOrOptions: RunBatchEvaluationCommandResult | SaveBatchEvalRunOptions): string {
   const dir = getResultsDir();
   mkdirSync(dir, { recursive: true });
 
+  // Support both the legacy signature and the new options object
+  const isOptionsObj = 'result' in resultOrOptions;
+  const result = isOptionsObj ? resultOrOptions.result : resultOrOptions;
+  const source = isOptionsObj ? resultOrOptions.source : undefined;
+  const dataset = isOptionsObj ? resultOrOptions.dataset : undefined;
+
   const id = result.batchEvaluationId ?? 'unknown';
   const filePath = join(dir, `${id}.json`);
 
@@ -41,6 +55,8 @@ export function saveBatchEvalRun(result: RunBatchEvaluationCommandResult): strin
     evaluators: result.results.map(r => r.evaluatorId),
     results: result.results,
     evaluationResults: result.evaluationResults,
+    ...(source ? { source } : {}),
+    ...(dataset ? { dataset } : {}),
   };
 
   writeFileSync(filePath, JSON.stringify(record, null, 2));
diff --git a/src/cli/operations/eval/run-batch-evaluation.ts b/src/cli/operations/eval/run-batch-evaluation.ts
index 436cace71..4b68456ed 100644
--- a/src/cli/operations/eval/run-batch-evaluation.ts
+++ b/src/cli/operations/eval/run-batch-evaluation.ts
@@ -16,8 +16,11 @@ import type {
   GetBatchEvaluationResult,
   SessionMetadataEntry,
 } from '../../aws/agentcore-batch-evaluation';
-import { detectRegion } from '../../aws/region';
+import { resolveEndpointName, runtimeLogGroup } from '../../aws/cloudwatch';
+import { getRegion } from '../../commands/shared/region-utils';
 import { ExecLogger } from '../../logging/exec-logger';
+import { resolveAgentContext } from '../invoke/resolve-agent-context';
+import { runDatasetScenarios } from './shared/dataset-session-provider';
 import { CloudWatchLogsClient, GetLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs';
 
 // ============================================================================
@@ -45,6 +48,12 @@ export interface RunBatchEvaluationOptions {
   onProgress?: (status: string, message: string) => void;
   /** Called once the batch evaluation has been created, with ID and region for cancellation */
   onStarted?: (info: { batchEvaluationId: string; region: string }) => void;
+  /** Dataset name — invoke agent with dataset scenarios before batch evaluation */
+  dataset?: string;
+  /** Dataset version (omit for local file, or N/DRAFT) */
+  datasetVersion?: string;
+  /** Runtime endpoint name (e.g. PROMPT_V1). Defaults to DEFAULT. */
+  endpoint?: string;
 }
 
 export interface BatchEvaluationResult {
@@ -71,6 +80,9 @@ export type RunBatchEvaluationCommandResult = Result & {
 // ============================================================================
 
 const DEFAULT_POLL_INTERVAL_MS = 10_000;
+
+/** Delay before submitting batch eval to allow CloudWatch span ingestion. Matches SDK default. */
+const BATCH_INGESTION_DELAY_MS = 180_000;
 const TERMINAL_STATUSES = new Set(['COMPLETED', 'COMPLETED_WITH_ERRORS', 'FAILED', 'STOPPED', 'CANCELLED']);
 
 // ============================================================================
@@ -99,10 +111,7 @@ export async function runBatchEvaluationCommand(
       configIO.resolveAWSDeploymentTargets(),
     ]);
 
-    // Use the deployed target region (from aws-targets) rather than generic detectRegion()
-    const targetRegion = awsTargets.length > 0 ? awsTargets[0]!.region : undefined;
-    const { region: detectedRegion } = await detectRegion();
-    const region = options.region ?? targetRegion ?? detectedRegion;
+    const region = await getRegion(options.region);
     const stage = process.env.AGENTCORE_STAGE?.toLowerCase() ?? 'prod';
     logger?.log(`Region: ${region}, Stage: ${stage}`);
     logger?.endStep('success');
@@ -120,12 +129,13 @@ export async function runBatchEvaluationCommand(
 
     const runtimeId = agentState.runtimeId;
     // Service name in CW logs uses project_agent format without the CDK hash suffix
-    const serviceName = `${projectSpec.name}_${agent}.DEFAULT`;
-    const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT`;
+    const endpointName = resolveEndpointName(options.endpoint);
+    const serviceName = `${projectSpec.name}_${agent}.${endpointName}`;
+    const runtimeLogGroupName = runtimeLogGroup(runtimeId, options.endpoint);
 
     logger?.log(`Agent: ${agent} (runtime: ${runtimeId})`);
     logger?.log(`Service name: ${serviceName}`);
-    logger?.log(`Log group: ${runtimeLogGroup}`);
+    logger?.log(`Log group: ${runtimeLogGroupName}`);
     logger?.endStep('success');
 
     // 2b. Resolve evaluator names to deployed IDs
@@ -165,11 +175,80 @@ export async function runBatchEvaluationCommand(
 
     onProgress?.('starting', `Starting batch evaluation "${evalName}"...`);
 
+    // Dataset mode: invoke agent with scenarios first, then use those sessionIds
+    let datasetSessionIds: string[] = [];
+    let datasetMetadata: SessionMetadataEntry[] = [];
+    if (options.dataset) {
+      const agentContext = await resolveAgentContext({
+        project: projectSpec,
+        deployedState,
+        awsTargets,
+        agentName: agent,
+        endpoint: options.endpoint,
+      });
+
+      onProgress?.('invoking', `Invoking agent with dataset "${options.dataset}"...`);
+
+      const datasetResult = await runDatasetScenarios({
+        agentContext,
+        datasetName: options.dataset,
+        version: options.datasetVersion,
+        configBaseDir: configIO.getConfigRoot(),
+        onProgress: (phase, msg) => onProgress?.(phase, msg),
+      });
+
+      const successfulResults = datasetResult.scenarioResults.filter(r => r.status === 'success');
+      if (successfulResults.length === 0) {
+        return {
+          success: false,
+          error: new Error('All scenarios failed during invocation. No sessions to evaluate.'),
+          results: [],
+          logFilePath: logger?.logFilePath,
+        };
+      }
+
+      datasetSessionIds = successfulResults.map(r => r.sessionId);
+
+      // Build sessionMetadata with ground truth from dataset
+      datasetMetadata = successfulResults.map(r => {
+        const scenario = datasetResult.scenarios.find(s => s.scenario_id === r.scenarioId);
+        return {
+          sessionId: r.sessionId,
+          testScenarioId: r.scenarioId,
+          groundTruth: scenario
+            ? {
+                inline: {
+                  ...(scenario.assertions ? { assertions: scenario.assertions.map(a => ({ text: a })) } : {}),
+                  ...(scenario.expected_trajectory
+                    ? { expectedTrajectory: { toolNames: scenario.expected_trajectory } }
+                    : {}),
+                  ...(scenario.turns.some(t => t.expectedResponse)
+                    ? {
+                        turns: scenario.turns.map(t => ({
+                          input: { prompt: t.input },
+                          ...(t.expectedResponse ? { expectedResponse: { text: t.expectedResponse } } : {}),
+                        })),
+                      }
+                    : {}),
+                },
+              }
+            : undefined,
+        };
+      }) as SessionMetadataEntry[];
+
+      onProgress?.('invoking', `✓ ${successfulResults.length} sessions ready for batch evaluation`);
+
+      // Wait for CloudWatch span ingestion before submitting — the batch service
+      // queries CloudWatch server-side, so we can't poll. Match SDK default (180s).
+      onProgress?.('ingesting', 'Waiting 180s for CloudWatch span ingestion...');
+      await sleep(BATCH_INGESTION_DELAY_MS);
+    }
+
     // Build optional filter config for CloudWatch filtering
     // API requires either sessionIds OR timeRange, not both — sessionIds takes precedence
     // Merge explicit sessionIds with any sessionIds from sessionMetadata (deduplicated)
     const metadataSessionIds = options.sessionMetadata?.map(m => m.sessionId).filter(Boolean) ?? [];
-    const explicitSessionIds = options.sessionIds ?? [];
+    const explicitSessionIds = [...(options.sessionIds ?? []), ...datasetSessionIds];
     const effectiveSessionIds = [...new Set([...explicitSessionIds, ...metadataSessionIds])];
     const hasSessionIds = effectiveSessionIds.length > 0;
 
@@ -185,6 +264,9 @@ export async function runBatchEvaluationCommand(
       return undefined;
     })();
 
+    // Merge dataset metadata with any explicit sessionMetadata
+    const allSessionMetadata = [...(options.sessionMetadata ?? []), ...datasetMetadata];
+
     const startPayload = {
       region,
       name: evalName,
@@ -192,13 +274,11 @@ export async function runBatchEvaluationCommand(
       dataSourceConfig: {
         cloudWatchLogs: {
           serviceNames: [serviceName],
-          logGroupNames: [runtimeLogGroup],
+          logGroupNames: [runtimeLogGroupName],
           ...(filterConfig ? { filterConfig } : {}),
         },
       },
-      ...(options.sessionMetadata && options.sessionMetadata.length > 0
-        ? { evaluationMetadata: { sessionMetadata: options.sessionMetadata } }
-        : {}),
+      ...(allSessionMetadata.length > 0 ? { evaluationMetadata: { sessionMetadata: allSessionMetadata } } : {}),
       clientToken: generateClientToken(),
     };
 
diff --git a/src/cli/operations/eval/run-eval.ts b/src/cli/operations/eval/run-eval.ts
index 8ba5cb307..b1c136dd8 100644
--- a/src/cli/operations/eval/run-eval.ts
+++ b/src/cli/operations/eval/run-eval.ts
@@ -1,28 +1,27 @@
-import { ResourceNotFoundError, ValidationError } from '../../../lib';
+import { ConfigIO, ResourceNotFoundError, ValidationError } from '../../../lib';
 import type { Result } from '../../../lib/result';
 import { getCredentialProvider } from '../../aws';
-import { evaluate } from '../../aws/agentcore';
 import type { EvaluationReferenceInput } from '../../aws/agentcore';
 import { getEvaluator } from '../../aws/agentcore-control';
-import { DEFAULT_ENDPOINT_NAME } from '../../constants';
+import { runtimeLogGroup } from '../../aws/cloudwatch';
+import { resolveAgentContext } from '../invoke/resolve-agent-context';
 import type { DeployedProjectConfig } from '../resolve-agent';
 import { loadDeployedProjectConfig, resolveAgent } from '../resolve-agent';
+import { runDatasetScenariosAndCollectSpans } from './shared/dataset-session-provider';
+import { runEvaluatorsOverSessions } from './shared/evaluator-runner';
+import {
+  SPANS_LOG_GROUP,
+  executeQuery,
+  extractTraceIds,
+  fetchSessionSpans,
+  sanitizeQueryValue,
+} from './shared/span-collector';
 import { generateFilename, saveEvalRun } from './storage';
-import type { EvalEvaluatorResult, EvalRunResult, EvalSessionScore, RunEvalOptions, SessionInfo } from './types';
-import { CloudWatchLogsClient, GetQueryResultsCommand, StartQueryCommand } from '@aws-sdk/client-cloudwatch-logs';
-import type { ResultField } from '@aws-sdk/client-cloudwatch-logs';
-import type { DocumentType } from '@smithy/types';
+import type { EvalRunResult, RunEvalOptions, SessionInfo } from './types';
+import { CloudWatchLogsClient } from '@aws-sdk/client-cloudwatch-logs';
 import { writeFileSync } from 'fs';
 import { join } from 'path';
 
-const SPANS_LOG_GROUP = 'aws/spans';
-
-const SUPPORTED_SCOPES = new Set([
-  'strands.telemetry.tracer',
-  'opentelemetry.instrumentation.langchain',
-  'openinference.instrumentation.langchain',
-]);
-
 interface ResolvedEvalContext {
   agentLabel: string;
   region: string;
@@ -96,16 +95,13 @@ function resolveFromArn(options: RunEvalOptions): ResolveResult {
     return { success: false, error: 'No evaluators specified. Use -e/--evaluator with Builtin.* or --evaluator-arn.' };
   }
 
-  const endpointName = options.endpoint ?? process.env.AGENTCORE_RUNTIME_ENDPOINT ?? DEFAULT_ENDPOINT_NAME;
-  const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-${endpointName}`;
-
   return {
     success: true,
     ctx: {
       agentLabel: runtimeId,
       region,
       runtimeId,
-      runtimeLogGroup,
+      runtimeLogGroup: runtimeLogGroup(runtimeId, options.endpoint),
       evaluatorIds,
       evaluatorLabels,
     },
@@ -122,8 +118,6 @@ function resolveFromProject(context: DeployedProjectConfig, options: RunEvalOpti
   }
 
   const { agent } = agentResult;
-  const endpointName = options.endpoint ?? process.env.AGENTCORE_RUNTIME_ENDPOINT ?? DEFAULT_ENDPOINT_NAME;
-  const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${agent.runtimeId}-${endpointName}`;
 
   // Resolve evaluator names to IDs
   const evaluatorIds: string[] = [];
@@ -165,7 +159,7 @@ function resolveFromProject(context: DeployedProjectConfig, options: RunEvalOpti
       agentLabel: agent.agentName,
       region: agent.region,
       runtimeId: agent.runtimeId,
-      runtimeLogGroup,
+      runtimeLogGroup: runtimeLogGroup(agent.runtimeId, options.endpoint),
       evaluatorIds,
       evaluatorLabels,
     },
@@ -220,154 +214,6 @@ async function resolveEvaluatorLevels(evaluatorIds: string[], region: string): P
   return levels;
 }
 
-/**
- * Extract distinct trace IDs from session spans.
- */
-function extractTraceIds(spans: DocumentType[]): string[] {
-  const traceIds = new Set<string>();
-  for (const span of spans) {
-    const traceId = (span as Record<string, unknown>).traceId as string | undefined;
-    if (traceId) {
-      traceIds.add(traceId);
-    }
-  }
-  return [...traceIds];
-}
-
-/**
- * Extract span IDs that represent tool calls from session spans.
- */
-function extractToolCallSpanIds(spans: DocumentType[]): string[] {
-  const spanIds: string[] = [];
-  for (const span of spans) {
-    const doc = span as Record<string, unknown>;
-    const spanId = doc.spanId as string | undefined;
-    if (!spanId) continue;
-
-    // Tool call spans must have a tool name attribute — kind=CLIENT alone is too broad
-    const attrs = doc.attributes as Record<string, unknown> | undefined;
-    if (attrs?.['gen_ai.tool.name'] ?? attrs?.['tool.name']) {
-      spanIds.push(spanId);
-    }
-  }
-  return spanIds;
-}
-
-const EVALUATE_TARGET_BATCH_SIZE = 10;
-
-interface TargetIdBatch {
-  traceIds?: string[];
-  spanIds?: string[];
-}
-
-/**
- * Batch targetTraceIds / targetSpanIds into chunks of EVALUATE_TARGET_BATCH_SIZE.
- * The Evaluate API limits these arrays to 10 items per call.
- * For SESSION-level evaluators (both undefined), returns a single batch with no IDs.
- */
-function batchTargetIds(traceIds?: string[], spanIds?: string[]): TargetIdBatch[] {
-  if (spanIds) {
-    return chunk(spanIds, EVALUATE_TARGET_BATCH_SIZE).map(batch => ({ spanIds: batch }));
-  }
-  if (traceIds) {
-    return chunk(traceIds, EVALUATE_TARGET_BATCH_SIZE).map(batch => ({ traceIds: batch }));
-  }
-  // SESSION level — single call with no target IDs
-  return [{}];
-}
-
-function chunk<T>(arr: T[], size: number): T[][] {
-  const batches: T[][] = [];
-  for (let i = 0; i < arr.length; i += size) {
-    batches.push(arr.slice(i, i + size));
-  }
-  return batches;
-}
-
-/**
- * Execute a CloudWatch Logs Insights query and wait for results.
- */
-async function executeQuery(
-  client: CloudWatchLogsClient,
-  logGroupName: string,
-  queryString: string,
-  startTimeSec: number,
-  endTimeSec: number
-): Promise<ResultField[][]> {
-  const startQuery = await client.send(
-    new StartQueryCommand({
-      logGroupName,
-      startTime: startTimeSec,
-      endTime: endTimeSec,
-      queryString,
-    })
-  );
-
-  if (!startQuery.queryId) {
-    throw new Error('Failed to start CloudWatch Logs Insights query');
-  }
-
-  for (let i = 0; i < 60; i++) {
-    await new Promise(resolve => setTimeout(resolve, 1000));
-
-    const queryResults = await client.send(new GetQueryResultsCommand({ queryId: startQuery.queryId }));
-    const status = queryResults.status ?? 'Unknown';
-
-    if (status === 'Failed' || status === 'Cancelled') {
-      throw new Error(`CloudWatch query ${status.toLowerCase()}`);
-    }
-
-    if (status === 'Complete') {
-      return queryResults.results ?? [];
-    }
-  }
-
-  throw new Error('CloudWatch query timed out after 60 seconds');
-}
-
-/**
- * Extract parsed @message documents from CloudWatch Insights results.
- */
-function extractMessages(rows: ResultField[][]): Record<string, unknown>[] {
-  const docs: Record<string, unknown>[] = [];
-  for (const row of rows) {
-    const messageField = row.find(f => f.field === '@message');
-    if (messageField?.value) {
-      try {
-        docs.push(JSON.parse(messageField.value) as Record<string, unknown>);
-      } catch {
-        // Skip non-JSON log lines
-      }
-    }
-  }
-  return docs;
-}
-
-/**
- * Check if a document is relevant for evaluation:
- * - Has a supported instrumentation scope, OR
- * - Is a log record with conversation data (body.input / body.output)
- */
-function isRelevantForEval(doc: Record<string, unknown>): boolean {
-  const scope = doc.scope as Record<string, unknown> | undefined;
-  const scopeName = scope?.name as string | undefined;
-  if (scopeName && SUPPORTED_SCOPES.has(scopeName)) {
-    return true;
-  }
-
-  const body = doc.body;
-  if (body && typeof body === 'object' && ('input' in body || 'output' in body)) {
-    return true;
-  }
-
-  return false;
-}
-
-/** Sanitize a value for use in CloudWatch Insights query strings by removing single quotes. */
-function sanitizeQueryValue(value: string): string {
-  return value.replace(/'/g, '');
-}
-
 const MAX_DISCOVERED_SESSIONS = 50;
 
 export interface DiscoverSessionsOptions {
@@ -413,165 +259,115 @@ export async function discoverSessions(opts: DiscoverSessionsOptions): Promise<S
   return sessions;
 }
 
-interface SessionSpans {
-  sessionId: string;
-  spans: DocumentType[];
-}
-
-interface FetchSpansOptions {
-  runtimeId: string;
-  runtimeLogGroup: string;
-  region: string;
-  lookbackDays: number;
-  sessionId?: string;
-  traceId?: string;
-}
-
-/**
- * Fetch OTel spans from the `aws/spans` log group and runtime logs from the agent's
- * log group, then group them by session.
- *
- * The Evaluate API requires spans from a single session per call.
- */
-async function fetchSessionSpans(opts: FetchSpansOptions): Promise<SessionSpans[]> {
-  const { runtimeId, runtimeLogGroup, region, lookbackDays } = opts;
-  const endTimeMs = Date.now();
-  const startTimeMs = endTimeMs - lookbackDays * 24 * 60 * 60 * 1000;
-  const startTimeSec = Math.floor(startTimeMs / 1000);
-  const endTimeSec = Math.floor(endTimeMs / 1000);
-
-  const client = new CloudWatchLogsClient({
-    credentials: getCredentialProvider(),
-    region,
-  });
+export type RunEvalResult = Result<{ run: EvalRunResult; filePath: string }>;
 
-  // 1. Query proper OTel spans from the aws/spans log group
-  let spanQuery = `fields @message, attributes.session.id as sessionId, traceId
-     | parse resource.attributes.cloud.resource_id "runtime/*/" as parsedAgentId
-     | filter parsedAgentId = '${sanitizeQueryValue(runtimeId)}'
-     | filter ispresent(scope.name)`;
+export async function handleRunEval(options: RunEvalOptions): Promise<RunEvalResult> {
+  let resolution: ResolveResult;
 
-  if (opts.sessionId) {
-    spanQuery += `\n     | filter attributes.session.id = '${sanitizeQueryValue(opts.sessionId)}'`;
-  }
-  if (opts.traceId) {
-    spanQuery += `\n     | filter traceId = '${sanitizeQueryValue(opts.traceId)}'`;
+  if (options.agentArn) {
+    resolution = resolveFromArn(options);
+  } else {
+    const context = await loadDeployedProjectConfig();
+    resolution = resolveFromProject(context, options);
   }
 
-  spanQuery += `\n     | sort startTimeUnixNano asc\n     | limit 10000`;
-
-  const spanRows = await executeQuery(client, SPANS_LOG_GROUP, spanQuery, startTimeSec, endTimeSec);
-
-  // Group spans by session and collect trace IDs
-  const sessionMap = new Map<string, DocumentType[]>();
-  const traceIds = new Set<string>();
-
-  for (const row of spanRows) {
-    const messageField = row.find(f => f.field === '@message');
-    const sessionField = row.find(f => f.field === 'sessionId');
-    const traceField = row.find(f => f.field === 'traceId');
-
-    if (!messageField?.value) continue;
-
-    let doc: Record<string, unknown>;
-    try {
-      doc = JSON.parse(messageField.value) as Record<string, unknown>;
-    } catch {
-      continue;
-    }
-
-    const sessionId = sessionField?.value ?? 'unknown';
-    if (!sessionMap.has(sessionId)) {
-      sessionMap.set(sessionId, []);
-    }
-    sessionMap.get(sessionId)!.push(doc as DocumentType);
-
-    if (traceField?.value) {
-      traceIds.add(traceField.value);
-    }
+  if (!resolution.success) {
+    return { success: false, error: new ResourceNotFoundError(resolution.error) };
   }
 
-  if (sessionMap.size === 0) {
-    return [];
-  }
+  const { ctx } = resolution;
 
-  // 2. Query runtime logs from the agent's log group for the trace IDs found
-  if (traceIds.size > 0) {
-    const traceFilter = [...traceIds].map(t => `'${sanitizeQueryValue(t)}'`).join(', ');
-    let logRows: ResultField[][] = [];
-    try {
-      logRows = await executeQuery(
-        client,
-        runtimeLogGroup,
-        `fields @message, traceId
-         | filter traceId in [${traceFilter}]
-         | sort @timestamp asc
-         | limit 10000`,
-        startTimeSec,
-        endTimeSec
-      );
-    } catch {
-      // Runtime log group may not exist yet; continue with spans only
-    }
+  // Dataset mode: invoke agent with scenarios, collect spans, build ground truth
+  if (options.dataset) {
+    const configIO = new ConfigIO();
+    const project = await configIO.readProjectSpec();
+    const deployedState = await configIO.readDeployedState();
+    const awsTargets = await configIO.readAWSDeploymentTargets();
+
+    const agentContext = await resolveAgentContext({
+      project,
+      deployedState,
+      awsTargets,
+      agentName: options.agent,
+      endpoint: options.endpoint,
+    });
 
-    const logDocs = extractMessages(logRows);
+    const datasetResult = await runDatasetScenariosAndCollectSpans({
+      agentContext,
+      datasetName: options.dataset,
+      version: options.datasetVersion,
+      configBaseDir: configIO.getConfigRoot(),
+      querySpans: async (region, logGroup, sessionId) => {
+        const result = await fetchSessionSpans({
+          runtimeId: agentContext.runtimeId,
+          runtimeLogGroup: logGroup,
+          region,
+          lookbackDays: 1,
+          sessionId,
+        });
+        return result.length > 0 ? result[0]!.spans : [];
+      },
+      onProgress: options.onProgress,
+    });
 
-    // Match runtime logs to sessions via traceId
-    // Build traceId → sessionId mapping from spans
-    const traceToSession = new Map<string, string>();
-    for (const row of spanRows) {
-      const traceField = row.find(f => f.field === 'traceId');
-      const sessionField = row.find(f => f.field === 'sessionId');
-      if (traceField?.value && sessionField?.value) {
-        traceToSession.set(traceField.value, sessionField.value);
-      }
+    if (datasetResult.sessions.length === 0) {
+      return {
+        success: false,
+        error: new ResourceNotFoundError('No spans collected from dataset scenarios. All sessions may have timed out.'),
+      };
     }
 
-    for (const logDoc of logDocs) {
-      if (!isRelevantForEval(logDoc)) continue;
-
-      const logTraceId = logDoc.traceId as string | undefined;
-      const sessionId = logTraceId ? (traceToSession.get(logTraceId) ?? 'unknown') : 'unknown';
-      if (!sessionMap.has(sessionId)) {
-        sessionMap.set(sessionId, []);
-      }
-      sessionMap.get(sessionId)!.push(logDoc as DocumentType);
-    }
-  }
+    // Resolve evaluator levels
+    const evaluatorLevels = await resolveEvaluatorLevels(ctx.evaluatorIds, ctx.region);
 
-  // 3. Build session list — aws/spans docs are already scoped by runtimeId (step 1),
-  //    and runtime log docs were filtered through isRelevantForEval (step 2).
-  //    We keep all docs so the Evaluate API has full trace context for resolving
-  //    template variables like {context} and {assistant_turn}.
-  const sessions: SessionSpans[] = [];
-  for (const [sessionId, docs] of sessionMap) {
-    if (docs.length > 0) {
-      sessions.push({ sessionId, spans: docs });
+    // Group dataset-generated ref inputs by sessionId
+    const refInputsBySession = new Map<string, EvaluationReferenceInput[]>();
+    for (const ref of datasetResult.referenceInputs) {
+      const sid = ref.context.spanContext.sessionId;
+      const list = refInputsBySession.get(sid) ?? [];
+      list.push(ref);
+      refInputsBySession.set(sid, list);
     }
-  }
-
-  return sessions;
-}
 
-export type RunEvalResult = Result<{ run: EvalRunResult; filePath: string }>;
+    // Tag sessions with scenarioId
+    const scenarioBySession = new Map(datasetResult.scenarioResults.map(r => [r.sessionId, r.scenarioId]));
+    const sessions = datasetResult.sessions.map(s => ({
+      sessionId: s.sessionId,
+      spans: s.spans,
+      scenarioId: scenarioBySession.get(s.sessionId),
+    }));
+
+    const results = await runEvaluatorsOverSessions({
+      region: ctx.region,
+      evaluatorIds: ctx.evaluatorIds,
+      evaluatorLabels: ctx.evaluatorLabels,
+      evaluatorLevels,
+      sessions,
+      refInputsBySession,
+    });
 
-export async function handleRunEval(options: RunEvalOptions): Promise<RunEvalResult> {
-  let resolution: ResolveResult;
+    // Build and save result
+    const timestamp = new Date().toISOString();
+    const run: EvalRunResult = {
+      timestamp,
+      agent: ctx.agentLabel,
+      evaluators: ctx.evaluatorLabels,
+      sessionCount: sessions.length,
+      results,
+      source: 'dataset',
+      datasetName: options.dataset,
+      dataset: {
+        id: options.dataset,
+        version: options.datasetVersion ?? 'LOCAL',
+      },
+    };
 
-  if (options.agentArn) {
-    resolution = resolveFromArn(options);
-  } else {
-    const context = await loadDeployedProjectConfig();
-    resolution = resolveFromProject(context, options);
-  }
+    const filePath = options.output ?? saveEvalRun(run);
 
-  if (!resolution.success) {
-    return { success: false, error: new ResourceNotFoundError(resolution.error) };
+    return { success: true, run, filePath };
   }
 
-  const { ctx } = resolution;
-
-  // Fetch spans grouped by session
+  // Historical trace mode (existing behavior)
   let sessions = await fetchSessionSpans({
     runtimeId: ctx.runtimeId,
     runtimeLogGroup: ctx.runtimeLogGroup,
@@ -658,75 +454,19 @@ export async function handleRunEval(options: RunEvalOptions): Promise<RunEvalRes
     }
   }
 
-  // Run each evaluator against each session with level-appropriate targeting
-  const results: EvalEvaluatorResult[] = [];
-
-  for (let i = 0; i < ctx.evaluatorIds.length; i++) {
-    const evaluatorId = ctx.evaluatorIds[i]!;
-    const evaluatorName = ctx.evaluatorLabels[i] ?? evaluatorId;
-    const level = evaluatorLevels.get(evaluatorId) ?? 'SESSION';
-
-    const sessionScores: EvalSessionScore[] = [];
-    let totalInputTokens = 0;
-    let totalOutputTokens = 0;
-    let totalTokens = 0;
-
-    for (const session of sessions) {
-      // Build evaluation target based on evaluator level
-      let targetTraceIds: string[] | undefined;
-      let targetSpanIds: string[] | undefined;
-
-      if (level === 'TRACE') {
-        targetTraceIds = extractTraceIds(session.spans);
-        if (targetTraceIds.length === 0) continue;
-      } else if (level === 'TOOL_CALL') {
-        targetSpanIds = extractToolCallSpanIds(session.spans);
-        if (targetSpanIds.length === 0) continue;
-      }
-
-      // The Evaluate API limits targetSpanIds and targetTraceIds to 10 per call.
-      // Batch into chunks and merge results.
-      const batches = batchTargetIds(targetTraceIds, targetSpanIds);
-
-      for (const batch of batches) {
-        const response = await evaluate({
-          region: ctx.region,
-          evaluatorId,
-          sessionSpans: session.spans,
-          targetTraceIds: batch.traceIds,
-          targetSpanIds: batch.spanIds,
-          evaluationReferenceInputs,
-        });
-
-        for (const r of response.evaluationResults) {
-          sessionScores.push({
-            sessionId: r.context?.sessionId ?? session.sessionId,
-            traceId: r.context?.traceId,
-            spanId: r.context?.spanId,
-            value: r.value ?? 0,
-            label: r.label,
-            explanation: r.explanation,
-            errorMessage: r.errorMessage,
-          });
-
-          totalInputTokens += r.tokenUsage?.inputTokens ?? 0;
-          totalOutputTokens += r.tokenUsage?.outputTokens ?? 0;
-          totalTokens += r.tokenUsage?.totalTokens ?? 0;
-        }
-      }
-    }
-
-    const validScores = sessionScores.filter(s => !s.errorMessage);
-    const aggregateScore =
-      validScores.length > 0 ? validScores.reduce((sum, s) => sum + s.value, 0) / validScores.length : 0;
+  // Historical mode: one set of ref inputs applies to the single targeted session
+  const refInputsBySession = evaluationReferenceInputs
+    ? new Map([[sessions[0]!.sessionId, evaluationReferenceInputs]])
+    : undefined;
 
-    results.push({
-      evaluator: evaluatorName,
-      aggregateScore,
-      sessionScores,
-      tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens, totalTokens },
-    });
-  }
+  const results = await runEvaluatorsOverSessions({
+    region: ctx.region,
+    evaluatorIds: ctx.evaluatorIds,
+    evaluatorLabels: ctx.evaluatorLabels,
+    evaluatorLevels,
+    sessions,
+    refInputsBySession,
+  });
 
   // Build run result
   const timestamp = new Date().toISOString();
diff --git a/src/cli/operations/eval/shared/__tests__/dataset-loader.test.ts b/src/cli/operations/eval/shared/__tests__/dataset-loader.test.ts
new file mode 100644
index 000000000..843a42bc2
--- /dev/null
+++ b/src/cli/operations/eval/shared/__tests__/dataset-loader.test.ts
@@ -0,0 +1,81 @@
+import { describe, expect, it } from 'vitest';
+
+// Test the parseAndValidate logic by importing the module and testing indirectly
+// Since parseAndValidate is private, we test through loadDatasetScenarios' validation behavior
+// by creating a test helper that mimics the parsing
+
+describe('dataset-loader validation', () => {
+  // Inline reimplementation of parseAndValidate for unit testing
+  function parseAndValidate(content: string) {
+    const lines = content.split('\n').filter(l => l.trim());
+    if (lines.length === 0) throw new Error('Dataset has no examples.');
+
+    return lines.map((line, index) => {
+      let obj: Record<string, unknown>;
+      try {
+        obj = JSON.parse(line) as Record<string, unknown>;
+      } catch (err) {
+        throw new Error(`Invalid JSON at line ${index + 1}: ${err instanceof Error ? err.message : String(err)}`);
+      }
+      if (!obj.scenario_id || typeof obj.scenario_id !== 'string') {
+        throw new Error(`Line ${index + 1}: missing required field "scenario_id"`);
+      }
+      if (!obj.turns || !Array.isArray(obj.turns) || obj.turns.length === 0) {
+        throw new Error(`Line ${index + 1}: "turns" must be a non-empty array`);
+      }
+      for (let i = 0; i < (obj.turns as unknown[]).length; i++) {
+        const turn = (obj.turns as Record<string, unknown>[])[i];
+        if (!turn?.input || typeof turn.input !== 'string') {
+          throw new Error(`Line ${index + 1}, turn ${i + 1}: each turn must have a string "input" field`);
+        }
+      }
+      return obj;
+    });
+  }
+
+  it('parses valid JSONL', () => {
+    const content = '{"scenario_id":"s1","turns":[{"input":"hello"}]}\n{"scenario_id":"s2","turns":[{"input":"bye"}]}';
+    const result = parseAndValidate(content);
+    expect(result).toHaveLength(2);
+    expect(result[0]!.scenario_id).toBe('s1');
+  });
+
+  it('throws on empty content', () => {
+    expect(() => parseAndValidate('')).toThrow('no examples');
+  });
+
+  it('throws on missing scenario_id', () => {
+    expect(() => parseAndValidate('{"turns":[{"input":"x"}]}')).toThrow('scenario_id');
+  });
+
+  it('throws on missing turns', () => {
+    expect(() => parseAndValidate('{"scenario_id":"s1"}')).toThrow('turns');
+  });
+
+  it('throws on empty turns array', () => {
+    expect(() => parseAndValidate('{"scenario_id":"s1","turns":[]}')).toThrow('non-empty');
+  });
+
+  it('throws on turn without input', () => {
+    expect(() => parseAndValidate('{"scenario_id":"s1","turns":[{"expectedResponse":"x"}]}')).toThrow('input');
+  });
+
+  it('throws with line number context on invalid JSON', () => {
+    const content = '{"scenario_id":"s1","turns":[{"input":"ok"}]}\nnot json';
+    expect(() => parseAndValidate(content)).toThrow('line 2');
+  });
+
+  it('allows optional fields (assertions, expected_trajectory, expectedResponse)', () => {
+    const content =
+      '{"scenario_id":"s1","turns":[{"input":"q","expectedResponse":"a"}],"assertions":["be nice"],"expected_trajectory":["tool_a"]}';
+    const result = parseAndValidate(content);
+    expect(result[0]!.assertions).toEqual(['be nice']);
+    expect(result[0]!.expected_trajectory).toEqual(['tool_a']);
+  });
+
+  it('ignores blank lines', () => {
+    const content = '{"scenario_id":"s1","turns":[{"input":"hi"}]}\n\n\n{"scenario_id":"s2","turns":[{"input":"bye"}]}';
+    const result = parseAndValidate(content);
+    expect(result).toHaveLength(2);
+  });
+});
diff --git a/src/cli/operations/eval/shared/__tests__/dataset-session-provider.test.ts b/src/cli/operations/eval/shared/__tests__/dataset-session-provider.test.ts
new file mode 100644
index 000000000..9a16efcb5
--- /dev/null
+++ b/src/cli/operations/eval/shared/__tests__/dataset-session-provider.test.ts
@@ -0,0 +1,92 @@
+import { buildReferenceInputs } from '../dataset-session-provider.js';
+import { describe, expect, it } from 'vitest';
+
+describe('buildReferenceInputs', () => {
+  it('includes session-level assertions when scenario has assertions', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'hello' }],
+        assertions: ['Agent greets politely', 'Agent responds in English'],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'],
+    });
+
+    expect(result.length).toBeGreaterThanOrEqual(1);
+    const sessionLevel = result.find(r => !r.context.spanContext.traceId);
+    expect(sessionLevel).toBeDefined();
+    expect(sessionLevel!.assertions).toEqual([
+      { text: 'Agent greets politely' },
+      { text: 'Agent responds in English' },
+    ]);
+  });
+
+  it('includes session-level expected_trajectory when present', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'hello' }],
+        expected_trajectory: ['lookup_user', 'greet'],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'],
+    });
+
+    const sessionLevel = result.find(r => !r.context.spanContext.traceId);
+    expect(sessionLevel).toBeDefined();
+    expect(sessionLevel!.expectedTrajectory).toEqual({ toolNames: ['lookup_user', 'greet'] });
+  });
+
+  it('maps turn.expectedResponse to traceId by index', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [
+          { input: 'q1', expectedResponse: 'answer1' },
+          { input: 'q2', expectedResponse: 'answer2' },
+        ],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-a', 'trace-b'],
+    });
+
+    expect(result).toHaveLength(2);
+    expect(result[0]!.context.spanContext.traceId).toBe('trace-a');
+    expect(result[0]!.expectedResponse).toEqual({ text: 'answer1' });
+    expect(result[1]!.context.spanContext.traceId).toBe('trace-b');
+    expect(result[1]!.expectedResponse).toEqual({ text: 'answer2' });
+  });
+
+  it('stops mapping when traceIds exhausted', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [
+          { input: 'q1', expectedResponse: 'a1' },
+          { input: 'q2', expectedResponse: 'a2' },
+          { input: 'q3', expectedResponse: 'a3' },
+        ],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'], // only 1 traceId for 3 turns
+    });
+
+    // Only 1 result because we ran out of traceIds
+    expect(result).toHaveLength(1);
+    expect(result[0]!.expectedResponse).toEqual({ text: 'a1' });
+  });
+
+  it('returns empty array when scenario has no ground truth', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'hello' }, { input: 'goodbye' }],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1', 'trace-2'],
+    });
+
+    expect(result).toHaveLength(0);
+  });
+});
diff --git a/src/cli/operations/eval/shared/__tests__/ground-truth.test.ts b/src/cli/operations/eval/shared/__tests__/ground-truth.test.ts
new file mode 100644
index 000000000..17e76031b
--- /dev/null
+++ b/src/cli/operations/eval/shared/__tests__/ground-truth.test.ts
@@ -0,0 +1,109 @@
+import { buildReferenceInputs } from '../dataset-session-provider';
+import { describe, expect, it } from 'vitest';
+
+describe('buildReferenceInputs', () => {
+  it('builds session-level assertions and trajectory', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'hello' }],
+        assertions: ['Agent should greet'],
+        expected_trajectory: ['greet_user'],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'],
+    });
+
+    expect(result).toHaveLength(1);
+    expect(result[0]!.context.spanContext.sessionId).toBe('sess-1');
+    expect(result[0]!.assertions).toEqual([{ text: 'Agent should greet' }]);
+    expect(result[0]!.expectedTrajectory).toEqual({ toolNames: ['greet_user'] });
+  });
+
+  it('maps per-turn expectedResponse to traceIds by index', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [
+          { input: 'q1', expectedResponse: 'a1' },
+          { input: 'q2', expectedResponse: 'a2' },
+        ],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1', 'trace-2'],
+    });
+
+    expect(result).toHaveLength(2);
+    expect(result[0]!.context.spanContext.traceId).toBe('trace-1');
+    expect(result[0]!.expectedResponse).toEqual({ text: 'a1' });
+    expect(result[1]!.context.spanContext.traceId).toBe('trace-2');
+    expect(result[1]!.expectedResponse).toEqual({ text: 'a2' });
+  });
+
+  it('skips extra turns when fewer traceIds than turns (SDK behavior)', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [
+          { input: 'q1', expectedResponse: 'a1' },
+          { input: 'q2', expectedResponse: 'a2' },
+          { input: 'q3', expectedResponse: 'a3' },
+        ],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'], // only 1 trace for 3 turns
+    });
+
+    expect(result).toHaveLength(1);
+    expect(result[0]!.expectedResponse).toEqual({ text: 'a1' });
+  });
+
+  it('skips turns without expectedResponse', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'q1' }, { input: 'q2', expectedResponse: 'a2' }],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1', 'trace-2'],
+    });
+
+    expect(result).toHaveLength(1);
+    expect(result[0]!.context.spanContext.traceId).toBe('trace-2');
+    expect(result[0]!.expectedResponse).toEqual({ text: 'a2' });
+  });
+
+  it('returns empty when no ground truth provided', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'hello' }],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'],
+    });
+
+    expect(result).toHaveLength(0);
+  });
+
+  it('combines session-level and per-trace inputs', () => {
+    const result = buildReferenceInputs({
+      scenario: {
+        scenario_id: 'test',
+        turns: [{ input: 'q1', expectedResponse: 'a1' }],
+        assertions: ['Be helpful'],
+        expected_trajectory: ['tool_a'],
+      },
+      sessionId: 'sess-1',
+      traceIds: ['trace-1'],
+    });
+
+    expect(result).toHaveLength(2);
+    // Session-level
+    expect(result[0]!.assertions).toEqual([{ text: 'Be helpful' }]);
+    expect(result[0]!.context.spanContext.traceId).toBeUndefined();
+    // Per-trace
+    expect(result[1]!.expectedResponse).toEqual({ text: 'a1' });
+    expect(result[1]!.context.spanContext.traceId).toBe('trace-1');
+  });
+});
diff --git a/src/cli/operations/eval/shared/__tests__/span-collector.test.ts b/src/cli/operations/eval/shared/__tests__/span-collector.test.ts
new file mode 100644
index 000000000..6c7e1ded2
--- /dev/null
+++ b/src/cli/operations/eval/shared/__tests__/span-collector.test.ts
@@ -0,0 +1,127 @@
+import { collectSpans, extractTraceIds } from '../span-collector';
+import type { DocumentType } from '@smithy/types';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+describe('extractTraceIds', () => {
+  it('extracts unique traceIds in appearance order', () => {
+    const spans = [
+      { traceId: 'a', spanId: '1' },
+      { traceId: 'b', spanId: '2' },
+      { traceId: 'a', spanId: '3' }, // duplicate
+      { traceId: 'c', spanId: '4' },
+    ];
+
+    const result = extractTraceIds(spans);
+    expect(result).toEqual(['a', 'b', 'c']);
+  });
+
+  it('returns empty array for no spans', () => {
+    expect(extractTraceIds([])).toEqual([]);
+  });
+
+  it('skips spans without traceId', () => {
+    const spans = [{ spanId: '1' }, { traceId: 'a', spanId: '2' }, { other: 'x' }] as unknown as DocumentType[];
+    expect(extractTraceIds(spans)).toEqual(['a']);
+  });
+});
+
+describe('collectSpans', () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it('returns spans for all sessions after polling', async () => {
+    const mockQuerySpans = vi.fn().mockImplementation((_r, _l, sessionId) => {
+      return Promise.resolve([{ traceId: `trace-${sessionId}`, spanId: 'sp1' }]);
+    });
+
+    const promise = collectSpans({
+      sessionIds: ['sess-1', 'sess-2'],
+      region: 'us-east-1',
+      logGroup: '/aws/spans',
+      querySpans: mockQuerySpans,
+    });
+
+    // Advance past ingestion delay
+    await vi.advanceTimersByTimeAsync(180_000);
+    // Advance past one poll interval to let the query resolve
+    await vi.advanceTimersByTimeAsync(5_000);
+
+    const result = await promise;
+
+    expect(result.spans.size).toBe(2);
+    expect(result.timedOut).toHaveLength(0);
+    expect(result.spans.get('sess-1')).toHaveLength(1);
+    expect(result.spans.get('sess-2')).toHaveLength(1);
+  });
+
+  it('reports timed-out sessions', async () => {
+    const mockQuerySpans = vi.fn().mockImplementation((_r, _l, sessionId) => {
+      // sess-1 always returns empty (simulates missing spans)
+      if (sessionId === 'sess-1') return Promise.resolve([]);
+      return Promise.resolve([{ traceId: 'trace-2' }]);
+    });
+
+    const promise = collectSpans({
+      sessionIds: ['sess-1', 'sess-2'],
+      region: 'us-east-1',
+      logGroup: '/aws/spans',
+      querySpans: mockQuerySpans,
+    });
+
+    // Advance past ingestion delay + full poll timeout
+    await vi.advanceTimersByTimeAsync(180_000 + 60_000 + 5_000);
+
+    const result = await promise;
+
+    expect(result.spans.has('sess-2')).toBe(true);
+    expect(result.timedOut).toContain('sess-1');
+  });
+
+  it('retries on transient errors', async () => {
+    let calls = 0;
+    const mockQuerySpans = vi.fn().mockImplementation(() => {
+      calls++;
+      if (calls <= 2) throw new Error('Service unavailable');
+      return Promise.resolve([{ traceId: 'trace-1' }]);
+    });
+
+    const promise = collectSpans({
+      sessionIds: ['sess-1'],
+      region: 'us-east-1',
+      logGroup: '/aws/spans',
+      querySpans: mockQuerySpans,
+    });
+
+    // Advance past ingestion delay + enough poll intervals for retry
+    await vi.advanceTimersByTimeAsync(180_000 + 180_000);
+
+    const result = await promise;
+
+    expect(result.spans.has('sess-1')).toBe(true);
+    expect(result.timedOut).toHaveLength(0);
+  });
+
+  it('calls onProgress with ingestion delay message', async () => {
+    const onProgress = vi.fn();
+    const mockQuerySpans = vi.fn().mockResolvedValue([{ traceId: 't1' }]);
+
+    const promise = collectSpans({
+      sessionIds: ['sess-1'],
+      region: 'us-east-1',
+      logGroup: '/aws/spans',
+      querySpans: mockQuerySpans,
+      onProgress,
+    });
+
+    await vi.advanceTimersByTimeAsync(180_000 + 5_000);
+    await promise;
+
+    // First call should be the ingestion delay message
+    expect(onProgress).toHaveBeenCalledWith(0, 1, expect.stringContaining('Waiting for span ingestion'));
+  });
+});
diff --git a/src/cli/operations/eval/shared/dataset-loader.ts b/src/cli/operations/eval/shared/dataset-loader.ts
new file mode 100644
index 000000000..4a94ee475
--- /dev/null
+++ b/src/cli/operations/eval/shared/dataset-loader.ts
@@ -0,0 +1,106 @@
+/**
+ * Load and validate dataset scenarios for evaluation.
+ *
+ * Supports two modes:
+ * - Local file (no --version): reads directly from config.managed.location
+ * - Version mode (--version N or DRAFT): downloads from service via pre-signed URL
+ */
+import { downloadDataset, getDataset } from '../../../aws/agentcore-datasets';
+import { resolveDataset } from '../../dataset/resolve-dataset';
+import type { PredefinedScenario, Turn } from './types';
+import { readFile } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+export interface LoadDatasetOptions {
+  datasetName: string;
+  version?: string;
+  configBaseDir: string;
+}
+
+/**
+ * Load dataset scenarios from local file or service version.
+ * Validates required fields and rejects simulated schemas.
+ */
+export async function loadDatasetScenarios(options: LoadDatasetOptions): Promise<PredefinedScenario[]> {
+  const { datasetName, version, configBaseDir } = options;
+  const resolved = await resolveDataset(datasetName);
+
+  // Check schema type — reject simulated
+  const { ConfigIO } = await import('../../../../lib');
+  const configIO = new ConfigIO();
+  const projectSpec = await configIO.readProjectSpec();
+  const datasetSpec = projectSpec.datasets?.find(d => d.name === datasetName);
+  if (datasetSpec?.schemaType === 'AGENTCORE_EVALUATION_SIMULATED_V1') {
+    throw new Error(
+      'Simulated scenarios (actor profiles) are not supported yet. Use predefined turns or wait for Phase 4.'
+    );
+  }
+
+  let content: string;
+
+  if (!version) {
+    // Local file mode — read directly (fastest iteration, no push required)
+    const filePath = resolve(configBaseDir, resolved.location);
+    content = await readFile(filePath, 'utf8');
+  } else {
+    // Version mode — download from service
+    const datasetInfo = await getDataset({
+      region: resolved.region,
+      datasetId: resolved.datasetId,
+      version: version === 'DRAFT' ? undefined : version,
+    });
+    if (!datasetInfo.downloadUrl) {
+      throw new Error(
+        'Dataset has no download URL available. The dataset may not be ready yet. Please try again later.'
+      );
+    }
+    content = await downloadDataset(datasetInfo.downloadUrl, { mode: 'buffer' });
+  }
+
+  return parseAndValidate(content);
+}
+
+/**
+ * Parse JSONL content into validated PredefinedScenario objects.
+ */
+function parseAndValidate(content: string): PredefinedScenario[] {
+  const lines = content.split('\n').filter(l => l.trim());
+
+  if (lines.length === 0) {
+    throw new Error('Dataset has no examples. Add scenarios to your dataset file first.');
+  }
+
+  return lines.map((line, index) => {
+    let obj: Record<string, unknown>;
+    try {
+      obj = JSON.parse(line) as Record<string, unknown>;
+    } catch (err) {
+      throw new Error(
+        `Invalid JSON at line ${index + 1}: ${err instanceof Error ? err.message : String(err)}\n` +
+          `  ${line.length > 120 ? line.slice(0, 120) + '...' : line}`
+      );
+    }
+
+    if (!obj.scenario_id || typeof obj.scenario_id !== 'string') {
+      throw new Error(`Line ${index + 1}: missing required field "scenario_id"`);
+    }
+
+    if (!obj.turns || !Array.isArray(obj.turns) || obj.turns.length === 0) {
+      throw new Error(`Line ${index + 1}: "turns" must be a non-empty array`);
+    }
+
+    for (let i = 0; i < (obj.turns as unknown[]).length; i++) {
+      const turn = (obj.turns as Record<string, unknown>[])[i];
+      if (!turn?.input || typeof turn.input !== 'string') {
+        throw new Error(`Line ${index + 1}, turn ${i + 1}: each turn must have a string "input" field`);
+      }
+    }
+
+    return {
+      scenario_id: obj.scenario_id,
+      turns: obj.turns as Turn[],
+      assertions: obj.assertions as string[] | undefined,
+      expected_trajectory: obj.expected_trajectory as string[] | undefined,
+    };
+  });
+}
diff --git a/src/cli/operations/eval/shared/dataset-session-provider.ts b/src/cli/operations/eval/shared/dataset-session-provider.ts
new file mode 100644
index 000000000..40b3bc19e
--- /dev/null
+++ b/src/cli/operations/eval/shared/dataset-session-provider.ts
@@ -0,0 +1,177 @@
+/**
+ * Dataset scenario orchestration for dataset-driven evaluation.
+ *
+ * Two functions by responsibility:
+ * - runDatasetScenarios — load + invoke (Phase A + B). Used by batch eval.
+ * - runDatasetScenariosAndCollectSpans — composes the runner + span collection + ground truth.
+ *   Used by on-demand eval.
+ */
+import { ConfigIO } from '../../../../lib';
+import type { EvaluationReferenceInput } from '../../../aws/agentcore';
+import { runtimeLogGroup } from '../../../aws/cloudwatch';
+import type { AgentContext } from '../../invoke/resolve-agent-context';
+import { loadDatasetScenarios } from './dataset-loader';
+import { executeScenarios } from './scenario-executor';
+import type { ScenarioInvocationResult } from './scenario-executor';
+import { collectSpans, extractTraceIds } from './span-collector';
+import type { PredefinedScenario } from './types';
+import type { DocumentType } from '@smithy/types';
+
+interface BuildReferenceInputsArgs {
+  scenario: PredefinedScenario;
+  sessionId: string;
+  traceIds: string[];
+}
+
+/**
+ * Build evaluationReferenceInputs for a single scenario.
+ *
+ * - Session-level: assertions + expected_trajectory (applied to full session)
+ * - Per-trace: turn[i].expectedResponse → traceIds[i] (by appearance order)
+ *   If traceIds.length < turns.length, extra turns are skipped (SDK behavior).
+ */
+export function buildReferenceInputs(options: BuildReferenceInputsArgs): EvaluationReferenceInput[] {
+  const { scenario, sessionId, traceIds } = options;
+  const inputs: EvaluationReferenceInput[] = [];
+
+  const hasAssertions = scenario.assertions && scenario.assertions.length > 0;
+  const hasTrajectory = scenario.expected_trajectory && scenario.expected_trajectory.length > 0;
+
+  if (hasAssertions || hasTrajectory) {
+    inputs.push({
+      context: { spanContext: { sessionId } },
+      ...(hasAssertions && { assertions: scenario.assertions!.map(text => ({ text })) }),
+      ...(hasTrajectory && { expectedTrajectory: { toolNames: scenario.expected_trajectory! } }),
+    });
+  }
+
+  for (let i = 0; i < scenario.turns.length; i++) {
+    const turn = scenario.turns[i]!;
+    if (!turn.expectedResponse) continue;
+    if (i >= traceIds.length) break;
+
+    inputs.push({
+      context: { spanContext: { sessionId, traceId: traceIds[i] } },
+      expectedResponse: { text: turn.expectedResponse },
+    });
+  }
+
+  return inputs;
+}
+
+export interface RunDatasetScenariosOptions {
+  agentContext: AgentContext;
+  datasetName: string;
+  version?: string;
+  /** Base directory for resolving dataset file paths. If omitted, resolved via ConfigIO. */
+  configBaseDir?: string;
+  onProgress?: (phase: string, message: string) => void;
+}
+
+export interface RunDatasetScenariosResult {
+  scenarioResults: ScenarioInvocationResult[];
+  scenarios: PredefinedScenario[];
+}
+
+export interface RunDatasetScenariosAndCollectSpansOptions extends RunDatasetScenariosOptions {
+  querySpans: (region: string, logGroup: string, sessionId: string) => Promise<DocumentType[]>;
+}
+
+export interface RunDatasetScenariosAndCollectSpansResult extends RunDatasetScenariosResult {
+  sessions: { sessionId: string; spans: DocumentType[] }[];
+  referenceInputs: EvaluationReferenceInput[];
+}
+
+/**
+ * Phase A + B: Load scenarios from dataset, invoke agent with each scenario.
+ *
+ * Throws if all scenarios fail invocation.
+ */
+export async function runDatasetScenarios(options: RunDatasetScenariosOptions): Promise<RunDatasetScenariosResult> {
+  const { agentContext, datasetName, version, onProgress } = options;
+
+  // Phase A: Load dataset scenarios
+  onProgress?.('load', `Loading dataset "${datasetName}"...`);
+  const configBaseDir = options.configBaseDir ?? new ConfigIO().getConfigRoot();
+  const scenarios = await loadDatasetScenarios({ datasetName, version, configBaseDir });
+  onProgress?.('load', `Loaded ${scenarios.length} scenarios`);
+
+  // Phase B: Execute scenarios (5 concurrent)
+  onProgress?.('invoke', `Invoking agent with ${scenarios.length} scenarios...`);
+  const scenarioResults = await executeScenarios({
+    scenarios,
+    agentContext,
+    onProgress: (completed, total, current) => {
+      const status = current.status === 'success' ? '✓' : '✗';
+      onProgress?.('invoke', `[${completed}/${total}] ${current.scenarioId}: ${status}`);
+    },
+  });
+
+  const successfulResults = scenarioResults.filter(r => r.status === 'success');
+  const failedCount = scenarioResults.length - successfulResults.length;
+  onProgress?.(
+    'invoke',
+    `✓ ${successfulResults.length}/${scenarioResults.length} scenarios invoked${failedCount > 0 ? ` (${failedCount} failed)` : ''}`
+  );
+
+  if (successfulResults.length === 0) {
+    throw new Error('All scenarios failed during invocation. No sessions to evaluate.');
+  }
+
+  return { scenarioResults, scenarios };
+}
+
+/**
+ * Phase A + B + C: Run scenarios, then wait for span ingestion, collect spans,
+ * and build evaluation reference inputs from dataset ground truth.
+ *
+ * Composes runDatasetScenarios and adds the span collection step.
+ */
+export async function runDatasetScenariosAndCollectSpans(
+  options: RunDatasetScenariosAndCollectSpansOptions
+): Promise<RunDatasetScenariosAndCollectSpansResult> {
+  const { agentContext, querySpans, onProgress } = options;
+
+  const { scenarioResults, scenarios } = await runDatasetScenarios(options);
+  const successfulResults = scenarioResults.filter(r => r.status === 'success');
+
+  const logGroup = runtimeLogGroup(agentContext.runtimeId, agentContext.endpoint);
+  const sessionIds = successfulResults.map(r => r.sessionId);
+
+  onProgress?.('collect', 'Waiting for span ingestion (15s)...');
+  const { spans: collectedSpans, timedOut } = await collectSpans({
+    sessionIds,
+    region: agentContext.region,
+    logGroup,
+    querySpans,
+    onProgress: (collected, total) => {
+      onProgress?.('collect', `Collecting spans... (${collected}/${total} sessions)`);
+    },
+  });
+
+  if (timedOut.length > 0) {
+    onProgress?.('collect', `⚠ ${timedOut.length} sessions timed out waiting for spans`);
+  }
+  onProgress?.('collect', `✓ ${collectedSpans.size}/${sessionIds.length} sessions collected`);
+
+  const sessions: { sessionId: string; spans: DocumentType[] }[] = [];
+  const refInputSources: { scenario: PredefinedScenario; sessionId: string; traceIds: string[] }[] = [];
+
+  for (const result of successfulResults) {
+    const spans = collectedSpans.get(result.sessionId);
+    if (!spans || spans.length === 0) continue;
+
+    sessions.push({ sessionId: result.sessionId, spans });
+
+    const traceIds = extractTraceIds(spans);
+    const scenario = scenarios.find(s => s.scenario_id === result.scenarioId);
+    if (!scenario) continue; // Defensive: scenarioId always matches a loaded scenario
+    refInputSources.push({ scenario, sessionId: result.sessionId, traceIds });
+  }
+
+  const referenceInputs = refInputSources.flatMap(({ scenario, sessionId, traceIds }) =>
+    buildReferenceInputs({ scenario, sessionId, traceIds })
+  );
+
+  return { sessions, referenceInputs, scenarioResults, scenarios };
+}
diff --git a/src/cli/operations/eval/shared/evaluator-runner.ts b/src/cli/operations/eval/shared/evaluator-runner.ts
new file mode 100644
index 000000000..e07625b1e
--- /dev/null
+++ b/src/cli/operations/eval/shared/evaluator-runner.ts
@@ -0,0 +1,126 @@
+/**
+ * Shared evaluator-loop runner for dataset and historical-trace eval modes.
+ *
+ * Handles TRACE/TOOL_CALL/SESSION level routing, batching targetTraceIds/targetSpanIds
+ * into chunks of 10 (Evaluate API limit), per-session ref input filtering, and score
+ * aggregation.
+ */
+import type { EvaluationReferenceInput } from '../../../aws/agentcore';
+import { evaluate } from '../../../aws/agentcore';
+import type { EvalEvaluatorResult, EvalSessionScore } from '../types';
+import { extractToolCallSpanIds, extractTraceIds } from './span-collector';
+import type { DocumentType } from '@smithy/types';
+
+type EvaluatorLevel = 'SESSION' | 'TRACE' | 'TOOL_CALL';
+
+export interface SessionWithSpans {
+  sessionId: string;
+  spans: DocumentType[];
+  /** Optional scenario tag for dataset mode — flows into EvalSessionScore. */
+  scenarioId?: string;
+}
+
+export interface RunEvaluatorsOptions {
+  region: string;
+  evaluatorIds: string[];
+  evaluatorLabels: string[];
+  evaluatorLevels: Map<string, EvaluatorLevel>;
+  sessions: SessionWithSpans[];
+  /** Per-session ref inputs. Dataset mode: one entry per session. Historical: one entry for targeted session. */
+  refInputsBySession?: Map<string, EvaluationReferenceInput[]>;
+}
+
+const BATCH_SIZE = 10;
+
+function batchTargetIds(traceIds?: string[], spanIds?: string[]): { traceIds?: string[]; spanIds?: string[] }[] {
+  const result: { traceIds?: string[]; spanIds?: string[] }[] = [];
+  if (traceIds) {
+    for (let i = 0; i < traceIds.length; i += BATCH_SIZE) {
+      result.push({ traceIds: traceIds.slice(i, i + BATCH_SIZE) });
+    }
+  } else if (spanIds) {
+    for (let i = 0; i < spanIds.length; i += BATCH_SIZE) {
+      result.push({ spanIds: spanIds.slice(i, i + BATCH_SIZE) });
+    }
+  } else {
+    result.push({ traceIds: undefined, spanIds: undefined });
+  }
+  return result;
+}
+
+function resolveTargets(
+  level: EvaluatorLevel,
+  spans: DocumentType[]
+): { traceIds?: string[]; spanIds?: string[] } | null {
+  if (level === 'TRACE') {
+    const traceIds = extractTraceIds(spans);
+    return traceIds.length > 0 ? { traceIds, spanIds: undefined } : null;
+  }
+  if (level === 'TOOL_CALL') {
+    const spanIds = extractToolCallSpanIds(spans);
+    return spanIds.length > 0 ? { traceIds: undefined, spanIds } : null;
+  }
+  return { traceIds: undefined, spanIds: undefined };
+}
+
+/**
+ * Run all evaluators against all sessions. Shared by dataset and historical-trace modes.
+ */
+export async function runEvaluatorsOverSessions(opts: RunEvaluatorsOptions): Promise<EvalEvaluatorResult[]> {
+  const results: EvalEvaluatorResult[] = [];
+
+  for (let i = 0; i < opts.evaluatorIds.length; i++) {
+    const evaluatorId = opts.evaluatorIds[i]!;
+    const evaluatorName = opts.evaluatorLabels[i] ?? evaluatorId;
+    const level = opts.evaluatorLevels.get(evaluatorId) ?? 'SESSION';
+
+    const sessionScores: EvalSessionScore[] = [];
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+    let totalTokens = 0;
+
+    for (const session of opts.sessions) {
+      const targets = resolveTargets(level, session.spans);
+      if (!targets) continue;
+
+      for (const batch of batchTargetIds(targets.traceIds, targets.spanIds)) {
+        const response = await evaluate({
+          region: opts.region,
+          evaluatorId,
+          sessionSpans: session.spans,
+          targetTraceIds: batch.traceIds,
+          targetSpanIds: batch.spanIds,
+          evaluationReferenceInputs: opts.refInputsBySession?.get(session.sessionId),
+        });
+
+        for (const r of response.evaluationResults) {
+          sessionScores.push({
+            sessionId: r.context?.sessionId ?? session.sessionId,
+            scenarioId: session.scenarioId,
+            traceId: r.context?.traceId,
+            spanId: r.context?.spanId,
+            value: r.value ?? 0,
+            label: r.label,
+            explanation: r.explanation,
+            errorMessage: r.errorMessage,
+          });
+          totalInputTokens += r.tokenUsage?.inputTokens ?? 0;
+          totalOutputTokens += r.tokenUsage?.outputTokens ?? 0;
+          totalTokens += r.tokenUsage?.totalTokens ?? 0;
+        }
+      }
+    }
+
+    const valid = sessionScores.filter(s => !s.errorMessage);
+    const aggregateScore = valid.length > 0 ? valid.reduce((sum, s) => sum + s.value, 0) / valid.length : 0;
+
+    results.push({
+      evaluator: evaluatorName,
+      aggregateScore,
+      sessionScores,
+      tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens, totalTokens },
+    });
+  }
+
+  return results;
+}
diff --git a/src/cli/operations/eval/shared/scenario-executor.ts b/src/cli/operations/eval/shared/scenario-executor.ts
new file mode 100644
index 000000000..be3c52a5e
--- /dev/null
+++ b/src/cli/operations/eval/shared/scenario-executor.ts
@@ -0,0 +1,94 @@
+/**
+ * Execute dataset scenarios against a deployed agent.
+ *
+ * Invokes the agent for each scenario's turns sequentially within a session,
+ * running up to 5 scenarios concurrently. Halts a scenario on turn failure.
+ */
+import { invokeAgentRuntime } from '../../../aws/agentcore';
+import type { AgentContext } from '../../invoke/resolve-agent-context';
+import { generateSessionId } from '../../session';
+import type { PredefinedScenario } from './types';
+
+/** Maximum concurrent scenario executions. */
+const MAX_CONCURRENT = 5;
+
+export interface ScenarioInvocationResult {
+  scenarioId: string;
+  sessionId: string;
+  turnCount: number;
+  status: 'success' | 'failed';
+  error?: string;
+}
+
+export interface ExecuteScenariosOptions {
+  scenarios: PredefinedScenario[];
+  agentContext: AgentContext;
+  onProgress?: (completed: number, total: number, current: ScenarioInvocationResult) => void;
+}
+
+/**
+ * Execute all scenarios concurrently (max 5 at a time).
+ * Each scenario invokes all turns sequentially in one session.
+ * Halts on turn failure — marks entire scenario as FAILED.
+ */
+export async function executeScenarios(options: ExecuteScenariosOptions): Promise<ScenarioInvocationResult[]> {
+  const { scenarios, agentContext, onProgress } = options;
+  const results: ScenarioInvocationResult[] = new Array<ScenarioInvocationResult>(scenarios.length);
+  let nextIndex = 0;
+  let completedCount = 0;
+
+  async function worker(): Promise<void> {
+    while (true) {
+      const i = nextIndex++;
+      if (i >= scenarios.length) return;
+      const result = await executeSingleScenario(scenarios[i]!, agentContext);
+      results[i] = result;
+      completedCount++;
+      onProgress?.(completedCount, scenarios.length, result);
+    }
+  }
+
+  const workers = Array.from({ length: Math.min(MAX_CONCURRENT, scenarios.length) }, () => worker());
+  await Promise.all(workers);
+  return results;
+}
+
+/**
+ * Execute a single scenario: invoke all turns sequentially in one session.
+ * Halts on first turn failure.
+ */
+async function executeSingleScenario(
+  scenario: PredefinedScenario,
+  ctx: AgentContext
+): Promise<ScenarioInvocationResult> {
+  const sessionId = generateSessionId();
+
+  try {
+    for (const turn of scenario.turns) {
+      await invokeAgentRuntime({
+        region: ctx.region,
+        runtimeArn: ctx.runtimeArn,
+        payload: turn.input,
+        sessionId,
+        bearerToken: ctx.bearerToken,
+        baggage: ctx.baggage,
+        endpoint: ctx.endpoint,
+      });
+    }
+
+    return {
+      scenarioId: scenario.scenario_id,
+      sessionId: sessionId,
+      turnCount: scenario.turns.length,
+      status: 'success',
+    };
+  } catch (err) {
+    return {
+      scenarioId: scenario.scenario_id,
+      sessionId: sessionId,
+      turnCount: scenario.turns.length,
+      status: 'failed',
+      error: err instanceof Error ? err.message : String(err),
+    };
+  }
+}
diff --git a/src/cli/operations/eval/shared/span-collector.ts b/src/cli/operations/eval/shared/span-collector.ts
new file mode 100644
index 000000000..ca78cf334
--- /dev/null
+++ b/src/cli/operations/eval/shared/span-collector.ts
@@ -0,0 +1,377 @@
+/**
+ * Collect spans from CloudWatch after agent invocations.
+ *
+ * Waits for an ingestion delay, then polls for spans
+ * for each session. Retries on transient errors.
+ */
+import { getCredentialProvider } from '../../../aws';
+import { CloudWatchLogsClient, GetQueryResultsCommand, StartQueryCommand } from '@aws-sdk/client-cloudwatch-logs';
+import type { ResultField } from '@aws-sdk/client-cloudwatch-logs';
+import type { DocumentType } from '@smithy/types';
+
+/**
+ * Default delay before first span query (CloudWatch ingestion buffer).
+ * Matches SDK's evaluation_delay_seconds default (180s).
+ */
+const SPAN_INGESTION_DELAY_MS = 180_000;
+
+/** Maximum time to poll for spans after the ingestion delay. */
+const SPAN_POLL_TIMEOUT_MS = 60_000;
+
+/** Interval between poll attempts. */
+const SPAN_POLL_INTERVAL_MS = 5_000;
+
+export const SPANS_LOG_GROUP = 'aws/spans';
+
+const SUPPORTED_SCOPES = new Set([
+  'strands.telemetry.tracer',
+  'opentelemetry.instrumentation.langchain',
+  'openinference.instrumentation.langchain',
+]);
+
+export interface CollectSpansOptions {
+  sessionIds: string[];
+  region: string;
+  logGroup: string;
+  querySpans: (region: string, logGroup: string, sessionId: string) => Promise<DocumentType[]>;
+  onProgress?: (collected: number, total: number, message?: string) => void;
+}
+
+export interface CollectedSpans {
+  spans: Map<string, DocumentType[]>;
+  timedOut: string[];
+}
+
+/** Returns true if the error is permanent (non-retryable). */
+function isPermanentError(err: unknown): boolean {
+  const msg = err instanceof Error ? err.message : String(err);
+  return msg.includes('AccessDenied') || msg.includes('InvalidParameter');
+}
+
+/** Poll a single session for spans until we have some or the deadline passes. */
+async function pollOneSession(
+  sessionId: string,
+  querySpans: CollectSpansOptions['querySpans'],
+  region: string,
+  logGroup: string,
+  timeoutMs: number
+): Promise<DocumentType[] | null> {
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    try {
+      const spans = await querySpans(region, logGroup, sessionId);
+      if (spans.length > 0) return spans;
+    } catch (err) {
+      if (isPermanentError(err)) {
+        throw new Error(`CloudWatch query failed: ${err instanceof Error ? err.message : String(err)}`);
+      }
+      // Transient errors (throttling, 503) — retry next interval
+    }
+    await sleep(SPAN_POLL_INTERVAL_MS);
+  }
+  return null;
+}
+
+/**
+ * Collect spans for all sessions after ingestion delay.
+ * Each session polls independently with its own timeout budget.
+ */
+export async function collectSpans(options: CollectSpansOptions): Promise<CollectedSpans> {
+  const { sessionIds, querySpans, onProgress } = options;
+
+  // Phase 1: Wait for CloudWatch ingestion
+  onProgress?.(0, sessionIds.length, `Waiting for span ingestion (${SPAN_INGESTION_DELAY_MS / 1000}s)...`);
+  await sleep(SPAN_INGESTION_DELAY_MS);
+
+  // Phase 2: Poll each session in parallel — use allSettled so one failure doesn't abort the rest
+  let collectedCount = 0;
+  const settled = await Promise.allSettled(
+    sessionIds.map(async sessionId => {
+      const spans = await pollOneSession(sessionId, querySpans, options.region, options.logGroup, SPAN_POLL_TIMEOUT_MS);
+      if (spans) {
+        collectedCount++;
+        onProgress?.(collectedCount, sessionIds.length);
+      }
+      return { sessionId, spans };
+    })
+  );
+
+  const collected = new Map<string, DocumentType[]>();
+  const timedOut: string[] = [];
+  for (const outcome of settled) {
+    if (outcome.status === 'fulfilled') {
+      const r = outcome.value;
+      if (r.spans) collected.set(r.sessionId, r.spans);
+      else timedOut.push(r.sessionId);
+    } else {
+      // Rejected sessions are treated as timed out
+      timedOut.push('unknown');
+    }
+  }
+
+  return { spans: collected, timedOut };
+}
+
+/**
+ * Extract unique traceIds from spans in appearance order.
+ * Used by ground-truth mapping (turn[i] → traceIds[i]).
+ */
+export function extractTraceIds(spans: DocumentType[]): string[] {
+  const seen = new Set<string>();
+  const traceIds: string[] = [];
+  for (const span of spans) {
+    const traceId = (span as Record<string, unknown>).traceId as string | undefined;
+    if (traceId && !seen.has(traceId)) {
+      seen.add(traceId);
+      traceIds.push(traceId);
+    }
+  }
+  return traceIds;
+}
+
+/**
+ * Extract span IDs that represent tool calls from session spans.
+ */
+export function extractToolCallSpanIds(spans: DocumentType[]): string[] {
+  const spanIds: string[] = [];
+  for (const span of spans) {
+    const doc = span as Record<string, unknown>;
+    const spanId = doc.spanId as string | undefined;
+    if (!spanId) continue;
+
+    // Tool call spans must have a tool name attribute — kind=CLIENT alone is too broad
+    const attrs = doc.attributes as Record<string, unknown> | undefined;
+    if (attrs?.['gen_ai.tool.name'] ?? attrs?.['tool.name']) {
+      spanIds.push(spanId);
+    }
+  }
+  return spanIds;
+}
+
+/** Sanitize a value for use in CloudWatch Insights query strings by removing single quotes. */
+export function sanitizeQueryValue(value: string): string {
+  return value.replace(/'/g, '');
+}
+
+/**
+ * Execute a CloudWatch Logs Insights query and wait for results.
+ */
+export async function executeQuery(
+  client: CloudWatchLogsClient,
+  logGroupName: string,
+  queryString: string,
+  startTimeSec: number,
+  endTimeSec: number
+): Promise<ResultField[][]> {
+  const startQuery = await client.send(
+    new StartQueryCommand({
+      logGroupName,
+      startTime: startTimeSec,
+      endTime: endTimeSec,
+      queryString,
+    })
+  );
+
+  if (!startQuery.queryId) {
+    throw new Error('Failed to start CloudWatch Logs Insights query');
+  }
+
+  for (let i = 0; i < 60; i++) {
+    await new Promise(resolve => setTimeout(resolve, 1000));
+
+    const queryResults = await client.send(new GetQueryResultsCommand({ queryId: startQuery.queryId }));
+    const status = queryResults.status ?? 'Unknown';
+
+    if (status === 'Failed' || status === 'Cancelled') {
+      throw new Error(`CloudWatch query ${status.toLowerCase()}`);
+    }
+
+    if (status === 'Complete') {
+      return queryResults.results ?? [];
+    }
+  }
+
+  throw new Error('CloudWatch query timed out after 60 seconds');
+}
+
+/**
+ * Extract parsed @message documents from CloudWatch Insights results.
+ */
+function extractMessages(rows: ResultField[][]): Record<string, unknown>[] {
+  const docs: Record<string, unknown>[] = [];
+  for (const row of rows) {
+    const messageField = row.find(f => f.field === '@message');
+    if (messageField?.value) {
+      try {
+        docs.push(JSON.parse(messageField.value) as Record<string, unknown>);
+      } catch {
+        // Skip non-JSON log lines
+      }
+    }
+  }
+  return docs;
+}
+
+/**
+ * Check if a document is relevant for evaluation:
+ * - Has a supported instrumentation scope, OR
+ * - Is a log record with conversation data (body.input / body.output)
+ */
+function isRelevantForEval(doc: Record<string, unknown>): boolean {
+  const scope = doc.scope as Record<string, unknown> | undefined;
+  const scopeName = scope?.name as string | undefined;
+  if (scopeName && SUPPORTED_SCOPES.has(scopeName)) {
+    return true;
+  }
+
+  const body = doc.body;
+  if (body && typeof body === 'object' && ('input' in body || 'output' in body)) {
+    return true;
+  }
+
+  return false;
+}
+
+export interface SessionSpans {
+  sessionId: string;
+  spans: DocumentType[];
+}
+
+export interface FetchSpansOptions {
+  runtimeId: string;
+  runtimeLogGroup: string;
+  region: string;
+  lookbackDays: number;
+  sessionId?: string;
+  traceId?: string;
+}
+
+/**
+ * Fetch OTel spans from the `aws/spans` log group and runtime logs from the agent's
+ * log group, then group them by session.
+ *
+ * The Evaluate API requires spans from a single session per call.
+ */
+export async function fetchSessionSpans(opts: FetchSpansOptions): Promise<SessionSpans[]> {
+  const { runtimeId, runtimeLogGroup, region, lookbackDays } = opts;
+  const endTimeMs = Date.now();
+  const startTimeMs = endTimeMs - lookbackDays * 24 * 60 * 60 * 1000;
+  const startTimeSec = Math.floor(startTimeMs / 1000);
+  const endTimeSec = Math.floor(endTimeMs / 1000);
+
+  const client = new CloudWatchLogsClient({
+    credentials: getCredentialProvider(),
+    region,
+  });
+
+  // 1. Query proper OTel spans from the aws/spans log group
+  let spanQuery = `fields @message, attributes.session.id as sessionId, traceId
+     | parse resource.attributes.cloud.resource_id "runtime/*/" as parsedAgentId
+     | filter parsedAgentId = '${sanitizeQueryValue(runtimeId)}'
+     | filter ispresent(scope.name)`;
+
+  if (opts.sessionId) {
+    spanQuery += `\n     | filter attributes.session.id = '${sanitizeQueryValue(opts.sessionId)}'`;
+  }
+  if (opts.traceId) {
+    spanQuery += `\n     | filter traceId = '${sanitizeQueryValue(opts.traceId)}'`;
+  }
+
+  spanQuery += `\n     | sort startTimeUnixNano asc\n     | limit 10000`;
+
+  const spanRows = await executeQuery(client, SPANS_LOG_GROUP, spanQuery, startTimeSec, endTimeSec);
+
+  // Group spans by session and collect trace IDs
+  const sessionMap = new Map<string, DocumentType[]>();
+  const traceIds = new Set<string>();
+
+  for (const row of spanRows) {
+    const messageField = row.find(f => f.field === '@message');
+    const sessionField = row.find(f => f.field === 'sessionId');
+    const traceField = row.find(f => f.field === 'traceId');
+
+    if (!messageField?.value) continue;
+
+    let doc: Record<string, unknown>;
+    try {
+      doc = JSON.parse(messageField.value) as Record<string, unknown>;
+    } catch {
+      continue;
+    }
+
+    const sessionId = sessionField?.value ?? 'unknown';
+    if (!sessionMap.has(sessionId)) {
+      sessionMap.set(sessionId, []);
+    }
+    sessionMap.get(sessionId)!.push(doc as DocumentType);
+
+    if (traceField?.value) {
+      traceIds.add(traceField.value);
+    }
+  }
+
+  if (sessionMap.size === 0) {
+    return [];
+  }
+
+  // 2. Query runtime logs from the agent's log group for the trace IDs found
+  if (traceIds.size > 0) {
+    const traceFilter = [...traceIds].map(t => `'${sanitizeQueryValue(t)}'`).join(', ');
+    let logRows: ResultField[][] = [];
+    try {
+      logRows = await executeQuery(
+        client,
+        runtimeLogGroup,
+        `fields @message, traceId
+         | filter traceId in [${traceFilter}]
+         | sort @timestamp asc
+         | limit 10000`,
+        startTimeSec,
+        endTimeSec
+      );
+    } catch {
+      // Runtime log group may not exist yet; continue with spans only
+    }
+
+    const logDocs = extractMessages(logRows);
+
+    // Match runtime logs to sessions via traceId
+    // Build traceId → sessionId mapping from spans
+    const traceToSession = new Map<string, string>();
+    for (const row of spanRows) {
+      const traceField = row.find(f => f.field === 'traceId');
+      const sessionField = row.find(f => f.field === 'sessionId');
+      if (traceField?.value && sessionField?.value) {
+        traceToSession.set(traceField.value, sessionField.value);
+      }
+    }
+
+    for (const logDoc of logDocs) {
+      if (!isRelevantForEval(logDoc)) continue;
+
+      const logTraceId = logDoc.traceId as string | undefined;
+      const sessionId = logTraceId ? (traceToSession.get(logTraceId) ?? 'unknown') : 'unknown';
+      if (!sessionMap.has(sessionId)) {
+        sessionMap.set(sessionId, []);
+      }
+      sessionMap.get(sessionId)!.push(logDoc as DocumentType);
+    }
+  }
+
+  // 3. Build session list — aws/spans docs are already scoped by runtimeId (step 1),
+  //    and runtime log docs were filtered through isRelevantForEval (step 2).
+  //    We keep all docs so the Evaluate API has full trace context for resolving
+  //    template variables like {context} and {assistant_turn}.
+  const sessions: SessionSpans[] = [];
+  for (const [sessionId, docs] of sessionMap) {
+    if (docs.length > 0) {
+      sessions.push({ sessionId, spans: docs });
+    }
+  }
+
+  return sessions;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
diff --git a/src/cli/operations/eval/shared/types.ts b/src/cli/operations/eval/shared/types.ts
new file mode 100644
index 000000000..671ac97ad
--- /dev/null
+++ b/src/cli/operations/eval/shared/types.ts
@@ -0,0 +1,17 @@
+/**
+ * Shared types for dataset-driven evaluation.
+ */
+
+/** A single turn in a predefined scenario. */
+export interface Turn {
+  input: string;
+  expectedResponse?: string;
+}
+
+/** A predefined evaluation scenario parsed from JSONL. */
+export interface PredefinedScenario {
+  scenario_id: string;
+  turns: Turn[];
+  assertions?: string[];
+  expected_trajectory?: string[];
+}
diff --git a/src/cli/operations/eval/types.ts b/src/cli/operations/eval/types.ts
index 6f06e8364..daf67f645 100644
--- a/src/cli/operations/eval/types.ts
+++ b/src/cli/operations/eval/types.ts
@@ -13,6 +13,7 @@ export interface EvalEvaluatorResult {
 /** Per-session score from an evaluator */
 export interface EvalSessionScore {
   sessionId: string;
+  scenarioId?: string;
   traceId?: string;
   spanId?: string;
   value: number;
@@ -26,7 +27,7 @@ export interface EvalRunResult {
   timestamp: string;
   agent: string;
   evaluators: string[];
-  lookbackDays: number;
+  lookbackDays?: number;
   sessionCount: number;
   results: EvalEvaluatorResult[];
   referenceInputs?: {
@@ -34,6 +35,12 @@ export interface EvalRunResult {
     expectedTrajectory?: string[];
     expectedResponse?: string;
   };
+  /** Present when eval was run against a dataset */
+  source?: 'dataset' | 'traces';
+  /** Dataset name (when source === 'dataset') */
+  datasetName?: string;
+  /** Dataset details (when source === 'dataset') */
+  dataset?: { id: string; version: string };
 }
 
 /** Lightweight session info returned by session discovery */
@@ -71,6 +78,12 @@ export interface RunEvalOptions {
   expectedResponse?: string;
   days: number;
   output?: string;
+  /** Dataset name — invoke agent with dataset scenarios instead of historical traces */
+  dataset?: string;
+  /** Dataset version (omit for local file, or N/DRAFT) */
+  datasetVersion?: string;
+  /** Progress callback for dataset evaluation phases */
+  onProgress?: (phase: string, message: string) => void;
   json?: boolean;
 }
 
diff --git a/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts b/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts
new file mode 100644
index 000000000..67f360ee1
--- /dev/null
+++ b/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts
@@ -0,0 +1,104 @@
+import { resolveAgentContext } from '../resolve-agent-context';
+import { describe, expect, it, vi } from 'vitest';
+
+vi.mock('../../fetch-access', () => ({
+  canFetchRuntimeToken: vi.fn().mockResolvedValue(false),
+  fetchRuntimeToken: vi.fn(),
+}));
+
+const mockProject = {
+  name: 'TestProject',
+  version: 1,
+  managedBy: 'CDK' as const,
+  runtimes: [{ name: 'MyAgent', build: 'CodeZip' as const, entrypoint: 'main.py', codeLocation: 'app/MyAgent/' }],
+  memories: [],
+  credentials: [],
+  evaluators: [],
+  onlineEvalConfigs: [],
+  configBundles: [],
+  datasets: [],
+  policyEngines: [],
+  agentCoreGateways: [],
+  mcpRuntimeTools: [],
+  unassignedTargets: [],
+};
+
+const mockDeployedState = {
+  targets: {
+    default: {
+      stackName: 'TestStack',
+      resources: {
+        runtimes: {
+          MyAgent: {
+            runtimeId: 'runtime-123',
+            runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123456:runtime/TestProject_MyAgent-abc',
+          },
+        },
+      },
+    },
+  },
+};
+
+const mockTargets = [{ name: 'default', account: '123456', region: 'us-east-1' }];
+
+describe('resolveAgentContext', () => {
+  it('resolves agent context with runtimeArn and region', async () => {
+    const ctx = await resolveAgentContext({
+      project: mockProject as any,
+      deployedState: mockDeployedState as any,
+      awsTargets: mockTargets as any,
+      agentName: 'MyAgent',
+    });
+
+    expect(ctx.runtimeArn).toBe('arn:aws:bedrock-agentcore:us-east-1:123456:runtime/TestProject_MyAgent-abc');
+    expect(ctx.runtimeId).toBe('runtime-123');
+    expect(ctx.region).toBe('us-east-1');
+    expect(ctx.agentName).toBe('MyAgent');
+  });
+
+  it('auto-selects single agent when agentName is omitted', async () => {
+    const ctx = await resolveAgentContext({
+      project: mockProject as any,
+      deployedState: mockDeployedState as any,
+      awsTargets: mockTargets as any,
+    });
+
+    expect(ctx.agentName).toBe('MyAgent');
+  });
+
+  it('throws when no deployed targets', async () => {
+    await expect(
+      resolveAgentContext({
+        project: mockProject as any,
+        deployedState: { targets: {} } as any,
+        awsTargets: mockTargets as any,
+      })
+    ).rejects.toThrow('No deployed targets');
+  });
+
+  it('throws when agent not found', async () => {
+    await expect(
+      resolveAgentContext({
+        project: mockProject as any,
+        deployedState: mockDeployedState as any,
+        awsTargets: mockTargets as any,
+        agentName: 'NonExistent',
+      })
+    ).rejects.toThrow('not found');
+  });
+
+  it('throws when agent not deployed', async () => {
+    const stateWithoutRuntime = {
+      targets: { default: { stackName: 'TestStack', resources: { runtimes: {} } } },
+    };
+
+    await expect(
+      resolveAgentContext({
+        project: mockProject as any,
+        deployedState: stateWithoutRuntime as any,
+        awsTargets: mockTargets as any,
+        agentName: 'MyAgent',
+      })
+    ).rejects.toThrow('not deployed');
+  });
+});
diff --git a/src/cli/operations/invoke/resolve-agent-context.ts b/src/cli/operations/invoke/resolve-agent-context.ts
new file mode 100644
index 000000000..2eb9971de
--- /dev/null
+++ b/src/cli/operations/invoke/resolve-agent-context.ts
@@ -0,0 +1,122 @@
+/**
+ * Shared agent resolution logic.
+ *
+ * Resolves a deployed agent to its full invocation context: runtimeArn, region,
+ * config bundle baggage, and bearer token. Called ONCE before invoking —
+ * reused across multiple invocations (e.g., dataset eval scenarios).
+ *
+ * Used by:
+ * - `agentcore invoke` (commands/invoke/action.ts)
+ * - Dataset eval scenario executor (operations/eval/shared/scenario-executor.ts)
+ */
+import type { AgentCoreProjectSpec, AwsDeploymentTargets, DeployedState } from '../../../schema';
+import { canFetchRuntimeToken, fetchRuntimeToken } from '../fetch-access';
+
+export interface AgentContext {
+  runtimeArn: string;
+  runtimeId: string;
+  region: string;
+  endpoint?: string;
+  agentName: string;
+  baggage?: string;
+  bearerToken?: string;
+}
+
+export interface ResolveAgentContextOptions {
+  project: AgentCoreProjectSpec;
+  deployedState: DeployedState;
+  awsTargets: AwsDeploymentTargets;
+  agentName?: string;
+  endpoint?: string;
+  targetName?: string;
+}
+
+/**
+ * Resolve a deployed agent to its invocation context.
+ * Handles: target resolution, agent lookup, config bundle baggage, bearer token.
+ */
+export async function resolveAgentContext(options: ResolveAgentContextOptions): Promise<AgentContext> {
+  const { project, deployedState, awsTargets } = options;
+
+  // Resolve target
+  const targetNames = Object.keys(deployedState.targets);
+  if (targetNames.length === 0) {
+    throw new Error('No deployed targets found. Run `agentcore deploy` first.');
+  }
+
+  const selectedTargetName = options.targetName ?? targetNames[0]!;
+
+  if (options.targetName && !targetNames.includes(options.targetName)) {
+    throw new Error(`Target '${options.targetName}' not found. Available: ${targetNames.join(', ')}`);
+  }
+
+  const targetState = deployedState.targets[selectedTargetName];
+  const targetConfig = awsTargets.find(t => t.name === selectedTargetName);
+
+  if (!targetConfig) {
+    throw new Error(`Target config '${selectedTargetName}' not found in aws-targets`);
+  }
+
+  // Resolve agent
+  if (project.runtimes.length === 0) {
+    throw new Error('No agents defined in configuration');
+  }
+
+  const agentSpec = options.agentName ? project.runtimes.find(a => a.name === options.agentName) : project.runtimes[0];
+
+  if (!agentSpec) {
+    const available = project.runtimes.map(a => a.name).join(', ');
+    throw new Error(`Agent '${options.agentName}' not found. Available: ${available}`);
+  }
+
+  const agentState = targetState?.resources?.runtimes?.[agentSpec.name];
+
+  if (!agentState) {
+    throw new Error(`Agent '${agentSpec.name}' is not deployed to target '${selectedTargetName}'`);
+  }
+
+  // Resolve config bundle baggage
+  let baggage: string | undefined;
+  const bundleSpec = project.configBundles?.find(b => {
+    const keys = Object.keys(b.components ?? {});
+    return keys.some(k => k === `{{runtime:${agentSpec.name}}}`);
+  });
+  if (bundleSpec) {
+    const deployedBundles = targetState?.resources?.configBundles ?? {};
+    const bundleState = deployedBundles[bundleSpec.name];
+    if (bundleState?.bundleArn && bundleState?.versionId) {
+      baggage = `aws.agentcore.configbundle_arn=${encodeURIComponent(bundleState.bundleArn)},aws.agentcore.configbundle_version=${encodeURIComponent(bundleState.versionId)}`;
+    }
+  }
+
+  // Resolve bearer token for CUSTOM_JWT agents
+  let bearerToken: string | undefined;
+  if (agentSpec.authorizerType === 'CUSTOM_JWT') {
+    const canFetch = await canFetchRuntimeToken(agentSpec.name);
+    if (canFetch) {
+      try {
+        const tokenResult = await fetchRuntimeToken(agentSpec.name, { deployTarget: selectedTargetName });
+        bearerToken = tokenResult.token;
+      } catch (err) {
+        throw new Error(
+          `CUSTOM_JWT agent requires a bearer token. Auto-fetch failed: ${err instanceof Error ? err.message : String(err)}`
+        );
+      }
+    } else {
+      throw new Error(
+        `Agent '${agentSpec.name}' is configured for CUSTOM_JWT but no bearer token is available. ` +
+          `Re-add the agent with --client-id and --client-secret to enable auto-fetch.`
+      );
+    }
+  }
+
+  return {
+    runtimeArn: agentState.runtimeArn,
+    runtimeId: agentState.runtimeId,
+    region: targetConfig.region,
+    endpoint: options.endpoint,
+    agentName: agentSpec.name,
+    baggage,
+    bearerToken,
+  };
+}
diff --git a/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts b/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts
index 4395edd23..4a85dc568 100644
--- a/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts
+++ b/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts
@@ -5,6 +5,7 @@ const mockSearchLogs = vi.fn();
 
 vi.mock('../../../aws/cloudwatch', () => ({
   searchLogs: (...args: unknown[]) => mockSearchLogs(...args),
+  runtimeLogGroup: (runtimeId: string) => `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT`,
 }));
 
 /**
diff --git a/src/cli/operations/recommendation/fetch-session-spans.ts b/src/cli/operations/recommendation/fetch-session-spans.ts
index db5e63911..992e936a3 100644
--- a/src/cli/operations/recommendation/fetch-session-spans.ts
+++ b/src/cli/operations/recommendation/fetch-session-spans.ts
@@ -12,7 +12,7 @@
  * Without log records the mapper produces "zero trajectories".
  */
 import type { SessionSpan } from '../../aws/agentcore-recommendation';
-import { searchLogs } from '../../aws/cloudwatch';
+import { runtimeLogGroup, searchLogs } from '../../aws/cloudwatch';
 
 export interface FetchSessionSpansOptions {
   /** AWS region */
@@ -47,7 +47,7 @@ const SPANS_LOG_GROUP = 'aws/spans';
 export async function fetchSessionSpans(options: FetchSessionSpansOptions): Promise<FetchSessionSpansResult> {
   const { region, runtimeId, sessionId, lookbackDays = 7, onProgress } = options;
 
-  const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT`;
+  const runtimeLogGroupName = runtimeLogGroup(runtimeId);
   const endTimeMs = Date.now();
   const startTimeMs = endTimeMs - lookbackDays * 24 * 60 * 60 * 1000;
 
@@ -62,7 +62,7 @@ export async function fetchSessionSpans(options: FetchSessionSpansOptions): Prom
       filterPattern: `"session.id" "${sessionId}"`,
     }),
     collectLogEvents({
-      logGroupName: runtimeLogGroup,
+      logGroupName: runtimeLogGroupName,
       region,
       startTimeMs,
       endTimeMs,
diff --git a/src/cli/operations/recommendation/run-recommendation.ts b/src/cli/operations/recommendation/run-recommendation.ts
index d277c01cb..42ff863cc 100644
--- a/src/cli/operations/recommendation/run-recommendation.ts
+++ b/src/cli/operations/recommendation/run-recommendation.ts
@@ -15,6 +15,7 @@ import type {
   SessionSpan,
 } from '../../aws/agentcore-recommendation';
 import { getRecommendation, startRecommendation } from '../../aws/agentcore-recommendation';
+import { runtimeLogGroup } from '../../aws/cloudwatch';
 import { arnPrefix } from '../../aws/partition';
 import { detectRegion } from '../../aws/region';
 import { ExecLogger } from '../../logging/exec-logger';
@@ -461,7 +462,7 @@ async function buildRecommendationConfig(opts: BuildConfigOptions): Promise<Reco
     agentTraces = { sessionSpans: allSpans };
   } else {
     // Lookback-based path — use cloudwatchLogs with time range
-    const runtimeLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:/aws/bedrock-agentcore/runtimes/${opts.runtimeId}-DEFAULT`;
+    const runtimeLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:${runtimeLogGroup(opts.runtimeId)}`;
     const spansLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:aws/spans`;
 
     // Derive service name: strip the random hash suffix from runtimeId
diff --git a/src/cli/operations/traces/get-trace.ts b/src/cli/operations/traces/get-trace.ts
index 153e67f5c..2e7e0f01d 100644
--- a/src/cli/operations/traces/get-trace.ts
+++ b/src/cli/operations/traces/get-trace.ts
@@ -1,6 +1,6 @@
 import { ResourceNotFoundError, ValidationError } from '../../../lib';
 import type { Result } from '../../../lib/result';
-import { DEFAULT_ENDPOINT_NAME } from '../../constants';
+import { runtimeLogGroup } from '../../aws/cloudwatch';
 import { runInsightsQuery } from './insights-query';
 import type {
   CloudWatchSpanRecord,
@@ -16,10 +16,6 @@ import path from 'node:path';
 const SPANS_LOG_GROUP = 'aws/spans';
 const TRACE_ID_PATTERN = /^[a-fA-F0-9-]+$/;
 
-function runtimeLogGroup(runtimeId: string): string {
-  return `/aws/bedrock-agentcore/runtimes/${runtimeId}-${DEFAULT_ENDPOINT_NAME}`;
-}
-
 async function fetchSpans(
   region: string,
   traceId: string,
diff --git a/src/cli/operations/traces/list-traces.ts b/src/cli/operations/traces/list-traces.ts
index 865dbb02d..eb35b3f97 100644
--- a/src/cli/operations/traces/list-traces.ts
+++ b/src/cli/operations/traces/list-traces.ts
@@ -1,4 +1,4 @@
-import { DEFAULT_ENDPOINT_NAME } from '../../constants';
+import { runtimeLogGroup } from '../../aws/cloudwatch';
 import { runInsightsQuery } from './insights-query';
 import type { ListTracesOptions, ListTracesResult, TraceEntry } from './types';
 
@@ -11,7 +11,7 @@ import type { ListTracesOptions, ListTracesResult, TraceEntry } from './types';
 export async function listTraces(options: ListTracesOptions): Promise<ListTracesResult> {
   const { region, runtimeId, limit = 20 } = options;
 
-  const logGroupName = `/aws/bedrock-agentcore/runtimes/${runtimeId}-${DEFAULT_ENDPOINT_NAME}`;
+  const logGroupName = runtimeLogGroup(runtimeId);
 
   const result = await runInsightsQuery({
     region,
diff --git a/src/cli/primitives/DatasetPrimitive.ts b/src/cli/primitives/DatasetPrimitive.ts
new file mode 100644
index 000000000..d6a20d968
--- /dev/null
+++ b/src/cli/primitives/DatasetPrimitive.ts
@@ -0,0 +1,232 @@
+import { findConfigRoot } from '../../lib';
+import type { Result } from '../../lib/result';
+import type { DatasetSchemaType } from '../../schema';
+import { DatasetSchema } from '../../schema';
+import type { AddDatasetOptions } from '../commands/add/types';
+import { validateAddDatasetOptions } from '../commands/add/validate';
+import { getErrorMessage } from '../errors';
+import type { RemovalPreview, SchemaChange } from '../operations/remove/types';
+import { runCliCommand } from '../telemetry/cli-command-run.js';
+import { getTemplatePath } from '../templates/templateRoot';
+import { requireTTY } from '../tui/guards/tty';
+import { BasePrimitive } from './BasePrimitive';
+import type { AddResult, AddScreenComponent, RemovableResource } from './types';
+import type { Command } from '@commander-js/extra-typings';
+import { copyFile, mkdir } from 'node:fs/promises';
+import { join } from 'node:path';
+
+const SCHEMA_TYPE_TO_ASSET: Record<string, string> = {
+  AGENTCORE_EVALUATION_PREDEFINED_V1: 'predefined-v1.jsonl',
+  AGENTCORE_EVALUATION_SIMULATED_V1: 'simulated-v1.jsonl',
+};
+
+/**
+ * Represents a dataset that can be removed.
+ */
+export type RemovableDataset = RemovableResource;
+
+/**
+ * DatasetPrimitive handles all dataset add/remove operations.
+ */
+export class DatasetPrimitive extends BasePrimitive<AddDatasetOptions, RemovableDataset> {
+  readonly kind = 'dataset';
+  readonly label = 'Dataset';
+  readonly primitiveSchema = DatasetSchema;
+
+  async add(options: AddDatasetOptions): Promise<AddResult<{ datasetName: string; location: string }>> {
+    try {
+      const project = await this.readProjectSpec();
+      const datasets = project.datasets ?? [];
+
+      this.checkDuplicate(datasets, options.name);
+
+      const location = `datasets/${options.name}.jsonl`;
+      const dataset = {
+        name: options.name,
+        schemaType: options.schemaType,
+        ...(options.description && { description: options.description }),
+        config: {
+          managed: { location },
+        },
+      };
+
+      datasets.push(dataset);
+      project.datasets = datasets;
+      await this.writeProjectSpec(project);
+
+      // Scaffold the starter .jsonl file
+      await this.scaffoldDatasetFile(options.name, options.schemaType, location);
+
+      return { success: true, datasetName: dataset.name, location: `agentcore/${location}` };
+    } catch (err) {
+      return { success: false, error: err instanceof Error ? err : new Error(getErrorMessage(err)) };
+    }
+  }
+
+  async remove(datasetName: string): Promise<Result> {
+    try {
+      const project = await this.readProjectSpec();
+      const datasets = project.datasets ?? [];
+
+      const datasetIndex = datasets.findIndex(d => d.name === datasetName);
+      if (datasetIndex === -1) {
+        return { success: false, error: new Error(`Dataset "${datasetName}" not found.`) };
+      }
+
+      datasets.splice(datasetIndex, 1);
+      project.datasets = datasets;
+      await this.writeProjectSpec(project);
+
+      return { success: true };
+    } catch (err) {
+      return { success: false, error: err instanceof Error ? err : new Error('Unknown error') };
+    }
+  }
+
+  async previewRemove(datasetName: string): Promise<RemovalPreview> {
+    const project = await this.readProjectSpec();
+    const datasets = project.datasets ?? [];
+
+    const dataset = datasets.find(d => d.name === datasetName);
+    if (!dataset) {
+      throw new Error(`Dataset "${datasetName}" not found.`);
+    }
+
+    const summary: string[] = [`Removing dataset: ${datasetName}`];
+    const schemaChanges: SchemaChange[] = [];
+
+    const afterSpec = {
+      ...project,
+      datasets: datasets.filter(d => d.name !== datasetName),
+    };
+
+    schemaChanges.push({
+      file: 'agentcore/agentcore.json',
+      before: project,
+      after: afterSpec,
+    });
+
+    return { summary, directoriesToDelete: [], schemaChanges };
+  }
+
+  async getRemovable(): Promise<RemovableDataset[]> {
+    try {
+      const project = await this.readProjectSpec();
+      return (project.datasets ?? []).map(d => ({ name: d.name }));
+    } catch {
+      return [];
+    }
+  }
+
+  /**
+   * Get list of existing dataset names.
+   */
+  async getAllNames(): Promise<string[]> {
+    try {
+      const project = await this.configIO.readProjectSpec();
+      return (project.datasets ?? []).map(d => d.name);
+    } catch {
+      return [];
+    }
+  }
+
+  registerCommands(addCmd: Command, removeCmd: Command): void {
+    addCmd
+      .command('dataset')
+      .description('Add a dataset to the project')
+      .option('--name <name>', 'Dataset name [non-interactive]')
+      .option(
+        '--schema-type <schemaType>',
+        'Dataset schema type: AGENTCORE_EVALUATION_PREDEFINED_V1 | AGENTCORE_EVALUATION_SIMULATED_V1 [non-interactive]'
+      )
+      .option('--description <description>', 'Dataset description [non-interactive]')
+      .option('--json', 'Output as JSON [non-interactive]')
+      .action(async (cliOptions: { name?: string; schemaType?: string; description?: string; json?: boolean }) => {
+        if (!findConfigRoot()) {
+          console.error('No agentcore project found. Run `agentcore create` first.');
+          process.exit(1);
+        }
+
+        if (cliOptions.name || cliOptions.json) {
+          // CLI mode
+          await runCliCommand('add.dataset', !!cliOptions.json, async () => {
+            const validation = validateAddDatasetOptions({
+              name: cliOptions.name ?? '',
+              schemaType: (cliOptions.schemaType ?? '') as DatasetSchemaType,
+              description: cliOptions.description,
+            });
+
+            if (!validation.valid) {
+              throw new Error(validation.error);
+            }
+
+            const result = await this.add({
+              name: cliOptions.name!,
+              schemaType: cliOptions.schemaType! as DatasetSchemaType,
+              description: cliOptions.description,
+            });
+
+            if (!result.success) {
+              throw result.error;
+            }
+
+            if (cliOptions.json) {
+              console.log(JSON.stringify(result));
+            } else {
+              console.log(`Added dataset '${result.datasetName}'`);
+              console.log(`  File: ${result.location}`);
+            }
+
+            return {};
+          });
+        } else {
+          try {
+            // TUI fallback — dynamic imports to avoid pulling ink (async) into registry
+            requireTTY();
+            const [{ render }, { default: React }, { AddFlow }] = await Promise.all([
+              import('ink'),
+              import('react'),
+              import('../tui/screens/add/AddFlow'),
+            ]);
+            const { unmount } = render(
+              React.createElement(AddFlow, {
+                isInteractive: false,
+                initialResource: 'dataset',
+                onExit: () => {
+                  unmount();
+                  process.exit(0);
+                },
+              })
+            );
+          } catch (error) {
+            console.error(getErrorMessage(error));
+            process.exit(1);
+          }
+        }
+      });
+
+    this.registerRemoveSubcommand(removeCmd);
+  }
+
+  addScreen(): AddScreenComponent {
+    return null;
+  }
+
+  /**
+   * Copy the starter JSONL asset file to the dataset location.
+   */
+  private async scaffoldDatasetFile(name: string, schemaType: string, location: string): Promise<void> {
+    const configRoot = findConfigRoot();
+    if (!configRoot) return;
+
+    const targetPath = join(configRoot, location);
+    const targetDir = join(configRoot, 'datasets');
+    await mkdir(targetDir, { recursive: true });
+
+    const assetFile = SCHEMA_TYPE_TO_ASSET[schemaType];
+    if (!assetFile) return;
+
+    const sourcePath = getTemplatePath('datasets', assetFile);
+    await copyFile(sourcePath, targetPath);
+  }
+}
diff --git a/src/cli/primitives/__tests__/DatasetPrimitive.test.ts b/src/cli/primitives/__tests__/DatasetPrimitive.test.ts
new file mode 100644
index 000000000..453852a3a
--- /dev/null
+++ b/src/cli/primitives/__tests__/DatasetPrimitive.test.ts
@@ -0,0 +1,190 @@
+import { DatasetPrimitive } from '../DatasetPrimitive.js';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+const mockReadProjectSpec = vi.fn();
+const mockWriteProjectSpec = vi.fn();
+const mockCopyFile = vi.fn();
+const mockMkdir = vi.fn();
+
+vi.mock('../../../lib/index.js', () => ({
+  ConfigIO: class {
+    readProjectSpec = mockReadProjectSpec;
+    writeProjectSpec = mockWriteProjectSpec;
+  },
+  findConfigRoot: () => '/fake/root',
+}));
+
+vi.mock('node:fs/promises', () => ({
+  copyFile: (...args: unknown[]) => mockCopyFile(...args),
+  mkdir: (...args: unknown[]) => mockMkdir(...args),
+}));
+
+vi.mock('../../templates/templateRoot', () => ({
+  getTemplatePath: (...segments: string[]) => `/templates/${segments.join('/')}`,
+}));
+
+function makeProject(datasets: { name: string; schemaType?: string }[] = []) {
+  return {
+    name: 'TestProject',
+    version: 1,
+    managedBy: 'CDK' as const,
+    runtimes: [],
+    memories: [],
+    credentials: [],
+    evaluators: [],
+    onlineEvalConfigs: [],
+    datasets: datasets.map(d => ({
+      name: d.name,
+      schemaType: d.schemaType ?? 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+      config: { managed: { location: `datasets/${d.name}.jsonl` } },
+    })),
+  };
+}
+
+const primitive = new DatasetPrimitive();
+
+describe('DatasetPrimitive', () => {
+  afterEach(() => vi.clearAllMocks());
+
+  describe('add', () => {
+    it('adds dataset to spec with description, returns success and scaffolds file', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+      mockMkdir.mockResolvedValue(undefined);
+      mockCopyFile.mockResolvedValue(undefined);
+
+      const result = await primitive.add({
+        name: 'MyDataset',
+        schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+        description: 'A test dataset',
+      });
+
+      expect(result.success).toBe(true);
+      if (result.success) {
+        expect(result.datasetName).toBe('MyDataset');
+        expect(result.location).toBe('agentcore/datasets/MyDataset.jsonl');
+      }
+
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.datasets).toHaveLength(1);
+      expect(writtenSpec.datasets[0].name).toBe('MyDataset');
+      expect(writtenSpec.datasets[0].description).toBe('A test dataset');
+    });
+
+    it('returns error when name already exists', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'Existing' }]));
+
+      const result = await primitive.add({
+        name: 'Existing',
+        schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+      });
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error.message).toContain('already exists');
+      }
+    });
+
+    it('returns error when readProjectSpec rejects', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('disk failure'));
+
+      const result = await primitive.add({
+        name: 'NewDataset',
+        schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+      });
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error.message).toBe('disk failure');
+      }
+    });
+  });
+
+  describe('remove', () => {
+    it('removes dataset from spec', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'DatasetA' }, { name: 'DatasetB' }]));
+      mockWriteProjectSpec.mockResolvedValue(undefined);
+
+      const result = await primitive.remove('DatasetA');
+
+      expect(result.success).toBe(true);
+      const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0];
+      expect(writtenSpec.datasets).toHaveLength(1);
+      expect(writtenSpec.datasets[0].name).toBe('DatasetB');
+    });
+
+    it('returns error when dataset not found for removal', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+
+      const result = await primitive.remove('NonExistent');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error.message).toContain('NonExistent');
+        expect(result.error.message).toContain('not found');
+      }
+    });
+
+    it('returns error when readProjectSpec fails during remove', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('io error'));
+
+      const result = await primitive.remove('Whatever');
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error.message).toBe('io error');
+      }
+    });
+  });
+
+  describe('previewRemove', () => {
+    it('returns summary and schema changes', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'DatasetA' }]));
+
+      const preview = await primitive.previewRemove('DatasetA');
+
+      expect(preview.summary[0]).toContain('Removing dataset: DatasetA');
+      expect(preview.schemaChanges).toHaveLength(1);
+      expect(preview.schemaChanges[0]!.file).toBe('agentcore/agentcore.json');
+      expect((preview.schemaChanges[0]!.after as { datasets: unknown[] }).datasets).toHaveLength(0);
+    });
+
+    it('throws when not found', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject());
+
+      await expect(primitive.previewRemove('Missing')).rejects.toThrow('not found');
+    });
+  });
+
+  describe('getRemovable', () => {
+    it('returns dataset names', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'A' }, { name: 'B' }]));
+
+      const result = await primitive.getRemovable();
+
+      expect(result).toEqual([{ name: 'A' }, { name: 'B' }]);
+    });
+
+    it('returns empty array on error', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('fail'));
+
+      expect(await primitive.getRemovable()).toEqual([]);
+    });
+  });
+
+  describe('getAllNames', () => {
+    it('returns names', async () => {
+      mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'X' }, { name: 'Y' }]));
+
+      const result = await primitive.getAllNames();
+
+      expect(result).toEqual(['X', 'Y']);
+    });
+
+    it('returns empty array on error', async () => {
+      mockReadProjectSpec.mockRejectedValue(new Error('fail'));
+
+      expect(await primitive.getAllNames()).toEqual([]);
+    });
+  });
+});
diff --git a/src/cli/primitives/__tests__/GatewayPrimitive.test.ts b/src/cli/primitives/__tests__/GatewayPrimitive.test.ts
index fb53e095d..0fa1ebac9 100644
--- a/src/cli/primitives/__tests__/GatewayPrimitive.test.ts
+++ b/src/cli/primitives/__tests__/GatewayPrimitive.test.ts
@@ -16,6 +16,7 @@ const defaultProject: AgentCoreProjectSpec = {
   configBundles: [],
   abTests: [],
   httpGateways: [],
+  datasets: [],
 };
 
 const { mockConfigExists, mockReadProjectSpec, mockWriteProjectSpec } = vi.hoisted(() => ({
diff --git a/src/cli/primitives/__tests__/auth-utils.test.ts b/src/cli/primitives/__tests__/auth-utils.test.ts
index 5f0e1a7c9..08b17e0f9 100644
--- a/src/cli/primitives/__tests__/auth-utils.test.ts
+++ b/src/cli/primitives/__tests__/auth-utils.test.ts
@@ -96,6 +96,7 @@ describe('createManagedOAuthCredential', () => {
     configBundles: [],
     abTests: [],
     httpGateways: [],
+    datasets: [],
   };
 
   const jwtConfig: JwtConfigOptions = {
diff --git a/src/cli/primitives/index.ts b/src/cli/primitives/index.ts
index 05d00f869..380db4350 100644
--- a/src/cli/primitives/index.ts
+++ b/src/cli/primitives/index.ts
@@ -1,5 +1,8 @@
 export { ABTestPrimitive } from './ABTestPrimitive';
 export { BasePrimitive } from './BasePrimitive';
+export { DatasetPrimitive } from './DatasetPrimitive';
+export type { AddDatasetOptions } from '../commands/add/types';
+export type { RemovableDataset } from './DatasetPrimitive';
 export { MemoryPrimitive } from './MemoryPrimitive';
 export { CredentialPrimitive } from './CredentialPrimitive';
 export { AgentPrimitive } from './AgentPrimitive';
@@ -13,6 +16,7 @@ export {
   ALL_PRIMITIVES,
   agentPrimitive,
   memoryPrimitive,
+  datasetPrimitive,
   credentialPrimitive,
   evaluatorPrimitive,
   onlineEvalConfigPrimitive,
diff --git a/src/cli/primitives/registry.ts b/src/cli/primitives/registry.ts
index 754b4e182..bf053196f 100644
--- a/src/cli/primitives/registry.ts
+++ b/src/cli/primitives/registry.ts
@@ -3,6 +3,7 @@ import { AgentPrimitive } from './AgentPrimitive';
 import type { BasePrimitive } from './BasePrimitive';
 import { ConfigBundlePrimitive } from './ConfigBundlePrimitive';
 import { CredentialPrimitive } from './CredentialPrimitive';
+import { DatasetPrimitive } from './DatasetPrimitive';
 import { EvaluatorPrimitive } from './EvaluatorPrimitive';
 import { GatewayPrimitive } from './GatewayPrimitive';
 import { GatewayTargetPrimitive } from './GatewayTargetPrimitive';
@@ -18,6 +19,7 @@ import type { RemovableResource } from './types';
  */
 export const agentPrimitive = new AgentPrimitive();
 export const memoryPrimitive = new MemoryPrimitive();
+export const datasetPrimitive = new DatasetPrimitive();
 export const credentialPrimitive = new CredentialPrimitive();
 export const evaluatorPrimitive = new EvaluatorPrimitive();
 export const onlineEvalConfigPrimitive = new OnlineEvalConfigPrimitive();
@@ -35,6 +37,7 @@ export const runtimeEndpointPrimitive = new RuntimeEndpointPrimitive();
 export const ALL_PRIMITIVES: BasePrimitive<unknown, RemovableResource>[] = [
   agentPrimitive,
   memoryPrimitive,
+  datasetPrimitive,
   credentialPrimitive,
   evaluatorPrimitive,
   onlineEvalConfigPrimitive,
diff --git a/src/cli/project.ts b/src/cli/project.ts
index 14ea7be3c..b9f608bd8 100644
--- a/src/cli/project.ts
+++ b/src/cli/project.ts
@@ -21,6 +21,7 @@ export function createDefaultProjectSpec(projectName: string): AgentCoreProjectS
     configBundles: [],
     abTests: [],
     httpGateways: [],
+    datasets: [],
     tags: {
       'agentcore:created-by': 'agentcore-cli',
       'agentcore:project-name': projectName,
diff --git a/src/cli/telemetry/schemas/command-run.ts b/src/cli/telemetry/schemas/command-run.ts
index 7d8f48492..ee88934f9 100644
--- a/src/cli/telemetry/schemas/command-run.ts
+++ b/src/cli/telemetry/schemas/command-run.ts
@@ -154,6 +154,7 @@ export const COMMAND_SCHEMAS = {
   create: CreateAttrs,
   'add.agent': AddAgentAttrs,
   'add.memory': AddMemoryAttrs,
+  'add.dataset': NoAttrs,
   'add.credential': AddCredentialAttrs,
   'add.evaluator': AddEvaluatorAttrs,
   'add.online-eval': AddOnlineEvalAttrs,
@@ -190,6 +191,7 @@ export const COMMAND_SCHEMAS = {
   'remove.all': NoAttrs,
   'remove.agent': NoAttrs,
   'remove.memory': NoAttrs,
+  'remove.dataset': NoAttrs,
   'remove.credential': NoAttrs,
   'remove.evaluator': NoAttrs,
   'remove.online-eval': NoAttrs,
@@ -200,6 +202,9 @@ export const COMMAND_SCHEMAS = {
   'remove.runtime-endpoint': NoAttrs,
   'remove.config-bundle': NoAttrs,
   'remove.ab-test': NoAttrs,
+  'dataset.download': NoAttrs,
+  'dataset.publish-version': NoAttrs,
+  'dataset.remove-version': NoAttrs,
   'telemetry.disable': NoAttrs,
   'telemetry.enable': NoAttrs,
   'telemetry.status': NoAttrs,
diff --git a/src/cli/tui/App.tsx b/src/cli/tui/App.tsx
index 322d0b5a8..62fc7db93 100644
--- a/src/cli/tui/App.tsx
+++ b/src/cli/tui/App.tsx
@@ -9,6 +9,7 @@ import { AddFlow } from './screens/add/AddFlow';
 import { CliOnlyScreen } from './screens/cli-only';
 import { ConfigBundleFlow } from './screens/config-bundle-hub';
 import { CreateScreen } from './screens/create';
+import { DatasetFlow } from './screens/dataset-hub';
 import { DeployScreen } from './screens/deploy/DeployScreen';
 import { EvalHubScreen, EvalScreen } from './screens/eval';
 import { FetchAccessScreen } from './screens/fetch-access';
@@ -56,6 +57,7 @@ type Route =
   | { name: 'package' }
   | { name: 'update' }
   | { name: 'config-bundle' }
+  | { name: 'dataset' }
   | { name: 'import' }
   | { name: 'ab-test' }
   | { name: 'cli-only'; commandId: string };
@@ -141,6 +143,8 @@ function AppContent() {
       setRoute({ name: 'update' });
     } else if (id === 'config-bundle') {
       setRoute({ name: 'config-bundle' });
+    } else if (id === 'dataset') {
+      setRoute({ name: 'dataset' });
     } else if (id === 'ab-test') {
       setRoute({ name: 'ab-test' });
     }
@@ -336,6 +340,10 @@ function AppContent() {
     return <ConfigBundleFlow onExit={() => setRoute({ name: 'help' })} />;
   }
 
+  if (route.name === 'dataset') {
+    return <DatasetFlow onExit={() => setRoute({ name: 'help' })} />;
+  }
+
   if (route.name === 'ab-test') {
     return <ABTestPickerScreen onExit={() => setRoute({ name: 'help' })} />;
   }
diff --git a/src/cli/tui/components/ResourceGraph.tsx b/src/cli/tui/components/ResourceGraph.tsx
index 36504cd62..1295624f7 100644
--- a/src/cli/tui/components/ResourceGraph.tsx
+++ b/src/cli/tui/components/ResourceGraph.tsx
@@ -22,6 +22,7 @@ const ICONS = {
   policy: '▢',
   'config-bundle': '⬡',
   'ab-test': '⚗',
+  dataset: '▤',
   'runtime-endpoint': '◉',
 } as const;
 
@@ -132,6 +133,7 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res
   const unassignedTargets = mcp?.unassignedTargets ?? [];
   const policyEngines = project.policyEngines ?? [];
   const configBundles = project.configBundles ?? [];
+  const datasets = project.datasets ?? [];
   const abTests = project.abTests ?? [];
 
   // Build lookup map and collect pending-removal resources in a single pass
@@ -331,6 +333,27 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res
         </Box>
       )}
 
+      {/* Datasets */}
+      {datasets.length > 0 && (
+        <Box flexDirection="column">
+          <SectionHeader>Datasets</SectionHeader>
+          {datasets.map(ds => {
+            const rsEntry = statusMap.get(`dataset:${ds.name}`);
+            return (
+              <ResourceRow
+                key={ds.name}
+                icon={ICONS.dataset}
+                color="cyan"
+                name={ds.name}
+                detail={rsEntry?.detail ?? ds.schemaType}
+                deploymentState={rsEntry?.deploymentState}
+                identifier={rsEntry?.identifier}
+              />
+            );
+          })}
+        </Box>
+      )}
+
       {/* AB Tests */}
       {abTests.length > 0 && (
         <Box flexDirection="column">
diff --git a/src/cli/tui/hooks/useRemove.ts b/src/cli/tui/hooks/useRemove.ts
index 9400ea2ad..f1a5ab65b 100644
--- a/src/cli/tui/hooks/useRemove.ts
+++ b/src/cli/tui/hooks/useRemove.ts
@@ -11,6 +11,7 @@ import {
   agentPrimitive,
   configBundlePrimitive,
   credentialPrimitive,
+  datasetPrimitive,
   evaluatorPrimitive,
   gatewayPrimitive,
   gatewayTargetPrimitive,
@@ -144,6 +145,11 @@ export function useRemovableEvaluators() {
   return { evaluators, ...rest };
 }
 
+export function useRemovableDatasets() {
+  const { items: datasets, ...rest } = useRemovableResources(() => datasetPrimitive.getRemovable());
+  return { datasets, ...rest };
+}
+
 export function useRemovableOnlineEvalConfigs() {
   const { items: onlineEvalConfigs, ...rest } = useRemovableResources(() => onlineEvalConfigPrimitive.getRemovable());
   return { onlineEvalConfigs, ...rest };
@@ -243,6 +249,10 @@ export function useRemovalPreview() {
     (name: string) => loadPreview(n => evaluatorPrimitive.previewRemove(n), name),
     [loadPreview]
   );
+  const loadDatasetPreview = useCallback(
+    (name: string) => loadPreview(n => datasetPrimitive.previewRemove(n), name),
+    [loadPreview]
+  );
   const loadOnlineEvalPreview = useCallback(
     (name: string) => loadPreview(n => onlineEvalConfigPrimitive.previewRemove(n), name),
     [loadPreview]
@@ -282,6 +292,7 @@ export function useRemovalPreview() {
     loadMemoryPreview,
     loadIdentityPreview,
     loadEvaluatorPreview,
+    loadDatasetPreview,
     loadOnlineEvalPreview,
     loadPolicyEnginePreview,
     loadPolicyPreview,
@@ -351,6 +362,14 @@ export function useRemoveEvaluator() {
   );
 }
 
+export function useRemoveDataset() {
+  return useRemoveResource(
+    (name: string) => datasetPrimitive.remove(name),
+    'dataset',
+    name => name
+  );
+}
+
 export function useRemovePolicyEngine() {
   return useRemoveResource(
     (name: string) => policyEnginePrimitive.remove(name),
diff --git a/src/cli/tui/screens/add/AddFlow.tsx b/src/cli/tui/screens/add/AddFlow.tsx
index eef7f4db2..2079ea214 100644
--- a/src/cli/tui/screens/add/AddFlow.tsx
+++ b/src/cli/tui/screens/add/AddFlow.tsx
@@ -9,6 +9,7 @@ import type { AddAgentConfig } from '../agent/types';
 import { FRAMEWORK_OPTIONS } from '../agent/types';
 import { useAddAgent } from '../agent/useAddAgent';
 import { AddConfigBundleFlow } from '../config-bundle';
+import { AddDatasetFlow } from '../dataset';
 import { AddEvaluatorFlow } from '../evaluator';
 import { AddIdentityFlow } from '../identity';
 import { AddGatewayFlow, AddGatewayTargetFlow } from '../mcp';
@@ -33,6 +34,7 @@ type FlowState =
   | { name: 'evaluator-wizard' }
   | { name: 'online-eval-wizard' }
   | { name: 'policy-wizard' }
+  | { name: 'dataset-wizard' }
   | { name: 'config-bundle-wizard' }
   | { name: 'ab-test-wizard' }
   | { name: 'runtime-endpoint-wizard' }
@@ -187,6 +189,8 @@ function getInitialFlowState(resource?: AddResourceType): FlowState {
       return { name: 'policy-wizard' };
     case 'runtime-endpoint':
       return { name: 'runtime-endpoint-wizard' };
+    case 'dataset':
+      return { name: 'dataset-wizard' };
     case 'config-bundle':
       return { name: 'config-bundle-wizard' };
     case 'ab-test':
@@ -238,6 +242,9 @@ export function AddFlow(props: AddFlowProps) {
       case 'policy':
         setFlow({ name: 'policy-wizard' });
         break;
+      case 'dataset':
+        setFlow({ name: 'dataset-wizard' });
+        break;
       case 'config-bundle':
         setFlow({ name: 'config-bundle-wizard' });
         break;
@@ -480,6 +487,19 @@ export function AddFlow(props: AddFlowProps) {
     );
   }
 
+  // Dataset wizard
+  if (flow.name === 'dataset-wizard') {
+    return (
+      <AddDatasetFlow
+        isInteractive={props.isInteractive}
+        onExit={props.onExit}
+        onBack={() => setFlow({ name: 'select' })}
+        onDev={props.onDev}
+        onDeploy={props.onDeploy}
+      />
+    );
+  }
+
   // Configuration bundle wizard
   if (flow.name === 'config-bundle-wizard') {
     return (
diff --git a/src/cli/tui/screens/add/AddScreen.tsx b/src/cli/tui/screens/add/AddScreen.tsx
index 04dceac97..d90779b71 100644
--- a/src/cli/tui/screens/add/AddScreen.tsx
+++ b/src/cli/tui/screens/add/AddScreen.tsx
@@ -11,6 +11,7 @@ const ADD_RESOURCES = [
   { id: 'gateway-target', title: 'Gateway Target', description: 'Extend agent capabilities' },
   { id: 'runtime-endpoint', title: 'Runtime Endpoint', description: 'Named endpoint for a runtime' },
   { id: 'policy', title: 'Policy', description: 'Cedar policies for gateway tools' },
+  { id: 'dataset', title: 'Dataset', description: 'Evaluation dataset for testing agents' },
   { id: 'config-bundle', title: 'Configuration Bundle [preview]', description: 'Versioned component configurations' },
   { id: 'ab-test', title: 'AB Test [preview]', description: 'Compare agent configurations with traffic splitting' },
 ] as const;
diff --git a/src/cli/tui/screens/dataset-hub/DatasetFlow.tsx b/src/cli/tui/screens/dataset-hub/DatasetFlow.tsx
new file mode 100644
index 000000000..9ae3584fd
--- /dev/null
+++ b/src/cli/tui/screens/dataset-hub/DatasetFlow.tsx
@@ -0,0 +1,475 @@
+/**
+ * Dataset Flow — manages navigation between hub, download, publish-version, and remove-version screens.
+ */
+import { ConfigIO } from '../../../../lib';
+import type { Dataset } from '../../../../schema';
+import { listDatasetVersions } from '../../../aws/agentcore-datasets';
+import type { DatasetVersionSummary } from '../../../aws/agentcore-datasets';
+import { deleteDatasetVersion, publishDataset, pullDataset } from '../../../operations/dataset';
+import type { PullResult } from '../../../operations/dataset';
+import { ErrorPrompt, Screen, WizardSelect } from '../../components';
+import type { SelectableItem } from '../../components';
+import { useListNavigation } from '../../hooks';
+import { Box, Text } from 'ink';
+import React, { useCallback, useEffect, useState } from 'react';
+
+// ============================================================================
+// Types
+// ============================================================================
+
+interface ResolvedDatasetInfo {
+  name: string;
+  datasetId: string;
+  region: string;
+  location: string;
+}
+
+type FlowState =
+  | { name: 'loading' }
+  | { name: 'hub'; datasets: ResolvedDatasetInfo[] }
+  | { name: 'pick-dataset'; action: 'download' | 'publish-version' | 'remove-version'; datasets: ResolvedDatasetInfo[] }
+  | { name: 'pick-version'; dataset: ResolvedDatasetInfo; versions: DatasetVersionSummary[] }
+  | { name: 'pick-delete-version'; dataset: ResolvedDatasetInfo; versions: DatasetVersionSummary[] }
+  | { name: 'confirm-pull'; dataset: ResolvedDatasetInfo; version: string }
+  | { name: 'confirm-delete'; dataset: ResolvedDatasetInfo; version: string }
+  | { name: 'running'; message: string }
+  | { name: 'pull-result'; dataset: ResolvedDatasetInfo; result: PullResult }
+  | { name: 'publish-result'; dataset: ResolvedDatasetInfo; version: string; exampleCount: number }
+  | { name: 'delete-result'; dataset: ResolvedDatasetInfo; version: string }
+  | { name: 'error'; message: string };
+
+const HUB_ACTIONS: SelectableItem[] = [
+  { id: 'download', title: 'Download', description: 'Download service DRAFT/version → local file' },
+  { id: 'publish-version', title: 'Publish Version', description: 'Snapshot DRAFT → immutable version' },
+  { id: 'remove-version', title: 'Remove Version', description: 'Delete a specific published version' },
+];
+
+// ============================================================================
+// Component
+// ============================================================================
+
+interface DatasetFlowProps {
+  onExit: () => void;
+}
+
+export function DatasetFlow({ onExit }: DatasetFlowProps) {
+  const [flow, setFlow] = useState<FlowState>({ name: 'loading' });
+
+  // Load datasets on mount
+  useEffect(() => {
+    void (async () => {
+      try {
+        const configIO = new ConfigIO();
+        const projectSpec = await configIO.readProjectSpec();
+        const datasets: Dataset[] = projectSpec.datasets ?? [];
+
+        if (datasets.length === 0) {
+          setFlow({ name: 'error', message: 'No datasets found. Run `agentcore add dataset` first.' });
+          return;
+        }
+
+        const targets = await configIO.resolveAWSDeploymentTargets();
+        if (targets.length === 0) {
+          setFlow({
+            name: 'error',
+            message: 'No AWS deployment targets configured. Run `agentcore deploy` first to create one.',
+          });
+          return;
+        }
+        const region = targets[0]!.region;
+        const targetName = targets[0]!.name;
+
+        const deployedState = await configIO.readDeployedState().catch(() => undefined);
+        const deployedDatasets = deployedState?.targets?.[targetName]?.resources?.datasets ?? {};
+
+        const resolved: ResolvedDatasetInfo[] = [];
+        for (const ds of datasets) {
+          const state = deployedDatasets[ds.name];
+          if (state) {
+            resolved.push({
+              name: ds.name,
+              datasetId: state.datasetId,
+              region,
+              location: ds.config.managed.location,
+            });
+          }
+        }
+
+        if (resolved.length === 0) {
+          setFlow({ name: 'error', message: 'No deployed datasets found. Run `agentcore deploy` first.' });
+          return;
+        }
+
+        setFlow({ name: 'hub', datasets: resolved });
+      } catch (err) {
+        setFlow({ name: 'error', message: err instanceof Error ? err.message : String(err) });
+      }
+    })();
+  }, []);
+
+  const executeAction = async (action: string, dataset: ResolvedDatasetInfo, version?: string) => {
+    const configIO = new ConfigIO();
+    const configBaseDir = configIO.getConfigRoot();
+
+    setFlow({ name: 'running', message: `Running ${action}...` });
+
+    try {
+      if (action === 'download') {
+        if (!version) {
+          const versions = await listDatasetVersions({ region: dataset.region, datasetId: dataset.datasetId });
+          setFlow({ name: 'pick-version', dataset, versions: versions.versions });
+          return;
+        }
+        const result = await pullDataset({
+          region: dataset.region,
+          datasetId: dataset.datasetId,
+          localFilePath: dataset.location,
+          configBaseDir,
+          version: version === 'DRAFT' ? undefined : version,
+        });
+        setFlow({ name: 'pull-result', dataset, result });
+      } else if (action === 'publish-version') {
+        const result = await publishDataset({
+          region: dataset.region,
+          datasetId: dataset.datasetId,
+        });
+        setFlow({ name: 'publish-result', dataset, version: result.version, exampleCount: result.exampleCount });
+      } else if (action === 'remove-version') {
+        if (!version) {
+          const versions = await listDatasetVersions({ region: dataset.region, datasetId: dataset.datasetId });
+          setFlow({ name: 'pick-delete-version', dataset, versions: versions.versions });
+          return;
+        }
+        setFlow({ name: 'confirm-delete', dataset, version });
+      } else if (action === 'confirm-delete') {
+        await deleteDatasetVersion({
+          region: dataset.region,
+          datasetId: dataset.datasetId,
+          version: version!,
+        });
+        setFlow({ name: 'delete-result', dataset, version: version! });
+      }
+    } catch (err) {
+      setFlow({ name: 'error', message: err instanceof Error ? err.message : String(err) });
+    }
+  };
+
+  const handleAction = useCallback((actionId: string, datasets: ResolvedDatasetInfo[]) => {
+    const action = actionId as 'download' | 'publish-version' | 'remove-version';
+    if (datasets.length === 1) {
+      void executeAction(action, datasets[0]!);
+    } else {
+      setFlow({ name: 'pick-dataset', action, datasets });
+    }
+  }, []);
+
+  // ══════════════════════════════════════════════════════════════════════════
+  // Render states
+  // ══════════════════════════════════════════════════════════════════════════
+
+  if (flow.name === 'loading') {
+    return (
+      <Screen title="Dataset Management" onExit={onExit}>
+        <Text dimColor>Loading datasets...</Text>
+      </Screen>
+    );
+  }
+
+  if (flow.name === 'hub') {
+    return <HubScreen datasets={flow.datasets} onSelect={handleAction} onExit={onExit} />;
+  }
+
+  if (flow.name === 'pick-dataset') {
+    return (
+      <DatasetPickerScreen
+        datasets={flow.datasets}
+        onSelect={dataset => void executeAction(flow.action, dataset)}
+        onExit={() => setFlow({ name: 'hub', datasets: flow.datasets })}
+      />
+    );
+  }
+
+  if (flow.name === 'pick-version') {
+    return (
+      <VersionPickerScreen
+        versions={flow.versions}
+        onSelect={version => setFlow({ name: 'confirm-pull', dataset: flow.dataset, version })}
+        onExit={() => setFlow({ name: 'hub', datasets: [] })}
+      />
+    );
+  }
+
+  if (flow.name === 'confirm-pull') {
+    const versionLabel = flow.version === 'DRAFT' ? 'DRAFT' : `version ${flow.version}`;
+    return (
+      <ConfirmPullScreen
+        location={flow.dataset.location}
+        versionLabel={versionLabel}
+        onConfirm={() => void executeAction('download', flow.dataset, flow.version)}
+        onCancel={() => setFlow({ name: 'hub', datasets: [] })}
+      />
+    );
+  }
+
+  if (flow.name === 'running') {
+    return (
+      <Screen title="Dataset Management" onExit={onExit}>
+        <Text dimColor>{flow.message}</Text>
+      </Screen>
+    );
+  }
+
+  if (flow.name === 'pick-delete-version') {
+    return (
+      <DeleteVersionPickerScreen
+        versions={flow.versions}
+        onSelect={version => setFlow({ name: 'confirm-delete', dataset: flow.dataset, version })}
+        onExit={() => setFlow({ name: 'hub', datasets: [] })}
+      />
+    );
+  }
+
+  if (flow.name === 'confirm-delete') {
+    return (
+      <ConfirmDeleteScreen
+        datasetName={flow.dataset.name}
+        version={flow.version}
+        onConfirm={() => void executeAction('confirm-delete', flow.dataset, flow.version)}
+        onCancel={() => setFlow({ name: 'hub', datasets: [] })}
+      />
+    );
+  }
+
+  if (flow.name === 'delete-result') {
+    return (
+      <Screen title="Dataset Management" onExit={onExit}>
+        <Box flexDirection="column">
+          <Text color="green">
+            ✓ Deleted version {flow.version} of dataset &quot;{flow.dataset.name}&quot;
+          </Text>
+        </Box>
+      </Screen>
+    );
+  }
+
+  if (flow.name === 'pull-result') {
+    return (
+      <Screen title="Dataset Management" onExit={onExit}>
+        <Box flexDirection="column">
+          <Text color="green">
+            ✓ {flow.result.exampleCount} examples written to {flow.dataset.location}
+          </Text>
+          <Text dimColor>
+            {' '}
+            Pulled from: {flow.result.version === 'DRAFT' ? 'DRAFT' : `version ${flow.result.version}`}
+          </Text>
+        </Box>
+      </Screen>
+    );
+  }
+
+  if (flow.name === 'publish-result') {
+    return (
+      <Screen title="Dataset Management" onExit={onExit}>
+        <Box flexDirection="column">
+          <Text color="green">
+            ✓ Published version {flow.version} ({flow.exampleCount} examples)
+          </Text>
+          <Text dimColor> draftStatus: UNMODIFIED</Text>
+        </Box>
+      </Screen>
+    );
+  }
+
+  return <ErrorPrompt message="Dataset error" detail={flow.message} onBack={onExit} onExit={onExit} />;
+}
+
+// ============================================================================
+// Sub-screens
+// ============================================================================
+
+function HubScreen({
+  datasets,
+  onSelect,
+  onExit,
+}: {
+  datasets: ResolvedDatasetInfo[];
+  onSelect: (actionId: string, datasets: ResolvedDatasetInfo[]) => void;
+  onExit: () => void;
+}) {
+  const nav = useListNavigation({
+    items: HUB_ACTIONS,
+    onSelect: (item: SelectableItem) => onSelect(item.id, datasets),
+  });
+
+  return (
+    <Screen title="Dataset Management" onExit={onExit}>
+      <WizardSelect
+        title="What would you like to do?"
+        description={`${datasets.length} dataset(s) deployed`}
+        items={HUB_ACTIONS}
+        selectedIndex={nav.selectedIndex}
+      />
+    </Screen>
+  );
+}
+
+function DatasetPickerScreen({
+  datasets,
+  onSelect,
+  onExit,
+}: {
+  datasets: ResolvedDatasetInfo[];
+  onSelect: (dataset: ResolvedDatasetInfo) => void;
+  onExit: () => void;
+}) {
+  const items: SelectableItem[] = datasets.map(d => ({
+    id: d.name,
+    title: d.name,
+    description: d.datasetId,
+  }));
+
+  const nav = useListNavigation({
+    items,
+    onSelect: (item: SelectableItem) => {
+      const dataset = datasets.find(d => d.name === item.id)!;
+      onSelect(dataset);
+    },
+  });
+
+  return (
+    <Screen title="Select Dataset" onExit={onExit}>
+      <WizardSelect title="Which dataset?" items={items} selectedIndex={nav.selectedIndex} />
+    </Screen>
+  );
+}
+
+function VersionPickerScreen({
+  versions,
+  onSelect,
+  onExit,
+}: {
+  versions: DatasetVersionSummary[];
+  onSelect: (version: string) => void;
+  onExit: () => void;
+}) {
+  const items: SelectableItem[] = [
+    { id: 'DRAFT', title: 'DRAFT', description: 'Current working copy' },
+    ...versions.map((v, i) => ({
+      id: v.datasetVersion,
+      title: `Version ${v.datasetVersion}${i === 0 ? ' (latest)' : ''}`,
+      description: `${v.exampleCount} examples`,
+    })),
+  ];
+
+  const nav = useListNavigation({
+    items,
+    onSelect: (item: SelectableItem) => onSelect(item.id),
+  });
+
+  return (
+    <Screen title="Pull From" onExit={onExit}>
+      <WizardSelect title="Which version to pull?" items={items} selectedIndex={nav.selectedIndex} />
+    </Screen>
+  );
+}
+
+function ConfirmPullScreen({
+  location,
+  versionLabel,
+  onConfirm,
+  onCancel,
+}: {
+  location: string;
+  versionLabel: string;
+  onConfirm: () => void;
+  onCancel: () => void;
+}) {
+  const items: SelectableItem[] = [
+    { id: 'yes', title: 'Yes, overwrite', description: '' },
+    { id: 'no', title: 'Cancel', description: '' },
+  ];
+
+  const nav = useListNavigation({
+    items,
+    onSelect: (item: SelectableItem) => {
+      if (item.id === 'yes') onConfirm();
+      else onCancel();
+    },
+  });
+
+  return (
+    <Screen title="Confirm Pull" onExit={onCancel}>
+      <Box flexDirection="column">
+        <Text color="yellow">⚠ This will overwrite: {location}</Text>
+        <Text dimColor> (pulling {versionLabel})</Text>
+        <Text>{''}</Text>
+        <WizardSelect title="Continue?" items={items} selectedIndex={nav.selectedIndex} />
+      </Box>
+    </Screen>
+  );
+}
+
+function DeleteVersionPickerScreen({
+  versions,
+  onSelect,
+  onExit,
+}: {
+  versions: DatasetVersionSummary[];
+  onSelect: (version: string) => void;
+  onExit: () => void;
+}) {
+  const items: SelectableItem[] = versions.map((v, i) => ({
+    id: v.datasetVersion,
+    title: `Version ${v.datasetVersion}${i === 0 ? ' (latest)' : ''}`,
+    description: `${v.exampleCount} examples`,
+  }));
+
+  const nav = useListNavigation({
+    items,
+    onSelect: (item: SelectableItem) => onSelect(item.id),
+  });
+
+  return (
+    <Screen title="Delete Version" onExit={onExit}>
+      <WizardSelect title="Which version to delete?" items={items} selectedIndex={nav.selectedIndex} />
+    </Screen>
+  );
+}
+
+function ConfirmDeleteScreen({
+  datasetName,
+  version,
+  onConfirm,
+  onCancel,
+}: {
+  datasetName: string;
+  version: string;
+  onConfirm: () => void;
+  onCancel: () => void;
+}) {
+  const items: SelectableItem[] = [
+    { id: 'yes', title: 'Yes, delete', description: '' },
+    { id: 'no', title: 'Cancel', description: '' },
+  ];
+
+  const nav = useListNavigation({
+    items,
+    onSelect: (item: SelectableItem) => {
+      if (item.id === 'yes') onConfirm();
+      else onCancel();
+    },
+  });
+
+  return (
+    <Screen title="Confirm Delete" onExit={onCancel}>
+      <Box flexDirection="column">
+        <Text color="yellow">
+          ⚠ This will permanently delete version {version} of dataset &quot;{datasetName}&quot;
+        </Text>
+        <Text>{''}</Text>
+        <WizardSelect title="Continue?" items={items} selectedIndex={nav.selectedIndex} />
+      </Box>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/dataset-hub/index.ts b/src/cli/tui/screens/dataset-hub/index.ts
new file mode 100644
index 000000000..7c69f8074
--- /dev/null
+++ b/src/cli/tui/screens/dataset-hub/index.ts
@@ -0,0 +1 @@
+export { DatasetFlow } from './DatasetFlow';
diff --git a/src/cli/tui/screens/dataset/AddDatasetFlow.tsx b/src/cli/tui/screens/dataset/AddDatasetFlow.tsx
new file mode 100644
index 000000000..31b5cde6e
--- /dev/null
+++ b/src/cli/tui/screens/dataset/AddDatasetFlow.tsx
@@ -0,0 +1,105 @@
+import { datasetPrimitive } from '../../../primitives/registry';
+import { ErrorPrompt } from '../../components';
+import { AddSuccessScreen } from '../add/AddSuccessScreen';
+import type { AddDatasetConfig } from './AddDatasetScreen';
+import { AddDatasetScreen } from './AddDatasetScreen';
+import { Box, Text } from 'ink';
+import React, { useCallback, useEffect, useState } from 'react';
+
+type FlowState =
+  | { name: 'create-wizard' }
+  | { name: 'create-success'; datasetName: string; schemaType: string; location: string; description?: string }
+  | { name: 'error'; message: string };
+
+interface AddDatasetFlowProps {
+  isInteractive?: boolean;
+  onExit: () => void;
+  onBack: () => void;
+  onDev?: () => void;
+  onDeploy?: () => void;
+}
+
+export function AddDatasetFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddDatasetFlowProps) {
+  const [flow, setFlow] = useState<FlowState>({ name: 'create-wizard' });
+  const [existingNames, setExistingNames] = useState<string[]>([]);
+
+  useEffect(() => {
+    void datasetPrimitive.getAllNames().then(setExistingNames);
+  }, []);
+
+  // In non-interactive mode, exit after success
+  useEffect(() => {
+    if (!isInteractive && flow.name === 'create-success') {
+      onExit();
+    }
+  }, [isInteractive, flow.name, onExit]);
+
+  const handleCreateComplete = useCallback((config: AddDatasetConfig) => {
+    void datasetPrimitive
+      .add({ name: config.name, schemaType: config.schemaType, description: config.description })
+      .then(result => {
+        if (result.success) {
+          setFlow({
+            name: 'create-success',
+            datasetName: result.datasetName,
+            schemaType: config.schemaType,
+            location: result.location,
+            description: config.description,
+          });
+          return;
+        }
+        setFlow({ name: 'error', message: result.error.message });
+      });
+  }, []);
+
+  // Create wizard
+  if (flow.name === 'create-wizard') {
+    return <AddDatasetScreen existingDatasetNames={existingNames} onComplete={handleCreateComplete} onExit={onBack} />;
+  }
+
+  // Create success
+  if (flow.name === 'create-success') {
+    return (
+      <AddSuccessScreen
+        isInteractive={isInteractive}
+        message={`Added dataset: ${flow.datasetName}`}
+        detail=""
+        summary={
+          <Box flexDirection="column" marginTop={1}>
+            <Text dimColor> Schema: {flow.schemaType}</Text>
+            <Text dimColor> File: {flow.location}</Text>
+            {flow.description && <Text dimColor> Desc: {flow.description}</Text>}
+            <Box marginTop={1} flexDirection="column">
+              <Text color="yellow">Next steps:</Text>
+              <Text>
+                {' '}
+                1. Please replace sample examples in <Text color="cyan">{flow.location}</Text> with your own dataset
+                examples
+              </Text>
+              <Text>
+                {' '}
+                2. Run <Text color="cyan">agentcore deploy</Text> to create the dataset and sync examples
+              </Text>
+            </Box>
+          </Box>
+        }
+        onAddAnother={onBack}
+        onDev={onDev}
+        onDeploy={onDeploy}
+        onExit={onExit}
+      />
+    );
+  }
+
+  // Error
+  return (
+    <ErrorPrompt
+      message="Failed to add dataset"
+      detail={flow.message}
+      onBack={() => {
+        setFlow({ name: 'create-wizard' });
+      }}
+      onExit={onExit}
+    />
+  );
+}
diff --git a/src/cli/tui/screens/dataset/AddDatasetScreen.tsx b/src/cli/tui/screens/dataset/AddDatasetScreen.tsx
new file mode 100644
index 000000000..fb41b8baa
--- /dev/null
+++ b/src/cli/tui/screens/dataset/AddDatasetScreen.tsx
@@ -0,0 +1,141 @@
+import type { DatasetSchemaType } from '../../../../schema';
+import { DatasetNameSchema } from '../../../../schema';
+import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components';
+import type { SelectableItem } from '../../components';
+import { HELP_TEXT } from '../../constants';
+import { useListNavigation } from '../../hooks';
+import { generateUniqueName } from '../../utils';
+import React, { useMemo, useState } from 'react';
+
+const SCHEMA_TYPE_OPTIONS: SelectableItem[] = [
+  {
+    id: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+    title: 'Predefined Turns',
+    description: 'Explicit inputs with expected responses',
+  },
+  {
+    id: 'AGENTCORE_EVALUATION_SIMULATED_V1',
+    title: 'Actor Simulator',
+    description: 'Actor profiles for synthetic conversations',
+  },
+];
+
+export interface AddDatasetConfig {
+  name: string;
+  schemaType: DatasetSchemaType;
+  description?: string;
+}
+
+type Step = 'name' | 'schema-type' | 'description' | 'confirm';
+
+const STEP_LABELS: Record<Step, string> = {
+  name: 'Name',
+  'schema-type': 'Schema Type',
+  description: 'Description',
+  confirm: 'Confirm',
+};
+
+const STEPS: Step[] = ['name', 'schema-type', 'description', 'confirm'];
+
+interface AddDatasetScreenProps {
+  onComplete: (config: AddDatasetConfig) => void;
+  onExit: () => void;
+  existingDatasetNames: string[];
+}
+
+export function AddDatasetScreen({ onComplete, onExit, existingDatasetNames }: AddDatasetScreenProps) {
+  const [step, setStep] = useState<Step>('name');
+  const [name, setName] = useState('');
+  const [schemaType, setSchemaType] = useState<DatasetSchemaType>('AGENTCORE_EVALUATION_PREDEFINED_V1');
+  const [description, setDescription] = useState('');
+
+  const isNameStep = step === 'name';
+  const isSchemaTypeStep = step === 'schema-type';
+  const isDescriptionStep = step === 'description';
+  const isConfirmStep = step === 'confirm';
+
+  const schemaTypeNav = useListNavigation({
+    items: SCHEMA_TYPE_OPTIONS,
+    isActive: isSchemaTypeStep,
+    onSelect: (item: SelectableItem) => {
+      setSchemaType(item.id as DatasetSchemaType);
+      setStep('description');
+    },
+    onExit: () => setStep('name'),
+  });
+
+  useListNavigation({
+    items: [{ id: 'confirm', title: 'Confirm' }],
+    onSelect: () => onComplete({ name, schemaType, description: description || undefined }),
+    onExit: () => setStep('description'),
+    isActive: isConfirmStep,
+  });
+
+  const helpText = isSchemaTypeStep
+    ? HELP_TEXT.NAVIGATE_SELECT
+    : isConfirmStep
+      ? HELP_TEXT.CONFIRM_CANCEL
+      : HELP_TEXT.TEXT_INPUT;
+
+  const headerContent = <StepIndicator steps={STEPS} currentStep={step} labels={STEP_LABELS} />;
+
+  const confirmFields = useMemo(
+    () => [
+      { label: 'Name', value: name },
+      { label: 'Schema Type', value: schemaType },
+      ...(description ? [{ label: 'Description', value: description }] : []),
+    ],
+    [name, schemaType, description]
+  );
+
+  return (
+    <Screen
+      title="Add Dataset"
+      onExit={onExit}
+      helpText={helpText}
+      headerContent={headerContent}
+      exitEnabled={isNameStep}
+    >
+      <Panel>
+        {isNameStep && (
+          <TextInput
+            key="name"
+            prompt="Dataset name"
+            initialValue={generateUniqueName('MyDataset', existingDatasetNames)}
+            onSubmit={(value: string) => {
+              setName(value);
+              setStep('schema-type');
+            }}
+            onCancel={onExit}
+            schema={DatasetNameSchema}
+            customValidation={value => !existingDatasetNames.includes(value) || 'Dataset name already exists'}
+          />
+        )}
+
+        {isSchemaTypeStep && (
+          <WizardSelect
+            title="Schema type"
+            description="Choose the structure for your dataset examples"
+            items={SCHEMA_TYPE_OPTIONS}
+            selectedIndex={schemaTypeNav.selectedIndex}
+          />
+        )}
+
+        {isDescriptionStep && (
+          <TextInput
+            key="description"
+            prompt="Description (optional, press Enter to skip)"
+            onSubmit={(value: string) => {
+              setDescription(value);
+              setStep('confirm');
+            }}
+            onCancel={() => setStep('schema-type')}
+            allowEmpty
+          />
+        )}
+
+        {isConfirmStep && <ConfirmReview fields={confirmFields} />}
+      </Panel>
+    </Screen>
+  );
+}
diff --git a/src/cli/tui/screens/dataset/index.ts b/src/cli/tui/screens/dataset/index.ts
new file mode 100644
index 000000000..1795e49f1
--- /dev/null
+++ b/src/cli/tui/screens/dataset/index.ts
@@ -0,0 +1,3 @@
+export { AddDatasetFlow } from './AddDatasetFlow';
+export { AddDatasetScreen } from './AddDatasetScreen';
+export type { AddDatasetConfig } from './AddDatasetScreen';
diff --git a/src/cli/tui/screens/deploy/DeployScreen.tsx b/src/cli/tui/screens/deploy/DeployScreen.tsx
index 319f970ec..828aec38b 100644
--- a/src/cli/tui/screens/deploy/DeployScreen.tsx
+++ b/src/cli/tui/screens/deploy/DeployScreen.tsx
@@ -383,20 +383,6 @@ export function DeployScreen({
             </Box>
           )}
 
-          {allSuccess && postDeployWarnings.length > 0 && (
-            <Box flexDirection="column" marginTop={1}>
-              <Text color="yellow" bold>
-                Post-deploy warnings:
-              </Text>
-              {postDeployWarnings.map((w, i) => (
-                <Text key={i} color="yellow">
-                  {'  '}
-                  {w}
-                </Text>
-              ))}
-            </Box>
-          )}
-
           {allSuccess && deployNotes.length > 0 && (
             <Box flexDirection="column" marginTop={1}>
               {deployNotes.map((note, i) => (
diff --git a/src/cli/tui/screens/deploy/useDeployFlow.ts b/src/cli/tui/screens/deploy/useDeployFlow.ts
index cdeff0915..8b0295744 100644
--- a/src/cli/tui/screens/deploy/useDeployFlow.ts
+++ b/src/cli/tui/screens/deploy/useDeployFlow.ts
@@ -4,6 +4,7 @@ import {
   buildDeployedState,
   getStackOutputs,
   parseAgentOutputs,
+  parseDatasetOutputs,
   parseEvaluatorOutputs,
   parseGatewayOutputs,
   parseMemoryOutputs,
@@ -21,6 +22,7 @@ import {
   resolveConfigBundleComponentKeys,
   setupConfigBundles,
 } from '../../../operations/deploy/post-deploy-config-bundles';
+import { syncDatasets } from '../../../operations/deploy/post-deploy-datasets';
 import { setupHttpGateways } from '../../../operations/deploy/post-deploy-http-gateways';
 import { enableOnlineEvalConfigs } from '../../../operations/deploy/post-deploy-online-evals';
 import { withCommandRunTelemetry } from '../../../telemetry/cli-command-run.js';
@@ -309,6 +311,10 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
     );
     const policies = parsePolicyOutputs(outputs, policySpecs);
 
+    // Parse dataset outputs
+    const datasetNames = (ctx.projectSpec.datasets ?? []).map((d: { name: string }) => d.name);
+    const datasets = parseDatasetOutputs(outputs, datasetNames);
+
     // Expose outputs to UI
     setStackOutputs(outputs);
 
@@ -326,9 +332,54 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
       credentials: Object.keys(allCredentials).length > 0 ? allCredentials : undefined,
       policyEngines,
       policies,
+      datasets,
     });
     await configIO.writeDeployedState(deployedState);
 
+    // Post-deploy: Sync dataset examples from local JSONL to service DRAFT.
+    const datasetSpecs = ctx.projectSpec.datasets ?? [];
+    const deployedDatasetsRecord = deployedState.targets?.[target.name]?.resources?.datasets ?? {};
+    if (datasetSpecs.length > 0 && Object.keys(deployedDatasetsRecord).length > 0) {
+      try {
+        const datasetSyncResult = await syncDatasets({
+          region: target.region,
+          datasets: datasetSpecs,
+          deployedDatasets: deployedDatasetsRecord,
+          configBaseDir: configIO.getConfigRoot(),
+        });
+
+        if (datasetSyncResult.results.some(r => r.status === 'synced')) {
+          const updatedState = await configIO.readDeployedState().catch(() => deployedState);
+          const targetResources = updatedState.targets[target.name]?.resources;
+          if (targetResources) {
+            targetResources.datasets = datasetSyncResult.updatedDatasets;
+            await configIO.writeDeployedState(updatedState);
+            deployedState = updatedState;
+          }
+        }
+
+        if (datasetSyncResult.hasErrors) {
+          const errors = datasetSyncResult.results.filter(r => r.status === 'error');
+          for (const err of errors) {
+            logger.log(`Dataset "${err.datasetName}" sync error: ${err.error}`, 'warn');
+          }
+          setPostDeployHasError(true);
+          setPostDeployWarnings(prev => [...prev, ...errors.map(err => `Dataset "${err.datasetName}": ${err.error}`)]);
+        }
+
+        for (const r of datasetSyncResult.results) {
+          if (r.status === 'synced') {
+            logger.log(`Dataset "${r.datasetName}": +${r.added} added, ~${r.updated} updated, -${r.deleted} deleted`);
+          }
+        }
+      } catch (err: unknown) {
+        const message = err instanceof Error ? err.message : String(err);
+        logger.log(`Dataset sync failed: ${message}`, 'warn');
+        setPostDeployHasError(true);
+        setPostDeployWarnings(prev => [...prev, `Dataset sync failed: ${message}`]);
+      }
+    }
+
     // Post-deploy: Enable online eval configs that have enableOnCreate (CFN deploys them as DISABLED).
     // Only enable configs that are newly deployed — skip configs that already existed before this
     // deploy run, so we don't re-enable configs a customer intentionally disabled.
diff --git a/src/cli/tui/screens/eval/EvalScreen.tsx b/src/cli/tui/screens/eval/EvalScreen.tsx
index eb33557fc..bed1a93de 100644
--- a/src/cli/tui/screens/eval/EvalScreen.tsx
+++ b/src/cli/tui/screens/eval/EvalScreen.tsx
@@ -280,14 +280,23 @@ function RunDetailView({ run, onBack, maxHeight }: { run: EvalRunResult; onBack:
           <Text bold>Agent:</Text> {run.agent}
           {'  '}
           <Text bold>Date:</Text> {formatFullDate(run.timestamp)}
-          {'  '}
-          <Text bold>Lookback:</Text> {run.lookbackDays}d
+          {run.source !== 'dataset' && (
+            <>
+              {'  '}
+              <Text bold>Lookback:</Text> {run.lookbackDays}d
+            </>
+          )}
         </Text>
         <Text>
           <Text bold>Sessions:</Text> {run.sessionCount}
           {'  '}
           <Text bold>Evaluators:</Text> {run.evaluators.map(shortEvalName).join(', ')}
         </Text>
+        {run.source === 'dataset' && run.dataset && (
+          <Text>
+            <Text bold>Dataset:</Text> {run.dataset.id} (version: {run.dataset.version})
+          </Text>
+        )}
       </Box>
       <Text color="gray">{'─'.repeat(60)}</Text>
       {run.results.map((result, i) => (
diff --git a/src/cli/tui/screens/remove/RemoveDatasetScreen.tsx b/src/cli/tui/screens/remove/RemoveDatasetScreen.tsx
new file mode 100644
index 000000000..c4872c14a
--- /dev/null
+++ b/src/cli/tui/screens/remove/RemoveDatasetScreen.tsx
@@ -0,0 +1,21 @@
+import type { RemovableDataset } from '../../../primitives/DatasetPrimitive';
+import { SelectScreen } from '../../components';
+import React from 'react';
+
+interface RemoveDatasetScreenProps {
+  datasets: RemovableDataset[];
+  onSelect: (datasetName: string) => void;
+  onExit: () => void;
+}
+
+export function RemoveDatasetScreen({ datasets, onSelect, onExit }: RemoveDatasetScreenProps) {
+  const items = datasets.map(dataset => ({
+    id: dataset.name,
+    title: dataset.name,
+    description: 'Dataset',
+  }));
+
+  return (
+    <SelectScreen title="Select Dataset to Remove" items={items} onSelect={item => onSelect(item.id)} onExit={onExit} />
+  );
+}
diff --git a/src/cli/tui/screens/remove/RemoveFlow.tsx b/src/cli/tui/screens/remove/RemoveFlow.tsx
index 696107486..a20a7eeac 100644
--- a/src/cli/tui/screens/remove/RemoveFlow.tsx
+++ b/src/cli/tui/screens/remove/RemoveFlow.tsx
@@ -4,6 +4,7 @@ import {
   useRemovableABTests,
   useRemovableAgents,
   useRemovableConfigBundles,
+  useRemovableDatasets,
   useRemovableEvaluators,
   useRemovableGatewayTargets,
   useRemovableGateways,
@@ -17,6 +18,7 @@ import {
   useRemoveABTest,
   useRemoveAgent,
   useRemoveConfigBundle,
+  useRemoveDataset,
   useRemoveEvaluator,
   useRemoveGateway,
   useRemoveGatewayTarget,
@@ -32,6 +34,7 @@ import { RemoveAgentScreen } from './RemoveAgentScreen';
 import { RemoveAllScreen } from './RemoveAllScreen';
 import { RemoveConfigBundleScreen } from './RemoveConfigBundleScreen';
 import { RemoveConfirmScreen } from './RemoveConfirmScreen';
+import { RemoveDatasetScreen } from './RemoveDatasetScreen';
 import { RemoveEvaluatorScreen } from './RemoveEvaluatorScreen';
 import { RemoveGatewayScreen } from './RemoveGatewayScreen';
 import { RemoveGatewayTargetScreen } from './RemoveGatewayTargetScreen';
@@ -56,6 +59,7 @@ type FlowState =
   | { name: 'select-memory' }
   | { name: 'select-identity' }
   | { name: 'select-evaluator' }
+  | { name: 'select-dataset' }
   | { name: 'select-online-eval' }
   | { name: 'select-policy-engine' }
   | { name: 'select-policy' }
@@ -68,6 +72,7 @@ type FlowState =
   | { name: 'confirm-memory'; memoryName: string; preview: RemovalPreview }
   | { name: 'confirm-identity'; identityName: string; preview: RemovalPreview }
   | { name: 'confirm-evaluator'; evaluatorName: string; preview: RemovalPreview }
+  | { name: 'confirm-dataset'; datasetName: string; preview: RemovalPreview }
   | { name: 'confirm-online-eval'; configName: string; preview: RemovalPreview }
   | { name: 'confirm-policy-engine'; engineName: string; preview: RemovalPreview }
   | { name: 'confirm-policy'; compositeKey: string; policyName: string; preview: RemovalPreview }
@@ -81,6 +86,7 @@ type FlowState =
   | { name: 'memory-success'; memoryName: string; logFilePath?: string }
   | { name: 'identity-success'; identityName: string; logFilePath?: string }
   | { name: 'evaluator-success'; evaluatorName: string; logFilePath?: string }
+  | { name: 'dataset-success'; datasetName: string; logFilePath?: string }
   | { name: 'online-eval-success'; configName: string; logFilePath?: string }
   | { name: 'policy-engine-success'; engineName: string; logFilePath?: string }
   | { name: 'policy-success'; policyName: string; logFilePath?: string }
@@ -111,7 +117,8 @@ interface RemoveFlowProps {
     | 'policy-engine'
     | 'policy'
     | 'config-bundle'
-    | 'ab-test';
+    | 'ab-test'
+    | 'dataset';
   /** Initial resource name to auto-select (for CLI --name flag) */
   initialResourceName?: string;
 }
@@ -139,6 +146,8 @@ export function RemoveFlow({
         return { name: 'select-identity' };
       case 'evaluator':
         return { name: 'select-evaluator' };
+      case 'dataset':
+        return { name: 'select-dataset' };
       case 'online-eval':
         return { name: 'select-online-eval' };
       case 'policy-engine':
@@ -164,6 +173,7 @@ export function RemoveFlow({
   const { memories, isLoading: isLoadingMemories, refresh: refreshMemories } = useRemovableMemories();
   const { identities, isLoading: isLoadingIdentities, refresh: refreshIdentities } = useRemovableIdentities();
   const { evaluators, isLoading: isLoadingEvaluators, refresh: refreshEvaluators } = useRemovableEvaluators();
+  const { datasets, isLoading: isLoadingDatasets, refresh: refreshDatasets } = useRemovableDatasets();
   const {
     onlineEvalConfigs,
     isLoading: isLoadingOnlineEvals,
@@ -195,6 +205,7 @@ export function RemoveFlow({
     isLoadingMemories ||
     isLoadingIdentities ||
     isLoadingEvaluators ||
+    isLoadingDatasets ||
     isLoadingOnlineEvals ||
     isLoadingPolicyEngines ||
     isLoadingPolicies ||
@@ -209,6 +220,7 @@ export function RemoveFlow({
     loadMemoryPreview,
     loadIdentityPreview,
     loadEvaluatorPreview,
+    loadDatasetPreview,
     loadOnlineEvalPreview,
     loadPolicyEnginePreview,
     loadPolicyPreview,
@@ -225,6 +237,7 @@ export function RemoveFlow({
   const { remove: removeMemoryOp, reset: resetRemoveMemory } = useRemoveMemory();
   const { remove: removeIdentityOp, reset: resetRemoveIdentity } = useRemoveIdentity();
   const { remove: removeEvaluatorOp, reset: resetRemoveEvaluator } = useRemoveEvaluator();
+  const { remove: removeDatasetOp, reset: resetRemoveDataset } = useRemoveDataset();
   const { remove: removeOnlineEvalOp, reset: resetRemoveOnlineEval } = useRemoveOnlineEvalConfig();
   const { remove: removePolicyEngineOp, reset: resetRemovePolicyEngine } = useRemovePolicyEngine();
   const { remove: removePolicyOp, reset: resetRemovePolicy } = useRemovePolicy();
@@ -258,6 +271,7 @@ export function RemoveFlow({
         'memory-success',
         'identity-success',
         'evaluator-success',
+        'dataset-success',
         'online-eval-success',
         'policy-engine-success',
         'policy-success',
@@ -294,6 +308,9 @@ export function RemoveFlow({
       case 'evaluator':
         setFlow({ name: 'select-evaluator' });
         break;
+      case 'dataset':
+        setFlow({ name: 'select-dataset' });
+        break;
       case 'online-eval':
         setFlow({ name: 'select-online-eval' });
         break;
@@ -453,6 +470,28 @@ export function RemoveFlow({
     [loadEvaluatorPreview, force, removeEvaluatorOp]
   );
 
+  const handleSelectDataset = useCallback(
+    async (datasetName: string) => {
+      const result = await loadDatasetPreview(datasetName);
+      if (result.ok) {
+        if (force) {
+          setFlow({ name: 'loading', message: `Removing dataset ${datasetName}...` });
+          const removeResult = await removeDatasetOp(datasetName, result.preview);
+          if (removeResult.success) {
+            setFlow({ name: 'dataset-success', datasetName });
+          } else {
+            setFlow({ name: 'error', message: removeResult.error.message });
+          }
+        } else {
+          setFlow({ name: 'confirm-dataset', datasetName, preview: result.preview });
+        }
+      } else {
+        setFlow({ name: 'error', message: result.error });
+      }
+    },
+    [loadDatasetPreview, force, removeDatasetOp]
+  );
+
   const handleSelectOnlineEval = useCallback(
     async (configName: string) => {
       const result = await loadOnlineEvalPreview(configName);
@@ -633,6 +672,9 @@ export function RemoveFlow({
         case 'runtime-endpoint':
           void handleSelectRuntimeEndpoint(initialResourceName);
           break;
+        case 'dataset':
+          void handleSelectDataset(initialResourceName);
+          break;
       }
     }, 0);
   }, [
@@ -644,6 +686,7 @@ export function RemoveFlow({
     handleSelectMemory,
     handleSelectIdentity,
     handleSelectEvaluator,
+    handleSelectDataset,
     handleSelectOnlineEval,
     handleSelectPolicyEngine,
     handleSelectPolicy,
@@ -749,6 +792,22 @@ export function RemoveFlow({
     [removeEvaluatorOp]
   );
 
+  const handleConfirmDataset = useCallback(
+    async (datasetName: string, preview: RemovalPreview) => {
+      pendingResultRef.current = null;
+      setResultReady(false);
+      setFlow({ name: 'loading', message: `Removing dataset ${datasetName}...` });
+      const result = await removeDatasetOp(datasetName, preview);
+      if (result.success) {
+        pendingResultRef.current = { name: 'dataset-success', datasetName, logFilePath: result.logFilePath };
+      } else {
+        pendingResultRef.current = { name: 'error', message: result.error.message };
+      }
+      setResultReady(true);
+    },
+    [removeDatasetOp]
+  );
+
   const handleConfirmOnlineEval = useCallback(
     async (configName: string, preview: RemovalPreview) => {
       pendingResultRef.current = null;
@@ -853,6 +912,7 @@ export function RemoveFlow({
     resetRemoveMemory();
     resetRemoveIdentity();
     resetRemoveEvaluator();
+    resetRemoveDataset();
     resetRemoveOnlineEval();
     resetRemovePolicyEngine();
     resetRemovePolicy();
@@ -867,6 +927,7 @@ export function RemoveFlow({
     resetRemoveMemory,
     resetRemoveIdentity,
     resetRemoveEvaluator,
+    resetRemoveDataset,
     resetRemoveOnlineEval,
     resetRemovePolicyEngine,
     resetRemovePolicy,
@@ -883,6 +944,7 @@ export function RemoveFlow({
       refreshMemories(),
       refreshIdentities(),
       refreshEvaluators(),
+      refreshDatasets(),
       refreshOnlineEvals(),
       refreshPolicyEngines(),
       refreshPolicies(),
@@ -896,6 +958,7 @@ export function RemoveFlow({
     refreshMemories,
     refreshIdentities,
     refreshEvaluators,
+    refreshDatasets,
     refreshOnlineEvals,
     refreshPolicyEngines,
     refreshPolicies,
@@ -924,6 +987,7 @@ export function RemoveFlow({
         configBundleCount={configBundles.length}
         abTestCount={abTests.length}
         runtimeEndpointCount={runtimeEndpoints.length}
+        datasetCount={datasets.length}
       />
     );
   }
@@ -1019,6 +1083,19 @@ export function RemoveFlow({
     );
   }
 
+  if (flow.name === 'select-dataset') {
+    if (initialResourceName && isLoading) {
+      return null;
+    }
+    return (
+      <RemoveDatasetScreen
+        datasets={datasets}
+        onSelect={(name: string) => void handleSelectDataset(name)}
+        onExit={() => setFlow({ name: 'select' })}
+      />
+    );
+  }
+
   if (flow.name === 'select-online-eval') {
     if (initialResourceName && isLoading) {
       return null;
@@ -1164,6 +1241,17 @@ export function RemoveFlow({
     );
   }
 
+  if (flow.name === 'confirm-dataset') {
+    return (
+      <RemoveConfirmScreen
+        title={`Remove Dataset: ${flow.datasetName}`}
+        preview={flow.preview}
+        onConfirm={() => void handleConfirmDataset(flow.datasetName, flow.preview)}
+        onCancel={() => setFlow({ name: 'select-dataset' })}
+      />
+    );
+  }
+
   if (flow.name === 'confirm-online-eval') {
     return (
       <RemoveConfirmScreen
@@ -1327,6 +1415,22 @@ export function RemoveFlow({
     );
   }
 
+  if (flow.name === 'dataset-success') {
+    return (
+      <RemoveSuccessScreen
+        isInteractive={isInteractive}
+        message={`Removed dataset: ${flow.datasetName}`}
+        detail="Dataset removed from agentcore.json. Deploy with `agentcore deploy` to apply changes. (Local JSONL is left on disk.)"
+        logFilePath={flow.logFilePath}
+        onRemoveAnother={() => {
+          resetAll();
+          void refreshAll().then(() => setFlow({ name: 'select' }));
+        }}
+        onExit={onExit}
+      />
+    );
+  }
+
   if (flow.name === 'online-eval-success') {
     return (
       <RemoveSuccessScreen
diff --git a/src/cli/tui/screens/remove/RemoveScreen.tsx b/src/cli/tui/screens/remove/RemoveScreen.tsx
index b1178e530..81e0ec9ad 100644
--- a/src/cli/tui/screens/remove/RemoveScreen.tsx
+++ b/src/cli/tui/screens/remove/RemoveScreen.tsx
@@ -15,6 +15,7 @@ const REMOVE_RESOURCES = [
   { id: 'config-bundle', title: 'Configuration Bundle [preview]', description: 'Remove a configuration bundle' },
   { id: 'ab-test', title: 'AB Test [preview]', description: 'Remove an A/B test' },
   { id: 'runtime-endpoint', title: 'Runtime Endpoint', description: 'Remove a runtime endpoint' },
+  { id: 'dataset', title: 'Dataset', description: 'Remove a dataset' },
   { id: 'all', title: 'All', description: 'Reset entire agentcore project' },
 ] as const;
 
@@ -47,6 +48,8 @@ interface RemoveScreenProps {
   abTestCount: number;
   /** Number of runtime endpoints available for removal */
   runtimeEndpointCount: number;
+  /** Number of datasets available for removal */
+  datasetCount: number;
 }
 
 export function RemoveScreen({
@@ -64,6 +67,7 @@ export function RemoveScreen({
   configBundleCount,
   abTestCount,
   runtimeEndpointCount,
+  datasetCount,
 }: RemoveScreenProps) {
   const items: SelectableItem[] = useMemo(() => {
     return REMOVE_RESOURCES.map(r => {
@@ -143,6 +147,12 @@ export function RemoveScreen({
             description = 'No runtime endpoints to remove';
           }
           break;
+        case 'dataset':
+          if (datasetCount === 0) {
+            disabled = true;
+            description = 'No datasets to remove';
+          }
+          break;
         case 'all':
           // 'all' is always available
           break;
@@ -163,6 +173,7 @@ export function RemoveScreen({
     configBundleCount,
     abTestCount,
     runtimeEndpointCount,
+    datasetCount,
   ]);
 
   const isDisabled = (item: SelectableItem) => item.disabled ?? false;
diff --git a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx
index ccc59e9da..a0933bd32 100644
--- a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx
+++ b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx
@@ -24,6 +24,7 @@ describe('RemoveScreen', () => {
         configBundleCount={1}
         abTestCount={0}
         runtimeEndpointCount={1}
+        datasetCount={0}
       />
     );
 
@@ -57,6 +58,7 @@ describe('RemoveScreen', () => {
         configBundleCount={0}
         abTestCount={0}
         runtimeEndpointCount={0}
+        datasetCount={0}
       />
     );
 
@@ -86,6 +88,7 @@ describe('RemoveScreen', () => {
         configBundleCount={0}
         abTestCount={2}
         runtimeEndpointCount={0}
+        datasetCount={0}
       />
     );
 
@@ -113,6 +116,7 @@ describe('RemoveScreen', () => {
         configBundleCount={0}
         abTestCount={0}
         runtimeEndpointCount={0}
+        datasetCount={0}
       />
     );
 
diff --git a/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx b/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx
index a1903f7d0..642759154 100644
--- a/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx
+++ b/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx
@@ -102,6 +102,9 @@ function BatchEvalListView({
                 .join(', ');
             }
 
+            const datasetLabel =
+              rec.source === 'dataset' && rec.dataset ? ` [${rec.dataset.id}@${rec.dataset.version}]` : '';
+
             return (
               <Text key={rec.batchEvaluationId} wrap="truncate-end">
                 <Text color={selected ? 'cyan' : undefined}>{selected ? '>' : ' '} </Text>
@@ -109,6 +112,7 @@ function BatchEvalListView({
                 <Text color={statusColor(rec.status)}>{rec.status.padEnd(12)}</Text>
                 {scoreText && <Text>{scoreText.padEnd(10)}</Text>}
                 <Text dimColor>{rec.name}</Text>
+                {datasetLabel && <Text color="blue">{datasetLabel}</Text>}
               </Text>
             );
           })}
@@ -165,6 +169,11 @@ function BatchEvalDetailView({ record, onBack }: { record: BatchEvalRunRecord; o
         <Text>
           <Text bold>Evaluators:</Text> {record.evaluators.join(', ')}
         </Text>
+        {record.source === 'dataset' && record.dataset && (
+          <Text>
+            <Text bold>Dataset:</Text> {record.dataset.id} (version: {record.dataset.version})
+          </Text>
+        )}
         {record.startedAt && (
           <Text>
             <Text bold>Started:</Text> {new Date(record.startedAt).toLocaleString()}
diff --git a/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx b/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx
index 659d2034e..b46ad4e36 100644
--- a/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx
+++ b/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx
@@ -55,6 +55,8 @@ interface BatchEvalConfig {
   groundTruthFile: string;
   sessionMetadata?: SessionMetadataEntry[];
   name: string;
+  dataset?: string;
+  datasetVersion?: string;
 }
 
 const STEP_LABELS: Record<BatchEvalStep, string> = {
@@ -67,9 +69,19 @@ const STEP_LABELS: Record<BatchEvalStep, string> = {
   confirm: 'Confirm',
 };
 
+type EvalSource = 'dataset' | 'traces';
+
 type FlowState =
   | { name: 'loading' }
-  | { name: 'wizard'; agents: AgentItem[]; evaluators: EvaluatorItem[] }
+  | { name: 'source-picker'; agents: AgentItem[]; evaluators: EvaluatorItem[] }
+  | {
+      name: 'wizard';
+      agents: AgentItem[];
+      evaluators: EvaluatorItem[];
+      source: EvalSource;
+      dataset?: string;
+      datasetVersion?: string;
+    }
   | {
       name: 'running';
       config: BatchEvalConfig;
@@ -176,7 +188,7 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) {
           return;
         }
 
-        setFlow({ name: 'wizard', agents, evaluators });
+        setFlow({ name: 'source-picker', agents, evaluators });
       } catch (err) {
         if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) });
       }
@@ -187,15 +199,30 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) {
     };
   }, [flow.name]);
 
-  const handleWizardComplete = useCallback((config: BatchEvalConfig) => {
-    stoppingRef.current = false;
-    const initialSteps: Step[] = [
-      { label: 'Starting batch evaluation...', status: 'running' },
-      { label: 'Polling for results', status: 'pending' },
-      { label: 'Fetching scores', status: 'pending' },
-    ];
-    setFlow({ name: 'running', config, steps: initialSteps, elapsed: 0 });
-  }, []);
+  const handleWizardComplete = useCallback(
+    (config: BatchEvalConfig) => {
+      // Inject dataset info from source-picker selection
+      if (flow.name === 'wizard' && flow.source === 'dataset') {
+        config = { ...config, dataset: flow.dataset, datasetVersion: flow.datasetVersion };
+      }
+      stoppingRef.current = false;
+      const isDataset = flow.name === 'wizard' && flow.source === 'dataset';
+      const initialSteps: Step[] = isDataset
+        ? [
+            { label: 'Running dataset scenarios...', status: 'running' },
+            { label: 'Starting batch evaluation', status: 'pending' },
+            { label: 'Polling for results', status: 'pending' },
+            { label: 'Fetching scores', status: 'pending' },
+          ]
+        : [
+            { label: 'Starting batch evaluation...', status: 'running' },
+            { label: 'Polling for results', status: 'pending' },
+            { label: 'Fetching scores', status: 'pending' },
+          ];
+      setFlow({ name: 'running', config, steps: initialSteps, elapsed: 0 });
+    },
+    [flow]
+  );
 
   // Execute batch evaluation
   useEffect(() => {
@@ -223,6 +250,8 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) {
           sessionIds: config.sessionIds.length > 0 ? config.sessionIds : undefined,
           lookbackDays: config.days,
           sessionMetadata: config.sessionMetadata,
+          dataset: config.dataset,
+          datasetVersion: config.datasetVersion,
           onProgress: (status, _message) => {
             if (cancelled) return;
             setFlow(prev => {
@@ -250,7 +279,13 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) {
         let savedFilePath: string | undefined;
         if (result.success) {
           try {
-            savedFilePath = saveBatchEvalRun(result);
+            const datasetInfo = config.dataset
+              ? {
+                  source: 'dataset' as const,
+                  dataset: { id: config.dataset, version: config.datasetVersion ?? 'LOCAL' },
+                }
+              : {};
+            savedFilePath = saveBatchEvalRun({ result, ...datasetInfo });
           } catch {
             // Non-fatal
           }
@@ -317,11 +352,37 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) {
     return <ErrorPrompt message="AWS credentials required" detail={flow.message} onBack={onExit} onExit={onExit} />;
   }
 
+  if (flow.name === 'source-picker') {
+    return (
+      <BatchEvalSourcePicker
+        agents={flow.agents}
+        evaluators={flow.evaluators}
+        onSelect={(source, dataset, datasetVersion) => {
+          if (source === 'traces') {
+            setFlow({ name: 'wizard', agents: flow.agents, evaluators: flow.evaluators, source: 'traces' });
+          } else {
+            setFlow({
+              name: 'wizard',
+              agents: flow.agents,
+              evaluators: flow.evaluators,
+              source: 'dataset',
+              dataset,
+              datasetVersion,
+            });
+          }
+        }}
+        onExit={onExit}
+      />
+    );
+  }
+
   if (flow.name === 'wizard') {
     return (
       <BatchEvalWizard
         agents={flow.agents}
         evaluators={flow.evaluators}
+        source={flow.source}
+        dataset={flow.dataset}
         onComplete={handleWizardComplete}
         onExit={onExit}
       />
@@ -381,19 +442,30 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) {
 interface BatchEvalWizardProps {
   agents: AgentItem[];
   evaluators: EvaluatorItem[];
+  source?: EvalSource;
+  dataset?: string;
   onComplete: (config: BatchEvalConfig) => void;
   onExit: () => void;
 }
 
-function BatchEvalWizard({ agents, evaluators: rawEvaluators, onComplete, onExit }: BatchEvalWizardProps) {
+function BatchEvalWizard({
+  agents,
+  evaluators: rawEvaluators,
+  source,
+  dataset,
+  onComplete,
+  onExit,
+}: BatchEvalWizardProps) {
   const skipAgent = agents.length <= 1;
-  const allSteps = useMemo<BatchEvalStep[]>(
-    () =>
-      skipAgent
-        ? ['evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm']
-        : ['agent', 'evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm'],
-    [skipAgent]
-  );
+  const isDatasetMode = source === 'dataset';
+  const allSteps = useMemo<BatchEvalStep[]>(() => {
+    if (isDatasetMode) {
+      return skipAgent ? ['evaluators', 'name', 'confirm'] : ['agent', 'evaluators', 'name', 'confirm'];
+    }
+    return skipAgent
+      ? ['evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm']
+      : ['agent', 'evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm'];
+  }, [skipAgent, isDatasetMode]);
 
   const [step, setStep] = useState<BatchEvalStep>(allSteps[0]!);
   const [config, setConfig] = useState<BatchEvalConfig>({
@@ -796,14 +868,23 @@ function BatchEvalWizard({ agents, evaluators: rawEvaluators, onComplete, onExit
             fields={[
               { label: 'Agent', value: config.agent },
               { label: 'Evaluators', value: config.evaluatorNames.join(', ') },
-              { label: 'Lookback', value: `${config.days} day${config.days !== 1 ? 's' : ''}` },
-              {
-                label: 'Sessions',
-                value: `${config.sessionIds.length} selected`,
-              },
-              ...(config.sessionMetadata
-                ? [{ label: 'Ground Truth', value: `${config.sessionMetadata.length} session(s) with ground truth` }]
-                : []),
+              ...(isDatasetMode
+                ? [{ label: 'Source', value: `Dataset: ${dataset ?? 'default'}` }]
+                : [
+                    { label: 'Lookback', value: `${config.days} day${config.days !== 1 ? 's' : ''}` },
+                    {
+                      label: 'Sessions',
+                      value: `${config.sessionIds.length} selected`,
+                    },
+                    ...(config.sessionMetadata
+                      ? [
+                          {
+                            label: 'Ground Truth',
+                            value: `${config.sessionMetadata.length} session(s) with ground truth`,
+                          },
+                        ]
+                      : []),
+                  ]),
               ...(config.name ? [{ label: 'Name', value: config.name }] : []),
             ]}
           />
@@ -813,6 +894,217 @@ function BatchEvalWizard({ agents, evaluators: rawEvaluators, onComplete, onExit
   );
 }
 
+// ============================================================================
+// Source Picker
+// ============================================================================
+
+interface BatchEvalSourcePickerProps {
+  agents: AgentItem[];
+  evaluators: EvaluatorItem[];
+  onSelect: (source: EvalSource, dataset?: string, datasetVersion?: string) => void;
+  onExit: () => void;
+}
+
+function BatchEvalSourcePicker({
+  agents: _agents,
+  evaluators: _evaluators,
+  onSelect,
+  onExit,
+}: BatchEvalSourcePickerProps) {
+  const [step, setStep] = useState<'source' | 'dataset' | 'version'>('source');
+  const [datasets, setDatasets] = useState<{ name: string; schemaType: string }[]>([]);
+  const [selectedDataset, setSelectedDataset] = useState<string>('');
+  const [versionItems, setVersionItems] = useState<{ id: string; title: string; description: string }[]>([]);
+  const [loadingVersions, setLoadingVersions] = useState(false);
+
+  // Load dataset names from project config
+  useEffect(() => {
+    void (async () => {
+      try {
+        const { ConfigIO } = await import('../../../../lib');
+        const configIO = new ConfigIO();
+        const spec = await configIO.readProjectSpec();
+        setDatasets(
+          (spec.datasets ?? []).map((d: { name: string; schemaType: string }) => ({
+            name: d.name,
+            schemaType: d.schemaType,
+          }))
+        );
+      } catch {
+        // No datasets available
+      }
+    })();
+  }, []);
+
+  // Load versions when a dataset is selected
+  useEffect(() => {
+    if (step !== 'version' || !selectedDataset) return;
+    let cancelled = false;
+    setLoadingVersions(true);
+
+    void (async () => {
+      try {
+        const { resolveDataset } = await import('../../../operations/dataset/resolve-dataset');
+        const { listDatasetVersions } = await import('../../../aws/agentcore-datasets');
+        const resolved = await resolveDataset(selectedDataset);
+        const result = await listDatasetVersions({ region: resolved.region, datasetId: resolved.datasetId });
+
+        if (cancelled) return;
+
+        const items: { id: string; title: string; description: string }[] = [
+          { id: 'local', title: 'Local file', description: 'fastest iteration, no push required' },
+          { id: 'DRAFT', title: 'DRAFT', description: 'latest pushed content' },
+        ];
+        for (const v of result.versions.sort((a, b) => b.createdAt - a.createdAt)) {
+          const date = new Date(v.createdAt * 1000).toLocaleDateString([], {
+            month: 'short',
+            day: 'numeric',
+            year: 'numeric',
+          });
+          items.push({
+            id: v.datasetVersion,
+            title: `Version ${v.datasetVersion}`,
+            description: `${v.exampleCount} examples · ${date}`,
+          });
+        }
+        setVersionItems(items);
+      } catch {
+        // If versions can't be loaded (not deployed yet), just offer local
+        setVersionItems([{ id: 'local', title: 'Local file', description: 'fastest iteration, no push required' }]);
+      } finally {
+        if (!cancelled) setLoadingVersions(false);
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [step, selectedDataset]);
+
+  const sourceItems = [
+    { id: 'dataset', title: 'Dataset', description: 'Invoke agent with dataset scenarios' },
+    { id: 'traces', title: 'Historical traces', description: 'Evaluate existing sessions' },
+  ];
+
+  const SCHEMA_LABELS: Record<string, string> = {
+    AGENTCORE_EVALUATION_PREDEFINED_V1: 'Predefined Turns',
+    AGENTCORE_EVALUATION_SIMULATED_V1: 'Actor Simulator',
+  };
+
+  const datasetItems = datasets.map(d => ({
+    id: d.name,
+    title: d.name,
+    description: SCHEMA_LABELS[d.schemaType] ?? d.schemaType,
+  }));
+
+  const handleDatasetSelected = useCallback(
+    (name: string) => {
+      setSelectedDataset(name);
+      setStep('version');
+    },
+    [setSelectedDataset, setStep]
+  );
+
+  const sourceNav = useListNavigation({
+    items: sourceItems,
+    onSelect: (item: { id: string }) => {
+      if (item.id === 'traces') {
+        onSelect('traces');
+      } else {
+        if (datasets.length === 1) {
+          handleDatasetSelected(datasets[0]!.name);
+        } else if (datasets.length > 1) {
+          setStep('dataset');
+        } else {
+          onSelect('dataset');
+        }
+      }
+    },
+    onExit,
+    isActive: step === 'source',
+  });
+
+  const datasetNav = useListNavigation({
+    items: datasetItems,
+    onSelect: (item: { id: string }) => {
+      handleDatasetSelected(item.id);
+    },
+    onExit: () => setStep('source'),
+    isActive: step === 'dataset',
+  });
+
+  const versionNav = useListNavigation({
+    items: versionItems,
+    onSelect: (item: { id: string }) => {
+      const version = item.id === 'local' ? undefined : item.id;
+      onSelect('dataset', selectedDataset, version);
+    },
+    onExit: () => (datasets.length > 1 ? setStep('dataset') : setStep('source')),
+    isActive: step === 'version' && !loadingVersions,
+  });
+
+  if (step === 'version') {
+    return (
+      <Screen
+        title="Run Batch Evaluation [preview]"
+        onExit={() => (datasets.length > 1 ? setStep('dataset') : setStep('source'))}
+      >
+        <Box flexDirection="column">
+          <Text bold>Select version for {selectedDataset}:</Text>
+          {loadingVersions ? (
+            <GradientText text="Loading versions..." />
+          ) : (
+            <>
+              {versionItems.map((item, i) => (
+                <Text key={item.id}>
+                  {i === versionNav.selectedIndex ? <Text color="cyan">❯ </Text> : '  '}
+                  <Text color={i === versionNav.selectedIndex ? 'cyan' : undefined}>{item.title}</Text>
+                  <Text dimColor> — {item.description}</Text>
+                </Text>
+              ))}
+              <Text dimColor>{'\n'}↑↓ Enter select · Esc back</Text>
+            </>
+          )}
+        </Box>
+      </Screen>
+    );
+  }
+
+  if (step === 'dataset') {
+    return (
+      <Screen title="Run Batch Evaluation [preview]" onExit={() => setStep('source')}>
+        <Box flexDirection="column">
+          <Text bold>Select dataset:</Text>
+          {datasetItems.map((item, i) => (
+            <Text key={item.id}>
+              {i === datasetNav.selectedIndex ? <Text color="cyan">❯ </Text> : '  '}
+              <Text color={i === datasetNav.selectedIndex ? 'cyan' : undefined}>{item.title}</Text>
+              {item.description && <Text dimColor> — {item.description}</Text>}
+            </Text>
+          ))}
+          <Text dimColor>{'\n'}↑↓ Enter select · Esc back</Text>
+        </Box>
+      </Screen>
+    );
+  }
+
+  return (
+    <Screen title="Run Batch Evaluation [preview]" onExit={onExit}>
+      <Box flexDirection="column">
+        <Text bold>Evaluation source:</Text>
+        {sourceItems.map((item, i) => (
+          <Text key={item.id}>
+            {i === sourceNav.selectedIndex ? <Text color="cyan">❯ </Text> : '  '}
+            <Text color={i === sourceNav.selectedIndex ? 'cyan' : undefined}>{item.title}</Text>
+            <Text dimColor> — {item.description}</Text>
+          </Text>
+        ))}
+        <Text dimColor>{'\n'}↑↓ Enter select · Esc back</Text>
+      </Box>
+    </Screen>
+  );
+}
+
 // ============================================================================
 // Results View
 // ============================================================================
diff --git a/src/cli/tui/screens/run-eval/RunEvalFlow.tsx b/src/cli/tui/screens/run-eval/RunEvalFlow.tsx
index 231589b2d..7af3c4eda 100644
--- a/src/cli/tui/screens/run-eval/RunEvalFlow.tsx
+++ b/src/cli/tui/screens/run-eval/RunEvalFlow.tsx
@@ -16,10 +16,13 @@ import type { AgentItem, RunEvalConfig, RunEvalFlowData } from './types';
 import { Box, Text } from 'ink';
 import React, { useCallback, useEffect, useState } from 'react';
 
+type EvalSource = 'dataset' | 'traces';
+
 type FlowState =
   | { name: 'loading' }
-  | { name: 'wizard'; data: RunEvalFlowData }
-  | { name: 'running'; config: RunEvalConfig }
+  | { name: 'source-picker'; data: RunEvalFlowData }
+  | { name: 'wizard'; data: RunEvalFlowData; source: EvalSource; dataset?: string; datasetVersion?: string }
+  | { name: 'running'; config: RunEvalConfig; progressMessage?: string }
   | { name: 'results'; result: RunEvalResult; run: EvalRunResult; filePath: string }
   | { name: 'creds-error'; message: string }
   | { name: 'error'; message: string };
@@ -108,7 +111,7 @@ export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
           return;
         }
 
-        setFlow({ name: 'wizard', data: { agents, evaluators } });
+        setFlow({ name: 'source-picker', data: { agents, evaluators } });
       } catch (err) {
         if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) });
       }
@@ -119,9 +122,20 @@ export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
     };
   }, [flow.name]);
 
-  const handleRunComplete = useCallback((config: RunEvalConfig) => {
-    setFlow({ name: 'running', config });
-  }, []);
+  const handleRunComplete = useCallback(
+    (config: RunEvalConfig) => {
+      // Inject dataset info from source-picker selection
+      if (flow.name === 'wizard' && flow.source === 'dataset') {
+        config = { ...config, dataset: flow.dataset, datasetVersion: flow.datasetVersion };
+      }
+      const isDataset = flow.name === 'wizard' && flow.source === 'dataset';
+      const progressMessage = isDataset
+        ? 'Running dataset evaluation: loading scenarios → invoking agent → collecting spans → evaluating...'
+        : undefined;
+      setFlow({ name: 'running', config, progressMessage });
+    },
+    [flow]
+  );
 
   // Execute the eval when we enter 'running' state
   useEffect(() => {
@@ -141,6 +155,14 @@ export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
           assertions: config.assertions.length > 0 ? config.assertions : undefined,
           expectedTrajectory: config.expectedTrajectory.length > 0 ? config.expectedTrajectory : undefined,
           expectedResponse: config.expectedResponse || undefined,
+          dataset: config.dataset,
+          datasetVersion: config.datasetVersion,
+          onProgress: config.dataset
+            ? (_phase, message) => {
+                if (!cancelled)
+                  setFlow(prev => (prev.name === 'running' ? { ...prev, progressMessage: message } : prev));
+              }
+            : undefined,
         });
 
         if (cancelled) return;
@@ -173,11 +195,28 @@ export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
     return <ErrorPrompt message="AWS credentials required" detail={flow.message} onBack={onExit} onExit={onExit} />;
   }
 
+  if (flow.name === 'source-picker') {
+    return (
+      <EvalSourcePicker
+        data={flow.data}
+        onSelect={(source, dataset, datasetVersion) => {
+          if (source === 'traces') {
+            setFlow({ name: 'wizard', data: flow.data, source: 'traces' });
+          } else {
+            setFlow({ name: 'wizard', data: flow.data, source: 'dataset', dataset, datasetVersion });
+          }
+        }}
+        onExit={onExit}
+      />
+    );
+  }
+
   if (flow.name === 'wizard') {
     return (
       <RunEvalScreen
         agents={flow.data.agents}
         evaluatorItems={flow.data.evaluators}
+        source={flow.source}
         onComplete={handleRunComplete}
         onExit={onExit}
       />
@@ -185,9 +224,10 @@ export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
   }
 
   if (flow.name === 'running') {
+    const message = flow.progressMessage ?? 'Running evaluation... this may take a few minutes';
     return (
       <Screen title="Run On-demand Evaluation" onExit={onExit}>
-        <GradientText text="Running evaluation... this may take a few minutes" />
+        <GradientText text={message} />
       </Screen>
     );
   }
@@ -214,6 +254,202 @@ export function RunEvalFlow({ onExit, onViewRuns }: RunEvalFlowProps) {
   );
 }
 
+// ─────────────────────────────────────────────────────────────────────────────
+// Evaluation source picker
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface EvalSourcePickerProps {
+  data: RunEvalFlowData;
+  onSelect: (source: EvalSource, dataset?: string, datasetVersion?: string) => void;
+  onExit: () => void;
+}
+
+function EvalSourcePicker({ data: _data, onSelect, onExit }: EvalSourcePickerProps) {
+  const [step, setStep] = useState<'source' | 'dataset' | 'version'>('source');
+  const [datasets, setDatasets] = useState<string[]>([]);
+  const [selectedDataset, setSelectedDataset] = useState<string>('');
+  const [versionItems, setVersionItems] = useState<{ id: string; title: string; description: string }[]>([]);
+  const [loadingVersions, setLoadingVersions] = useState(false);
+
+  // Load dataset names from project config
+  useEffect(() => {
+    void (async () => {
+      try {
+        const { ConfigIO } = await import('../../../../lib');
+        const configIO = new ConfigIO();
+        const spec = await configIO.readProjectSpec();
+        setDatasets((spec.datasets ?? []).map(d => d.name));
+      } catch {
+        // No datasets available
+      }
+    })();
+  }, []);
+
+  // Load versions when a dataset is selected
+  useEffect(() => {
+    if (step !== 'version' || !selectedDataset) return;
+    let cancelled = false;
+    setLoadingVersions(true);
+
+    void (async () => {
+      try {
+        const { resolveDataset } = await import('../../../operations/dataset/resolve-dataset');
+        const { listDatasetVersions } = await import('../../../aws/agentcore-datasets');
+        const resolved = await resolveDataset(selectedDataset);
+        const result = await listDatasetVersions({ region: resolved.region, datasetId: resolved.datasetId });
+
+        if (cancelled) return;
+
+        const items: { id: string; title: string; description: string }[] = [
+          { id: 'local', title: 'Local file', description: 'fastest iteration, no push required' },
+          { id: 'DRAFT', title: 'DRAFT', description: 'latest pushed content' },
+        ];
+        for (const v of result.versions.sort((a, b) => b.createdAt - a.createdAt)) {
+          const date = new Date(v.createdAt * 1000).toLocaleDateString([], {
+            month: 'short',
+            day: 'numeric',
+            year: 'numeric',
+          });
+          items.push({
+            id: v.datasetVersion,
+            title: `Version ${v.datasetVersion}`,
+            description: `${v.exampleCount} examples · ${date}`,
+          });
+        }
+        setVersionItems(items);
+      } catch {
+        // If versions can't be loaded (not deployed yet), just offer local + DRAFT
+        setVersionItems([
+          { id: 'local', title: 'Local file', description: 'fastest iteration, no push required' },
+          { id: 'DRAFT', title: 'DRAFT', description: 'latest pushed content' },
+        ]);
+      } finally {
+        if (!cancelled) setLoadingVersions(false);
+      }
+    })();
+
+    return () => {
+      cancelled = true;
+    };
+  }, [step, selectedDataset]);
+
+  const sourceItems = [
+    { id: 'dataset', title: 'Dataset', description: 'Invoke agent with dataset scenarios' },
+    { id: 'traces', title: 'Historical traces', description: 'Evaluate existing sessions' },
+  ];
+
+  const datasetItems = datasets.map(name => ({
+    id: name,
+    title: name,
+  }));
+
+  const handleDatasetSelected = useCallback(
+    (name: string) => {
+      setSelectedDataset(name);
+      setStep('version');
+    },
+    [setSelectedDataset, setStep]
+  );
+
+  const sourceNav = useListNavigation({
+    items: sourceItems,
+    onSelect: (item: { id: string }) => {
+      if (item.id === 'traces') {
+        onSelect('traces');
+      } else {
+        if (datasets.length === 1) {
+          handleDatasetSelected(datasets[0]!);
+        } else if (datasets.length > 1) {
+          setStep('dataset');
+        } else {
+          onSelect('dataset');
+        }
+      }
+    },
+    onExit,
+    isActive: step === 'source',
+  });
+
+  const datasetNav = useListNavigation({
+    items: datasetItems,
+    onSelect: (item: { id: string }) => {
+      handleDatasetSelected(item.id);
+    },
+    onExit: () => setStep('source'),
+    isActive: step === 'dataset',
+  });
+
+  const versionNav = useListNavigation({
+    items: versionItems,
+    onSelect: (item: { id: string }) => {
+      const version = item.id === 'local' ? undefined : item.id;
+      onSelect('dataset', selectedDataset, version);
+    },
+    onExit: () => (datasets.length > 1 ? setStep('dataset') : setStep('source')),
+    isActive: step === 'version' && !loadingVersions,
+  });
+
+  if (step === 'version') {
+    return (
+      <Screen
+        title="Run On-demand Evaluation"
+        onExit={() => (datasets.length > 1 ? setStep('dataset') : setStep('source'))}
+      >
+        <Box flexDirection="column">
+          <Text bold>Select version for {selectedDataset}:</Text>
+          {loadingVersions ? (
+            <GradientText text="Loading versions..." />
+          ) : (
+            <>
+              {versionItems.map((item, i) => (
+                <Text key={item.id}>
+                  {i === versionNav.selectedIndex ? <Text color="cyan">❯ </Text> : '  '}
+                  <Text color={i === versionNav.selectedIndex ? 'cyan' : undefined}>{item.title}</Text>
+                  <Text dimColor> — {item.description}</Text>
+                </Text>
+              ))}
+              <Text dimColor>{'\n'}↑↓ Enter select · Esc back</Text>
+            </>
+          )}
+        </Box>
+      </Screen>
+    );
+  }
+
+  if (step === 'dataset') {
+    return (
+      <Screen title="Run On-demand Evaluation" onExit={() => setStep('source')}>
+        <Box flexDirection="column">
+          <Text bold>Select dataset:</Text>
+          {datasetItems.map((item, i) => (
+            <Text key={item.id}>
+              {i === datasetNav.selectedIndex ? <Text color="cyan">❯ </Text> : '  '}
+              <Text color={i === datasetNav.selectedIndex ? 'cyan' : undefined}>{item.title}</Text>
+            </Text>
+          ))}
+          <Text dimColor>{'\n'}↑↓ Enter select · Esc back</Text>
+        </Box>
+      </Screen>
+    );
+  }
+
+  return (
+    <Screen title="Run On-demand Evaluation" onExit={onExit}>
+      <Box flexDirection="column">
+        <Text bold>Evaluation source:</Text>
+        {sourceItems.map((item, i) => (
+          <Text key={item.id}>
+            {i === sourceNav.selectedIndex ? <Text color="cyan">❯ </Text> : '  '}
+            <Text color={i === sourceNav.selectedIndex ? 'cyan' : undefined}>{item.title}</Text>
+            <Text dimColor> — {item.description}</Text>
+          </Text>
+        ))}
+        <Text dimColor>{'\n'}↑↓ Enter select · Esc back</Text>
+      </Box>
+    </Screen>
+  );
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // Results view
 // ─────────────────────────────────────────────────────────────────────────────
@@ -253,8 +489,18 @@ function ResultsView({ run, filePath, onRunAnother, onViewRuns, onExit }: Result
             <Text bold>Agent:</Text> {run.agent}
             {'  '}
             <Text bold>Sessions:</Text> {run.sessionCount}
-            {'  '}
-            <Text bold>Lookback:</Text> {run.lookbackDays}d
+            {run.lookbackDays != null && (
+              <>
+                {'  '}
+                <Text bold>Lookback:</Text> {run.lookbackDays}d
+              </>
+            )}
+            {run.datasetName && (
+              <>
+                {'  '}
+                <Text bold>Dataset:</Text> {run.datasetName}
+              </>
+            )}
           </Text>
           {run.referenceInputs && (
             <Text dimColor>
diff --git a/src/cli/tui/screens/run-eval/RunEvalScreen.tsx b/src/cli/tui/screens/run-eval/RunEvalScreen.tsx
index d98a7431c..ea5af8de9 100644
--- a/src/cli/tui/screens/run-eval/RunEvalScreen.tsx
+++ b/src/cli/tui/screens/run-eval/RunEvalScreen.tsx
@@ -1,4 +1,4 @@
-import { detectRegion } from '../../../aws/region';
+import { getRegion } from '../../../commands/shared/region-utils';
 import type { SessionInfo } from '../../../operations/eval';
 import { discoverSessions } from '../../../operations/eval';
 import { loadDeployedProjectConfig, resolveAgent } from '../../../operations/resolve-agent';
@@ -26,12 +26,19 @@ import React, { useEffect, useMemo, useRef, useState } from 'react';
 interface RunEvalScreenProps {
   agents: AgentItem[];
   evaluatorItems: EvaluatorItem[];
+  source?: 'dataset' | 'traces';
   onComplete: (config: RunEvalConfig) => void;
   onExit: () => void;
 }
 
-export function RunEvalScreen({ agents, evaluatorItems: rawEvaluatorItems, onComplete, onExit }: RunEvalScreenProps) {
-  const wizard = useRunEvalWizard(agents.length);
+export function RunEvalScreen({
+  agents,
+  evaluatorItems: rawEvaluatorItems,
+  source = 'traces',
+  onComplete,
+  onExit,
+}: RunEvalScreenProps) {
+  const wizard = useRunEvalWizard(agents.length, source);
 
   // Auto-select agent if only one
   const singleAgent = agents.length === 1 ? agents[0]!.name : null;
@@ -81,7 +88,7 @@ export function RunEvalScreen({ agents, evaluatorItems: rawEvaluatorItems, onCom
     void (async () => {
       try {
         const context = await loadDeployedProjectConfig();
-        const { region } = await detectRegion();
+        const region = await getRegion();
         const agentResult = resolveAgent(context, { runtime: wizard.config.agent });
         if (!agentResult.success) {
           if (!cancelled) setSessionResult({ key: fetchKey, phase: 'error', message: agentResult.error });
@@ -157,6 +164,14 @@ export function RunEvalScreen({ agents, evaluatorItems: rawEvaluatorItems, onCom
     requireSelection: true,
   });
 
+  // Handle Esc during session loading/error
+  useListNavigation({
+    items: [{ id: 'back', title: 'Back' }],
+    onSelect: () => wizard.goBack(),
+    onExit: () => wizard.goBack(),
+    isActive: isSessionsStep && sessionPhase !== 'loaded',
+  });
+
   const sessionsNav = useMultiSelectNavigation({
     items: sessionItems,
     getId: item => item.id,
@@ -195,11 +210,12 @@ export function RunEvalScreen({ agents, evaluatorItems: rawEvaluatorItems, onCom
   const confirmFields = [
     { label: 'Agent', value: wizard.config.agent },
     { label: 'Evaluators', value: wizard.config.evaluators.join(', ') },
-    { label: 'Lookback', value: `${wizard.config.days} day${wizard.config.days !== 1 ? 's' : ''}` },
-    {
-      label: 'Sessions',
-      value: `${wizard.config.sessionIds.length} selected`,
-    },
+    ...(source === 'traces'
+      ? [
+          { label: 'Lookback', value: `${wizard.config.days} day${wizard.config.days !== 1 ? 's' : ''}` },
+          { label: 'Sessions', value: `${wizard.config.sessionIds.length} selected` },
+        ]
+      : [{ label: 'Source', value: 'Dataset' }]),
     ...(wizard.config.assertions.length > 0
       ? [{ label: 'Assertions', value: `${wizard.config.assertions.length} assertion(s)` }]
       : []),
diff --git a/src/cli/tui/screens/run-eval/types.ts b/src/cli/tui/screens/run-eval/types.ts
index 346a30ba1..bafef0af3 100644
--- a/src/cli/tui/screens/run-eval/types.ts
+++ b/src/cli/tui/screens/run-eval/types.ts
@@ -10,6 +10,8 @@ export interface RunEvalConfig {
   assertions: string[];
   expectedTrajectory: string[];
   expectedResponse: string;
+  dataset?: string;
+  datasetVersion?: string;
 }
 
 export const RUN_EVAL_STEP_LABELS: Record<RunEvalStep, string> = {
diff --git a/src/cli/tui/screens/run-eval/useRunEvalWizard.ts b/src/cli/tui/screens/run-eval/useRunEvalWizard.ts
index 9f152f405..3667a26bc 100644
--- a/src/cli/tui/screens/run-eval/useRunEvalWizard.ts
+++ b/src/cli/tui/screens/run-eval/useRunEvalWizard.ts
@@ -2,14 +2,19 @@ import type { RunEvalConfig, RunEvalStep } from './types';
 import { DEFAULT_LOOKBACK_DAYS } from './types';
 import { useCallback, useState } from 'react';
 
-function getAllSteps(agentCount: number): RunEvalStep[] {
+export type EvalSourceMode = 'dataset' | 'traces';
+
+function getAllSteps(agentCount: number, source: EvalSourceMode): RunEvalStep[] {
   const steps: RunEvalStep[] = [];
   if (agentCount > 1) {
     steps.push('agent');
   }
-  steps.push('evaluators', 'days', 'sessions');
-  // groundTruth step is always in the array; setSessions skips it when multiple sessions selected
-  steps.push('groundTruth');
+  steps.push('evaluators');
+  if (source === 'traces') {
+    steps.push('days', 'sessions');
+    // groundTruth step is always in the array; setSessions skips it when multiple sessions selected
+    steps.push('groundTruth');
+  }
   steps.push('confirm');
   return steps;
 }
@@ -32,8 +37,8 @@ export interface GroundTruthData {
   expectedResponse: string;
 }
 
-export function useRunEvalWizard(agentCount: number) {
-  const allSteps = getAllSteps(agentCount);
+export function useRunEvalWizard(agentCount: number, source: EvalSourceMode = 'traces') {
+  const allSteps = getAllSteps(agentCount, source);
   const [config, setConfig] = useState<RunEvalConfig>(getDefaultConfig);
   const [step, setStep] = useState<RunEvalStep>(allSteps[0]!);
 
diff --git a/src/schema/llm-compacted/agentcore.ts b/src/schema/llm-compacted/agentcore.ts
index 112b122ee..fcd9d2066 100644
--- a/src/schema/llm-compacted/agentcore.ts
+++ b/src/schema/llm-compacted/agentcore.ts
@@ -28,6 +28,16 @@ interface AgentCoreProjectSpec {
   abTests: ABTest[]; // Unique by name — A/B test experiments
   /** @internal Auto-managed by AB test creation. Do not configure directly. */
   httpGateways: HttpGateway[]; // Unique by name — HTTP gateways bound to a runtime
+  datasets: DatasetSpec[]; // Unique by name — datasets for Dataset Management
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// DATASET
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface DatasetSpec {
+  name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,47}$ @max 48
+  description?: string;
 }
 
 // ─────────────────────────────────────────────────────────────────────────────
diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts
index b3f4d3d6f..a3373eb45 100644
--- a/src/schema/schemas/agentcore-project.ts
+++ b/src/schema/schemas/agentcore-project.ts
@@ -11,6 +11,7 @@ import { AgentEnvSpecSchema } from './agent-env';
 import { AgentCoreGatewaySchema, AgentCoreGatewayTargetSchema, AgentCoreMcpRuntimeToolSchema } from './mcp';
 import { ABTestSchema } from './primitives/ab-test';
 import { ConfigBundleSchema } from './primitives/config-bundle';
+import { DatasetSchema } from './primitives/dataset';
 import {
   EvaluationLevelSchema,
   EvaluatorConfigSchema,
@@ -69,6 +70,9 @@ export type { Policy, PolicyEngine, ValidationMode } from './primitives/policy';
 export { PolicyEngineNameSchema, PolicyNameSchema, PolicySchema, ValidationModeSchema } from './primitives/policy';
 export { TagsSchema };
 export type { Tags } from './primitives/tags';
+export { DatasetSchema };
+export { DatasetNameSchema, DatasetSchemaTypeSchema } from './primitives/dataset';
+export type { Dataset, DatasetSchemaType } from './primitives/dataset';
 export type { ABTestMode, TargetRef, GatewayFilter, PerVariantOnlineEvaluationConfig } from './primitives/ab-test';
 export { ABTestModeSchema, TargetRefSchema, GatewayFilterSchema } from './primitives/ab-test';
 export type { HttpGatewayTarget } from './primitives/http-gateway';
@@ -368,6 +372,20 @@ export const AgentCoreProjectSpecSchema = z
           name => `Duplicate HTTP gateway name: ${name}`
         )
       ),
+
+    datasets: z
+      .array(DatasetSchema)
+      .optional()
+      .superRefine((val, ctx) => {
+        if (!val) return;
+        const seen = new Set<string>();
+        for (const dataset of val) {
+          if (seen.has(dataset.name)) {
+            ctx.addIssue({ code: z.ZodIssueCode.custom, message: `Duplicate dataset name: ${dataset.name}` });
+          }
+          seen.add(dataset.name);
+        }
+      }),
   })
   .strict()
   .superRefine((spec, ctx) => {
diff --git a/src/schema/schemas/deployed-state.ts b/src/schema/schemas/deployed-state.ts
index a37469799..96cd37a59 100644
--- a/src/schema/schemas/deployed-state.ts
+++ b/src/schema/schemas/deployed-state.ts
@@ -174,6 +174,18 @@ export const OnlineEvalDeployedStateSchema = z.object({
 
 export type OnlineEvalDeployedState = z.infer<typeof OnlineEvalDeployedStateSchema>;
 
+// ============================================================================
+// Dataset Deployed State
+// ============================================================================
+
+export const DatasetDeployedStateSchema = z.object({
+  datasetId: z.string().min(1),
+  datasetArn: z.string().min(1),
+  contentHash: z.string().optional(),
+});
+
+export type DatasetDeployedState = z.infer<typeof DatasetDeployedStateSchema>;
+
 // ============================================================================
 // Configuration Bundle Deployed State
 // ============================================================================
@@ -241,6 +253,7 @@ export const DeployedResourceStateSchema = z.object({
   credentials: z.record(z.string(), CredentialDeployedStateSchema).optional(),
   evaluators: z.record(z.string(), EvaluatorDeployedStateSchema).optional(),
   onlineEvalConfigs: z.record(z.string(), OnlineEvalDeployedStateSchema).optional(),
+  datasets: z.record(z.string(), DatasetDeployedStateSchema).optional(),
   configBundles: z.record(z.string(), ConfigBundleDeployedStateSchema).optional(),
   abTests: z.record(z.string(), ABTestDeployedStateSchema).optional(),
   httpGateways: z.record(z.string(), HttpGatewayDeployedStateSchema).optional(),
diff --git a/src/schema/schemas/primitives/__tests__/dataset.test.ts b/src/schema/schemas/primitives/__tests__/dataset.test.ts
new file mode 100644
index 000000000..e279482e6
--- /dev/null
+++ b/src/schema/schemas/primitives/__tests__/dataset.test.ts
@@ -0,0 +1,108 @@
+import { DatasetNameSchema, DatasetSchema } from '../dataset';
+import { describe, expect, it } from 'vitest';
+
+describe('DatasetNameSchema', () => {
+  describe('valid names', () => {
+    it('accepts a simple alphabetic name', () => {
+      expect(DatasetNameSchema.safeParse('MyDataset').success).toBe(true);
+    });
+
+    it('accepts a name with alphanumeric characters', () => {
+      expect(DatasetNameSchema.safeParse('Dataset123').success).toBe(true);
+    });
+
+    it('accepts a name with underscores', () => {
+      expect(DatasetNameSchema.safeParse('my_dataset').success).toBe(true);
+    });
+
+    it('accepts a name at max length (48 chars)', () => {
+      const name = 'A' + 'a'.repeat(47);
+      expect(DatasetNameSchema.safeParse(name).success).toBe(true);
+    });
+  });
+
+  describe('invalid names', () => {
+    it('rejects an empty string', () => {
+      expect(DatasetNameSchema.safeParse('').success).toBe(false);
+    });
+
+    it('rejects a name starting with a digit', () => {
+      expect(DatasetNameSchema.safeParse('1dataset').success).toBe(false);
+    });
+
+    it('rejects a name starting with an underscore', () => {
+      expect(DatasetNameSchema.safeParse('_dataset').success).toBe(false);
+    });
+
+    it('rejects a name with hyphens', () => {
+      expect(DatasetNameSchema.safeParse('my-dataset').success).toBe(false);
+    });
+
+    it('rejects a name exceeding 48 characters', () => {
+      const name = 'A' + 'a'.repeat(48);
+      expect(DatasetNameSchema.safeParse(name).success).toBe(false);
+    });
+  });
+});
+
+describe('DatasetSchema', () => {
+  const validDataset = {
+    name: 'MyDataset',
+    schemaType: 'AGENTCORE_EVALUATION_PREDEFINED_V1',
+    config: { managed: { location: 'datasets/MyDataset.jsonl' } },
+  };
+
+  it('validates a complete dataset', () => {
+    const result = DatasetSchema.safeParse(validDataset);
+    expect(result.success).toBe(true);
+  });
+
+  it('validates a dataset with description', () => {
+    const result = DatasetSchema.safeParse({ ...validDataset, description: 'A test dataset' });
+    expect(result.success).toBe(true);
+  });
+
+  it('validates SIMULATED_V1 schema type', () => {
+    const result = DatasetSchema.safeParse({
+      ...validDataset,
+      schemaType: 'AGENTCORE_EVALUATION_SIMULATED_V1',
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects a dataset without a name', () => {
+    const { name: _, ...noName } = validDataset;
+    const result = DatasetSchema.safeParse(noName);
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects a dataset with an invalid name', () => {
+    const result = DatasetSchema.safeParse({ ...validDataset, name: '1invalid' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects a dataset without schemaType', () => {
+    const { schemaType: _, ...noSchema } = validDataset;
+    const result = DatasetSchema.safeParse(noSchema);
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects an invalid schemaType', () => {
+    const result = DatasetSchema.safeParse({ ...validDataset, schemaType: 'INVALID_TYPE' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects a dataset without config', () => {
+    const { config: _, ...noConfig } = validDataset;
+    const result = DatasetSchema.safeParse(noConfig);
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects a dataset with empty managed location', () => {
+    const result = DatasetSchema.safeParse({
+      ...validDataset,
+      config: { managed: { location: '' } },
+    });
+    expect(result.success).toBe(false);
+  });
+});
diff --git a/src/schema/schemas/primitives/dataset.ts b/src/schema/schemas/primitives/dataset.ts
new file mode 100644
index 000000000..272f05011
--- /dev/null
+++ b/src/schema/schemas/primitives/dataset.ts
@@ -0,0 +1,67 @@
+import { z } from 'zod';
+
+// ============================================================================
+// Dataset Types
+// ============================================================================
+
+/**
+ * Dataset name validation.
+ * Pattern: ^[a-zA-Z][a-zA-Z0-9_]{0,47}$
+ */
+export const DatasetNameSchema = z
+  .string()
+  .min(1, 'Dataset name is required')
+  .max(48)
+  .regex(
+    /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/,
+    'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)'
+  );
+
+/**
+ * Versioned schema type governing the structure of dataset examples.
+ * Immutable after creation (createOnly CFN property).
+ */
+export const DatasetSchemaTypeSchema = z.enum([
+  'AGENTCORE_EVALUATION_PREDEFINED_V1',
+  'AGENTCORE_EVALUATION_SIMULATED_V1',
+]);
+
+export type DatasetSchemaType = z.infer<typeof DatasetSchemaTypeSchema>;
+
+/**
+ * Managed dataset config — CLI manages the local file and syncs to service.
+ */
+export const DatasetManagedConfigSchema = z.object({
+  location: z.string().min(1),
+});
+
+/**
+ * Dataset configuration.
+ */
+export const DatasetConfigSchema = z.object({
+  managed: DatasetManagedConfigSchema,
+});
+
+/**
+ * Dataset specification in agentcore.json.
+ */
+export const DatasetSchema = z.object({
+  /** Dataset name */
+  name: DatasetNameSchema,
+  /**
+   * Versioned schema type governing dataset structure.
+   * Immutable after creation.
+   */
+  schemaType: DatasetSchemaTypeSchema,
+  /** Optional description (max 200 characters) */
+  description: z.string().max(200).optional(),
+  /** Dataset content management config */
+  config: DatasetConfigSchema,
+  /** Optional KMS key ARN for SSE-KMS encryption. Immutable after creation. */
+  kmsKeyArn: z
+    .string()
+    .regex(/^arn:aws(-[a-z]+)*:kms:[a-zA-Z0-9-]*:[0-9]{12}:key\/[a-zA-Z0-9-]{36}$/, 'Must be a valid KMS key ARN')
+    .optional(),
+});
+
+export type Dataset = z.infer<typeof DatasetSchema>;
diff --git a/src/schema/schemas/primitives/index.ts b/src/schema/schemas/primitives/index.ts
index 38967a181..bd4ce95e6 100644
--- a/src/schema/schemas/primitives/index.ts
+++ b/src/schema/schemas/primitives/index.ts
@@ -6,6 +6,9 @@ export type {
   TrafficAllocationConfig,
   VariantConfiguration,
 } from './ab-test';
+
+export type { Dataset, DatasetSchemaType } from './dataset';
+export { DatasetNameSchema, DatasetSchema, DatasetSchemaTypeSchema } from './dataset';
 export {
   ABTestNameSchema,
   ABTestDescriptionSchema,