# QAAS: Quantum Agent Annealed Swarm

1000 intents, 48 agents, CQM with dependency chains. Simulated annealing vs greedy.

## Setup

Install D-Wave Ocean SDK:
```bash
pip install dwave-ocean-sdk
```

Configure your API token:
```bash
dwave config create
```

In [55]:
import random
import time
import numpy as np
from collections import defaultdict
from dimod import ConstrainedQuadraticModel, Binary, cqm_to_bqm
from dwave.system import LeapHybridCQMSampler
import neal

## Define the Swarm

48 agents across 4 cloud model tiers and 8 local models.
Cloud agents have 25 task capacity each, local agents are limited but free.

In [56]:
agents = {}

# Cloud agents - 10 sessions each, different price/quality tiers
cloud_models = [
    ('claude',  0.015, 0.95, {'simple', 'moderate', 'complex', 'reasoning', 'code-analysis', 'long-context'}),
    ('gpt5.2',  0.012, 0.92, {'simple', 'moderate', 'complex', 'reasoning', 'code-analysis', 'long-context'}),
    ('gemini',  0.010, 0.88, {'simple', 'moderate', 'complex', 'reasoning', 'code-analysis', 'long-context'}),
    ('kimi2.5', 0.008, 0.85, {'simple', 'moderate', 'complex', 'reasoning', 'long-context'}),
]

for model_name, cost, quality, capabilities in cloud_models:
    for i in range(10):
        agents[f'{model_name}-{i}'] = {
            'cost_per_task': cost,
            'quality': quality,
            'capabilities': capabilities,
            'is_local': False,
            'capacity': 25,
            'latency': 2.0,
        }

# Local agents - free but lower quality, instant response
local_models = [
    ('llama3.2-1b',    0, 0.40, {'simple'}, 3, 0.5),
    ('llama3.2-3b',    0, 0.55, {'simple', 'moderate'}, 2, 0.8),
    ('llama3.1-8b',    0, 0.65, {'simple', 'moderate'}, 2, 1.2),
    ('codellama-7b',   0, 0.70, {'simple', 'moderate', 'code-analysis'}, 2, 1.0),
    ('mistral-7b',     0, 0.60, {'simple', 'moderate'}, 2, 1.0),
    ('phi3-mini',      0, 0.45, {'simple'}, 4, 0.3),
    ('qwen2-7b',       0, 0.65, {'simple', 'moderate', 'reasoning'}, 2, 1.1),
    ('deepseek-coder', 0, 0.72, {'simple', 'moderate', 'code-analysis'}, 2, 1.0),
]

for model_name, cost, quality, capabilities, capacity, latency in local_models:
    agents[model_name] = {
        'cost_per_task': cost,
        'quality': quality,
        'capabilities': capabilities,
        'is_local': True,
        'capacity': capacity,
        'latency': latency,
    }

agent_names = list(agents.keys())
num_agents = len(agent_names)
cloud_cap = sum(a['capacity'] for a in agents.values() if not a['is_local'])
local_cap = sum(a['capacity'] for a in agents.values() if a['is_local'])

print(f"Total agents: {num_agents}")
print(f"Cloud: {sum(1 for a in agents.values() if not a['is_local'])} agents, {cloud_cap} capacity")
print(f"Local: {sum(1 for a in agents.values() if a['is_local'])} agents, {local_cap} capacity")
print(f"Total capacity: {cloud_cap + local_cap}")

Total agents: 48
Cloud: 40 agents, 1000 capacity
Local: 8 agents, 19 capacity
Total capacity: 1019


## Define the Intents

1000 tasks with workflow dependency chains. Quality bars force routing to capable agents.

In [57]:
intent_templates = {
    'simple': [
        'fix-typo', 'fix-lint', 'fix-whitespace', 'fix-indent',
        'rename-var', 'sort-imports', 'remove-unused-import',
        'add-type-hint', 'update-version-bump', 'fix-trailing-comma',
        'add-newline-eof', 'remove-console-log', 'fix-semicolon',
        'update-todo-comment', 'fix-bracket-style', 'swap-quotes',
        'add-missing-return', 'fix-null-check', 'update-env-var', 'fix-off-by-one',
    ],
    'moderate': [
        'implement-helper-function', 'write-unit-test', 'add-input-validation',
        'fix-bug-in-handler', 'add-error-handling', 'refactor-loop',
        'add-api-endpoint', 'update-db-query', 'add-retry-logic',
        'implement-dto', 'add-request-logging', 'fix-async-await',
        'add-rate-limiter', 'update-middleware', 'add-cache-layer',
        'implement-pagination', 'fix-memory-leak', 'add-health-check',
        'update-serializer', 'implement-webhook-handler',
    ],
    'complex': [
        'architect-new-service', 'design-db-schema', 'implement-auth-flow',
        'build-ci-cd-pipeline', 'migrate-to-graphql', 'optimize-query-perf',
        'implement-search-index', 'design-rest-api', 'build-admin-dashboard',
        'implement-job-queue', 'design-caching-strategy', 'build-monitoring',
        'implement-oauth2', 'design-microservice-split', 'build-etl-pipeline',
        'implement-graphql-schema', 'design-event-bus', 'build-deploy-pipeline',
        'implement-rate-limiting', 'design-db-sharding',
    ],
    'reasoning': [
        'review-pr-architecture', 'debug-prod-incident', 'plan-migration-strategy',
        'evaluate-framework-choice', 'design-for-scale', 'analyze-security-surface',
        'plan-tech-debt-paydown', 'review-system-design', 'analyze-perf-bottleneck',
        'plan-rollback-strategy', 'evaluate-buy-vs-build', 'design-disaster-recovery',
    ],
    'code-analysis': [
        'review-pr-for-bugs', 'find-security-vulns', 'analyze-dep-tree',
        'audit-test-coverage', 'find-dead-code', 'measure-cyclomatic-complexity',
        'review-error-handling', 'find-code-duplication',
    ],
}

intents = []
intent_id = 0

distribution = [
    ('simple',        400, 0.50),
    ('moderate',      300, 0.70),
    ('complex',       150, 0.85),
    ('reasoning',     100, 0.90),
    ('code-analysis',  50, 0.80),
]

for complexity, count, min_quality in distribution:
    templates = intent_templates[complexity]
    for i in range(count):
        template = templates[i % len(templates)]
        intents.append({
            'id': f'{template}-{intent_id}',
            'complexity': complexity,
            'min_quality': min_quality,
            'depends': [],
        })
        intent_id += 1

num_intents = len(intents)

# === WORKFLOW DEPENDENCY CHAINS ===
workflow_chains = []

def find_free_task(complexity, used):
    """Find an unassigned task of the given complexity"""
    for idx, t in enumerate(intents):
        if t['complexity'] == complexity and idx not in used and not t['depends']:
            return idx
    return None

used_in_chains = set()

# 25 feature dev chains: spec -> impl -> test -> review
for _ in range(25):
    steps = [
        find_free_task('simple', used_in_chains),
        find_free_task('moderate', used_in_chains),
        find_free_task('complex', used_in_chains),
        find_free_task('reasoning', used_in_chains),
    ]
    if all(s is not None for s in steps):
        for k in range(1, len(steps)):
            intents[steps[k]]['depends'] = [steps[k-1]]
        used_in_chains.update(steps)
        workflow_chains.append(('feature-dev', steps))

# 15 bug fix chains: triage -> reproduce -> fix
for _ in range(15):
    steps = [
        find_free_task('simple', used_in_chains),
        find_free_task('moderate', used_in_chains),
        find_free_task('complex', used_in_chains),
    ]
    if all(s is not None for s in steps):
        for k in range(1, len(steps)):
            intents[steps[k]]['depends'] = [steps[k-1]]
        used_in_chains.update(steps)
        workflow_chains.append(('bug-fix', steps))

# 10 infra chains: design -> provision -> review
for _ in range(10):
    steps = [
        find_free_task('moderate', used_in_chains),
        find_free_task('complex', used_in_chains),
        find_free_task('code-analysis', used_in_chains),
    ]
    if all(s is not None for s in steps):
        for k in range(1, len(steps)):
            intents[steps[k]]['depends'] = [steps[k-1]]
        used_in_chains.update(steps)
        workflow_chains.append(('infra', steps))

num_chained = sum(1 for t in intents if t['depends'])
print(f"Total tasks: {num_intents}")
print(f"Workflow chains: {len(workflow_chains)} ({num_chained} dependent tasks)")
print(f"  feature-dev: {sum(1 for t, _ in workflow_chains if t == 'feature-dev')}")
print(f"  bug-fix:     {sum(1 for t, _ in workflow_chains if t == 'bug-fix')}")
print(f"  infra:       {sum(1 for t, _ in workflow_chains if t == 'infra')}")

print(f"\nBy type:")
for c, count, min_q in distribution:
    print(f"  {c:<15} {count:>4} tasks  (min quality: {min_q})")

Total tasks: 1000
Workflow chains: 50 (125 dependent tasks)
  feature-dev: 25
  bug-fix:     15
  infra:       10

By type:
  simple           400 tasks  (min quality: 0.5)
  moderate         300 tasks  (min quality: 0.7)
  complex          150 tasks  (min quality: 0.85)
  reasoning        100 tasks  (min quality: 0.9)
  code-analysis     50 tasks  (min quality: 0.8)


## Build the CQM

CQM separates objective from constraints. Assignment and capacity are hard constraints;
dependency quality ordering is a soft penalty in the objective.

In [58]:
def can_assign(intent, agent_name):
    """Can this agent handle this task at acceptable quality?"""
    agent = agents[agent_name]
    if intent['complexity'] not in agent['capabilities']:
        return False
    if agent['quality'] < intent['min_quality']:
        return False
    return True

def get_cost(intent, agent_name):
    """Real cost = money + wasted quality + latency."""
    agent = agents[agent_name]
    money_cost = agent['cost_per_task']
    quality_surplus = agent['quality'] - intent['min_quality']
    overkill_cost = quality_surplus * money_cost * 2
    latency_cost = agent['latency'] * 0.005
    return money_cost + overkill_cost + latency_cost

print(f"Building CQM: {num_intents} tasks x {num_agents} agents")
start_time = time.time()

cqm = ConstrainedQuadraticModel()

# Decision variables — only valid (intent, agent) pairs
x = {}
for i, intent in enumerate(intents):
    for j, name in enumerate(agent_names):
        if can_assign(intent, name):
            x[i, j] = Binary(f'x_{i}_{j}')

print(f"Valid assignments: {len(x)}")
print(f"Filtered out: {num_intents * num_agents - len(x)}")

# === Objective: minimize cost + dependency quality penalties ===
objective = 0
for (i, j), var in x.items():
    objective += get_cost(intents[i], agent_names[j]) * var

DEP_PENALTY = 100.0
dep_terms = 0
for i, intent in enumerate(intents):
    for dep_idx in intent.get('depends', []):
        for j in range(num_agents):
            for k in range(num_agents):
                if (i, j) in x and (dep_idx, k) in x:
                    if agents[agent_names[j]]['quality'] < agents[agent_names[k]]['quality']:
                        objective += DEP_PENALTY * x[i, j] * x[dep_idx, k]
                        dep_terms += 1

cqm.set_objective(objective)
print(f"Dependency penalty terms: {dep_terms}")

# === Hard constraints ===
for i in range(num_intents):
    valid = [x[i, j] for j in range(num_agents) if (i, j) in x]
    if valid:
        cqm.add_constraint(sum(valid) == 1, label=f'assign_{i}')

for j, name in enumerate(agent_names):
    cap = agents[name]['capacity']
    assigned = [x[i, j] for i in range(num_intents) if (i, j) in x]
    if assigned:
        cqm.add_constraint(sum(assigned) <= cap, label=f'cap_{name}')

build_time = time.time() - start_time
print(f"Constraints: {len(cqm.constraints)}")
print(f"Build time: {build_time:.2f}s")

Building CQM: 1000 tasks x 48 agents
Valid assignments: 40500
Filtered out: 7500
Dependency penalty terms: 62740
Constraints: 1046
Build time: 2.74s


## Solve with Simulated Annealing

Convert CQM to BQM with automatic slack variables, then run SA locally.

In [59]:
print("Converting CQM to BQM...")
bqm, _ = cqm_to_bqm(cqm, lagrange_multiplier=10.0)

print(f"BQM variables: {len(bqm.variables)} (includes slack)")
print(f"BQM quadratic terms: {len(bqm.quadratic)}")

print(f"\nRunning simulated annealing on {num_intents}-intent problem...")
start_time = time.time()

sampler = neal.SimulatedAnnealingSampler()
sampleset = sampler.sample(bqm, num_reads=10, num_sweeps=500)

solve_time = time.time() - start_time
print(f"Solve time: {solve_time:.1f}s")

best = sampleset.first
print(f"Best energy: {best.energy:.2f}")

Converting CQM to BQM...
BQM variables: 40712 (includes slack)
BQM quadratic terms: 19538946

Running simulated annealing on 1000-intent problem...
Solve time: 110.8s
Best energy: 76.30


In [60]:
# Parse assignments
assignments = {}
for var, val in best.sample.items():
    if val == 1 and var.startswith('x_'):
        parts = var.split('_')
        i, j = int(parts[1]), int(parts[2])
        assignments[i] = agent_names[j]

agent_counts = defaultdict(int)
for agent in assignments.values():
    agent_counts[agent] += 1

money_spent = sum(agents[a]['cost_per_task'] for a in assignments.values())

quality_met = sum(1 for i, a in assignments.items() if agents[a]['quality'] >= intents[i]['min_quality'])
unassigned = [i for i in range(num_intents) if i not in assignments]

capacity_violations = []
for name, count in agent_counts.items():
    if count > agents[name]['capacity']:
        capacity_violations.append(f"  {name}: {count}/{agents[name]['capacity']}")

anneal_dep_violations = 0
for i, intent in enumerate(intents):
    for dep_idx in intent.get('depends', []):
        if i in assignments and dep_idx in assignments:
            if agents[assignments[i]]['quality'] < agents[assignments[dep_idx]]['quality']:
                anneal_dep_violations += 1

overkill = []
for i, name in assignments.items():
    if intents[i]['complexity'] == 'simple' and agents[name]['cost_per_task'] > 0.01:
        overkill.append(f"  {intents[i]['id']} -> {name}")

print("=" * 60)
print("  FACTORY FLOOR SHIFT REPORT")
print("=" * 60)
print(f"\n  Tasks completed:     {len(assignments)}/{num_intents}")
print(f"  Tasks dropped:       {len(unassigned)}")
print(f"  Money spent:         ${money_spent:.2f}")
print(f"  Quality targets met: {quality_met}/{len(assignments)}")
print(f"  Capacity violations: {len(capacity_violations)}")
print(f"  Dep violations:      {anneal_dep_violations}")

# Workflow chain quality progression (show first 5 of each type)
print(f"\n{'─' * 60}")
print(f"  WORKFLOW CHAIN QUALITY PROGRESSION (sample)")
print(f"{'─' * 60}")

shown = defaultdict(int)
for wf_type, steps in workflow_chains:
    if shown[wf_type] >= 3:
        continue
    shown[wf_type] += 1
    chain_info = []
    for s in steps:
        if s in assignments:
            a = assignments[s]
            q = agents[a]['quality']
            chain_info.append(f"{a}({q})")
        else:
            chain_info.append("UNASSIGNED")
    print(f"  {wf_type}: {' -> '.join(chain_info)}")

remaining = len(workflow_chains) - sum(shown.values())
if remaining > 0:
    print(f"  ... and {remaining} more chains")

print(f"\n{'─' * 60}")
print(f"  AGENT DISPATCH")
print(f"{'─' * 60}")
print(f"  {'Model':<20} {'Tasks':<8} {'Cap':<6} {'$/task':<8} {'Quality'}")
print(f"  {'─'*52}")

for model_name, cost, quality, _ in cloud_models:
    total = sum(agent_counts[f'{model_name}-{i}'] for i in range(10))
    print(f"  {model_name + ' (x10)':<20} {total:<8} {250:<6} ${cost:<7} {quality}")

print(f"  {'─'*52}")

for model_name, _, quality, _, capacity, _ in local_models:
    count = agent_counts[model_name]
    print(f"  {model_name:<20} {count:<8} {capacity:<6} $0      {quality}")

cloud_tasks = sum(1 for a in assignments.values() if not agents[a]['is_local'])
local_tasks = len(assignments) - cloud_tasks

print(f"\n{'─' * 60}")
print(f"  COST EFFICIENCY")
print(f"{'─' * 60}")
print(f"  Local (free):  {local_tasks} tasks  - $0.00")
print(f"  Cloud (paid):  {cloud_tasks} tasks  - ${money_spent:.2f}")
print(f"  Avg cost/task: ${money_spent/max(len(assignments),1):.4f}")

if overkill:
    print(f"\n  OVERKILL ({len(overkill)} expensive models on simple tasks)")
    for line in overkill[:5]:
        print(line)
    if len(overkill) > 5:
        print(f"  ... and {len(overkill)-5} more")

if capacity_violations:
    print(f"\n  OVERLOADED AGENTS")
    for line in capacity_violations:
        print(line)

  FACTORY FLOOR SHIFT REPORT

  Tasks completed:     1000/1000
  Tasks dropped:       0
  Money spent:         $11.10
  Quality targets met: 1000/1000
  Capacity violations: 0
  Dep violations:      0

────────────────────────────────────────────────────────────
  WORKFLOW CHAIN QUALITY PROGRESSION (sample)
────────────────────────────────────────────────────────────
  feature-dev: gemini-7(0.88) -> gemini-9(0.88) -> gpt5.2-8(0.92) -> gpt5.2-2(0.92)
  feature-dev: kimi2.5-3(0.85) -> kimi2.5-0(0.85) -> kimi2.5-8(0.85) -> claude-2(0.95)
  feature-dev: kimi2.5-3(0.85) -> gpt5.2-6(0.92) -> claude-8(0.95) -> claude-1(0.95)
  bug-fix: kimi2.5-7(0.85) -> gemini-0(0.88) -> claude-1(0.95)
  bug-fix: kimi2.5-8(0.85) -> kimi2.5-2(0.85) -> kimi2.5-9(0.85)
  bug-fix: kimi2.5-1(0.85) -> kimi2.5-4(0.85) -> claude-9(0.95)
  infra: kimi2.5-3(0.85) -> gemini-0(0.88) -> claude-1(0.95)
  infra: kimi2.5-2(0.85) -> kimi2.5-2(0.85) -> claude-1(0.95)
  infra: kimi2.5-6(0.85) -> gemini-8(0.88) -> gpt5.2-1(0.92

## Solve with D-Wave (Quantum)

CQM goes directly to the hybrid solver — no manual BQM conversion needed.
Requires a D-Wave Leap API token.

In [61]:
# Uncomment to run on real quantum hardware

# sampler = LeapHybridCQMSampler()
# sampleset = sampler.sample_cqm(cqm, time_limit=60)
#
# feasible = sampleset.filter(lambda s: s.is_feasible)
# if feasible:
#     best = feasible.first
#     print(f"Best feasible energy: {best.energy}")
#     print(f"QPU access time: {sampleset.info.get('qpu_access_time', 'N/A')} us")
# else:
#     print("No feasible solution found")

## Greedy vs Annealing

Greedy picks the cheapest valid agent per task, first-come first-served. No global optimization, no dependency awareness.

In [62]:
def greedy_factory(intent_list, agent_dict):
    """Greedy: cheapest valid agent, first come first served."""
    names = list(agent_dict.keys())
    result = {}
    load = {a: 0 for a in agent_dict}
    cost = 0

    for idx, intent in enumerate(intent_list):
        best = None
        best_cost = float('inf')
        for name in names:
            a = agent_dict[name]
            if intent['complexity'] not in a['capabilities']:
                continue
            if a['quality'] < intent['min_quality']:
                continue
            if load[name] >= a['capacity']:
                continue
            if a['cost_per_task'] < best_cost:
                best_cost = a['cost_per_task']
                best = name
        if best:
            result[idx] = best
            load[best] += 1
            cost += agent_dict[best]['cost_per_task']

    return result, cost

greedy_assign, greedy_cost = greedy_factory(intents, agents)
greedy_cloud = sum(1 for a in greedy_assign.values() if not agents[a]['is_local'])
greedy_local = len(greedy_assign) - greedy_cloud

greedy_dep_violations = 0
for i, intent in enumerate(intents):
    for dep_idx in intent.get('depends', []):
        if i in greedy_assign and dep_idx in greedy_assign:
            if agents[greedy_assign[i]]['quality'] < agents[greedy_assign[dep_idx]]['quality']:
                greedy_dep_violations += 1

anneal_cloud = sum(1 for a in assignments.values() if not agents[a]['is_local'])
anneal_local = len(assignments) - anneal_cloud

print("=" * 60)
print("  HEAD TO HEAD: GREEDY vs ANNEALING")
print("=" * 60)
print(f"\n  {'Metric':<25} {'Greedy':<15} {'Annealing':<15}")
print(f"  {'─'*55}")
print(f"  {'Tasks shipped':<25} {len(greedy_assign):<15} {len(assignments):<15}")
print(f"  {'Tasks dropped':<25} {num_intents - len(greedy_assign):<15} {len(unassigned):<15}")
print(f"  {'Money spent':<25} ${greedy_cost:<14.2f} ${money_spent:<14.2f}")
print(f"  {'Cloud tasks':<25} {greedy_cloud:<15} {anneal_cloud:<15}")
print(f"  {'Local tasks (free)':<25} {greedy_local:<15} {anneal_local:<15}")
print(f"  {'Dep violations':<25} {greedy_dep_violations:<15} {anneal_dep_violations:<15}")

if len(assignments) > len(greedy_assign):
    print(f"\n  -> Annealing shipped {len(assignments) - len(greedy_assign)} more tasks")
if money_spent < greedy_cost:
    print(f"  -> Annealing saved ${greedy_cost - money_spent:.2f}")
elif money_spent > greedy_cost:
    print(f"  -> Greedy was ${money_spent - greedy_cost:.2f} cheaper")
if anneal_local > greedy_local:
    print(f"  -> Annealing used {anneal_local - greedy_local} more free local agents")
if greedy_dep_violations > anneal_dep_violations:
    print(f"  -> Annealing had {greedy_dep_violations - anneal_dep_violations} fewer dependency violations")

  HEAD TO HEAD: GREEDY vs ANNEALING

  Metric                    Greedy          Annealing      
  ───────────────────────────────────────────────────────
  Tasks shipped             1000            1000           
  Tasks dropped             0               0              
  Money spent               $11.07          $11.10         
  Cloud tasks               988             988            
  Local tasks (free)        12              12             
  Dep violations            0               0              
  -> Greedy was $0.03 cheaper
