In [1]:
import os
import time
import numpy as np

from braket.jobs import hybrid_job
from braket.jobs.config import InstanceConfig
from braket.jobs.environment_variables import get_job_device_arn

image_uri = "537332306153.dkr.ecr.us-west-2.amazonaws.com/braket-cudaq-byoc-job:latest"

n_qubits = 20
n_terms = 40000
n_shots = 1000

In [2]:
@hybrid_job(
    device='local:nvidia/nvidia-mgpu',
    instance_config=InstanceConfig(instanceType='ml.p3.8xlarge', instanceCount=1),
    image_uri=image_uri,
)
def test_cudaq_single_node_single_gpu(sagemaker_mpi_enabled=True):
    import cudaq

    print('Run hybrid job:', os.environ['AMZN_BRAKET_JOB_NAME'])
    device = get_job_device_arn()
    cudaq.set_target(device.split('/')[-1])
    print(cudaq.get_target())

    kernel = cudaq.make_kernel()
    qubits = kernel.qalloc(n_qubits)
    kernel.h(qubits[0])
    for i in range(1, n_qubits):
        kernel.cx(qubits[0], qubits[i])

    hamiltonian = cudaq.SpinOperator.random(n_qubits, n_terms)
    
    t0 = time.time()
    result = cudaq.observe(kernel, hamiltonian, shots_count=n_shots)
    t1 = time.time()
    print(f'result: {result.expectation()} | time: {t1-t0}')

print(f'Single node, single GPU')
single_node_single_gpu_job = test_cudaq_single_node_single_gpu()
print(single_node_single_gpu_job)


Skipping python version validation, make sure versions match between local environment and container.
Single node, single GPU
AwsQuantumJob('arn':'arn:aws:braket:us-west-2:537332306153:job/8211d528-54d3-410a-bee3-50d4605ff417')


In [3]:
@hybrid_job(
    device='local:nvidia/nvidia-mqpu',
    instance_config=InstanceConfig(instanceType='ml.p3.8xlarge', instanceCount=1),
    image_uri=image_uri,
)
def test_cudaq_single_node_multi_gpu(sagemaker_mpi_enabled=True):
    import cudaq

    print('Run hybrid job:', os.environ['AMZN_BRAKET_JOB_NAME'])
    device = get_job_device_arn()
    cudaq.set_target(device.split('/')[-1])
    print(cudaq.get_target())

    cudaq.mpi.initialize()
    num_ranks = cudaq.mpi.num_ranks()
    rank = cudaq.mpi.rank()
    print(f'rank={rank} | MPI is initialized? {cudaq.mpi.is_initialized()}')
    print(f'rank={rank}, num_ranks={num_ranks}')

    kernel = cudaq.make_kernel()
    qubits = kernel.qalloc(n_qubits)
    kernel.h(qubits[0])
    for i in range(1, n_qubits):
        kernel.cx(qubits[0], qubits[i])

    hamiltonian = cudaq.SpinOperator.random(n_qubits, n_terms)

    t0 = time.time()
    result = cudaq.observe(kernel, hamiltonian, shots_count=n_shots, execution=cudaq.parallel.mpi)
    t1 = time.time()
    print(f'rank={rank} | result: {result.expectation()} | time: {t1-t0}')

    cudaq.mpi.finalize()


print(f'Single node, multi GPU')
single_node_multi_gpu_job = test_cudaq_single_node_multi_gpu()
print(single_node_multi_gpu_job)

Skipping python version validation, make sure versions match between local environment and container.
Single node, multi GPU
AwsQuantumJob('arn':'arn:aws:braket:us-west-2:537332306153:job/46282ecf-ae2a-4d14-aba0-a0d6297c2285')


In [5]:
@hybrid_job(
    device='local:nvidia/nvidia-mqpu',
    instance_config=InstanceConfig(instanceType='ml.p3.8xlarge', instanceCount=2),
    image_uri=image_uri,
)
def test_cudaq_multi_node_multi_gpu(sagemaker_mpi_enabled=True):
    import cudaq

    print('Run hybrid job:', os.environ['AMZN_BRAKET_JOB_NAME'])
    device = get_job_device_arn()
    cudaq.set_target(device.split('/')[-1])
    print(cudaq.get_target())

    cudaq.mpi.initialize()
    num_ranks = cudaq.mpi.num_ranks()
    rank = cudaq.mpi.rank()
    print(f'rank={rank} | MPI is initialized? {cudaq.mpi.is_initialized()}')
    print(f'rank={rank}, num_ranks={num_ranks}')

    kernel = cudaq.make_kernel()
    qubits = kernel.qalloc(n_qubits)
    kernel.h(qubits[0])
    for i in range(1, n_qubits):
        kernel.cx(qubits[0], qubits[i])

    hamiltonian = cudaq.SpinOperator.random(n_qubits, n_terms)

    t0 = time.time()
    result = cudaq.observe(kernel, hamiltonian, shots_count=n_shots, execution=cudaq.parallel.mpi)
    t1 = time.time()
    print(f'rank={rank} | result: {result.expectation()} | time: {t1-t0}')

    cudaq.mpi.finalize()


print(f'Multi node, multi GPU')
multi_node_multi_gpu_job = test_cudaq_multi_node_multi_gpu()
print(multi_node_multi_gpu_job)


Skipping python version validation, make sure versions match between local environment and container.
Multi node, multi GPU
AwsQuantumJob('arn':'arn:aws:braket:us-west-2:537332306153:job/dbc0149a-57ad-4904-9ff7-d1483ec17317')
