# Example for execution of multiple circuits in QPUs

### Importing and adding paths to `sys.path`

In [17]:
import os, sys

# adding pyhton folder path to detect modules
#sys.path.insert(0, "/mnt/netapp1/Store_CESGA/home/cesga/mlosada/api/api-simulator/cunqa")

# path to access c++ files
installation_path = os.getenv("INSTALL_PATH")
sys.path.append(installation_path)

### Let's get the QPUs that we q-raised!

In [18]:
from cunqa.qpu import getQPUs

qpus  = getQPUs()

for q in qpus:
    print(f"QPU {q.id}, name: {q.backend.name}, backend: {q.backend.simulator}, version: {q.backend.version}.")


[34m	debug: File accessed correctly.[0m
/mnt/netapp1/Store_CESGA/home/cesga/mlosada/api/api-simulator/installation/cunqa/qpu.py:162
[31m[1m	error: No QPUs were found, /mnt/netapp1/Store_CESGA//home/cesga/mlosada/.api_simulator/qpu.json is empty.[0m


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


### Let's create a circuit to run in our QPUs!

In [8]:
from qiskit import QuantumCircuit
from qiskit.circuit.library import QFT

n = 5 # number of qubits

qc = QuantumCircuit(n)

qc.x(0); qc.x(n-1); qc.x(n-2)

qc.append(QFT(n), range(n))

qc.append(QFT(n).inverse(), range(n))

qc.measure_all()

display(qc.draw())

### Execution time! Let's do it sequentially

In [9]:
counts = []

for i, qpu in enumerate([qpus[0], qpus[2], qpus[4], qpus[5]]):

    print(f"For QPU {qpu.id}, with backend {qpu.backend.name}:")

    if i == 3:
        qjob = qpu.run(qc, transpile = True, initial_layout = [31, 30, 29, 28, 27], shots = 1000)
    else:
        qjob = qpu.run(qc, transpile = True, shots = 1000)

    result = qjob.result() # bloking call

    time = qjob.time_taken()

    counts.append(result.get_counts())

    print(f"Result: \n{result.get_counts()}\n Time taken: {time} s.")

For QPU 0, with backend BasicAer:
[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to dict for AerSimulator...[0m
[34m	debug: Qjob submitted to QPU 0.[0m
[34m	debug: Results correctly loaded.[0m
Result: 
{'11001': 1000}
 Time taken: 0.005048949 s.
For QPU 2, with backend BasicMunich:
[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to QASM2 for MunichSimulator...[0m
[34m	debug: Qjob submitted to QPU 2.[0m
[34m	debug: Results correctly loaded.[0m
Result: 
{'11001': 1000}
 Time taken: 0.001124921953305602 s.
For QPU 4, with backend FakeQmio:
[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to dict for AerSimulator...[0m
[34m	debug: Qjob submitted to QPU 4.[0m
[34m	debug: Results correctly loaded.[0m
Result: 
{'00000': 22, '00001': 25, '10000': 28, '10001': 38, '10010': 15, '10011': 19, '10100': 34, 

In [10]:
from qiskit.visualization import plot_histogram
import matplotlib.pyplot as plt
plot_histogram(counts, figsize = (10, 5), bar_labels=False); plt.legend(["QPU 0", "QPU 2", "QPU 4", "QPU 5"])
plt.savefig('counts.png')
plt.show()

### Cool isn't it? But this circuit is too simple, let's try with a more complex one!

In [11]:
import json

with open("circuits/circuit_15qubits_10layers.json", "r") as file:
    circuit = json.load(file)

### This circuit has 15 qubits and 10 intermidiate measurements, let's run it in AerSimulator

In [12]:
for qpu in qpus:
    if qpu.backend.name == "BasicAer":
        qpu0 = qpu
        break

qjob = qpu0.run(circuit, transpile = True, shots = 1000)

result = qjob.result() # bloking call

time = qjob.time_taken()

counts.append(result.get_counts())

print(f"Result: Time taken: {time} s.")

[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to dict for AerSimulator...[0m
[34m	debug: Qjob submitted to QPU 0.[0m
[34m	debug: Results correctly loaded.[0m
Result: Time taken: 9.002360148 s.


### Takes much longer ... let's parallelize 3 executions in 3 different QPUs

### Remenber that sending circuits to a given QPU is a non blocking call, so we can use a loop, keeping the QJOb objects in a list.

### Then, we can wait for all the jobs to finish with the gather function. Let's measure time to check that we are parallelizing:

In [15]:
import time
from cunqa.qpu import gather

qjobs = []


tick = time.time()

for qpu in [qpus[0], qpus[2], qpus[4]]:
    qjobs.append(qpu.run(circuit, transpile = True, shots = 1000))
    
results = gather(qjobs) # this is a bloking call
tack = time.time()

[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to dict for AerSimulator...[0m
[34m	debug: Qjob submitted to QPU 0.[0m
[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to QASM2 for MunichSimulator...[0m
[34m	debug: Qjob submitted to QPU 2.[0m
[34m	debug: Transpilation done.[0m
[34m	debug: A circuit dict was provided.[0m
[34m	debug: Translating to dict for AerSimulator...[0m
[34m	debug: Qjob submitted to QPU 4.[0m
[34m	debug: Results correctly loaded.[0m
[34m	debug: Results correctly loaded.[0m
[34m	debug: Results correctly loaded.[0m


In [16]:
print(f"Time taken to run 3 circuits in parallel: {tack - tick} s.")
print("Time for each execution:")
for i, result in enumerate(results):
    print(f"For job {i}, time taken: {result.time_taken} s.")

Time taken to run 3 circuits in parallel: 17.673771858215332 s.
Time for each execution:
For job 0, time taken: 9.334006437 s.
For job 1, time taken: 1.679389238357544 s.
For job 2, time taken: 17.271146456 s.
