In [1]:
from qiskit_gym.envs import LinearFunctionGym, CliffordGym, PermutationGym, LinearFunctionNoisyGym
from qiskit_gym.rl import RLSynthesis, PPOConfig, AlphaZeroConfig, BasicPolicyConfig

from qiskit import QuantumCircuit
from qiskit.transpiler import CouplingMap
import numpy as np

In [2]:
cmap_6_line = CouplingMap.from_line(4, bidirectional=True)
env = LinearFunctionNoisyGym.from_coupling_map(cmap_6_line, basis_gates=["CX"])

rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())
rls.learn(num_iterations=10, tb_path="runs/lf_4_line_ppo/")  # This will track progress in Tensorboard


[32m2025-11-04 11:55:39.392[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m176[0m - [1m(1/0) {'successes': {'ppo_deterministic': 0.1899999976158142, 'ppo_10': 0.7900000214576721}, 'rewards': {'ppo_deterministic': -0.22389453649520874, 'ppo_10': 0.6792304515838623}, 'difficulty': 1, 'success': 0.1899999976158142, 'reward': -0.22389453649520874} | {'to_rust': 0.003218833, 'eval_ppo_deterministic': 0.001274458, 'eval_ppo_10': 0.00652275, 'collect': 0.00958825, 'data_to_torch': 0.017729916, 'train': 0.169828208, 'total': 0.208435167}[0m
[32m2025-11-04 11:55:39.518[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m176[0m - [1m(1/1) {'successes': {'ppo_deterministic': 0.1899999976158142, 'ppo_10': 0.8700000047683716}, 'rewards': {'ppo_deterministic': -0.223933607339859, 'ppo_10': 0.7997070550918579}, 'difficulty': 1, 'success': 0.1899999976158142, 'reward': -0.223933607339859} | {'to_rust': 0.003405333, 'eval_ppo_deterministic': 0

In [None]:
qc_lf_input = QuantumCircuit(4)
qc_lf_input.cx(0,2)
qc_lf_input.cx(0,3)
qc_lf_input.draw(fold=-1)

In [6]:
qc_lf_output = rls.synth(qc_lf_input, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_lf_output.draw(fold=-1)

In [7]:
from qiskit.circuit.library.generalized_gates import LinearFunction
LinearFunction(qc_lf_input) == LinearFunction(qc_lf_output)

np.True_

In [10]:
cmap_3_line = CouplingMap.from_line(3, bidirectional=True)
cmap = CouplingMap.from_grid(3,3,bidirectional=True)
env = LinearFunctionNoisyGym.from_coupling_map(cmap)
print(env.config["gateset"])



some_perm = np.random.permutation(9)

rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())

rls.env.config["gateset"]

qc_perm = rls.synth(some_perm, num_searches=1000)
qc_perm.draw(fold=-1)


[('CX', (0, 1)), ('CX', (0, 3)), ('CX', (1, 0)), ('CX', (1, 2)), ('CX', (1, 4)), ('CX', (2, 1)), ('CX', (2, 5)), ('CX', (3, 0)), ('CX', (3, 4)), ('CX', (3, 6)), ('CX', (4, 1)), ('CX', (4, 3)), ('CX', (4, 5)), ('CX', (4, 7)), ('CX', (5, 2)), ('CX', (5, 4)), ('CX', (5, 8)), ('CX', (6, 3)), ('CX', (6, 7)), ('CX', (7, 4)), ('CX', (7, 6)), ('CX', (7, 8)), ('CX', (8, 5)), ('CX', (8, 7)), ('SWAP', (0, 1)), ('SWAP', (0, 3)), ('SWAP', (1, 0)), ('SWAP', (1, 2)), ('SWAP', (1, 4)), ('SWAP', (2, 1)), ('SWAP', (2, 5)), ('SWAP', (3, 0)), ('SWAP', (3, 4)), ('SWAP', (3, 6)), ('SWAP', (4, 1)), ('SWAP', (4, 3)), ('SWAP', (4, 5)), ('SWAP', (4, 7)), ('SWAP', (5, 2)), ('SWAP', (5, 4)), ('SWAP', (5, 8)), ('SWAP', (6, 3)), ('SWAP', (6, 7)), ('SWAP', (7, 4)), ('SWAP', (7, 6)), ('SWAP', (7, 8)), ('SWAP', (8, 5)), ('SWAP', (8, 7))]


AttributeError: 'numpy.ndarray' object has no attribute 'linear'

In [None]:
# You can set a custom state like this (in this case from a circuit)
env.set_state(env.get_state(qc))

env.action_space  # This tells you the number of possible actions (a discrete space of 8 actions)

env.observation_space  # This tells you the type and size of observation space (N by N discrete in this case)

# You can advance the env by providing an action
obs, reward, is_final, _, _ = env.step(2)

# This provides:
# - The observation of the state right after action
# - The reward for that step
# - If we are in a final state

obs, reward, is_final


# One way to do it
env.set_state(env.get_state(qc))
env.render()
for a in [4,2,4]:
    print(f"[{a}] - {env.config['gateset'][a]}")
    obs, reward, is_final, _, _ = env.step(a)
    print(f"[{a}] - Reward: {reward}, Is final: {is_final}")
    env.render()

[[1 0 0]
 [0 1 0]
 [1 0 1]]
[4] - ('SWAP', (0, 1))
[4] - Reward: -0.0062500000931322575, Is final: False
[[0 1 0]
 [1 0 0]
 [1 0 1]]
[2] - ('CX', (1, 2))
[2] - Reward: -0.0054687499068677425, Is final: False
[[0 1 0]
 [1 0 0]
 [0 0 1]]
[4] - ('SWAP', (0, 1))
[4] - Reward: 1.0, Is final: True
[[1 0 0]
 [0 1 0]
 [0 0 1]]


In [None]:
cmap_3x3 = CouplingMap.from_grid(3,3, bidirectional=False)
env = PermutationGym.from_coupling_map(cmap_3x3)
rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())

rls.env.config["gateset"]

[('SWAP', (0, 1)),
 ('SWAP', (0, 3)),
 ('SWAP', (1, 2)),
 ('SWAP', (1, 4)),
 ('SWAP', (2, 5)),
 ('SWAP', (3, 4)),
 ('SWAP', (3, 6)),
 ('SWAP', (4, 5)),
 ('SWAP', (4, 7)),
 ('SWAP', (5, 8)),
 ('SWAP', (6, 7)),
 ('SWAP', (7, 8))]

In [None]:
rls.learn(num_iterations=10, tb_path="runs/perm_square_3x3/")

[32m2025-09-19 09:59:05.237[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m176[0m - [1m(1/0) {'successes': {'ppo_deterministic': 0.0, 'ppo_10': 0.5199999809265137}, 'rewards': {'ppo_deterministic': -0.5078125, 'ppo_10': 0.27421873807907104}, 'difficulty': 1, 'success': 0.0, 'reward': -0.5078125} | {'to_rust': 0.00404725, 'eval_ppo_deterministic': 0.001655167, 'eval_ppo_10': 0.008290458, 'collect': 0.014999666, 'data_to_torch': 0.041117209, 'train': 0.17895525, 'total': 0.249390875}[0m
[32m2025-09-19 09:59:05.244[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m196[0m - [1m(1/0) Improved, saved checkpoint![0m
[32m2025-09-19 09:59:05.389[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m176[0m - [1m(1/1) {'successes': {'ppo_deterministic': 0.11999999731779099, 'ppo_10': 0.5699999928474426}, 'rewards': {'ppo_deterministic': -0.32734376192092896, 'ppo_10': 0.34941405057907104}, 'difficulty': 1, 'su

In [None]:
#rls.save("models/perm_square_3x3.json", "models/perm_square_3x3.pt")

rls = RLSynthesis.from_config_json("models/perm_square_3x3.json", "models/perm_square_3x3.pt")

In [None]:
some_perm = np.random.permutation(9)

qc_perm = rls.synth(some_perm, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_perm.draw(fold=-1)



In [None]:
qc_perm_input = QuantumCircuit(9)
qc_perm_input.swap(0,8)
qc_perm_input.draw(fold=-1)

In [None]:
qc_perm_output = rls.synth(qc_perm_input, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_perm_output.draw(fold=-1)

cmap = CouplingMap.from_heavy_hex(9, bidirectional=True)
env = LinearFunctionNoisyGym.from_coupling_map(cmap)
env.config["gateset"]

[('CX', (0, 81)),
 ('CX', (0, 121)),
 ('CX', (1, 121)),
 ('CX', (1, 122)),
 ('CX', (2, 122)),
 ('CX', (2, 123)),
 ('CX', (3, 123)),
 ('CX', (3, 124)),
 ('CX', (4, 124)),
 ('CX', (4, 125)),
 ('CX', (5, 125)),
 ('CX', (5, 126)),
 ('CX', (6, 126)),
 ('CX', (6, 127)),
 ('CX', (7, 127)),
 ('CX', (7, 128)),
 ('CX', (8, 128)),
 ('CX', (9, 81)),
 ('CX', (9, 129)),
 ('CX', (10, 129)),
 ('CX', (10, 130)),
 ('CX', (11, 130)),
 ('CX', (11, 131)),
 ('CX', (12, 131)),
 ('CX', (12, 132)),
 ('CX', (13, 132)),
 ('CX', (13, 133)),
 ('CX', (14, 133)),
 ('CX', (14, 134)),
 ('CX', (15, 134)),
 ('CX', (15, 135)),
 ('CX', (16, 135)),
 ('CX', (16, 136)),
 ('CX', (17, 90)),
 ('CX', (17, 136)),
 ('CX', (18, 91)),
 ('CX', (18, 137)),
 ('CX', (19, 137)),
 ('CX', (19, 138)),
 ('CX', (20, 138)),
 ('CX', (20, 139)),
 ('CX', (21, 139)),
 ('CX', (21, 140)),
 ('CX', (22, 140)),
 ('CX', (22, 141)),
 ('CX', (23, 141)),
 ('CX', (23, 142)),
 ('CX', (24, 142)),
 ('CX', (24, 143)),
 ('CX', (25, 143)),
 ('CX', (25, 144)),
 ('

In [None]:
#rls.save("models/lf_5_line.json", "models/lf_5_line.pt")

rls = RLSynthesis.from_config_json("models/lf_5_line.json", "models/lf_5_line.pt")

In [None]:
qc_lf_input = QuantumCircuit(5)
qc_lf_input.cx(0,4)
qc_lf_input.draw(fold=-1)

In [None]:
qc_lf_output = rls.synth(qc_lf_input, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_lf_output.draw(fold=-1)

In [None]:
from qiskit.circuit.library.generalized_gates import LinearFunction
LinearFunction(qc_lf_input) == LinearFunction(qc_lf_output)

np.True_

In [None]:
# Here we want to do Clifford synthesis but we only allow H and S to be placed on qubit 0
env = CliffordGym(
    num_qubits=3, 
    gateset=[
        ("CX", [0,1]),
        ("CX", [1,0]),
        ("CX", [1,2]),
        ("CX", [2,1]),
        ("SWAP", [0,1]),
        ("SWAP", [1,2]),
        ("H", [0]),
        ("S", [0]),
    ]
)
rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())
rls.learn(num_iterations=10, tb_path="runs/clifford_3q_custom/")

[32m2025-09-19 10:50:25.274[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m176[0m - [1m(1/0) {'successes': {'ppo_deterministic': 0.10999999940395355, 'ppo_10': 0.7300000190734863}, 'rewards': {'ppo_deterministic': -0.3423828184604645, 'ppo_10': 0.590039074420929}, 'difficulty': 1, 'success': 0.10999999940395355, 'reward': -0.3423828184604645} | {'to_rust': 0.00384125, 'eval_ppo_deterministic': 0.001390083, 'eval_ppo_10': 0.006526708, 'collect': 0.010808792, 'data_to_torch': 0.009190958, 'train': 0.109077416, 'total': 0.141090083}[0m
[32m2025-09-19 10:50:25.279[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m196[0m - [1m(1/0) Improved, saved checkpoint![0m
[32m2025-09-19 10:50:25.421[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m176[0m - [1m(1/1) {'successes': {'ppo_deterministic': 0.14000000059604645, 'ppo_10': 0.6700000166893005}, 'rewards': {'ppo_deterministic': -0.2972656190395355, 'ppo

In [None]:
#rls.save("models/clifford_3q_custom.json", "models/clifford_3q_custom.pt")

rls = RLSynthesis.from_config_json("models/clifford_3q_custom.json", "models/clifford_3q_custom.pt")

In [None]:
from qiskit.quantum_info import random_clifford, Clifford

In [None]:
qc_clifford_in = QuantumCircuit(3)
qc_clifford_in.h(2)

qc_clifford_out = rls.synth(qc_clifford_in, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_clifford_out.draw(fold=-1)

In [None]:
some_clifford = random_clifford(3, seed=42)
qc_rand_clifford_out = rls.synth(some_clifford, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_rand_clifford_out.draw(fold=-1)

In [None]:
# Equivalent up to phase
np.array_equal(some_clifford.tableau[:,:-1], Clifford(qc_rand_clifford_out).tableau[:,:-1])

True