In [26]:
import torch
import numpy
import pennylane as qml
import pennylane.numpy as np
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
import matplotlib.pyplot as plt
from collections import Counter

In [27]:
def Cliff2():
    """
    Random 2-qubit Clifford circuit.

    Arguments:
        -nodes (np.ndarray): 
    
    Returns:
        -null
    """
    
    weights = np.random.randint(2, size=(2, 10))
    
    return qml.matrix(qml.RandomLayers(weights=weights,wires=[0,1])).numpy()

In [28]:
def RandomLayers(N_QUBITS, DEPTH):
    """
    Generates brick wall pattern of random 2 qubit Clifford gates

    Arguments:
        -N_QUBITS (int): Number of qubits
        -DEPTH (int): Depth of the circuit

    Returns:
        -random_layers (np.ndarray): Array of 4x4 unitaries (N_QUBITS, DEPTH, 4, 4)
    
    """

    random_layers = []
    for t in range(DEPTH):
        layer = []
        for x in range(0,N_QUBITS,2):
                layer.append(Cliff2())
        random_layers.append(layer)

    return random_layers


In [29]:
N_QUBITS = 2*3
DEPTH = 2

# random_layers = []
# # for t in range(DEPTH):
# #         layer = []
# #         for x in range(0,N_QUBITS,2):
# #                 layer.append(Cliff2())
# #         random_layers.append(layer)

random_layers = RandomLayers(N_QUBITS,DEPTH)

dev = qml.device("default.qubit", wires=N_QUBITS)

@qml.qnode(dev)
def circuit(theta):
    """
    Quantum circuit with random entangling Clifford layers and disentangling layers.
    
    Arguments:
        -theta (np.ndarray): Binary matrix representing the positions of projections. (N_QUBITS, DEPTH)
    
    Returns:
        -Average Von Neumann entropy (float32): Average of 2-qubit Von Neumann entropies over all neighbors.
    """

    theta = theta.T
    DEPTH,N_QUBITS = np.shape(theta)

    for t in range(DEPTH):
        layer = random_layers[t]
        if t%2==0:
            for x in range(0,N_QUBITS,2):
                brick = layer[int(x/2)]
                qml.QubitUnitary(brick,wires=[x,x+1])
        elif t%2==1:
            for x in range(1,N_QUBITS-2,2):
                brick = layer[int((x-1)/2)]
                qml.QubitUnitary(brick,wires=[x,x+1])
            brick = layer[-1]
            qml.QubitUnitary(brick,wires=[N_QUBITS-1,0])
            
        projections = theta[t]
        for x in range(N_QUBITS):
            if projections[x]==1:
                qml.Projector(state=[0],wires=[x])
            
    entropies = []
    for x in range(N_QUBITS-1):
        entropies.append(qml.vn_entropy(wires=[x,x+1]))
    entropies.append(qml.vn_entropy(wires=[N_QUBITS-1,0]))
        
    return entropies

In [30]:
random_layers[0][1]

array([[-0.10857812+0.16546573j,  0.18615456+0.61394576j,
        -0.43227776+0.44462681j,  0.28318902+0.29068174j],
       [-0.27874883-0.29139345j,  0.54151985-0.38641197j,
        -0.33228934-0.34247542j,  0.2258591 +0.34075363j],
       [-0.13879178-0.50696963j, -0.05325489-0.2053904j ,
         0.04336174+0.61838595j, -0.46627099+0.27750185j],
       [ 0.246998  -0.67891932j, -0.29368105+0.12078603j,
         0.05769507+0.01127702j,  0.60205322-0.10629554j]])

In [31]:
theta = np.random.randint(2, size=(N_QUBITS,DEPTH))
print(circuit(theta))
print(type(circuit(theta)))
drawer = qml.draw(circuit)

print(drawer(theta))

[0.5087581128692267, 1.1102230246251564e-16, 1.1102230246251564e-16, 1.1102230246251564e-16, 0.5087581128692285, 1.4701977598569925e-15]
<class 'list'>
0: ─╭U(M0)─────────╭U(M5)─────────┤ ╭vnentropy                                            
1: ─╰U(M0)─╭U(M3)──│───────|0⟩⟨0|─┤ ╰vnentropy ╭vnentropy                                 
2: ─╭U(M1)─╰U(M3)──│───────|0⟩⟨0|─┤            ╰vnentropy ╭vnentropy                      
3: ─╰U(M1)─╭U(M4)──│───────|0⟩⟨0|─┤                       ╰vnentropy ╭vnentropy           
4: ─╭U(M2)─╰U(M4)──│──────────────┤                                  ╰vnentropy ╭vnentropy
5: ─╰U(M2)──|0⟩⟨0|─╰U(M5)─────────┤                                             ╰vnentropy

  ╭vnentropy
  │         
  │         
  │         
  │         
  ╰vnentropy

M0 = 
[[ 0.15723302-0.4258975j   0.        +0.j         -0.80175132+0.38869516j
   0.        +0.j        ]
 [ 0.        +0.j          0.81143708+0.52527811j  0.        +0.j
  -0.11064153+0.23110871j]
 [-0.73887512-0.497948

In [87]:
class Disentangler(gym.Env):
    """
    Reinforcement learning environment for the disentangler.
    """
    
    def __init__(self, n_qubits, depth):
        super(Disentangler, self).__init__()
        
        self.N_QUBITS = n_qubits
        self.DEPTH = depth

        self.action_space = gym.spaces.Discrete(self.N_QUBITS * self.DEPTH)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.N_QUBITS, self.DEPTH), dtype=np.int8)
        self.state = np.zeros((self.N_QUBITS, self.DEPTH), dtype=np.int8)

    def step(self, action):
        # Initialize reward and done
        reward = 0
        done = False
        truncate = False

        # Apply the action
        h = np.zeros(self.N_QUBITS * self.DEPTH, dtype=np.int8)
        h[action] = 1
        h = h.reshape((self.N_QUBITS, self.DEPTH))
        self.state = (self.state + h) % 2

        # Calculate entropy (assumes circuit is a predefined function)
        entropies = circuit(self.state)
        entropy = np.mean(entropies)

        # Check if the state is trivial
        trivial1 = (np.sum(self.state[:, -1]) == self.N_QUBITS)
        trivial2 = (np.sum(self.state[:, -1]) == self.N_QUBITS - 1)
        trivial = trivial1 or trivial2

        # Determine reward and done conditions
        if entropy < 1e-17:
            reward = 100
            done = True
        elif trivial:
            reward = -1000
            truncate = True
        
        # Return the state, reward, done flag, and info
        info = {}
        return self.state, reward, done, truncate, info
    
    def reset(self, seed=None):
        # Seed the random number generator if a seed is provided
        if seed is not None:
            np.random.seed(seed)
        
        # Reset the state to an all-zero matrix
        self.state = np.zeros((self.N_QUBITS, self.DEPTH), dtype=np.int8)

        info = {}
        return self.state, info
    
    def render(self):
        print()

    def close(self):
        # Optional: Implement any cleanup
        pass


In [88]:
env = Disentangler(n_qubits=N_QUBITS,depth=DEPTH)

In [90]:
env = Disentangler(n_qubits=N_QUBITS, depth=DEPTH)
obs = env.reset()
print(f"Initial Observation: \n {obs}")

action = env.action_space.sample()
obs, reward, done, truncate, info = env.step(action)
print(f"Observation: \n {obs}, Reward: {reward}, Done: {done}, Truncate: {truncate}, Info: {info}")

Initial Observation: 
 (tensor([[0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0]], dtype=int8, requires_grad=True), {})
Observation: 
 [[0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]], Reward: 0, Done: False, Truncate: False, Info: {}


In [91]:
env.reset(3)

(tensor([[0, 0],
         [0, 0],
         [0, 0],
         [0, 0],
         [0, 0],
         [0, 0]], dtype=int8, requires_grad=True),
 {})

In [92]:
model = PPO('MlpPolicy', env, verbose=1, learning_rate=0.01)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [93]:
model.learn(total_timesteps=10)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 38.6     |
|    ep_rew_mean     | -834     |
| time/              |          |
|    fps             | 106      |
|    iterations      | 1        |
|    time_elapsed    | 19       |
|    total_timesteps | 2048     |
---------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x1c10d72a640>

In [95]:
obs = env.reset()
for _ in range(100):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncate, info = env.step(action)
    env.render()
    if done:
        break

ValueError: You have passed a tuple to the predict() function instead of a Numpy array or a Dict. You are probably mixing Gym API with SB3 VecEnv API: `obs, info = env.reset()` (Gym) vs `obs = vec_env.reset()` (SB3 VecEnv). See related issue https://github.com/DLR-RM/stable-baselines3/issues/1694 and documentation for more information: https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecenv-api-vs-gym-api

In [96]:
model.predict(obs,deterministic=True)

ValueError: You have passed a tuple to the predict() function instead of a Numpy array or a Dict. You are probably mixing Gym API with SB3 VecEnv API: `obs, info = env.reset()` (Gym) vs `obs = vec_env.reset()` (SB3 VecEnv). See related issue https://github.com/DLR-RM/stable-baselines3/issues/1694 and documentation for more information: https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecenv-api-vs-gym-api