In [1]:
import pennylane as qml
import pennylane.numpy as np
import gymnasium as gym
import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
import matplotlib.pyplot as plt
!nvidia-smi

Mon Aug 12 22:55:04 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.99                 Driver Version: 555.99         CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   53C    P3             14W /   35W |       0MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
def Cliff2():
    """
    Random 2-qubit Clifford circuit.

    Arguments:
        -nodes (np.ndarray): 
    
    Returns:
        -null
    """
    
    weights = np.random.randint(2, size=(2, 10))
    
    return qml.matrix(qml.RandomLayers(weights=weights,wires=[0,1])).numpy()

In [3]:
def RandomLayers(N_QUBITS, DEPTH):
    """
    Generates brick wall pattern of random 2 qubit Clifford gates

    Arguments:
        -N_QUBITS (int): Number of qubits
        -DEPTH (int): Depth of the circuit

    Returns:
        -random_layers (np.ndarray): Array of 4x4 unitaries (N_QUBITS, DEPTH, 4, 4)
    
    """

    random_layers = []
    for t in range(DEPTH):
        layer = []
        for x in range(0,N_QUBITS,2):
                layer.append(Cliff2())
        random_layers.append(layer)

    return random_layers


In [4]:
N_QUBITS = 2*3
DEPTH = 2

random_layers = RandomLayers(N_QUBITS,DEPTH)

dev = qml.device("default.qubit", wires=N_QUBITS)

@qml.qnode(dev)
def circuit(theta):
    """
    Quantum circuit with random entangling Clifford layers and disentangling layers.
    
    Arguments:
        -theta (np.ndarray): Binary matrix representing the positions of projections. (N_QUBITS, DEPTH)
    
    Returns:
        -Average Von Neumann entropy (float32): Average of 2-qubit Von Neumann entropies over all neighbors.
    """

    theta = theta.T
    DEPTH,N_QUBITS = np.shape(theta)

    for t in range(DEPTH):
        layer = random_layers[t]
        if t%2==0:
            for x in range(0,N_QUBITS,2):
                brick = layer[int(x/2)]
                qml.QubitUnitary(brick,wires=[x,x+1])
        elif t%2==1:
            for x in range(1,N_QUBITS-2,2):
                brick = layer[int((x-1)/2)]
                qml.QubitUnitary(brick,wires=[x,x+1])
            brick = layer[-1]
            qml.QubitUnitary(brick,wires=[N_QUBITS-1,0])
            
        projections = theta[t]
        for x in range(N_QUBITS):
            if projections[x]==1:
                qml.Projector(state=[0],wires=[x])
            
    entropies = []
    for x in range(N_QUBITS-1):
        entropies.append(qml.vn_entropy(wires=[x,x+1]))
    entropies.append(qml.vn_entropy(wires=[N_QUBITS-1,0]))
        
    return entropies

In [5]:
class Disentangler(gym.Env):
    """
    Reinforcement learning environment for the disentangler.
    """
    
    def __init__(self, n_qubits, depth):
        super(Disentangler, self).__init__()
        
        self.N_QUBITS = n_qubits
        self.DEPTH = depth

        self.action_space = gym.spaces.Discrete(self.N_QUBITS * self.DEPTH)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.N_QUBITS, self.DEPTH), dtype=np.int8)
        self.state = np.zeros((self.N_QUBITS, self.DEPTH), dtype=np.int8)

    def step(self, action):
        # Initialize reward and done
        reward = 0
        done = False
        truncate = False

        # Apply the action
        h = np.zeros(self.N_QUBITS * self.DEPTH, dtype=np.int8)
        h[action] = 1
        h = h.reshape((self.N_QUBITS, self.DEPTH))
        self.state = (self.state + h) % 2

        # Calculate entropy (assumes circuit is a predefined function)
        entropies = circuit(self.state)
        entropy = np.mean(entropies)

        # Check if the state is trivial
        trivial1 = (np.sum(self.state[:, -1]) == self.N_QUBITS)
        trivial2 = (np.sum(self.state[:, -1]) == self.N_QUBITS - 1)
        trivial = trivial1 or trivial2

        # Determine reward and done conditions
        if entropy < 1e-17:
            reward = 100
            done = True
        elif trivial:
            reward = -1000
            truncate = True
        
        # Return the state, reward, done flag, and info
        info = {}
        return self.state, reward, done, truncate, info
    
    def reset(self, seed=None):
        # Seed the random number generator if a seed is provided
        if seed is not None:
            np.random.seed(seed)
        
        # Reset the state to an all-zero matrix
        self.state = np.zeros((self.N_QUBITS, self.DEPTH), dtype=np.int8)

        info = {}
        return self.state, info
    
    def render(self):
        print()

    def close(self):
        # Optional: Implement any cleanup
        pass


In [6]:
env = Disentangler(n_qubits=N_QUBITS,depth=DEPTH)
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log="./", learning_rate=0.1, ent_coef=0.01)
model.learn(total_timesteps=10000, tb_log_name="ppo1")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./ppo1_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.8     |
|    ep_rew_mean     | -725     |
| time/              |          |
|    fps             | 102      |
|    iterations      | 1        |
|    time_elapsed    | 19       |
|    total_timesteps | 2048     |
---------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 31.8     |
|    ep_rew_mean          | -725     |
| time/                   |          |
|    fps                  | 98       |
|    iterations           | 2        |
|    time_elapsed         | 41       |
|    total_timesteps      | 4096     |
| train/                  |          |
|    approx_kl            | 40.21439 |
|    clip_fraction        | 0.954    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.337   |

<stable_baselines3.ppo.ppo.PPO at 0x21522b02f40>

In [286]:
vec_env = model.get_env()
obs = vec_env.reset()

frames = []
rewards = []

for _ in range(100):
    action, _states = model.predict(obs)
    vec_env.state = obs
    obs, reward, done, info = vec_env.step(action)
    
    frames.append(obs)
    rewards.append(reward)

In [290]:
done

array([False])