In [1]:
import pennylane as qml
import pennylane.numpy as np
import gymnasium as gym
import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
import matplotlib.pyplot as plt
!nvidia-smi

Thu Aug 15 23:20:40 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.99                 Driver Version: 555.99         CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   48C    P3             12W /   35W |       0MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
def Cliff2():
    """
    Random 2-qubit Clifford circuit.

    Arguments:
        -nodes (np.ndarray): 
    
    Returns:
        -null
    """
    
    weights = np.random.randint(2, size=(2, 10))
    
    return qml.matrix(qml.RandomLayers(weights=weights,wires=[0,1])).numpy()

In [3]:
def RandomLayers(N_QUBITS, DEPTH):
    """
    Generates brick wall pattern of random 2 qubit Clifford gates

    Arguments:
        -N_QUBITS (int): Number of qubits
        -DEPTH (int): Depth of the circuit

    Returns:
        -random_layers (np.ndarray): Array of 4x4 unitaries (N_QUBITS, DEPTH, 4, 4)
    
    """

    random_layers = []
    for t in range(DEPTH):
        layer = []
        for x in range(0,N_QUBITS,2):
                layer.append(Cliff2())
        random_layers.append(layer)

    return random_layers


In [4]:
N_QUBITS = 8
DEPTH = 8

dev = qml.device("default.qubit", wires=N_QUBITS)

@qml.qnode(dev)
def circuit(theta, random_layers):
    """
    Quantum circuit with random entangling Clifford layers and disentangling layers.
    
    Arguments:
        -theta (np.ndarray): Binary matrix representing the positions of projections. (N_QUBITS, DEPTH)
    
    Returns:
        -Average Von Neumann entropy (float32): Average of 2-qubit Von Neumann entropies over all neighbors.
    """

    theta = theta.T
    DEPTH,N_QUBITS = np.shape(theta)

    for t in range(DEPTH):
        layer = random_layers[t]
        if t%2==0:
            for x in range(0,N_QUBITS,2):
                brick = layer[int(x/2)]
                qml.QubitUnitary(brick,wires=[x,x+1])
        elif t%2==1:
            for x in range(1,N_QUBITS-2,2):
                brick = layer[int((x-1)/2)]
                qml.QubitUnitary(brick,wires=[x,x+1])
            brick = layer[-1]
            qml.QubitUnitary(brick,wires=[N_QUBITS-1,0])
            
        projections = theta[t]
        for x in range(N_QUBITS):
            if projections[x]==1:
                qml.Projector(state=[0],wires=[x])
            
    entropies = []
    for x in range(N_QUBITS-1):
        entropies.append(qml.vn_entropy(wires=[x,x+1]))
    entropies.append(qml.vn_entropy(wires=[N_QUBITS-1,0]))
        
    return entropies

In [5]:
class Disentangler(gym.Env):
    """
    Reinforcement learning environment for the disentangler.
    """
    
    def __init__(self, n_qubits, depth, random_layers):
        super(Disentangler, self).__init__()
        
        self.N_QUBITS = n_qubits
        self.DEPTH = depth
        self.random_layers = random_layers

        self.action_space = gym.spaces.Discrete(self.N_QUBITS * self.DEPTH)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.N_QUBITS, self.DEPTH), dtype=np.int8)
        self.state = np.zeros((self.N_QUBITS, self.DEPTH), dtype=np.int8)

    def step(self, action):
        # Initialize reward and done
        reward = 0
        done = False
        truncate = False

        # Apply the action
        h = np.zeros(self.N_QUBITS * self.DEPTH, dtype=np.int8)
        h[action] = 1
        h = h.reshape((self.N_QUBITS, self.DEPTH))
        self.state = (self.state + h) % 2

        # Calculate entropy (assumes circuit is a predefined function)
        entropies = circuit(self.state, self.random_layers)
        entropy = np.mean(entropies)

        # Check if the state is trivial
        non_trivial = any(self.state[:,-1][i] == 0 and self.state[:,-1][(i + 1) % self.N_QUBITS] == 0 for i in range(self.N_QUBITS))

        # Determine reward and done conditions
        if entropy < 1e-15 and non_trivial:
            reward = 100
            done = True
        
        # Return the state, reward, done flag, truncate flag, and info
        info = {}
        return self.state, reward, done, truncate, info
    
    def reset(self, seed=None):
        # Seed the random number generator if a seed is provided
        if seed is not None:
            np.random.seed(seed)
        
        # Reset the state to an all-zero matrix
        self.state = np.zeros((self.N_QUBITS, self.DEPTH), dtype=np.int8)

        info = {}
        return self.state, info
    
    def render(self):
        print()

    def close(self):
        # Optional: Implement any cleanup
        pass


In [6]:
ts = 1e4
lr = 0.001
ec = 0.01

env = Disentangler(n_qubits=N_QUBITS, depth=DEPTH, random_layers=RandomLayers(N_QUBITS, DEPTH))
env.reset()
model = PPO('MlpPolicy', env, verbose=0, tensorboard_log="./tensorboard_files", learning_rate=lr, ent_coef=ec)

In [7]:
model.learn(total_timesteps=ts, tb_log_name="{N_QUBITS}x{DEPTH}_tb_{ts}_{lr}_{ec}".format(N_QUBITS=N_QUBITS, DEPTH=DEPTH, ts=ts, lr=lr, ec=ec))
model_path = "./models/{N_QUBITS}x{DEPTH}_PPO_{ts}_{lr}_{ec}".format(N_QUBITS=N_QUBITS, DEPTH=DEPTH, ts=ts, lr=lr, ec=ec)
model.save(model_path)

In [8]:
model = PPO.load(model_path)

episodes = 25
optimal_measurements = []
for _ in range(episodes):
    obs, _ = env.reset()
    done = False
    
    while not done:
        action, _states = model.predict(obs)
        obs, rewards, done, truncate, info = env.step(action)

    optimal_measurements.append(obs)

In [9]:
theta = optimal_measurements[1]

entropies_test = []
for _ in range(25):
    random_layers = RandomLayers(N_QUBITS,DEPTH)
    entropies_test.append(np.mean(circuit(theta, random_layers)))

entropies_test

[0.06144348595217766,
 0.1428470817752088,
 0.0530068066089596,
 0.0250226326181361,
 0.13477340929984982,
 0.053006806608959275,
 0.07159594788837442,
 1.1769091305122758e-15,
 0.13477340929984993,
 0.0670651102931926,
 0.13477340929984877,
 0.053740798912814816,
 0.098521322445006,
 0.09840355533148731,
 0.06248423874668095,
 9.654903270690545e-16,
 0.08141205701444057,
 9.378034913264303e-16,
 0.05300680660896026,
 0.1347734092998497,
 1.8030015088340912e-15,
 1.2825112473919477e-15,
 6.739528968447817e-16,
 0.08473653881174412,
 0.10433693231642568]