In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pyclifford as pc
import gymnasium as gym
import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy

In [2]:
def one_layer_brick_wall(circ,even=True):
    for ii in range(int(circ.N//2)):
        if even:
            circ.gate(2*ii,2*ii+1)
        else:
            circ.gate((2*ii+1),(2*ii+2)%circ.N)
    return circ

In [3]:
#construction of perfect entangler
#parameterize the U
#test if the function D==0. The U is the perfect entangler.
#Once the perfect entagnler is constrcuted, use it as two-qubit gates everywhere in the ciruicts
#fix the last section

In [4]:
#def one_layer_brick_wall_PE(circ,even=True):#perfect entangler
#    for ii in range(int(circ.N//2)):
#        if even:
#            circ."gate"(2*ii,2*ii+1)
#        else:
#            circ."gate"((2*ii+1),(2*ii+2)%circ.N)
#    return circ

In [5]:
def one_layer_measurement(circ,the,layer):
    """
    Input:
        
    """
     
    positions=[]
    for i in range(0,circ.N):
        if the[layer,i]==int(1):
            positions.append(int(i))
        else:
            continue
        
    if positions!=[]:
        circ.measure(*positions)

        
        
    return circ

In [6]:
def create_circuit(N_QUBITS,DEPTH,theta):
    circ = pc.circuit.Circuit(N_QUBITS)
    for i in range(int(DEPTH/2)):
        circ = one_layer_brick_wall(circ,even=True)
        circ = one_layer_measurement(circ,theta,int(DEPTH-1-2*i))
        circ = one_layer_brick_wall(circ,even=False)
        circ = one_layer_measurement(circ,theta,int(DEPTH-1-2*i-1))
    return circ

In [None]:
def brickwall(N_QUBIT,DEPTH):
    brickwall = []
    for _ in range(DEPTH):
        brickwall.append(one_layer_brickwall(circ))

In [1]:
def create_circuit_fixed(brickwall, theta):
    circ = pc.circuit.Circuit(N_QUBITS)
    for layer in brickwall:
        circ = layer
        circ = one_layer_measurement(circ,even=True)
    
    return circ

In [7]:
def averaged_EE(state_final):
    EE_positions=[]
    for i in range(0,state_final.N):
        EE_positions.append([int(i),int(i+1)%state_final.N])
        
    EE_list=[]
    for i in range(0,len(EE_positions)):
        EE_list.append(state_final.entropy(EE_positions[i]))
    
    averaged_EE=np.mean(EE_list)
        
    return averaged_EE #, EE_positions, EE_list

In [21]:
class Disentangler(gym.Env):
    """
    Reinforcement learning environment for the disentangler.
    """
    
    def __init__(self, n_qubits, depth, penalty_weights, rc, rs):
        super(Disentangler, self).__init__()
        
        self.N_QUBITS = n_qubits
        self.DEPTH = depth
        self.stored_entropy = [0,0]
        self.stored_theta = [0,0]
        self.penalty_weights = penalty_weights
        self.rc, self.rs = rc, rs
        
        self.action_space = gym.spaces.Discrete(self.N_QUBITS * self.DEPTH)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(self.DEPTH, self.N_QUBITS), dtype=np.int8)
        self.theta = np.zeros((self.DEPTH, self.N_QUBITS), dtype=np.int8)
        self.circ = create_circuit(self.N_QUBITS,self.DEPTH,self.theta)

    def step(self, action):
        # Initialize reward and done
        reward = 0
        done = False
        truncate = False

        # Apply the action
        theta_prev = self.theta
        h = np.zeros(self.N_QUBITS * self.DEPTH, dtype=np.int8)
        h[action] = 1
        h = h.reshape((self.DEPTH, self.N_QUBITS))
        self.theta = (self.theta + h) % 2
        theta_new = self.theta

        # Calculate entropy (assumes circuit is a predefined function)
        circ = create_circuit(self.N_QUBITS,self.DEPTH,self.theta)

        state_initial = pc.stabilizer.zero_state(self.N_QUBITS)
        state_final = circ.forward(state_initial)

        entropy = averaged_EE(state_final)
        m = [sum(self.theta[i]) for i in range(self.DEPTH)]
        self.stored_entropy = [self.stored_entropy[1], entropy]
        self.stored_theta = [self.stored_theta[1], np.dot(self.penalty_weights,m)]

        # Check if the state is trivial
        # non_trivial = any(self.theta[:,-1][i] == 0 and self.theta[:,-1][(i + 1) % self.N_QUBITS] == 0 for i in range(self.N_QUBITS))

        # Determine reward and done conditions
        
        #if self.stored_theta[0]==0 or self.stored_entropy[0]==0:
        #    reward = rc*(-self.stored_theta[1]) + rs*(1-self.stored_entropy[1]/self.stored_entropy[0])
        #else: 
        #    reward = rc*(1-self.stored_theta[1]/self.stored_theta[0]) + rs*(1-self.stored_entropy[1]/self.stored_entropy[0])
        
        
     
            
        if m[0] > self.N_QUBITS/2:
            reward += -100
            #done = True
        
        if entropy ==0:
            reward += 100
            done = True
        
        
        
        if self.stored_theta[0]*self.stored_entropy[0]==0:
            reward =0
        else:
            reward += ((rc*(self.stored_theta[0]-self.stored_theta[1])*self.stored_entropy[0] 
                      +rs*(self.stored_entropy[0]-self.stored_entropy[1])*self.stored_theta[0])
                      /self.stored_theta[0]*self.stored_entropy[0])
                      

        #if ((self.stored_theta[0]-self.stored_theta[1])*self.stored_entropy[0] 
         #              + (self.stored_entropy[0]-self.stored_entropy[1])*self.stored_theta[0]) ==0:
         #   reward = 0
        #else:
         #   reward =((rc*(self.stored_theta[0]-self.stored_theta[1])*self.stored_entropy[0] 
          #            + rs*(self.stored_entropy[0]-self.stored_entropy[1])*self.stored_theta[0])
           #          /((self.stored_theta[0]-self.stored_theta[1])*self.stored_entropy[0] 
            #           + (self.stored_entropy[0]-self.stored_entropy[1])*self.stored_theta[0]))
            
            
        #if entropy < 1e-17:
           
            
        
        
        # Return the state, reward, done flag, truncate flag, and info
        info = {}
        return self.theta, reward, done, truncate, info
    
    def reset(self, seed=None):
        # Seed the random number generator if a seed is provided
        if seed is not None:
            np.random.seed(seed)
        
        # Reset the state to an all-zero matrix
        self.theta = np.zeros((self.DEPTH, self.N_QUBITS), dtype=np.int8)

        info = {}
        return self.theta, info
    
    def render(self):
        print()

    def close(self):
        # Optional: Implement any cleanup
        pass


In [22]:
# ppo parameters
ts = 1e6
lr = 0.001
ec = 0.01

# circuit parameters
N_QUBITS=10
DEPTH=10
penalty_weights = [(DEPTH-i)/DEPTH for i in range(DEPTH)]
rc, rs = 0.5, 0.5

env = Disentangler(N_QUBITS,DEPTH,penalty_weights,rc,rs)

env.reset()
model = PPO('MlpPolicy', env, verbose=0, tensorboard_log="./tensorboard_files", learning_rate=lr, ent_coef=ec)

In [23]:
model.learn(total_timesteps=ts, tb_log_name="{N_QUBITS}x{DEPTH}_tb_{ts}_{lr}_{ec}".format(N_QUBITS=N_QUBITS, DEPTH=DEPTH, ts=ts, lr=lr, ec=ec))
model_path = "./models/{N_QUBITS}x{DEPTH}_PPO_{ts}_{lr}_{ec}".format(N_QUBITS=N_QUBITS, DEPTH=DEPTH, ts=ts, lr=lr, ec=ec)
model.save(model_path)

In [24]:
model = PPO.load(model_path)

episodes = 25
optimal_measurements = []
for _ in range(episodes):
    obs, _ = env.reset()
    done = False
    
    while not done:
        action, _states = model.predict(obs)
        obs, rewards, done, truncate, info = env.step(action)

    optimal_measurements.append(obs)

In [25]:
optimal_measurements

[array([[1, 1, 0, 1, 0, 0, 0, 1, 0, 1],
        [0, 1, 1, 0, 0, 1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int8),
 array([[1, 1, 0, 1, 0, 0, 0, 0, 1, 1],
        [0, 1, 1, 0, 1, 1, 1, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int8),
 array([[1, 1, 0, 1, 0, 0, 0, 0, 1, 1],
        [0, 1, 1, 0, 0, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 

In [34]:
the_star = optimal_measurements[4]

In [35]:
#this code cannot check. it could be 0 if the disentangler is universal
circ=create_circuit(N_QUBITS,DEPTH,the_star)
stateinitial =pc.stabilizer.zero_state(N_QUBITS)  #Initial stabilizer state
statefinal=circ.forward(stateinitial) #Final stabilizer state after the circuit applied
a= averaged_EE(statefinal)
a

0.2

In [28]:
DEPTH=6
penalty_weights = [(DEPTH-i)/DEPTH for i in range(DEPTH)]
m1 = [0,0,0,0,0,1]
m2 = [0,0,0,0,0,2]

In [29]:
np.dot(penalty_weights,m1)

0.16666666666666666

In [30]:
np.dot(penalty_weights,m2)

0.3333333333333333

In [31]:
1-np.dot(penalty_weights,m2)/np.dot(penalty_weights,m1)

-1.0

In [32]:
np.dot(penalty_weights,m1)-np.dot(penalty_weights,m2)

-0.16666666666666666

In [33]:
6/3

2.0