# RL agent for unitary gate building

We want to try and implement a RL agent (following various algorithms) that learn how to build a target unitary gate in a spin chain with control on the single qubits. The agent builds a parametrized circuit of N layers (with N initially fixed) and with constant structure (but variable parameters).

See also: https://github.com/ManuelGuatto/RL_4_Robust_QC/blob/main/PPO_Nominal_Damp.ipynb

In [1]:
import sys
print("Python version:", sys.version)
import torch

Python version: 3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]


In [2]:
from functools import partial
import pennylane as qml
import matplotlib.pyplot as plt
import numpy as np
import timeit
import gymnasium as gym
# import stable_baselines3
from gymnasium import spaces
import qutip as q

# Superoperator Environment

In [47]:
from itertools import starmap, product
import numpy as np
import functools as ft
import scipy as sp
# Initialization
Id = np.eye(2)
X = np.matrix([[0,1],[1,0]])
Y = np.matrix([[0,-1j],[1j,0]])
Z = np.matrix([[1,0],[0,-1]])


Sigmam = (X+1j*Y)/2
Sigmap = (X-1j*Y)/2
# Multi-kroneker and matrix product functions
# Multi-kronecker (tensor) product. Takes a list as argument and perform the tensor product of the elements
def multikron(list):
    a = ft.reduce(np.kron, list)
    return a


# Multi-matrix product. Takes a list and perform the dot product
def multidot(list):
    a = ft.reduce(np.dot, list)
    return a
#Define Puali basis for a generic number of qubits
# Multi-qubit Pauli operator basis
def pauli_basis_mine(nq):
    sol = []
    lst = list(product((Id,X,Y,Z),repeat=nq))
    for i in range(len(lst)):
        op = multikron(lst[i])
        sol.append(op)
    return sol
# Transfer matrix, Puali transfer matrix, Choi matrix, and chi matrix superoperator representations for channles given their Krauss operators 
# Pauli transfer matrix for unitary operators: implement eq.75 in https://arxiv.org/abs/2408.12064 
# "Use PTMprocess with a list with one element to get the same result"
def PTM(unitary):
    unitary = np.matrix(unitary)
    nq = np.log2(len(unitary)).astype(np.int64)
    B = np.zeros((4 ** nq, 4 ** nq), dtype=complex)

    P = pauli_basis_mine(nq)
    for i, opi in enumerate(P):
        for j, opj in enumerate(P):
            B[i][j] = np.trace(multidot([opi,unitary,opj,unitary.H]))/(2 ** nq)
    return B


# Compute the PTM for a process given in terms of Krauss operators, eq. 75 in https://arxiv.org/abs/2408.12064
# "Takes as input a list of Krauss operators"
def PTMprocess(krauss):
    for k in range(len(krauss)):
        krauss[k] = np.matrix(krauss[k])
    nq = np.log2(len(krauss[0])).astype(np.int64)
    B = np.zeros((4 ** nq, 4 ** nq), dtype=complex)

    P = pauli_basis_mine(nq)
    for i, opi in enumerate(P):
        for j, opj in enumerate(P):
            for k in range(len(krauss)):
                B[i][j] += np.trace(multidot([opi,krauss[k],opj,krauss[k].H]))/(2 ** nq)
    return B

# Compute the state in the Pauli representation, eq.76 in https://arxiv.org/abs/2408.12064
def PTMstate(rho):
    rho = np.matrix(rho)
    nq = np.log2(len(rho)).astype(np.int64)
    B = np.zeros((4 ** nq, 1), dtype=complex)

    P = pauli_basis_mine(nq)
    for i, opi in enumerate(P):
        B[i]= np.trace(multidot([opi,rho]))/(2 ** nq)
    return B

# Convert the state from the Pauli represenation back to the density matrix
def PTMstate_to_rho(state):
    nq = (0.5*np.log2(len(state))).astype(np.int64)
    B = np.zeros((2 ** nq, 2 ** nq), dtype=complex)

    P = pauli_basis_mine(nq)
    for i, opi in enumerate(P):
        B += state[i][0]*opi
    return B

# Compute the transition matrix in the unit basis, eq.73 in https://arxiv.org/abs/2408.12064
def TMprocess(krauss):
    for k in range(len(krauss)):
        krauss[k] = np.matrix(krauss[k])
    nq = np.log2(len(krauss[0])).astype(np.int64)
    B = np.zeros((4 ** nq, 4 ** nq), dtype=complex)

    
    for k in range(len(krauss)):
        B += np.kron(krauss[k].conjugate(),krauss[k])
    return B

# Perfrom the vectorization of a density matrix
def TMstate(rho):
    nq = np.log2(len(rho)).astype(np.int64)
    B = np.reshape(rho,(4**nq,))
    return B

# Compute the process matrix of a quantum channel. Eq.88 in https://arxiv.org/abs/2408.12064
def Chiprocess(krauss):
    for k in range(len(krauss)):
        krauss[k] = np.matrix(krauss[k])
    nq = np.log2(len(krauss[0])).astype(np.int64)
    B = np.zeros((4 ** nq, 4 ** nq), dtype=complex)

    P = pauli_basis_mine(nq)
    for i, opi in enumerate(P):
        for j, opj in enumerate(P):
            for m, opm in enumerate(P):
                for k in range(len(krauss)):
                    B[i][j] += np.trace(multidot([opm,opi,krauss[k],opm,krauss[k].H,opj]))/(8 ** nq)
    return B

# Vectorization of an operator
def vec(rho):
    nq = np.log2(len(rho)).astype(np.int64)
    B = np.reshape(rho,(4**nq,))
    return B

# Compute the Choi matrix of a quantum channel. Eq.112 in https://arxiv.org/abs/2408.12064
def Choi(krauss):
    for k in range(len(krauss)):
        krauss[k] = np.matrix(krauss[k])
    nq = np.log2(len(krauss[0])).astype(np.int64)
    
    B = np.zeros((4 ** nq, 4 ** nq), dtype=complex)

    for k in range(len(krauss)):
        B += np.outer(vec(krauss[k]),vec(krauss[k]).conjugate())
    return B
# Compute the process fidelity between two unitaries, eq.237 in https://arxiv.org/abs/2408.12064
def process_fid(un1,un2):
    nq = np.log2(len(un1)).astype(np.int64)
    fid = np.trace(np.dot(PTM(un1),np.linalg.inv(PTM(un2))))/(4**nq)
    return fid

# Generate an amplitude-phase damping superoperator channel applied to the qubit in position [qubit] in a system with num_qubit qubits
# Ref. 10.1109/ACCESS.2020.3025619 "Approximating Decoherence Processes for the Design and Simulation of Quantum Error
#Correction Codes on Classical Computers" https://ieeexplore.ieee.org/document/9201447

def TM_APD(t,T1,T2, qubit, num_qubit):
    #Karuss operators
    gamma = 1-np.exp(-t/T1)
    lam = 1-np.exp(t/T1-2*t/T2) 

    E0 = ((1+np.sqrt(1-gamma-(1-gamma)*lam))/2)*Id+((1-np.sqrt(1-gamma-(1-gamma)*lam))/2)*Z
    E1 = (np.sqrt(gamma)/2)*X+ 1j*(np.sqrt(gamma)/2)*Y
    E2 = (np.sqrt((1-gamma)*lam)/2)*Id-(np.sqrt((1-gamma)*lam)/2)*Z

    list = [Id]*num_qubit

    list[qubit] = E0
    K0 = multikron(list)
    list[qubit] = E1
    K1 = multikron(list)
    list[qubit] = E2
    K2 = multikron(list)
    

    Eapd = [K0,K1,K2]
    sol = TMprocess(Eapd)
    return sol
# Linblad superoperator from jump operators in the master equation:
#See  https://journals.aps.org/prresearch/pdf/10.1103/PhysRevResearch.4.023216 (eq.3) and also     
#https://doi.org/10.1063/1.1518555 
# Generate the Linblad superoperator for a system of num_qubit qubits, with Hmiltonian h and list of jum operators L
def Linbladian(h,L,num_qubit):
    Ide = multikron([Id]*num_qubit)
    miH = -1j*(multikron([Ide,h])-multikron([h.conjugate(),Ide]))
    
    mF = np.zeros((4 ** num_qubit, 4 ** num_qubit), dtype=complex)
    for k in range(len(L)):
        mF += multikron([L[k].conjugate(),L[k]])-0.5* multikron([Ide,multidot([L[k].conjugate().T,L[k]])])-0.5* multikron([multidot([L[k].T,L[k].conjugate()]),Ide])
    return miH+mF

# Exponentiate the Linblad superoperator to obatin the quantum channel generated by it
def LinbladianExp(t,h,L,num_qubit):
    sol = sp.linalg.expm(t*Linbladian(h,L,num_qubit))

    return sol
# Generate the amplitude-phase damping jump operators for the qubit in position [qubit] in a system with num_qubit qubits
def Linb_APD(T1,T2, qubit, num_qubit):
    #Karuss operators
    gammaR = 1/T1
    gammaD = (-1/T1+2/T2)/2 

    
    L1 = np.sqrt(gammaD/2)*Z
    L2 = np.sqrt(gammaR)*Sigmam
    
    print(L1, L2)
    list = [Id]*num_qubit

    list[qubit] = L1
    L1m = multikron(list)
    list[qubit] = L2
    L2m = multikron(list)
    
    Eapd = [L1m,L2m]

    return Eapd

# Visualization aid for matrices
#from qiskit.visualization import array_to_latex


# Generate the Linblad superoperator for a system of num_qubit qubits, with Hmiltonian h and Linblad generator super operator L
def LinbladianV2(h,L,num_qubit):
    Ide = multikron([Id]*num_qubit)
    miH = -1j*(multikron([Ide,h])-multikron([h.conjugate(),Ide]))
    
    mF = L
    return miH+mF

# Exponentiate the Linblad superoperator to obatin the quantum channel generated by it
def LinbladianExpV2(t,h,L,num_qubit):
    sol = sp.linalg.expm(t*LinbladianV2(h,L,num_qubit))

    return sol

In [48]:
num_wires = 2

Xp, Yp, Zp = qml.PauliX, qml.PauliY, qml.PauliZ

proj_0 = 1/2*(Zp(wires= [0]) + qml.Identity(wires=[0]))
proj_1 = 1/2*(-Zp(wires= [0]) + qml.Identity(wires=[0]))


Idp = qml.Identity(wires=[0,1]).matrix()
#param0 = -0.158
#param1 = 0.110
#param2 = -(0.158+0.152)

ops =  [proj_0 @ (Zp(1)/2),proj_1 @ (Zp(1)/2),proj_1 @ (Xp(1)/2)]

couplings = [-2*np.pi*0.158,-2*np.pi*(0.158-0.152),-2*np.pi*0.110]

Ham = qml.dot(couplings,ops).matrix()

Ham2 = qml.dot(couplings,ops)



def profit(op_mat,target_mat):
    """Compute the fidelity function for given parameters."""
    #op_mat = param_circ(Nfree,params)
    # Compute the fidelity between the target and the gate
    return np.abs(np.trace(target_mat.conj().T @ ( op_mat))) / 2**num_wires
#np.abs(np.trace(target_mat.conj().T @ op_mat)) / np.abs(np.trace(target_mat.conj().T @ target_mat)) #

# Version without Super - Operators for observation space
def Ufree(T):
    evo = qml.exp(Ham2, -1j * T)
    return evo.matrix()

def UpulseV2(params):
    # parameters are: [time interval duration, phase]
    
    rabi_f = 0.5*2*np.pi
    coeff=[rabi_f*np.cos(params[1]),rabi_f*np.sin(params[1])]

    op = [Xp(0) @ qml.Identity(1),Yp(0) @ qml.Identity(1)]
    Hmw = qml.dot(coeff,op)
    
    Htot = Ham2+Hmw
    
    deltaT = np.abs(params[0])

    evo = qml.exp(Htot, -1j * deltaT)
    
    return evo.matrix()


# We want to add some amplitude-phase damping on the electron qubit with characteristic times T1 and T2 to both the free evolution and the evolution with pulses
def UfreeSuper(T):
    h = Ham
    T1 = T1env # 6e3 microseconds
    T2 = T2env # Pure dephasing time in microseconds as in Suter Grover algorithm implementation
    num_qubit = num_wires
    L = Linb_APD(T1,T2, 0, num_qubit)  
    evo = LinbladianExp(T,h,L,num_qubit)
    #evo = qml.exp(H, -1j * T)
    return evo

def UpulseSuper(params):
    # parameters are: [time interval duration, phase]
    
    rabi_f = 0.5*2*np.pi
    coeff=[rabi_f*np.cos(params[1])/2,rabi_f*np.sin(params[1])/2]

    op = [Xp(0) @ qml.Identity(1),Yp(0) @ qml.Identity(1)]
    Hmw = qml.dot(coeff,op).matrix()
    
    Htot = Ham+Hmw

    T1 = T1env # 6e3 microseconds
    T2 = T2env # Pure dephasing time in microseconds as in Suter Grover algorithm implementation
    num_qubit = num_wires
    L = Linb_APD(T1,T2, 0, num_qubit)
    
    deltaT = params[0]

    evo = LinbladianExp(deltaT,Htot,L,num_qubit)

    #evo = qml.exp(Htot, -1j * deltaT)
    
    return evo
#16*16

Ham

array([[-0.49637164+0.j,  0.        +0.j,  0.        +0.j,
         0.        +0.j],
       [ 0.        +0.j,  0.49637164+0.j,  0.        +0.j,
         0.        +0.j],
       [ 0.        +0.j,  0.        +0.j, -0.01884956+0.j,
        -0.34557519+0.j],
       [ 0.        +0.j,  0.        +0.j, -0.34557519+0.j,
         0.01884956+0.j]])

In [49]:
params_suter_CNOT = [3.78,1.88,0.0 , 2.11,3.96 ,np.pi/5 ,2.15,1.9 ,np.pi/2 ,0.63]
target2 = (proj_0 @ qml.Identity(1)-1j*proj_1 @ Xp(1)).matrix()
target3 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, -1j], [0, 0, -1j, 0]])

#profit(suter_CNOT(params_suter_CNOT,3),target3)

In [52]:


def suter_layer(params):
    # Here we can define a layer as an evolution without pulse control follow by a pulse control with fixed Rabi frequency
    U = UpulseV2([params[1],params[2]]) @ Ufree(params[0])
    return U

def suter_layer_Super(params):
    # Here we can define a layer as an evolution without pulse control follow by a pulse control with fixed Rabi frequency
    U = UpulseSuper([params[1],params[2]]) @ UfreeSuper(params[0])
    return U

def suter_hadamard_Super(params,N):
    U = multikron([Id]*(2*num_wires))
    for i in range(N):
        para = params[0+3*i:3+3*i]
        U = suter_layer_Super(para) @ U
    U = UfreeSuper(params[-1]) @ U
    return U

def profitSuper(op_mat,target_mat):
    """Compute the fidelity function for given parameters."""
    #op_mat = param_circ(Nfree,params)
    # Compute the fidelity between the target and the gate
    return np.abs(np.trace(target_mat.conj().T @ ( op_mat))) / np.abs(np.trace(target_mat.conj().T @ target_mat))

####

# We want to add some amplitude-phase damping on the electron qubit with characteristic times T1 and T2 to both the free evolution and the evolution with pulses
def UfreeSuper_e(T,T1,T2):
    h = Ham
    #T1 = 6000 # 6e3 microseconds
    #T2 = 35 # Pure dephasing time in microseconds as in Suter Grover algorithm implementation
    num_qubit = num_wires
    L = Linb_APD(T1,T2, 0, num_qubit)
    evo = LinbladianExp(T,h,L,num_qubit)
    #evo = qml.exp(H, -1j * T)
    return evo


###to compare with suter parameters#### Definition with _e at the end!
def UpulseSuper_e(params,T1,T2):
    # parameters are: [time interval duration, phase]
    
    rabi_f = 0.5*2*np.pi
    coeff=[rabi_f*np.cos(params[1])/2,rabi_f*np.sin(params[1])/2]

    op = [Xp(0) @ qml.Identity(1),Yp(0) @ qml.Identity(1)]
    Hmw = qml.dot(coeff,op).matrix()
    
    Htot = Ham+Hmw
    #T1 = 6000 # 6e3 microseconds
    #T2 = 35 # Pure dephasing time in microseconds as in Suter Grover algorithm implementation
    num_qubit = num_wires
    L = Linb_APD(T1,T2, 0, num_qubit)
    
    deltaT = params[0]
    evo = LinbladianExp(deltaT,Htot,L,num_qubit)

    #evo = qml.exp(Htot, -1j * deltaT)
    
    return evo

def suter_layer_Super_e(params,T1,T2):
    # Here we can define a layer as an evolution without pulse control follow by a pulse control with fixed Rabi frequency
    U = UpulseSuper_e([params[1],params[2]],T1,T2) @ UfreeSuper_e(params[0],T1,T2)
    return U
    
def suter_circ_Super(params,N,T1,T2):
    U = multikron([Id]*(2*num_wires))
    for i in range(N):
        para = params[0+3*i:3+3*i]
        U = suter_layer_Super_e(para,T1,T2) @ U
    U = UfreeSuper_e(params[-1],T1,T2) @ U
    return U

In [7]:
def suter_CNOT_Super2(params,N):
    U = UfreeSuper(0)
    for i in range(N):
        para = params[0+3*i:3+3*i]
        U = suter_layer_Super(para) @ U
    U = UfreeSuper(params[-1]) @ U
    return U

#suter_layer([1.1,1.1,np.pi])

In [23]:
T1env = 6000
T2env = 35

params_suter_CNOT = [3.78,1.88,0.0 , 2.11,3.96 ,np.pi/5 ,2.15,1.9 ,np.pi/2 ,0.63]
params_suter_Hadamard = [0.74,0.23,3*np.pi/2 , 0.22,1.26 ,3*np.pi/2 ,0.43,1.5 ,np.pi/2 ,0.89]

target3  = (qml.Identity(0) @ qml.Hadamard(wires=[1])).matrix()

target2SuperCNOT = TMprocess([target2])
target3SuperHad = TMprocess([target3])
profitSuper(suter_circ_Super(params_suter_Hadamard,3,T1env,T2env),target3SuperHad)
#profitSuper(suter_layer_Super([1.1,1.1,np.pi]),TMprocess([Idp]))
#target2Super

np.float64(0.8763900930360687)

In [61]:
class Parent1:
    def __init__(self):
        print("Initializing Parent1")

class Parent2:
    def __init__(self):
        print("Initializing Parent2")

class Child(Parent1, Parent2):
    def __init__(self):
        Parent1.__init__(self)  # Explicit call
        Parent2.__init__(self)  # Explicit call
        print("Initializing Child")
    def func(self):
        Parent1.__init__(self)  # Explicit call
        Parent2.__init__(self)  # Explicit call
        print("Initializing Child")

child = Child().func()


Initializing Parent1
Initializing Parent2
Initializing Child
Initializing Parent1
Initializing Parent2
Initializing Child


In [10]:
class Parametric_envL(gym.Env):
    """
    Custom Environment that follows gym interface.
    """

    #metadata = {"render_modes": ["console"]}

    MAX_STEPS = 2
    INFIDELITY_THRESHOLD = 0.8


    def __init__(self, env_conf):
        #super(Parametric_env, self).__init__()
        #self.render_mode = render_mode
        #target2Super = TMprocess([target2])

        self.target = env_conf["Target"] #qml.Toffoli([0, 1, 2]).matrix()
        self.alpha = env_conf["Lagrange_time"]

        self.U = UfreeSuper(0)
        self.Utest = Ufree(0)

        #Define the action and observation spaces
        #self.action_space = spaces.Box(low=0, high=np.array([0.02, 
        #                                                     2*np.pi, 2*np.pi, 2*np.pi, 2*np.pi, 
        #                                                     2*np.pi, 2*np.pi, 2*np.pi, 2*np.pi, 
        #                                                     2*np.pi]), dtype=np.float32)
        #Here we normalize the action space
        self.action_space = spaces.Box(low=-1, high= 1, shape=(3,), dtype=np.float32)
        
        #self.action_space = spaces.Dict(
        #    {
        #        "angle_Z": spaces.Discrete(5, start=-2, seed=42),
        #        "angle_X": spaces.Discrete(5, start=-2, seed=42),
        #    }
        #)
        #self.action_space = spaces.MultiDiscrete([5,5],start=[-2,-2])
        #self.action_space = spaces.MultiDiscrete([10,9,9,9])
      #  self.observation_space = spaces.Box(low=-1, high=1, shape=(512,), dtype=np.float64)
        self.observation_space = spaces.Box(low=-1, high=1, shape=(32,), dtype=np.float64)

        #self.observation_space = spaces.Dict(
        #    {
        #        "real_part": spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float64),
        #        "imaginary_part": spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float64),
        #    }
        #)

    # Function that gets the state and gives the observation in the correct format
    def _get_obs(self):
        #return {"real_part": np.real(self.U), "imaginary_part": np.imag(self.U)}
       # obs = np.concatenate([np.real(self.U.reshape(256,)),np.imag(self.U.reshape(256,))])
        obs = np.concatenate([np.real(self.Utest.reshape(16,)),np.imag(self.Utest.reshape(16,))])
        return obs
    #def _get_obs(self):
    #    #return {"real_part": np.real(self.U), "imaginary_part": np.imag(self.U)}
    #    return {"real_part": np.real(self.U.reshape(4,)), "imaginary_part": np.imag(self.U.reshape(4,))}
    
    def reset(self,seed=None, options=None):
        super().reset(seed=seed)
        self.U = UfreeSuper(0)
        self.Utest = Ufree(0)
       # self.Uobs = 
        #self.rho_hat = self.rho
        #self.true_fidelity = 0
        self.fidelity = profitSuper(self.U,self.target)#0
        self.count = 0
        self.reward = 0
        self.done = False
        self.duration = 0
        observation = self._get_obs()
        self.info = {}

        return observation,{}
    
    # Function to convert the action sampled from action_space into angles for the rotations in the layer
    def _get_angle(self,para):
        az = 2*np.pi*para      
        return az
    
    # Function to renormlize the times in between [0,10]
    def _get_times(self,para):
        az = 2.0*np.abs(para) + np.abs(0.1)  #TIMES##########################################
        
        return az
    # Function to renormlize the times in between [0,10]
    def _get_times2(self,para):
        az = 2.2*np.abs(para) + np.abs(0.3)  #TIMES##########################################
        
        return az



    def step(self, action):
        #Check the state of the episode
        if self.done:
            print("EPISODE DONE!!!")
        elif (self.count == self.MAX_STEPS):
            self.done = True;
            #truncated = self.done; 
        else:
            assert self.action_space.contains(action)
            self.count += 1
            #self.duration += 4*np.pi*np.abs(action[0])
            
            
        #Define the effect of the unitary layer
        self.params = [self._get_times(action[0]),self._get_times(action[1]),self._get_angle(action[2])] 
        #print(self.params)
        op =  suter_layer_Super(self.params)  ###CHANGE: HERE
        opobs = suter_layer(self.params)
       # print(q.Qobj(op))
       # Upre = self.U
        Uobs = self.Utest

        self.U = op @ self.U                  ##CHANGE: HERE
        self.Utest = opobs @ self.Utest###!
        #print("unitary=", self.Utest)

        #Compute the reward
        #fid0 = self.fidelity
        #fid = profit(self.U,self.target)
        #fid = profit2(self.U,self.target,self.alpha,self.duration)
        self.fidelity = profitSuper(self.U,self.target)  ###CHANGE: HERE
        #self.fidelity = profit(self.Utest, target3)  ###CHANGE: HERE
       # print(self.fidelity)
        #if self.done:
        #    self.reward = self.fidelity
        #else:
        #    self.reward = 0
        #self.reward = fid#-fid0#self.fidelity
        self.info = {"Fidelity": self.fidelity,"Pre":Uobs,"Next":self.Utest}

        if 1-self.fidelity < self.INFIDELITY_THRESHOLD:
            self.done = True

        # We give reward only at the end of the episode
        if self.done:
            self.reward =   -np.log(1-self.fidelity) #-0.05*self.count
        else:
            self.reward = 0

        #Rewavrds at each step
        #self.reward = self.fidelity - fid0
        observation = self._get_obs()

        
        #try:
        #    assert self.observation_space.contains(self.rho)
        #except AssertionError:
        #    print("INVALID STATE", self.rho)
        #terminated = self.done

       # print(profit2)
        return (observation, self.reward, self.done,self.done, self.info)

   

        
    def close(self):
        pass

#test = q.rand_unitary(16, density=0.75, dims=None)
#test

In [11]:
#test = q.rand_unitary(16, density=0.75, dims=None)
#profitSuper(test.full(),target2SuperCNOT)

In [54]:
#https://optuna.github.io/optuna-dashboard/
#https://optuna-dashboard.readthedocs.io/en/stable/getting-started.html


from stable_baselines3.common.env_checker import check_env

rho_target = UfreeSuper(0)
#target2Super = TMprocess([target2])
alpha = 1

env = Parametric_envL(env_conf={"Target":rho_target,"Lagrange_time":alpha})
# If the environment don't follow the interface, an error will be thrown
check_env(env, warn=True)

[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]
[[ 0.11934843  0.       

In [55]:
from stable_baselines3 import PPO, A2C, DQN, TD3, DDPG
from stable_baselines3.common.env_util import make_vec_env

#target2Super = TMprocess([target3])
# Instantiate the env
#env_conf=dict(Target=Id,Lagrange_time=alpha)

vec_env = make_vec_env(lambda:Parametric_envL(env_conf={"Target": target3SuperHad,"Lagrange_time":alpha})
, n_envs=1)

#vec_env = make_vec_env(Parametric_env, n_envs=1, env_kwargs=dict(Target=Id,Lagrange_time=alpha))

[[ 0.11934843  0.        ]
 [ 0.         -0.11934843]] [[0.        +0.j 0.01290994+0.j]
 [0.        +0.j 0.        +0.j]]


In [14]:
%%time

numberofpulses = 0
t1 = 2
t2 = 16
T1env = 6000
T2env = 35

CPU times: total: 0 ns
Wall time: 0 ns


In [20]:
env = Parametric_envL(env_conf={"Target": target3SuperHad,"Lagrange_time":alpha})



#    n_steps: 32
 #   gamma: 0.999
#    learning_rate: 0.0011773587234305126
#    ent_coef: 5.272637170414138e-07
#    clip_range: 0.2
#    n_epochs: 10
#    gae_lambda: 0.9
#    max_grad_norm: 0.7
#    vf_coef: 0.8595041935037702
#    net_arch: small
#    activation_fn: tanh


#{'batch_size': 512, 'n_steps': 16, 'gamma': 0.9999,
#'learning_rate': 0.0002994563320507711, 
#'ent_coef': 0.0032639297015691062, 
#'clip_range': 0.3, 'n_epochs': 5,
#'gae_lambda': 0.95, 'max_grad_norm': 1,
# 'vf_coef': 0.866843416840177,
#'net_arch': 'medium', 
#'activation_fn': 'relu'}. Best is trial 3 with value: 2.626774.

#policy, env, 
#learning_rate=0.0003, 
#n_steps=2048, 
#batch_size=64, 
#n_epochs=10, 
#gamma=0.99, 
#gae_lambda=0.95, 
#clip_range=0.2, 
#clip_range_vf=None, 
#normalize_advantage=True, 
#ent_coef=0.0, 
#vf_coef=0.5, 
#max_grad_norm=0.5, 
#use_sde=False, 
#sde_sample_freq=-1, 
#rollout_buffer_class=None, 
#rollout_buffer_kwargs=None, 
#target_kl=None, 
#stats_window_size=100, 
#tensorboard_log=None, 
#policy_kwargs=None, 
#verbose=0, 
#seed=None, 
#device='auto',
#_init_setup_model=True)

# Best so far: model = PPO("MlpPolicy", env, gamma=0.9998,n_steps=1024,batch_size=64,clip_range=0.1, n_epochs=10,ent_coef=0.00, verbose=1).learn(4000_000)
# with parameters: t = 7# number = 5




#{'batch_size': 512, 'n_steps': 16, 'gamma': 0.9999,
#'learning_rate': 0.0002994563320507711, 
#'ent_coef': 0.0032639297015691062, 
#'clip_range': 0.3, 'n_epochs': 5,
#'gae_lambda': 0.95, 'max_grad_norm': 1,
# 'vf_coef': 0.866843416840177,
#'net_arch': 'medium', 
#'activation_fn': 'relu'}. Best is trial 3 with value: 2.626774.

# Train the agent
#MlpPolicy
#model = PPO("MlpPolicy", env,  
#            learning_rate=0.0003,gamma=0.99,
 #           n_steps=2048,batch_size=64,clip_range=0.2,
 #           n_epochs=20,ent_coef = 0.001,
 #           verbose=1,tensorboard_log="./ppo_cartpole_tensorboard/").learn(2000_000)

model = PPO("MlpPolicy", env, learning_rate=0.00005,
            gamma=0.99,n_steps=1024,batch_size=256,
            clip_range=0.3, n_epochs=10,ent_coef=0.003, 
            verbose = 1,tensorboard_log="./ppo_cartpole_tensorboard/diss3/").learn(30)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./ppo_cartpole_tensorboard/diss3/PPO_3
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.7      |
|    ep_rew_mean     | 0.477    |
| time/              |          |
|    fps             | 199      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 1024     |
---------------------------------


In [21]:
obs = vec_env.reset()
n_steps = 17
for step in range(n_steps):
    action, _ = model.predict(obs, deterministic= True)
    print(f"Step {step + 1}")
    print("Action: ", [action[0][i] for i in range(3)])
    obs, reward, done, info = vec_env.step(action)
    #print("obs=", obs, "reward=", reward, "done=", done)
    print("reward=", reward,"fidelity=",info[0]["Fidelity"], "done=", done)
    #vec_env.render()
    if done:
        # Note that the VecEnv resets automatically
        # when a done signal is encountered
        if reward > 0.99:
            print("Goal reached", "Fidelity=", info[0])
        else:
            print(" Max number of layers", "Fidelity=", info)
        break

Step 1
Action:  [np.float32(0.020665297), np.float32(0.028943848), np.float32(0.023901125)]
reward= [0.] fidelity= 0.007715618526455793 done= [False]
Step 2
Action:  [np.float32(0.011491087), np.float32(0.026448838), np.float32(0.02234408)]
reward= [0.] fidelity= 0.023192025107939305 done= [False]
Step 3
Action:  [np.float32(0.001166885), np.float32(0.017090622), np.float32(0.016121058)]
reward= [0.0343832] fidelity= 0.0337988143035623 done= [ True]
 Max number of layers Fidelity= [{'Fidelity': np.float64(0.0337988143035623), 'Pre': array([[ 0.53122157+0.18918657j, -0.00152893-0.02470436j,
        -0.02453987-0.81882003j,  0.10144593-0.00630192j],
       [ 0.00194088-0.02467499j,  0.53057092-0.19341683j,
         0.09902556-0.02263498j, -0.21112929-0.79093478j],
       [ 0.2643788 -0.77950741j,  0.06218855+0.00097028j,
         0.54737128-0.02598741j,  0.00151639+0.13531716j],
       [ 0.05976888+0.01729718j, -0.0298281 -0.82201807j,
        -0.0019526 +0.1353465j ,  0.54798658+0.03022

In [22]:
!tensorboard --logdir ./ppo_cartpole_tensorboard/diss3

^C
