In [159]:
#Import everything
import numpy as np
# from gym.utils import seeding
# from gym.spaces import Discrete, Tuple, Box
# import gym
from qiskit.quantum_info import state_fidelity
from qiskit import *
from numpy.linalg import matrix_power
import pandas as pd

In [177]:
#Set globally used variables
n = 10**6
k = 30

GATES = {
    0: np.array([[1, 1], [1, -1]]) * 1/np.sqrt(2), # H
    1: np.array([[1, 0], [0, np.exp(1j * np.pi / 4)]]), # T
    # 2: np.array([[0, 1], [1, 0]]), # X
    2: np.array([[1, 0], [0, 1]]) # I
}

thetas = np.array(pd.cut(np.linspace(0, np.pi, k), k, precision=10, include_lowest=True))
thetas[0] = pd.Interval(0, thetas[0].right, closed='both')
phis = np.array(pd.cut(np.linspace(0, 2*np.pi, 2*k), 2*k,  precision=10, include_lowest=True))
phis[0] = pd.Interval(0, phis[0].right, closed='both')

states = [(i, j) for i in range(len(thetas)) for j in range(len(phis))]
values = np.zeros(len(thetas) * len(phis))

print(len(states))

1800


In [170]:
def generate_target_circuit(n):
    s = np.array([1, 0])
    ht = GATES[0] @ GATES[1]
    return matrix_power(ht, n) @ s

def statevector_to_angles(state):
    svp = [abs(state[0])*np.exp(1j * np.angle(state[0])), abs(state[1])*np.exp(1j * np.angle(state[1]))]
    svp /= np.exp(1j * np.angle(state[0]))
    theta = 2 * np.arccos(abs(svp[0]))
    phi = np.angle(svp[1])
    if (phi < 0): phi += 2*np.pi
    return theta, phi
    # return np.cos(theta / 2) * np.array([1,0]) + np.exp(1j * phi) * np.sin(theta / 2) * np.array([0, 1])

def statevector_to_bloch_reg(state):
    theta, phi = statevector_to_angles(state)

    # take into consideration the poles
    for i in range(len(thetas)):
        if (theta in thetas[i]):
            theta_reg = i
    for i in range(len(phis)):
        if (phi in phis[i]):
            phi_reg = i

    if (theta_reg == 0):
        theta_reg = phi_reg = 0
    if (theta_reg == len(thetas)-1):
        theta_reg = len(thetas)-1
        phi_reg = len(phis)-1
    return (theta_reg, phi_reg)

def random_state_in_reg(reg):
    if (reg[0] == 0 or reg[0] == len(thetas)-1):
        phi = np.random.uniform(0, 2*np.pi)
    else:
        phi = np.random.uniform(phis[reg[1]].left, phis[reg[1]].right)
    theta = np.random.uniform(thetas[reg[0]].left, thetas[reg[0]].right)
    return np.cos(theta / 2) * np.array([1,0]) + np.exp(1j * phi) * np.sin(theta / 2) * np.array([0, 1])

def statevector_to_bloch_point(state):
    svp = [abs(state[0])*np.exp(1j * np.angle(state[0])), abs(state[1])*np.exp(1j * np.angle(state[1]))]
    svp /= np.exp(1j * np.angle(svp[0]))
    theta = 2 * np.arccos(abs(svp[0]))
    phi = np.angle(svp[1])
    return np.sin(theta)*np.cos(phi), np.sin(theta)*np.sin(phi), np.cos(theta)

def random_unitary(dim):
  # follows the algorithm in https://arxiv.org/pdf/math-ph/0609050.pdf
  Z = np.array([np.random.normal(0, 1) + np.random.normal(0, 1) * 1j for _ in range(dim ** 2)]).reshape(dim, dim)
  Q, R = np.linalg.qr(Z)
  diag = np.diagonal(R)
  lamb = np.diag(diag) / np.absolute(diag)
  unitary = np.matmul(Q, lamb)
  assert np.allclose(unitary.conj().T @ unitary, np.eye(dim))
  return unitary

In [171]:
goal = generate_target_circuit(n=n)
goal_region = statevector_to_bloch_reg(goal)


In [239]:
#This defines the reward function. Reward to network iff it is in the goal bloch region
def R(state, action):
    if (state == goal_reg):
        return 1
        # if (action <= len(GATES) - 2):
        #     return 0
        # else:
        #     return 0.1 # to encourage using identity
    else:
        return -0.1 

In [237]:
#This defines the reward function. Reward to network iff it is in the goal bloch region
def R(state, count):
    if (state == goal_region):
        return 1
        # if (action <= len(GATES) - 2):
        #     return 0
        # else:
        #     return 0.1 # to encourage using identity
    else:
        return -0.1

In [248]:
def q_learning(inital_state, terminal, eplison=0.01, gamma = 0.99,alpha = 1, episode_count=10000):
    # Check random values 
    Q = np.ones([len(states), len(GATES)])
    print("Terminal State : {0} Terminal State Index : {1}".format(terminal, states.index(terminal)))
    Q[states.index(terminal)] = np.zeros(len(GATES))

    for _ in range(episode_count):
        S = inital_state #Let this be the coordinates, (0,0), and not the index
        count = 0
        while S != terminal and count < 50: #Look until we reach our terminal state. Might go on forever 
            S_index = states.index(S) # Let S_index represent the states index in the Q array

            # Initally H
            A = np.argmax(Q[S_index]) # Get the argMax(S, a) = a of the current state 

            S_1 = statevector_to_bloch_reg(GATES[A] @ random_state_in_reg(S)) # Apply action to get S_t+1
            S_1_index = states.index(S_1) # Let S_1_index represent the index of S_1 in Q
            r = R(S_1, count) # Find the reward of the S_t+1
            Q[S_index][A] = Q[S_index][A] + (alpha * (r + (gamma*Q[S_1_index].max()) - Q[S_index][A]))

            S = S_1

            count += 1
    return Q



In [249]:
n = 10**6
goal = generate_target_circuit(n=n)
goal_reg = statevector_to_bloch_reg(goal)

#Generate and train policy
policy = q_learning((0,0), goal_reg, alpha=0.8,episode_count=1000)
for i in policy:
    print(i)


Terminal State : (15, 53) Terminal State Index : 953
[[-0.18152186 -0.24858213 -0.24858213]
 [ 1.          1.          1.        ]
 [ 1.          1.          1.        ]
 ...
 [ 1.          1.          1.        ]
 [ 1.          1.          1.        ]
 [-0.06097365 -0.090667   -0.090667  ]]


In [250]:
n = 10**6
goal = generate_target_circuit(n=n)
goal_reg = statevector_to_bloch_reg(goal)

#Generate and train policy
policy = q_learning((0,0), goal_reg, alpha=0.8,episode_count=2000)


print("Policy has finished")

#Noahs
optimal_programs = []
for i in range(10):
    converged = False
    while not converged:
        s = random_state_in_reg((0, 0))
        prog = []
        counter = 0
        while counter < 30:
            action = np.argmax(policy[states.index(statevector_to_bloch_reg(s))])
            next_s = GATES[action] @ s
            prog.append(action)
            # next_s = random_state_in_reg(statevector_to_bloch_reg(next_s))
            s = next_s
            counter += 1
            if (statevector_to_bloch_reg(s) == goal_reg):
                print('converged')
                converged = True
                break
        
    optimal_programs.append(prog)
optimal_programs



Terminal State : (15, 53) Terminal State Index : 953
[[-0.73303694 -0.85547514 -0.85547514]
 [ 1.          1.          1.        ]
 [ 1.          1.          1.        ]
 ...
 [ 1.          1.          1.        ]
 [ 1.          1.          1.        ]
 [-0.46379335 -0.55690289 -0.55690289]]
Policy has finished


KeyboardInterrupt: 

In [245]:
fidelities = []
prog = [0, 1, 1, 0, 1]
for i in range(1):
    s = np.array([1, 0])
    s = random_state_in_reg((0,0))
    for a in prog:
        s = GATES[a] @ s
    f = state_fidelity(s, goal)
    fidelities.append(f)
    break
    # print(goal, s)
print(np.average(fidelities))

0.9974210798618732
