This code takes several generic psi inputs and attempts to find the gates required to encode each of them. Rewards are given to a state if it has a certain outcome for Z1Z2 and Z2Z3 measurements and the recovered state is the same as the input psi. We can then look to see if the applied gates are the same for each psi

Works if initial state isnt +, -, 0 or 1

Need to compare to random

In [1]:
from numpy import *
import cmath
from random import randrange
import pandas as pd
import time
from IPython.display import clear_output
import scipy
import matplotlib.pyplot as plt

In [2]:
one = array([[int(0) + 0j], [int(1) + 0j]])
zero = array([[int(1) + 0j], [int(0) + 0j]])
plus = (1/sqrt(2))*(zero + one)
minus = (1/sqrt(2))*(zero - one)
psilist = []
for oo in [-3, -5, -7, -9, 3, 5, 7, 9]:
    theta = pi/oo
    a = cos(theta)
    b = sin(theta)
    psilist.append(a*zero + b*one)

In [3]:
psilist

[array([[ 0.5      +0.j],
        [-0.8660254+0.j]]), array([[ 0.80901699+0.j],
        [-0.58778525+0.j]]), array([[ 0.90096887+0.j],
        [-0.43388374+0.j]]), array([[ 0.93969262+0.j],
        [-0.34202014+0.j]]), array([[0.5      +0.j],
        [0.8660254+0.j]]), array([[0.80901699+0.j],
        [0.58778525+0.j]]), array([[0.90096887+0.j],
        [0.43388374+0.j]]), array([[0.93969262+0.j],
        [0.34202014+0.j]])]

In [4]:
def norm(state):
    prod = scipy.sqrt(transpose(state.conjugate()).dot(state))
    return(asscalar(prod))

In [5]:
def fidelityp(guess, right):
    mult = (transpose(guess.conjugate())).dot(right)*(transpose(right.conjugate())).dot(guess)
    #print(mult)
    root = asscalar((mult))
    return(abs(root))

In [6]:
H = (1/sqrt(2))*array([[1,1],[1, -1]])
I = array([[1 , 0], [0, 1]])
T = array([[1, 0], [0, exp((1j*pi)/4)]])
S = array([[1, 0], [0, 1j]])

In [7]:
X = array([[0 , 1], [1,0]])
X1 = kron(kron(X, I),I)
X2 = kron(kron(I,X),I)
X3 = kron(kron(I,I),X)

Y = array([[0 , -1j], [1j,0]])
Y1 = kron(kron(Y, I),I)
Y2 = kron(kron(I,Y),I)
Y3 = kron(kron(I,I),Y)

Z = array([[1 , 0], [0,-1]])
Z1 = kron(kron(Z, I),I)
Z2 = kron(kron(I,Z),I)
Z3 = kron(kron(I,I),Z)

In [8]:
zerop = zero*transpose(zero)
onep = one*transpose(one)


In [9]:
CNOT13 = kron(kron(zerop,I),I) + kron(kron(onep,I),X)
CNOT31 = kron(kron(I,I),zerop) + kron(kron(X,I),onep)
CNOT21 = kron(kron(I,zerop),I) + kron(kron(X,onep),I)
CNOT23 = kron(kron(I,zerop),I) + kron(kron(I,onep),X)
CNOT32 = kron(kron(I,I),zerop) + kron(kron(I,X),onep)
CNOT12 = kron(kron(zerop,I),I) + kron(kron(onep,X),I)
H1 = kron(kron(H, I), I)
H2 = kron(kron(I, H), I)
H3 = kron(kron(I, I), H)
T1 = kron(kron(T, I), I)
T2 = kron(kron(I, T), I)
T3 = kron(kron(I, I), T)
S1 = kron(kron(S, I), I)
S2 = kron(kron(I, S), I)
S3 = kron(kron(I, I), S)
I3 = kron(kron(I,I),I)


In [10]:
glist = [I3, CNOT12, CNOT21, CNOT13, CNOT31, CNOT23, CNOT32, H1, H2, H3, T1, T2, T3, S1, S2, S3]

In [11]:
zero3 = kron(kron(zero,zero),zero)
zzo = kron(kron(zero,zero),one)
zoz = kron(kron(zero,one),zero)
zoo = kron(kron(zero,one),one)
ozz = kron(kron(one,zero),zero)
ozo = kron(kron(one,zero),one)
ooz = kron(kron(one,one),zero)
one3 = kron(kron(one,one),one)


In [12]:
Z12 = kron(kron(Z, Z), I)
Z23 = kron(kron(I, Z), Z)

In [13]:
psi3list = []
for qq in range(0, len(psilist)):
    psi3list.append(kron(kron(psilist[qq], zero),zero))
psi3list

[array([[ 0.5      +0.j],
        [ 0.       +0.j],
        [ 0.       +0.j],
        [ 0.       +0.j],
        [-0.8660254+0.j],
        [-0.       +0.j],
        [-0.       +0.j],
        [-0.       +0.j]]), array([[ 0.80901699+0.j],
        [ 0.        +0.j],
        [ 0.        +0.j],
        [ 0.        +0.j],
        [-0.58778525+0.j],
        [-0.        +0.j],
        [-0.        +0.j],
        [-0.        +0.j]]), array([[ 0.90096887+0.j],
        [ 0.        +0.j],
        [ 0.        +0.j],
        [ 0.        +0.j],
        [-0.43388374+0.j],
        [-0.        +0.j],
        [-0.        +0.j],
        [-0.        +0.j]]), array([[ 0.93969262+0.j],
        [ 0.        +0.j],
        [ 0.        +0.j],
        [ 0.        +0.j],
        [-0.34202014+0.j],
        [-0.        +0.j],
        [-0.        +0.j],
        [-0.        +0.j]]), array([[0.5      +0.j],
        [0.       +0.j],
        [0.       +0.j],
        [0.       +0.j],
        [0.8660254+0.j],
        [0.    

In [15]:
Q_table = pd.DataFrame()
for m in range(0,len(glist)):
    state = psi3list[0]
    gate = m
    new = (glist[m].dot(state))
    Q = pd.DataFrame({'state' : str(state), 'gate':gate, 'new': str(new), 'Q':0}, index=[0])
    Q_table = Q_table.append(Q)
Q_table = Q_table.reset_index(drop=True)

In [16]:
pd.set_option('display.max_colwidth', 100)
Q_table

Unnamed: 0,state,gate,new,Q
0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,0
1,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,1,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,0
2,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,2,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,0
3,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,3,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,0
4,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,4,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,0
5,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,5,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,0
6,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,6,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,0
7,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,7,[[-0.25881905+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0.96592583+0....,0
8,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,8,[[ 0.35355339+0.j]\n [ 0. +0.j]\n [ 0.35355339+0.j]\n [ 0. +0.j]\n [-0.61237244+0....,0
9,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,9,[[ 0.35355339+0.j]\n [ 0.35355339+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.61237244+0....,0


In [21]:
alpha = 0.5 #how much we update Q for every new action
gamma = 0.9 #discount factor 

num_iter = len(psi3list) #how many agents will come up with their own Q values from scratch
num_repeats = 600 #How many events one agent will go through to obtain Q values
penalty = 0.1 #How much taken off reward for each step
steps = 5 #How many steps in an event

In [37]:
final = pd.DataFrame() #Dataframe to store state action pairs for top Q values for each agent
tots1 = pd.DataFrame() #Dataframe to store all Q values of all agents 
for iter in range(0,1): #Begin an agent
    table = Q_table.copy() #set the starting dataframe (empty Q values or precalculated)
    print(str(iter) + ' has begun')
    psinit = psi3list[0]
    for repeats in range(0,num_repeats):
        gates = []
        psiold = psinit.copy() #save the current state
        reward = 0 #Reset the reward
        actionlist = []
        statelist = []
        for t in range(0,steps): #begin a step
            #print('input state')
            #print(psiold)
            ###intoduce error###
            k = randrange(3)
            #print('error')
            #print(k)
            
            if k == 0:
                psir = X1.dot(psiold)
            elif k == 1:
                psir = X2.dot(psiold)
            elif k == 2:
                psir = X3.dot(psiold)
            #print('state after error')
            #print(psir)
                
            ###perform measurements###
                
            o = (transpose(Z12.dot(psir))).dot(psir)
            if (o[0][0]).real > 0.99:
                val1 = 1
            elif (o[0][0]).real < -0.99:
                val1 = -1
            else:
                val1 = 0
            #print('measurement results')
            #print(val1)
            p = (transpose(Z23.dot(psir))).dot(psir)
            if (p[0][0]).real > 0.99:
                val2 = 1
            elif (p[0][0]).real < -0.99:
                val2 = -1
            else:
                val2 = 0
            #print(val2)
            
            ###detect errors based on results of measurements###
            
            if (val1 == 1) & (val2 == 1):
                err = 0
            elif (val1 == 1) & (val2 == -1):
                err = 3
            elif (val1 == -1) & (val2 == -1):
                err = 2
            elif (val1 == -1) & (val2 == 1):
                err = 1
            else:
                err = 0
            #print('detected error')
            #print(err)
            
            #penalty for not being certain
            plus = 0 #reset award
            flag = 0
            if (val1==0) or (val2 ==0):
                flag = 1
                
            ###Corrections
            
            if err == 1:
                psif = (X1.dot(psir))
            elif err == 2:
                psif = X2.dot(psir)
            elif err == 3:
                psif = (X3.dot(psir))
            elif err == 0:
                psif = psir
            #print('state after correction')
            #print(psif)
            psien = psif.copy()
            ##back to orig
            if len(gates) > 0:
                for kk in reversed(gates):
                    psif = (glist[kk]).dot(psif)
            
            #print('state after decoding')
            #print(psif)
                    
                
            ##reward based on fidelity of decoded
            
            
            if ((fidelityp(psinit, psif)) > 0.9999999) and (flag == 0):
                plus = 5
            elif ((fidelityp(psinit, psif)) > 0.85) and (flag == 0):
                plus = (fidelityp(psinit, psif))*(4/(0.15)) + (1 - (fidelityp(psinit, psif))*(4/(0.15)))
            else:
                plus = 0 
            reward += penalty #add more penalty with each step
            #print('fidelity')
            #print(fidelityp(psinit, psif))
        
            sec = table[table['state']==str(psiold)] #choose rows of the original table that contain the current state
            
            if len(sec) == 0: #if the new state is not in the dataframe, insert it with all the possible actions
                for s in range(0,len(glist)):
                    lad = glist[s].dot(psiold)
                    sec = pd.DataFrame({'state' : str(psiold), 'gate':s, 'new':str(lad),  'Q':0}, index=[0])
                    table = table.append(sec)
        
            #update the Q value of the old state based on the rewards given by the new state
            if plus == 5: #If we have reached the state we want we can stop
                print(psiold)
                print(gates)
                print(statelist)
                print('goal reached')
                break
            else:
                garray =[] #create an array of all possible new states and a corresponding array with associated actions
                actg = []
                for a in range(0,len(sec)):
                    b = sec.iloc[a]['gate']
                    actg.append(b)
                    garray.append((array(glist[b])).dot(psiold))
        
                
                r = randrange(len(garray))
                psiold = garray[r]
                action = actg[r]
                gates.append(action)
                statelist.append(psiold)
                #print(action)
                    
        
            #print(str(t) + 'th state is ' + str(psiold) + ' after action ' + str(action))
        print(repeats) 
    tots1 = tots1.append(table) #add the updated Q values from this agent to a table
    oppa = table.loc[(table.state == str(psinit))] #select the rows with the initial state
    lol = oppa.sort_values("Q", ascending=False) #sort according to Q value
    top = lol.head(1) #Choose the state action pair with the top Q value
    for n in [0, 1, 2, 3, 4]:
        
        oppa1 = table.loc[(table.state == top['new'].values[n])] #Find the state that results from the top action being applied
        lol = oppa1.sort_values("Q", ascending=False) #find the max Q value of next state
        top1 = lol.head(1)
        
        top = top.append(top1) #add it to the dataframe of steps
        #print(top1)
         
    final = final.append(top) #add this agents top first 4 steps to the other agents top first 4 steps
    final = final.reset_index(drop=True)

0 has begun
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
[[ 0.5      +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [-0.8660254+0.j]]
[0, 3, 5, 1]
[array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
[[ 0.5      +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [-0.8660254+0.j]]
[1, 15, 4, 5]
[array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
     

440
441
442
443
[[ 0.5      +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [-0.8660254+0.j]]
[1, 3]
[array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j]]), array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j]])]
goal reached
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
[[ 0.5      +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [ 0.       +0.j]
 [-0.8660254+0.j]]
[12, 3, 1]
[array([[ 0.5      +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [-0.8660254+0.j],
       [ 0.       +0.j],
       [ 0.       +0.j],
       [ 0

In [191]:
pd.set_option('display.max_colwidth', 100)
reads = final.head(60)
reads

Unnamed: 0,state,gate,new,Q
0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.8660254+0.j]\n ...,1,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,35.882372
1,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,3,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,40.085368
2,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,45.01246
3,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,45.01246
4,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,45.01246
5,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,0,[[ 0.5 +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n ...,45.01246
6,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [-0.58778525+0....,1,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,34.052175
7,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,3,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,38.145935
8,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,0,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,42.940426
9,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,0,[[ 0.80901699+0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0.j]\n [ 0. +0....,42.940426


In [146]:
together = tots1.groupby(['state','gate','new', 'fidelity'])['Q'].mean().reset_index()

In [147]:
take = together[together.fidelity == 1]
#take.loc['1','new']
take

Unnamed: 0,state,gate,new,fidelity,Q
3648,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,0,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,1.0,-0.250000
3649,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,1,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,1.0,-0.250000
3650,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,2,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,1.0,-0.250000
3651,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,3,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,1.0,-0.250000
3652,[[ 5.72061403e-01+0.j ]\n [ 5.72061403e-01+0.j ]\n [ 0.00000000e+00+0.j ]\n...,4,[[ 5.72061403e-01+0.j ]\n [-9.80469750e-19+0.41562694j]\n [ 0.00000000e+00+0.j ]\n...,1.0,-0.250000
...,...,...,...,...,...
6923,[[0.80901699+0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,11,[[0.80901699+0.j ]\n [0. +0.j ]\n [0. +0.j ]\n [0. +0....,1.0,6.014905
6924,[[0.80901699+0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,12,[[0.80901699+0.j ]\n [0. +0.j ]\n [0. +0.j ]\n [0. +0....,1.0,6.014905
6925,[[0.80901699+0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,13,[[0.80901699+0.j ]\n [0. +0.j ]\n [0. +0.j ]\n [0. +0....,1.0,35.173966
6926,[[0.80901699+0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,14,[[0.80901699+0.j ]\n [0. +0.j ]\n [0. +0.j ]\n [0. +0....,1.0,35.173966


In [53]:
ask = one3
table = together.copy()
final = pd.DataFrame() #Dataframe to store state action pairs for top Q values for each agent
tots1 = pd.DataFrame() 
oppa = table.loc[(table.state == str(ask))] #select the rows with the initial state
lol = oppa.sort_values("Q", ascending=False) #sort according to Q value
top = lol.head(1) #Choose the state action pair with the top Q value
for n in [0, 1, 2, 3, 4, 5, 6, 7, 8]:
        
    oppa1 = table.loc[(table.state == top['new'].values[n])] #Find the state that results from the top action being applied
    lol = oppa1.sort_values("Q", ascending=False) #find the max Q value of next state
    top1 = lol.head(1)
        
    top = top.append(top1) #add it to the dataframe of steps
    if (top.iloc[(n+1)]['new'] == top.iloc[(n+1)]['state']):
        break
        
top = top.reset_index(drop=True)

reads = top.head(30)

first = reads.iloc[0]['state']
last = reads.iloc[(len(reads) -1)]['state']
print('to get from ')
print(first)
print(' to ')
print(last)
print(' use actions: ')
for ii in range(0,len(reads)):
    act = reads.iloc[ii]['gate']
    print(str(act))
reads

to get from 
[[0.+0.j]
 [0.+0.j]
 [0.+0.j]
 [0.+0.j]
 [0.+0.j]
 [0.+0.j]
 [0.+0.j]
 [1.+0.j]]
 to 
[[0.        +0.j]
 [0.        +0.j]
 [0.        +0.j]
 [0.        +0.j]
 [0.        +0.j]
 [0.        +0.j]
 [0.70710678+0.j]
 [0.70710678+0.j]]
 use actions: 
3
9
5


Unnamed: 0,state,gate,new,fidelity,Q
0,[[0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [1.+0.j]],3,[[0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [1.+0.j]\n [0.+0.j]],0.5,35.427975
1,[[0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [0.+0.j]\n [1.+0.j]\n [0.+0.j]],9,[[0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,0.5,39.492871
2,[[0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,5,[[0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n [0. +0.j]\n ...,1.0,44.734424
