In [96]:
from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister, assemble, Aer
from qiskit.visualization import plot_histogram
from math import pi
import numpy as np


def circuitMaker(n_qubits, encodings, params, measurement, n_cbits=1):
    # create quantum circuit
    qr = QuantumRegister(n_qubits, 'qubit')
    cr = ClassicalRegister(n_cbits, 'cr')
    qc = QuantumCircuit()
    qc.add_register(qr)
    qc.add_register(cr)
    
    # encoding block
    encoder0, encoder1 = encoder(encodings)
    # print(encodings, encoder1)
    # qc.rx(pi, encoder0)
    # qc.rz(pi, encoder0)
    if encoder1:
        qc.rx(pi, encoder1)
        qc.rz(pi, encoder1)
        qc.barrier(qr)
    
    for i in range(n_qubits):
        # entangle block
        if i < n_qubits - 1:
            qc.cnot(i, i+1)
        else:
            qc.barrier(qr)

    for i in range(n_qubits):
        # variational block
        qc.u3(params[0+i], params[1+i], params[2+i], i)

    # measurement
    for i, j in enumerate(measurement):
        qc.measure(qr[j], cr[i])
    
    return qc

def encoder(encodings):
    return [i for i, b in enumerate(encodings) if b == '0'], [i for i, b in enumerate(encodings) if b == '1']

qc = circuitMaker(4, '1011', [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0], [0])
qc.draw()


In [97]:
import numpy as np
circuit_size = 4
n_actions = 4
parameters = [1,1,1,1,1,1,1,1,1,1,1,1]
bias = [0,0,0,0]
shots = 1000

# def findAction(statistic):
#     distribution = [0]*circuit_size
#     for k, v in statistic.items():
#         # attention: reserved statistic
#         for i, c in enumerate(k[::-1]):
#             if c == '1':
#                 distribution[i] += v
#     print(distribution)
#     return distribution

def getAction_single_measure(state, params):
    b_state = f'{state:0{circuit_size}b}'
    # tasks = [circuitMaker(circuit_size, b_state, parameters, [i]) for i in range(n_actions)]
    aer_sim = Aer.get_backend('aer_simulator')
    param_current, param_bias = params[:-n_actions], params[-n_actions:]
    rlt = [aer_sim.run(assemble(circuitMaker(circuit_size, b_state, param_current, [i]), shots=shots)).result().get_counts() for i in range(n_actions)]
    # qobj = assemble(qc, shots=shots)
    # job = aer_sim.run(qobj)
    # rlt = job.result().get_counts()
    # print(rlt)
    q_exps = [(i['0']-i['1'])/shots if len(i) > 1 else (-i['1']/shots if '1' in i.keys() else i['0']/shots) for i in rlt]

    return [a + b for a, b in zip(q_exps, param_bias)]
print(getAction_single_measure(0, np.concatenate((parameters, bias), axis=0)))

[0.55, 0.52, 0.526, 0.55]


In [98]:
# GLOBAL
sampled_vs = []
q_target = np.array([0.0,0.0,0.0,0.0])

def cost(params):
    global sampled_vs, q_target
    v_targets = [(s[2] if s[4] else s[2] + GAMMA*np.max(q_target)) for s in sampled_vs]
    v_predics = [getAction_single_measure(s[0], params)[s[1]] for s in sampled_vs]
    cost = sum([(tar - pred)**2 for tar, pred in zip(v_targets, v_predics)])/MINIBATCH # MSE
    # print('cost: ', cost)
    return cost

In [111]:
from qiskit.aqua.components.optimizers import ADAM
LEARNING_RATE = 0.1
opt = ADAM(maxiter=100,lr=LEARNING_RATE)

param_current = 0.01 * np.random.normal(size=(circuit_size * 3))
print('init param current: ', param_current)
param_bias = np.array([0.0, 0.0, 0.0, 0.0])
print('init param bias: ', param_bias)
param_target = np.copy(param_current)
print('param target', param_target)


init param current:  [ 0.00041793 -0.00514914  0.00381446  0.00574458 -0.00417114  0.01812999
 -0.0040242   0.0110605  -0.00718603  0.01342259  0.02998413 -0.00359647]
init param bias:  [0. 0. 0. 0.]
param target [ 0.00041793 -0.00514914  0.00381446  0.00574458 -0.00417114  0.01812999
 -0.0040242   0.0110605  -0.00718603  0.01342259  0.02998413 -0.00359647]


In [87]:
print('para current before opt: ', param_current)
params = np.concatenate((param_current, param_bias))
print(params)
new_params = opt.optimize(num_vars=len(params), objective_function=cost, initial_point=params)[0]
param_current, param_bias = np.array(new_params[:-n_actions]), np.array(new_params[-n_actions:])
print('para current after opt: ', param_current)

para current before opt:  [-0.01658838  0.00806475  0.01502274  0.00330557 -0.01153581 -0.00154375
 -0.01450986 -0.00742094 -0.01148889 -0.00401333 -0.00456615  0.01274378]
[-0.01658838  0.00806475  0.01502274  0.00330557 -0.01153581 -0.00154375
 -0.01450986 -0.00742094 -0.01148889 -0.00401333 -0.00456615  0.01274378
  0.          0.          0.          0.        ]
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
cost:  0.0
para current after opt:  []


  param_current, param_bias = np.array(new_params[:-n_actions]), np.array(new_params[-n_actions:])


In [112]:
import gym
import random
import csv

MAXEPISODE = 1
MAXSTEPS = 5
MINIBATCH = 5
GAMMA = 0.9
db = {}

env = gym.make('FrozenLake-v1', map_name = '4x4')
env.reset()
env.render()

counter_target = 0
TARGET_EPI = 8


for i in range(MAXEPISODE):
    state = env.reset()
    total_reward = 0
    done = False
    
    for t in range(MAXSTEPS):
        print(f'Episode: {i}, Steps: {t}')
        # get action for next step from target network
        params = np.concatenate((param_current, param_bias))
        print(len(params))
        q_target = getAction_single_measure(state, params)
        action = np.argmax(q_target)
        # new position, reward, condition and probabilities
        new_state, reward, done, info = env.step(action=action)
        # print(f'act: {action}, i:{type(i)}, new state:{new_state}, reward:{reward}, done:{done}, info:{info}')
        # save data in database
        db[i, t] = (state, action, reward, new_state, done)
        # sample from database
        if len(db) >= MINIBATCH:
            print('Doing optimize')
            # sampled_ks = random.sample(len(db), MINIBATCH)
            print('start sample')
            sampled_vs = [db[k] for k in random.sample(list(db), MINIBATCH)]
            print('sample done')
            # gradient-descent optimization
            print('para current before opt: ', param_current)
            print('start optimize')
            new_params = opt.optimize(num_vars=len(params), objective_function=cost, initial_point=params)[0]
            print('optimize done')
            param_current, param_bias = np.array(new_params[:-n_actions]), np.array(new_params[-n_actions:])
            print('para current after opt: ', param_current)
            counter_target +=1
            print('optimize over')
        # update parameters in target circuit
        if counter_target == TARGET_EPI:
            param_target = np.copy(param_current)
            counter_target = 0
        
        state = new_state

        if done:
            break

print('parameter target: ', param_target, 'parameter bias: ', param_bias)

# with open('target parameters', 'r', encoding='utf-8') as f:
#     write = csv.writer(f) 
#     write.writerow(param_target) 
#     write.writerows(param_bias) 




[41mS[0mFFF
FHFH
FFFH
HFFG
Episode: 0, Steps: 0
16
Episode: 0, Steps: 1
16
Episode: 0, Steps: 2
16
Episode: 0, Steps: 3
16
Episode: 0, Steps: 4
16
Doing optimize
start sample
sample done
para current before opt:  [ 0.00041793 -0.00514914  0.00381446  0.00574458 -0.00417114  0.01812999
 -0.0040242   0.0110605  -0.00718603  0.01342259  0.02998413 -0.00359647]
start optimize
optimize done
para current after opt:  [ 4.17933518e-04 -5.14913511e-03  3.81445531e-03  5.74458360e-03
 -4.17113660e-03  1.81299869e-02 -4.02419781e-03  1.10604996e-02
 -7.18602794e-03  1.34225854e-02  2.99841341e-02  6.37044519e-01]
optimize over
parameter target:  [ 0.00041793 -0.00514914  0.00381446  0.00574458 -0.00417114  0.01812999
 -0.0040242   0.0110605  -0.00718603  0.01342259  0.02998413 -0.00359647] parameter bias:  [0.30068331 0.         0.         0.        ]


In [45]:
parameters = [[1,1,1],[1,1,1],[1,1,1],[1,1,1]]
circuit_size = 4
b_state = '1011'

qc = circuitMaker(circuit_size, circuit_size, b_state, parameters)
qc.draw()

0100
