In [1]:
from qiskit import *
from qiskit.circuit.library import GroverOperator
from qiskit.quantum_info import Statevector

In [2]:
import numpy as np
from math import ceil

In [None]:
class GroverLearner:
    ''' 
    Implement a quanutm reinforcement learning agent based on Grover amplitute enhancement and QLearing algorithm.

    Assumption:
    The dimensions of the state space and action space are both finite

    Parameters:
    env: the environment to solve; default is OpenAI gym "FrozenLake"
    state (int): current state
    action (int): current action 
    state_dimension (int): dimension of the state space
    action_dimension (int): dimension of the action space
    action_qregister_size (int): number of qubits on the quantum register for storing the action wavefunction 
    max_grover_length (int): maximum of the length of the grover iteration
    Q_values (2D np array): Q values of all (state, action) combinations; shape = (state_dimension, action_dimention)
    grover_lengths (2D np array): lengths of grover iterations of all (state, action) combinaitons; shape = (state_dimension, action_dimention)
    grover_operators (1D np array): grover_operators for all actions
    action_circuits (1D np array): action quantum circuits for all actions
    hyperparameters (dict): hyperparameters of learning; 
                            {
                                'k': prefactor of max grover length, 
                                'alpha': learning rate, 'gamma': discount, 
                                'epsilonr': tolerance of the Q values,
                                'max_epochs': max number of epochs for training,
                                'max_steps': max number of steps in every epoch
                            }
    QSIM: qiskit simulator
    '''
    
    # constructor
    def __init__(self, env) -> None:
        pass
        
    # hyperparameter setter
    def set_hyperparameters(self, params):
        pass

    # initialize the quantum circuits of actions
    def _init_action_circuits(self):
        pass

    # intitialize the grover operators of actions
    def _init_grover_operators(self):
        pass
    
    # run grover iterations within one learning step:
    def _run_grover_iterations(self):
        pass

    # update Q value for one round of grover iterations
    def _update_Q_values(self):
        pass

    # train max epochs
    def train(self):
        pass

    