# VQCPG Package

## Imports and utils

### Imports

In [None]:
import numpy as np

import pennylane as qml

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

from functools import partial
from collections import deque

import os
import json
import warnings
import time
from datetime import datetime
import ray

import gym

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (10, 5)

libraries = {
    "numpy": np.__version__,
    "pennylane": qml.__version__,
    "torch": torch.__version__,
    "functools": "built-in",
    "collections": "built-in",
    "os": "built-in",
    "json": "built-in",
    "warnings": "built-in",
    "time": "built-in",
    "datetime": datetime.now().isoformat(),
    "ray": ray.__version__,
    "gym": gym.__version__,
    "matplotlib": plt.matplotlib.__version__
}

# Print versions
for lib, version in libraries.items():
    if version:
        print(f"{lib}: {version}")
    else:
        print(f"{lib}: version information not available")

### Utils

In [None]:
def create_directory(path):
    os.makedirs(path, exist_ok=True)
    return path

def tensor_to_list(tensor):
    """
    Convert a tensor or numpy array to a nested list.
    """
    if isinstance(tensor, list):
        return [tensor_to_list(t) for t in tensor]
    elif isinstance(tensor, dict):
        return {key: tensor_to_list(value) for key, value in tensor.items()}
    elif isinstance(tensor, np.ndarray):
        return tensor.tolist()
    elif isinstance(tensor, torch.Tensor):
        return tensor.tolist()
    else:
        return tensor
    
def create_optimizer_with_lr(params, lr_list, use_amsgrad=False):
    optimizer = torch.optim.Adam([
        {'params': p, 'lr': lr} for p, lr in zip(params, lr_list)
    ], amsgrad=use_amsgrad)
    return optimizer

def get_function_representation(func):
    """
    Returns the full name of a function or partial function with arguments.
    """
    if isinstance(func, partial):
        func_name = f"{func.func.__module__}.{func.func.__name__}"
        args = ", ".join(map(str, func.args)) if func.args else ""
        kwargs = ", ".join(f"{k}={v}" for k, v in func.keywords.items()) if func.keywords else ""
        return f"{func_name}({args}{', ' if args and kwargs else ''}{kwargs})"
    elif callable(func):
        return f"{func.__module__}.{func.__name__}"
    else:
        return str(func)

def get_instance_variables(instance):
    """
    Returns a dictionary of instance variables with formatted function names if callable.
    """
    variables = {}
    for name, value in vars(instance).items():
        if callable(value):
            variables[name] = get_function_representation(value)
        else:
            variables[name] = value
    return variables

### Observables

In [None]:
def measure_probs(qubits):
    '''
    Returns a list with the probability of each computational basis state
    '''
    return qml.probs(wires=range(qubits)) 

def two_measure_expval(qubits):
    '''
    Computes and returns the expectation values of two observables for a given number of qubits.

    For the specified number of qubits, this function constructs a PauliZ observable that acts
    on all qubits. It returns two expectation values:
    - The first expectation value is for the constructed PauliZ observable.
    - The second expectation value is for the negative of the constructed PauliZ observable.
    '''
    
    pauli_string = qml.PauliZ(0)
    for i in range(1, qubits):
        pauli_string = pauli_string @ qml.PauliZ(i)
    
    expvals = []
    expvals.append(qml.expval(pauli_string))
    expvals.append(qml.expval(-pauli_string))

    return expvals

def three_measure_expval(qubits):
    '''
    Computes and returns the expectation values of three observables based on the number of qubits.

    This function defines and evaluates three observables depending on the input number of qubits:
    - For 1 qubit: PauliZ, PauliX, and negative PauliZ.
    - For 2 qubits: PauliZ on the first qubit, tensor product of PauliZ on both qubits, and PauliZ on the second qubit.
    - For 3 or more qubits: PauliZ on the first qubit, a chain of PauliZ on the intermediate qubits, and PauliZ on the last qubit.
    '''

    expvals = []

    if qubits == 1:
        first_observable = qml.PauliZ(0)
        middle_observable = qml.PauliX(0)
        last_observable = -qml.PauliZ(0)
    elif qubits == 2:
        first_observable = qml.PauliZ(0)
        middle_observable = qml.PauliZ(0) @ qml.PauliZ(1) 
        last_observable = qml.PauliZ(1)       
    elif qubits >= 4:
        first_observable = qml.PauliZ(0)
        middle_observable = qml.PauliZ(1)
        for i in range(2, qubits - 1):
            middle_observable = middle_observable @ qml.PauliZ(i)
        last_observable = qml.PauliZ(qubits - 1)                    
    else:
        raise ValueError("Unsupported number of qubits: only 1, 3, or 4 qubits are supported")

    expvals.append(qml.expval(first_observable))
    expvals.append(qml.expval(middle_observable))
    expvals.append(qml.expval(last_observable))

    return expvals

## Circuits

### Jerbi Circuit

In [None]:
class JerbiModel(nn.Module):
    '''
    For detailed information about the parameters, call the info() method.
    '''
    def __init__(self, 
                n_qubits,
                n_layers, 
                device,
                shots,
                diff_method, 
                entanglement,
                entanglement_pattern, 
                entanglement_gate, 
                input_scaling, 
                input_init, 
                weight_init, 
                measure):
        super(JerbiModel, self).__init__()

        self.n_qubits = n_qubits
        self.n_layers = n_layers
        self.device = device
        self.shots = shots
        self.diff_method = diff_method
        self.entanglement = entanglement
        self.entanglement_pattern = entanglement_pattern
        self.entanglement_gate = entanglement_gate
        self.input_scaling = input_scaling
        self.input_init = input_init
        self.weight_init = weight_init
        self.measure = measure

        self.circuit = self.generate_circuit()
    
    def generate_circuit(self):
        # Call the error handling function
        self.handle_errors_and_warnings()

        # Initialize the device
        if self.shots is None:
            dev = qml.device(self.device, wires=self.n_qubits)
        else:
            dev = qml.device(self.device, wires=self.n_qubits, shots=self.shots)

        # Weight initialization
        self.weight_shapes = {
            "input_params": (self.n_layers, self.n_qubits, 2),
            "params": (self.n_layers + 1, self.n_qubits, 2)
        }
        
        self.init_method = {
            "input_params": self.input_init,
            "params": self.weight_init
        }

        @qml.qnode(dev, interface='torch', diff_method=self.diff_method)
        def qnode(inputs, params, input_params):

            # Apply Hadamard gates to all qubits
            qml.broadcast(qml.Hadamard, wires=range(self.n_qubits), pattern="single")

            # Apply layers and entanglement
            for layer in range(self.n_layers):
                for wire in range(self.n_qubits):
                    qml.RZ(params[layer][wire][0], wires=wire)
                    qml.RY(params[layer][wire][1], wires=wire)

                if self.entanglement:
                    qml.broadcast(self.entanglement_gate, wires=range(self.n_qubits), pattern=self.entanglement_pattern)

                # Input scaling
                if self.input_scaling is True:
                    for wire in range(self.n_qubits):
                        qml.RY(input_params[layer][wire][0] * inputs[wire], wires=wire)
                        qml.RZ(input_params[layer][wire][1] * inputs[wire], wires=wire)
                else:
                    for wire in range(self.n_qubits):
                        qml.RY(inputs[wire], wires=wire)
                        qml.RZ(inputs[wire], wires=wire)

            # Final layer
            for wire in range(self.n_qubits):
                qml.RZ(params[-1][wire][0], wires=wire)
                qml.RY(params[-1][wire][1], wires=wire)

            return self.measure(self.n_qubits)

        self.qnode = qnode
        model = qml.qnn.TorchLayer(qnode, weight_shapes=self.weight_shapes, init_method=self.init_method)

        return model

    def forward(self, inputs):
        ''' 
        Gives inputs to the circuit and outputs the respective output
        '''
        return self.circuit(inputs)
     
    def visualize_circuit(self):
        '''
        Draws the circuit
        '''
        inputs = torch.tensor([0.1 * i for i in range(self.n_qubits)], dtype=torch.float32)
        
        initialized_params = {}
        for key, shape in self.weight_shapes.items():
            initialized_params[key] = self.init_method[key](torch.empty(shape))

        # Draw the circuit
        qml.draw_mpl(self.qnode)(inputs, 
                                initialized_params["params"], 
                                initialized_params["input_params"])

    def handle_errors_and_warnings(self):
        ''' 
        Handles the errors and warnings
        '''
        # Check if the number of layers is valid
        if self.n_layers < 1:
            raise ValueError("Number of layers must be at least 1.") 

    def get_parameters(self):
        # Extract relevant attributes for JSON serialization
        return {
            "Number of Qubits": self.n_qubits,
            "Number of Layers": self.n_layers,
            "Device": str(self.device),  # Convert to string representation
            "Shots": self.shots,
            "Differentiation Method": self.diff_method,
            "Entanglement": self.entanglement,
            "Entanglement Pattern": self.entanglement_pattern,
            "Entanglement Gate": get_function_representation(self.entanglement_gate),  # Use the helper function to represent the gate
            "Input Scaling": self.input_scaling,
            "Input Initialization": get_function_representation(self.input_init),  # Use the helper function for the initializer
            "Parameters Initialization": get_function_representation(self.weight_init),  # Use the helper function for the initializer
            "Measurement Function": get_function_representation(self.measure)  # Use the helper function for the measurement function
        }
    
    @classmethod
    def info(cls):
        '''
        Provides a summary of the JerbiModel class, including its parameters and methods.
        '''
        info_text = """
        
        Creates a parametrized quantum circuit based on the 'Parametrized quantum policies for reinforcement learning' paper by Sofiene Jerbi.

        Parameters:
        ----------
        n_qubits (int): 
            Number of qubits used in the quantum circuit.
        
        n_layers (int): 
            Number of layers in the quantum circuit. Each layer typically consists of parameterized rotations followed by entanglement gates.
        
        device (str): 
            The quantum device used for simulation or execution (e.g., 'default_qubit', 'lightning.qubit', 'lightning.gpu').

        shots (int, optional): 
            Number of times the circuit gets executed (repeated measurements). If None, the circuit is executed with analytic calculations (no shot noise).
        
        diff_method (str): 
            Differentiation method used for training the model. Common options are 'best', 'parameter-shift', 'backprop', etc.

        entanglement (bool):
            If True, entanglement between qubits is implemented. The entanglement pattern and gate are defined by `entanglement_pattern` and `entanglement_gate`, respectively.
        
        entanglement_pattern (str): 
            Entanglement pattern used in the circuit, such as 'chain', 'ring', 'all_to_all', etc., as defined by qml.broadcast patterns.
        
        entanglement_gate (function): 
            Quantum gate used for entanglement, such as qml.CZ or qml.CNOT. This gate is applied between qubits according to the specified entanglement pattern.
        
        input_scaling (bool): 
            If True, input parameters are scaled by additional learnable parameters (input_params). The input is multiplied by these parameters before being applied to the qubits.
        
        input_init (function): 
            Function to initialize the input scaling parameters, such as torch.nn.init.uniform_, torch.nn.init.ones_, or any function defined by the user.
        
        weight_init (function): 
            Function to initialize the weights of the quantum circuit, such as torch.nn.init.uniform_, torch.nn.init.normal_, or any function defined by the user.
        
        measure (function): 
            Measurement function that takes the number of qubits as an argument and returns the measurement result. Common choices are `measure_probs`, `two_measure_expval`, or any user-defined measurement function.

        Methods:
        --------
        generate_circuit(self): 
            Generates and initializes the quantum circuit based on the parameters.
        
        forward(self, inputs): 
            Takes inputs and passes them through the quantum circuit to get the output.

        visualize_circuit(self): 
            Visualizes the generated quantum circuit for the given number of qubits using the initial parameters. Useful for debugging or analyzing the circuit design.

        handle_errors_and_warnings(self): 
            Handles common errors and warnings, such as invalid parameter values, unsupported devices, and incompatible differentiation methods.
            
        """
        return info_text

### TFQ Circuit

In [None]:
class TfqTutorial(nn.Module):
    '''
    For detailed information about the parameters, call the info() method.
    '''
    def __init__(self, 
                n_qubits,
                n_layers, 
                device,
                shots, 
                diff_method, 
                entanglement,
                entanglement_pattern, 
                entanglement_gate, 
                input_scaling, 
                input_init, 
                weight_init, 
                measure):
        super(TfqTutorial, self).__init__()

        self.n_qubits = n_qubits
        self.n_layers = n_layers
        self.device = device
        self.shots = shots
        self.diff_method = diff_method
        self.entanglement = entanglement
        self.entanglement_pattern = entanglement_pattern
        self.entanglement_gate = entanglement_gate
        self.input_scaling = input_scaling
        self.input_init = input_init
        self.weight_init = weight_init
        self.measure = measure

        self.circuit = self.generate_circuit()
    
    def generate_circuit(self):
        # Call the error handling function
        self.handle_errors_and_warnings()

        # Initialize the device
        if self.shots is None:
            dev = qml.device(self.device, wires=self.n_qubits)
        else:
            dev = qml.device(self.device, wires=self.n_qubits, shots=self.shots)
        
        # Weight initialization
        self.weight_shapes = {
            "input_params": (self.n_layers, self.n_qubits, 1),
            "params": (self.n_layers + 1, self.n_qubits, 3)
        }
        
        self.init_method = {
            "input_params": self.input_init,
            "params": self.weight_init,
        }
        
        @qml.qnode(dev, interface='torch', diff_method=self.diff_method)
        def qnode(inputs, params, input_params):
            
            # Apply layers and entanglement
            for layer in range(self.n_layers):
                for wire in range(self.n_qubits):
                    qml.RX(params[layer][wire][0], wires=wire)
                    qml.RY(params[layer][wire][1], wires=wire)
                    qml.RZ(params[layer][wire][2], wires=wire)

                if self.entanglement:
                    qml.broadcast(self.entanglement_gate, wires=range(self.n_qubits), pattern=self.entanglement_pattern)

                # Input scaling
                if self.input_scaling:
                    for wire in range(self.n_qubits):
                        qml.RX(input_params[layer][wire][0] * inputs[wire], wires=wire)
                else:
                    for wire in range(self.n_qubits):
                        qml.RX(inputs[wire], wires=wire)
                
            # Final layer
            for wire in range(self.n_qubits):
                qml.RX(params[-1][wire][0], wires=wire)
                qml.RY(params[-1][wire][1], wires=wire)
                qml.RZ(params[-1][wire][2], wires=wire)

            return self.measure(self.n_qubits)

        self.qnode = qnode

        model = qml.qnn.TorchLayer(qnode, weight_shapes=self.weight_shapes, init_method=self.init_method)  
        
        return model
    
    def forward(self, inputs):
        ''' 
        Gives inputs to the circuit and outputs the respective output
        '''
        return self.circuit(inputs)
    
    def visualize_circuit(self):
        '''
        Draws the circuit
        '''
        inputs = torch.tensor([0.1 * i for i in range(self.n_qubits)], dtype=torch.float32)
        
        initialized_params = {}
        for key, shape in self.weight_shapes.items():
            initialized_params[key] = self.init_method[key](torch.empty(shape))

        # Draw the circuit
        qml.draw_mpl(self.qnode)(inputs, 
                                initialized_params["params"], 
                                initialized_params["input_params"])

    def handle_errors_and_warnings(self):
        ''' 
        Handles the errors and warnings
        '''
        # Check if the number of layers is valid
        if self.n_layers < 1:
            raise ValueError("Number of layers must be at least 1.")

    def get_parameters(self):
        # Extract relevant attributes for JSON serialization
        return {
            "Number of Qubits": self.n_qubits,
            "Number of Layers": self.n_layers,
            "Device": str(self.device),  # Convert to string representation
            "Shots": self.shots,
            "Differentiation Method": self.diff_method,
            "Entanglement": self.entanglement,
            "Entanglement Pattern": self.entanglement_pattern,
            "Entanglement Gate": get_function_representation(self.entanglement_gate),  # Use the helper function to represent the gate
            "Input Scaling": self.input_scaling,
            "Input Initialization": get_function_representation(self.input_init),  # Use the helper function for the initializer
            "Parameters Initialization": get_function_representation(self.weight_init),  # Use the helper function for the initializer
            "Measurement Function": get_function_representation(self.measure)  # Use the helper function for the measurement function
        }        
    
    @classmethod
    def info(cls):
        '''
        Provides a summary of the TFQ class, including its parameters and methods.
        '''
        info_text = """
        
        Creates a parameterized quantum circuit based on the TensorFlow Quantum tutorial in https://www.tensorflow.org/quantum/tutorials/quantum_reinforcement_learning

        Parameters:
        ----------
        n_qubits (int): 
            Number of qubits used in the quantum circuit.
        
        n_layers (int): 
            Number of layers in the quantum circuit. Each layer typically consists of parameterized rotations followed by entanglement gates.
        
        device (str): 
            The quantum device used for simulation or execution (e.g., 'default_qubit', 'lightning.qubit').

        shots (int, optional): 
            Number of times the circuit gets executed (repeated measurements). If None, the circuit is executed with analytic calculations (no shot noise).
        
        diff_method (str): 
            Differentiation method used for training the model. Common options are 'best', 'parameter-shift', 'backprop', etc.

        entanglement (bool):
            If True, entanglement between qubits is implemented. The entanglement pattern and gate are defined by `entanglement_pattern` and `entanglement_gate`, respectively.
        
        entanglement_pattern (str): 
            Entanglement pattern used in the circuit, such as 'chain', 'ring', 'all_to_all', etc., as defined by qml.broadcast patterns.
        
        entanglement_gate (function): 
            Quantum gate used for entanglement, such as qml.CZ or qml.CNOT. This gate is applied between qubits according to the specified entanglement pattern.
        
        input_scaling (bool): 
            If True, input parameters are scaled by additional learnable parameters (input_params). The input is multiplied by these parameters before being applied to the qubits.
        
        input_init (function): 
            Function to initialize the input scaling parameters, such as torch.nn.init.uniform_, torch.nn.init.ones_, or any function defined by the user.
        
        weight_init (function): 
            Function to initialize the weights of the quantum circuit, such as torch.nn.init.uniform_, torch.nn.init.normal_, or any function defined by the user.
        
        measure (function): 
            Measurement function that takes the number of qubits as an argument and returns the measurement result. Common choices are `measure_probs`, `two_measure_expval`, `three_measure_expval`, or any user-defined measurement function.

        Methods:
        --------
        generate_circuit(self): 
            Generates and initializes the quantum circuit based on the parameters.
        
        forward(self, inputs): 
            Takes inputs and passes them through the quantum circuit to get the output.

        visualize_circuit(self): 
            Visualizes the generated quantum circuit for the given number of qubits using the initial parameters. Useful for debugging or analyzing the circuit design.

        handle_errors_and_warnings(self): 
            Handles common errors and warnings, such as invalid parameter values, unsupported devices, and incompatible differentiation methods.
            
        """
        return info_text

### UQC

In [None]:
class UQC(nn.Module):
    '''
    For detailed information about the parameters, call the info() method.
    '''
    def __init__(self, 
                n_qubits, 
                n_layers, 
                state_dim,
                device,
                shots, 
                diff_method,
                encoding_type,
                entanglement,
                entanglement_pattern, 
                entanglement_gate, 
                input_init,
                weight_init,
                bias_init,
                measure):
        super(UQC, self).__init__()

        self.n_qubits = n_qubits
        self.n_layers = n_layers
        self.state_dim = state_dim
        self.device = device
        self.shots = shots
        self.diff_method = diff_method
        self.encoding_type = encoding_type
        self.entanglement = entanglement
        self.entanglement_pattern = entanglement_pattern
        self.entanglement_gate = entanglement_gate
        self.input_init = input_init
        self.weight_init = weight_init
        self.bias_init = bias_init
        self.measure = measure
        self.input_scaling = None

        self.circuit = self.generate_circuit()
    
    def generate_circuit(self):
        if self.shots is None:
            dev = qml.device(self.device, wires=self.n_qubits)
        else:
            dev = qml.device(self.device, wires=self.n_qubits, shots=self.shots)
        
        if self.encoding_type == 'full':
            self.weight_shapes = {
                "input_params": (self.n_layers, self.n_qubits, self.state_dim),
                "params": (self.n_layers, self.n_qubits, 1),
                "bias": (self.n_layers, self.n_qubits)
            }
        elif self.encoding_type == 'partial':
            self.weight_shapes = {
            "input_params": (self.n_layers, self.n_qubits, int(self.state_dim/self.n_qubits)),
            "params": (self.n_layers, self.n_qubits, 1),
            "bias": (self.n_layers, self.n_qubits)
            }
        
        self.init_method = {
            "input_params": self.input_init,
            "params": self.weight_init,
            "bias": self.bias_init
        }
        
        @qml.qnode(dev, interface='torch', diff_method=self.diff_method)
        def qnode(inputs, input_params, params, bias):

            for layer in range(self.n_layers):
                for wire in range(self.n_qubits):
                    if self.encoding_type == 'full':
                        hadamard_product = torch.dot(inputs.clone().detach(), input_params[layer][wire])
                        angle = hadamard_product + bias[layer][wire]
                    elif self.encoding_type == 'partial':
                        separate_inputs = np.array_split(inputs,self.n_qubits)
                        hadamard_product = torch.dot(separate_inputs[wire], input_params[layer][wire])
                        angle = hadamard_product + bias[layer][wire]

                    qml.RZ(angle, wires=wire)
                    
                    qml.RY(params[layer][wire][0], wires=wire)
                    
                if self.entanglement:
                    qml.broadcast(self.entanglement_gate, wires=range(self.n_qubits), pattern=self.entanglement_pattern)

            return self.measure(self.n_qubits)

        self.qnode = qnode

        model = qml.qnn.TorchLayer(self.qnode, weight_shapes=self.weight_shapes, init_method=self.init_method)
        
        return model

    def forward(self, inputs):
        ''' 
        Gives inputs to the circuit and outputs the respective output
        '''
        return self.circuit(inputs)
    
    def visualize_circuit(self):
        inputs = torch.tensor([0.1 * i for i in range(self.state_dim)], dtype=torch.float32)
        
        # Initialize all parameters using the provided initialization methods
        initialized_params = {}
        for key, shape in self.weight_shapes.items():
            initialized_params[key] = self.init_method[key](torch.empty(shape))

        # Draw the circuit
        qml.draw_mpl(self.qnode)(inputs, 
                                initialized_params["weights"], 
                                initialized_params["params"], 
                                initialized_params["bias"])

    def handle_errors_and_warnings(self):
        ''' 
        Handles the errors and warnings
        '''
        # Check if the number of layers is valid
        if self.n_layers < 1:
            raise ValueError("Number of layers must be at least 1.")
        
    def get_parameters(self):
        # Extract relevant attributes for JSON serialization
        return {
            "Number of Qubits": self.n_qubits,
            "Number of Layers": self.n_layers,
            "State Dimension": self.state_dim,
            "Device": str(self.device),  # Convert to string representation
            "Shots": self.shots,
            "Differentiation Method": self.diff_method,
            "Encoding Type": self.encoding_type,
            "Entanglement": self.entanglement,
            "Entanglement Pattern": self.entanglement_pattern,
            "Entanglement Gate": get_function_representation(self.entanglement_gate),  # Use the helper function to represent the gate
            "Input Initialization": get_function_representation(self.input_init),  # Use the helper function for the initializer
            "Parameters Initialization": get_function_representation(self.weight_init),  # Use the helper function for the initializer
            "Bias Initialization": get_function_representation(self.bias_init),  # Use the helper function for the initializer
            "Measurement Function": get_function_representation(self.measure)  # Use the helper function for the measurement function
        }
    
    @classmethod
    def info(cls):
        '''
        Provides a summary of the UQC class and its parameters/methods.
        '''
        info_text = """
        
        Creates a parameterized quantum circuit based on the 'Data re-uploading for a universal quantum classifier' paper by Adrián Pérez-Salinas.

        Parameters:
        ----------
        n_qubits (int): 
            Number of qubits used in the quantum circuit.

        n_layers (int): 
            Number of layers in the quantum circuit. Each layer consists of parameterized rotations and entanglement gates.

        state_dim (int): 
            Dimensionality of the state space, determining the size of the weights associated with each qubit.

        device (str): 
            The quantum device to be used for execution, such as 'default.qubit', 'lightning.qubit', etc.

        shots (int, optional): 
            Number of times the circuit gets executed (repeated measurements). If None, the circuit is executed with analytic calculations (no shot noise).

        diff_method (str): 
            Differentiation method used for training the model. Common options include 'best', 'parameter-shift', 'adjoint', and 'backprop'.

        encoding_type (str): 
            Type of encoding used for the input data. Can be 'full' for complete encoding or 'partial' for partial encoding, which changes the shape of weights.

        entanglement (bool): 
            If True, entanglement between qubits is implemented. The entanglement pattern and gate are defined in entanglement_pattern and entanglement_gate, respectively.

        entanglement_pattern (str): 
            Entanglement pattern used in the circuit, such as 'chain', 'ring', 'all_to_all', etc., as defined by qml.broadcast patterns.

        entanglement_gate (function): 
            Quantum gate used for entanglement, such as qml.CZ or qml.CNOT. This gate is applied between qubits according to the specified entanglement pattern.

        input_init (function): 
            Function to initialize the weights of the quantum circuit, such as torch.nn.init.uniform_, torch.nn.init.normal_, or a user-defined function.

        weight_init (function): 
            Function to initialize the parameters of the quantum circuit, similar to input_init.

        bias_init (function): 
            Function to initialize the bias terms in the quantum circuit, such as torch.nn.init.uniform_, torch.nn.init.zeros_, or a user-defined function.

        measure (function): 
            Measurement function that takes the number of qubits as an argument and returns the measurement result. Common choices are measure_probs, two_measure_expval, or any user-defined measurement function.
        
        Methods:
        --------
        generate_circuit(self): 
            Generates and initializes the quantum circuit based on the parameters.
        
        forward(self, inputs): 
            Takes inputs and passes them through the quantum circuit to get the output.

        visualize_circuit(self): 
            Visualizes the generated quantum circuit for the given number of qubits using the initial parameters. Useful for debugging or analyzing the circuit design.

        handle_errors_and_warnings(self): 
            Handles common errors and warnings, such as invalid parameter values, unsupported devices, and incompatible differentiation methods.
        """
        return info_text

## Policy

In [None]:
class PolicyPostProcessing(nn.Module):
    '''
    For detailed information about the parameters, call the info() method.
    '''
    def __init__(self,
                 n_qubits,
                 n_actions,
                 policy_type, 
                 beta_scheduling, 
                 beta,
                 increase_rate, 
                 output_scaling,
                 output_init):
        super(PolicyPostProcessing, self).__init__()

        self.n_qubits = n_qubits
        self.n_actions = n_actions
        self.policy_type = policy_type
        self.beta_scheduling = beta_scheduling
        self.beta = beta
        self.increase_rate = increase_rate
        self.output_scaling = output_scaling
        self.output_init = output_init

        if self.output_scaling == True:
            self.output_params = nn.parameter.Parameter(torch.Tensor(self.n_actions), requires_grad=True)
            self.output_init(self.output_params)
        else:
            self.register_parameter('w_input', None)

    def forward(self,probs):
        ''' 
        Takes the VQC output and applies the selected policy 
        '''
        if self.policy_type == 'raw_contiguous':
            policy = self.raw_contiguous(probs)
        elif self.policy_type == 'raw_parity':
            policy = self.raw_parity(probs)
        elif self.policy_type == 'softmax':
            policy = self.softmax(probs)
        else:
            raise ValueError("Invalid post-processing method specified.")
        return policy

    def raw_contiguous(self,probs):
        ''' 
        Applies the Contiguous partition to the probabilities of the basis states (design to work with qml.probs)
        '''
        log_n_actions = int(np.log2(self.n_actions))
        
        # Ensure the number of actions does not exceed the number of basis states (determined by n_qubits)
        if log_n_actions > self.n_qubits:
            raise ValueError('Number of actions exceeds the number of basis states!')

        # Split the probabilities in a contiguous manner
        probs_split = torch.chunk(probs, self.n_actions)
        policy = [torch.sum(prob) for prob in probs_split]
        return(torch.stack(policy))

    def raw_parity(self,probs):
        ''' 
        Applies the Parity partition to the probabilities of the basis states (design to work with qml.probs)
        '''
        log_n_actions = int(np.log2(self.n_actions))

        # Check if the number of actions is a power of 2
        if log_n_actions < 1.0 or not (np.floor(log_n_actions) == np.ceil(log_n_actions)):
            raise NotImplementedError('Number of actions needs to be a power of two!')

        # Ensure the number of actions does not exceed the number of qubits
        if log_n_actions > self.n_qubits:
            raise ValueError('Number of actions exceeds number of basis states!')

        # Flatten the probability distribution to handle it as a single-dimensional array
        if log_n_actions == 1:
            summed_tensors = []
            even_tensor = probs[::2]  # Elements at even indices
            odd_tensor = probs[1::2]  # Elements at odd indices
            summed_tensors.append(torch.sum(even_tensor))
            summed_tensors.append(torch.sum(odd_tensor))
        else:
            probs_split = list(torch.chunk(probs, self.n_actions//2))
            summed_tensors = []

            for tensor in probs_split:
                even_tensor = tensor[::2]  # Even indexed elements
                odd_tensor = tensor[1::2]  # Odd indexed elements
                summed_tensors.append(torch.sum(even_tensor))
                summed_tensors.append(torch.sum(odd_tensor))

        return torch.stack(summed_tensors)
    
    def softmax(self, probs):
        ''' 
        Applies a softmax to the expected values of some observable
        '''
        if self.output_scaling == True:
            probs *= self.output_params

        scaled_output = probs * self.beta
        softmax_output = F.softmax(scaled_output, dim=0)
        return softmax_output
    
    def beta_schedule(self):
        '''
        Increases the inverse temperature parameter by 'increase_rate'
        '''
        if self.beta_scheduling == True and self.policy_type == 'softmax':
            self.beta += self.increase_rate

    def get_parameters(self):
        # Extract relevant attributes for JSON serialization
        return {
            "Policy Type": self.policy_type,
            "Beta Scheduling": self.beta_scheduling,
            "Beta": self.beta,
            "Increase Rate": self.increase_rate,
            "Output Scaling": self.output_scaling,
            "Output Initialization": get_function_representation(self.output_init),
        }
    
    @classmethod
    def info(cls):
        '''
        Provides a summary of the PolicyType class and its parameters/methods.
        '''
        info_text = """

        Processes the output of the circuit into one of the implemented policies (Born Contiguous, Born Parity, Softmax)

        Parameters:
        ----------
        n_actions (int): 
            Number of actions available for the agent to choose from.
        
        policy_type (str): 
            Type of policy applied to the probability distribution:
            - 'raw_contiguous': Applies the Born Contiguous-like policy.
            - 'raw_parity': Applies the Born Parity-like policy.
            - 'softmax': Applies the softmax policy to the expectation values.

        beta_scheduling (bool): 
            If True, updates the inverse temperature parameter (beta) after each episode. Used only for the softmax policy.
        
        beta (float): 
            Inverse temperature parameter used for scaling probabilities in the softmax policy.
        
        increase_rate (float): 
            Amount added to beta at the end of each episode, if beta_scheduling is True.
        
        output_scaling (bool): 
            If True, scales the output probabilities by learnable parameters.
        
        output_init (function): 
            Initialization function for output parameters, such as torch.nn.init.uniform_, torch.nn.init.ones_, etc.
        
        Methods:
        -------
        forward(self, probs):
            Selects an action based on the chosen post_processing method.
        
        raw_contiguous(self, probs):
            Sums up contiguous chunks of probabilities and returns the probability of each action.
        
        raw_parity(self, probs):
            Sums up probabilities based on parity and returns the probability of each action.
        
        softmax(self, probs):
            Applies a softmax function to the scaled probabilities and returns the probability of each action.
        
        beta_schedule(self):
            Updates the beta parameter if beta_scheduling is True. Only applicable for the softmax method.
        """
        return info_text

## Quantum Policy (Circuit + Policy)

In [None]:
class QuantumPolicy(nn.Module):
    '''
    For detailed information about the parameters, call the info() method.
    '''
    def __init__(self, circuit, post_processing):
        super(QuantumPolicy, self).__init__()
        self.circuit = circuit
        self.post_processing = post_processing

    def sample(self, inputs):
        '''
        Samples an action from the action probability distribution
        '''
        policy = self.forward(inputs)
        dist = torch.distributions.Categorical(policy)
        action = dist.sample()
        return action.item(), dist.log_prob(action), policy
    
    def forward(self, inputs):
        '''
        Input state is fed to the circuit - its output is then fed to the post processing 
        '''
        probs = self.circuit.forward(inputs)
        probs_processed = self.post_processing.forward(probs)
        return probs_processed

    @classmethod
    def info(cls):
        '''
        Provides a summary of the QuantumPolicy class and its parameters/methods.
        '''
        info_text = """

        Combines a quantum circuit for generating action probabilities with a post-processing step to create a valid probability distribution for action selection.

        Parameters:
        ----------
        circuit (object): 
            A quantum circuit instance that processes input states and generates raw probabilities.
        
        post_processing (object): 
            A post-processing instance that transforms raw probabilities into a valid probability distribution.

        Methods:
        -------
        sample(inputs):
            Samples an action based on the computed probability distribution for a given input.
        
        forward(inputs):
            Computes the forward pass of the policy network by processing the inputs through the circuit 
            and the post-processing module.
        """
        return info_text

## REINFORCE Agent

In [None]:
class ReinforceAgent:
    '''
    For detailed information about the parameters, call the info() method.
    '''
    def __init__(self, 
                policy, 
                policy_optimizer, 
                env_name, 
                n_episodes, 
                max_t, 
                gamma, 
                baseline, 
                batch_size, 
                normalize,
                print_every, 
                verbose):
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.policy = policy.to(self.device)
        self.policy_optimizer = policy_optimizer
        self.env = gym.make(env_name)
        self.env_name = env_name
        self.n_episodes = n_episodes
        self.max_t = max_t
        self.gamma = gamma
        self.baseline = baseline
        self.batch_size = batch_size
        self.normalize = normalize
        self.print_every = print_every
        self.verbose = verbose

        self.solved = False
        self.scores = deque(maxlen=100)
        self.batch_log_probs = []
        self.batch_rewards = []
        self.loss = torch.tensor(0.0)

    def train(self, run_count=None, rundate = None, path=None, tensorboard=False):

        # Creates data saving files if specified
        if run_count is not None and path is not None:
            data_path = create_directory(os.path.join(path, 'data'))
            env_folder = create_directory(os.path.join(data_path, self.env_name))
            experiment_folder_name = f"{self.policy.circuit.__class__.__name__}_{self.policy.circuit.n_qubits}qubits_{self.policy.circuit.n_layers}layer_{rundate}"
            experiment_path = create_directory(os.path.join(env_folder, experiment_folder_name))
            run_path = create_directory(os.path.join(experiment_path, f'run_{str(run_count)}'))
            self.save_agent_data(experiment_path)
        

        # Create TensorBoard session if specified
        if tensorboard:
            writer = SummaryWriter(log_dir=run_path)

        for i in range(1, self.n_episodes + 1):
            start_time = time.time()

            # Get episode
            self.get_trajectory()

            # Check if environment is solved
            self.env_solved_verification()

            # Update parameters if the batch is full and environment is not solved
            if i > 1 and i % self.batch_size == 0 and not self.solved:
                self.update_policy()
                self.policy.post_processing.beta_schedule()
 
            end_time = time.time()

            # Calculate the runtime
            self.runtime = end_time - start_time

            # Write in the TensorBoard session
            if tensorboard:
                self.writer_function(writer, i)

            # Save the episode data
            if run_count is not None and path is not None:
                self.save_data(run_path, i)

            # Print out episode data
            if self.verbose == 1:
                print('Episode {} reward: {:.2f}\t Solved: {}'.format(i, self.scores[-1], self.solved))
            if self.verbose >= 2:
                print('Episode {} reward: {:.2f}\t Solved: {}\t Runtime: {:.2f}\t Loss: {:.2f}'.format(i, self.scores[-1], self.solved, self.runtime, self.loss))
            if i % self.print_every == 0 and i > 1:
                print('Last {} Episodes average reward: {:.2f}\t'.format(len(self.scores), np.mean(self.scores)))

        # Save the final weights
        self.save_final_weights()

        # Close TensorBoard session
        if tensorboard:
            writer.close()

    def get_trajectory(self):
        '''
        Gets a trajectory based on the running policy until it runs out of bounds or achieves maximum reward of an episode
        '''
        # Get an episode trajectory
        self.saved_log_probs = []
        self.rewards = []
        state = self.env.reset()[0]
        for t in range(self.max_t):
            state_tensor = torch.tensor(self.normalize_state(state)).float().to(self.device)
            action, log_prob, _ = self.policy.sample(state_tensor)
            state, reward, done, _, _ = self.env.step(action)
            
            self.saved_log_probs.append(log_prob)
            self.rewards.append(reward)

            if done:
                break

        # Save the episode reward
        self.scores.append(sum(self.rewards))

        # Save data from the episode to the batch
        self.batch_log_probs.append(self.saved_log_probs)
        self.batch_rewards.append(self.rewards)

        # Clear data in case the agent already solved the environment
        if self.solved is True:
            self.batch_log_probs = []
            self.batch_rewards = []
      
    def update_policy(self):
        '''
        Computes the loss and gradients and updates the policy via gradient methods
        '''
        # Discounting of the rewards
        all_returns = []
        for batch in self.batch_rewards:
            R = 0
            ep_return = []
            for r in reversed(batch):
                R = r + self.gamma * R
                ep_return.insert(0, R)
            ep_return = torch.tensor(ep_return).to(self.device)

            # Standardization of the discounted returns
            ep_return = (ep_return - ep_return.mean()) / (ep_return.std() + 1e-8)

            all_returns.append(ep_return)

        # Calculate the policy loss
        policy_loss = []     
        if self.baseline:
            baseline = np.mean([sum(lst) for lst in all_returns])
            for log_probs, ep_returns in zip(self.batch_log_probs, all_returns):
                for log_prob, ret in zip(log_probs, ep_returns):
                    advantage = ret - baseline 
                    policy_loss.append(-log_prob * advantage)
        else:
            for log_probs, ep_returns in zip(self.batch_log_probs, all_returns):
                for log_prob, ret in zip(log_probs, ep_returns):
                    policy_loss.append(-log_prob * ret)

        policy_unsqueezed = [torch.unsqueeze(loss, 0) for loss in policy_loss]
        self.loss = torch.cat(policy_unsqueezed).mean()

        # Compute the gradients 
        self.policy_optimizer.zero_grad()
        self.loss.backward()
        self.policy_optimizer.step()

        # Clear old data
        del all_returns
        del policy_loss
        del policy_unsqueezed 
        self.batch_log_probs = []
        self.batch_rewards = []

    def normalize_state(self, state):
        '''
        Processes the input state by reducing its dimensionality and normalizing it
        '''
        # State-space reduction for the Acrobot
        if self.env_name in ('Acrobot-v0', 'Acrobot-v1'):
            theta1 = np.arccos(state[0])
            theta2 = np.arccos(state[2])
            state = [theta1,theta2,state[4],state[5]]


        # Normalize each feature by the maximum absolute value at each step
        if self.normalize == True:
            max_abs_value = max(abs(value) for value in state)
            state = np.array([value / max_abs_value for value in state])
        
        return state
    
    def env_solved_verification(self):
        '''
        Checks if the environment is solved
        '''
        # Acrobot-v1
        if self.env_name in ('Acrobot-v1'):
            if np.mean(self.scores) > -125:
                self.solved = True
        
        # CartPole-v0 and CartPole-v1
        elif self.env_name in ('CartPole-v0','CartPole-v1'):
            if np.mean(self.scores) > self.env.spec.reward_threshold:
                self.solved = True
        
        else:              
            warnings.warn(f"No reward threshold defined for environment {self.env_name}. "
                          "Consider specifying a solved condition explicitly.",
                          UserWarning
            )

    def save_agent_data(self, main_path):
        '''
        Stores the most relevant model parameters into a .json file.
        '''
        # Use the get_parameters method to get Circuit Parameters, Policy Parameters and Agent Parameters
        circuit_params = self.policy.circuit.get_parameters()  # Get circuit parameters
        policy_params = self.policy.post_processing.get_parameters()  # Get policy parameters
        agent_params = self.get_parameters()  # Get agent parameters
        
        # Create a structured dictionary
        agent_variables = {
            "Circuit Parameters": circuit_params,
            "Policy Parameters": policy_params,
            "Agent Parameters": agent_params
        }

        # Convert sets to lists
        def convert_sets_to_lists(obj):
            if isinstance(obj, set):
                return list(obj)
            elif isinstance(obj, dict):
                return {key: convert_sets_to_lists(value) for key, value in obj.items()}
            elif isinstance(obj, list):
                return [convert_sets_to_lists(item) for item in obj]
            else:
                return obj

        # Convert sets in agent_variables
        agent_variables = convert_sets_to_lists(agent_variables)

        # Save as JSON
        with open(os.path.join(main_path, "agent_characteristics.json"), "w") as f:
            json.dump(agent_variables, f, indent=4)

    def save_data(self, run_path, iteration):
        '''
        Saves the data into a .npz file for each episode
        '''
        data_file = os.path.join(run_path, "run_data.npz")
        
        # Load existing data if the file exists
        if os.path.exists(data_file):
            data = np.load(data_file, allow_pickle=True)
            old_episode_reward = data['episode_reward'].tolist()
            old_loss = data['loss'].tolist()
            old_runtime = data['runtime'].tolist()
            old_gradients = data['gradients'].tolist()
            old_params = data['params'].tolist()
        else:
            old_episode_reward = []
            old_loss = []
            old_runtime = []
            old_gradients = []
            old_params = []

        # Add episode reward and runtime
        old_episode_reward.append(self.scores[-1])
        old_runtime.append(self.runtime)

        # Stores the loss and parameter gradients when batch is full
        current_episode_gradients = []
        current_episode_params = []
        if iteration % self.batch_size == 0 and iteration > 1 and self.solved is False:
            old_loss.append(self.loss.item())
            for name, param in self.policy.circuit.named_parameters():
                # Get the parameter values and flatten them
                param_array = param.cpu().detach().numpy().flatten()
                
                # Append the parameter values to the old_parameters list
                current_episode_params.append(param_array)

                # If the parameter has a gradient, get and flatten it
                if param.grad is not None:
                    grad_array = param.grad.cpu().numpy().flatten()
                    current_episode_gradients.append(grad_array)

            # Concatenate the parameters and gradients into a single array for each episode
            flattened_parameters = np.concatenate(current_episode_params)
            flattened_gradients = np.concatenate(current_episode_gradients)

            old_params.append(flattened_parameters)
            old_gradients.append(flattened_gradients)
            
        # Save data to .npz file
        np.savez_compressed(data_file,
                            episode_reward=np.array(old_episode_reward),
                            loss=np.array(old_loss),
                            runtime=np.array(old_runtime),
                            gradients=np.array(old_gradients, dtype=object),
                            params=np.array(old_params,dtype=object))

        # Clear old data lists to free up memory
        del old_episode_reward[:]
        del old_loss[:]
        del old_runtime[:]
        del old_gradients[:]
        del old_params[:]

    def save_final_weights(self, run_path):
        '''
        Saves the final model weights into the .npz file at the end of training.
        '''
        data_file = os.path.join(run_path, "run_data.npz")

        # Load existing data if the file exists
        if os.path.exists(data_file):
            data = dict(np.load(data_file, allow_pickle=True))
        else:
            data = {}

        # Extract and save final weights
        final_weights = {name: param.detach().cpu().numpy() for name, param in self.policy.named_parameters()}
        data['final_weights'] = final_weights

        # Save updated data with final weights to .npz file
        np.savez_compressed(data_file, **data)
    
    def writer_function(self, writer, iteration):
        '''
        Stores data into a tensorboard session
        '''
        writer.add_scalar("Episode Reward", self.scores[-1], global_step=iteration)
        writer.add_scalar("Runtime", self.runtime, global_step=iteration)
        writer.add_scalar("Loss", self.loss.item(), global_step=iteration)

        gradients = []
        for name, param in self.policy.named_parameters():
            if param.grad is not None:
                if name == 'input_params' or name == 'params':
                    gradients.append(param.grad.view(-1))

        # Concatenate all collected gradients into a single tensor and calculate L2 norm of the combined gradients
        if gradients:
            combined_gradients = torch.cat(gradients)
            combined_grad_norm = torch.norm(combined_gradients).item()
            
            # Log the combined gradient norm
            writer.add_scalar("Gradient Norm/Combined", combined_grad_norm, global_step=iteration)

    def get_parameters(self):
        # Extract specified attributes for JSON serialization
        return {
            "Environment": self.env_name,
            "Gamma (discounting factor)": self.gamma,
            "Baseline": self.baseline,
            "Batch Size": self.batch_size,
            "Normalize": self.normalize,
        }
    
    @classmethod
    def info(cls):
        '''
        Provides a summary of the ReinforceAgent class and its parameters/methods.
        '''
        info_text = """

        Implements a Reinforcement Learning agent using the REINFORCE algorithm.

        Parameters:
        ----------
        policy (PolicyType): 
            An instance of the policy class that defines the action selection process based on the outputs of the VQC.
        
        policy_optimizer (torch.optim.Optimizer): 
            Optimizer used for updating the policy parameters during training.
        
        env_name (str): 
            Name of the environment to interact with, typically defined in OpenAI Gym.
        
        n_episodes (int): 
            Total number of episodes for training the agent.
        
        max_t (int): 
            Maximum number of time steps per episode.
        
        gamma (float): 
            Discount factor for future rewards, where 0 < gamma < 1.
        
        baseline (bool): 
            If True, applies a baseline to reduce variance in the policy gradient estimates.
        
        batch_size (int): 
            Number of episodes after which the policy parameters will be updated.
        
        normalize (bool): 
            If True, normalizes the state input for the agent.
        
        print_every (int): 
            Number of episodes after which to print the average reward.
        
        verbose (int): 
            Level of verbosity for outputting training details (0: none, 1: basic, 2: detailed).

        Methods:
        -------
        train(self, run_count=None, rundate=None, path=None, tensorboard=False):
            Trains the agent by collecting episodes and updating the policy using the REINFORCE algorithm.

        get_trajectory(self):
            Gathers a trajectory from the environment using the current policy until the episode ends.

        update_policy(self):
            Computes the loss, applies gradients, and updates the policy parameters.

        normalize_state(self, state):
            Normalizes and processes the input state to reduce dimensionality.

        env_solved_verification(self):
            Checks if the environment has been solved based on the average score.

        save_agent_data(self, main_path):
            Saves the agent's parameters and model characteristics to a JSON file.

        save_data(self, run_path, iteration):
            Saves episode data, including rewards, loss, and gradients, to a .npz file.

        save_final_weights(self, run_path):
            Saves the final model weights at the end of training to a .npz file.

        writer_function(self, writer, iteration):
            Logs episode data to TensorBoard for visualization.

        get_parameters(self):
            Returns a dictionary of important agent parameters for JSON serialization.
        """
        return info_text

# Single Runs

## Jerbi

### Visualize Circuit

In [None]:
n_qubits = 4
n_layers = 5
device = 'lightning.qubit'
shots = None
diff_method = 'adjoint' 
entanglement = True
entanglement_pattern = 'all_to_all'
entanglement_gate = qml.CZ
input_scaling = True
input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).normal_(-np.pi, np.pi)
policy_circuit_measure = two_measure_expval
policy_circuit = JerbiModel(n_qubits, 
                            n_layers, 
                            device, 
                            shots, 
                            diff_method, 
                            entanglement, 
                            entanglement_pattern, 
                            entanglement_gate, 
                            input_scaling, 
                            input_init, 
                            weight_init, 
                            policy_circuit_measure)

policy_circuit.visualize_circuit()

### Train Circuit

In [None]:
n_qubits = 4
n_layers = 5
device = 'lightning.qubit'
shots = None
diff_method = 'adjoint' 
entanglement = True
entanglement_pattern = 'all_to_all'
entanglement_gate = qml.CZ
input_scaling = True
input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).normal_(-np.pi, np.pi)
policy_circuit_measure = two_measure_expval
policy_circuit = JerbiModel(n_qubits, 
                            n_layers, 
                            device, 
                            shots, 
                            diff_method, 
                            entanglement, 
                            entanglement_pattern, 
                            entanglement_gate, 
                            input_scaling, 
                            input_init, 
                            weight_init, 
                            policy_circuit_measure)


n_actions = 2
post_processing = 'softmax'
beta_scheduling = False
beta = 1
increase_rate = 0.0005
output_scaling = True
output_init = torch.nn.init.ones_
policy_post_process = PolicyPostProcessing(n_qubits, n_actions, 
                         post_processing, 
                         beta_scheduling, 
                         beta, increase_rate, 
                         output_scaling, 
                         output_init)

policy = QuantumPolicy(policy_circuit,policy_post_process)

policy_lr_list= [0.1, 0.01, 0.1]  # [weights, input_weights, output_weights]
policy_params = list(policy_circuit.parameters()) + list(policy_post_process.parameters())
policy_optimizer= create_optimizer_with_lr(policy_params, policy_lr_list, use_amsgrad=True)

env_name = 'CartPole-v1'
n_episodes = 1000
max_t = 500
gamma = 0.98
baseline = True
batch_size = 10
normalize = True
print_every = 100
verbose = 1
reinforce_update = ReinforceAgent(policy, 
                                  policy_optimizer, 
                                  env_name, 
                                  n_episodes, 
                                  max_t, 
                                  gamma, 
                                  baseline, 
                                  batch_size,
                                  normalize,
                                  print_every, 
                                  verbose)
reinforce_update.train()

## TFQ

### Visualize Circuit

In [None]:
n_qubits = 4
n_layers = 5
shots = None
diff_method = 'adjoint' 
entanglement = True
entanglement_pattern = "all_to_all"
entanglement_gate = qml.CZ
input_scaling = True
input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).normal_(-np.pi, np.pi)
policy_circuit_measure = two_measure_expval
policy_circuit = TfqTutorial(n_qubits,
                            n_layers, 
                            shots, 
                            diff_method, 
                            entanglement, 
                            entanglement_pattern, 
                            entanglement_gate, 
                            input_scaling, 
                            input_init, 
                            weight_init, 
                            policy_circuit_measure)

policy_circuit.visualize_circuit()

### Train Circuit

In [None]:
n_qubits = 4
n_layers = 5
shots = None
diff_method = 'adjoint' 
entanglement = True
entanglement_pattern = 'all_to_all'
entanglement_gate = qml.CZ
input_scaling = True
input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).uniform_(-np.pi, np.pi)
policy_circuit_measure = two_measure_expval
policy_circuit = TfqTutorial(n_qubits, n_layers, shots, diff_method, 
                     entanglement, entanglement_pattern, entanglement_gate, 
                     input_scaling, input_init, weight_init, policy_circuit_measure)


n_actions = 2
post_processing = 'softmax'
beta_scheduling = False
beta = 1
increase_rate = 0.0005
output_scaling = True
output_init = torch.nn.init.ones_
policy_post_process = PolicyPostProcessing(n_actions, 
                         post_processing, 
                         beta_scheduling, 
                         beta, increase_rate, 
                         output_scaling, 
                         output_init)

policy = QuantumPolicy(policy_circuit,policy_post_process)

policy_lr_list= [0.01, 0.1, 0.1]  # [weights, input_weights, output_weights]
policy_params = list(policy_circuit.parameters()) + list(policy_post_process.parameters())
policy_optimizer= create_optimizer_with_lr(policy_params, policy_lr_list, use_amsgrad=True)

env_name = 'CartPole-v1'
n_episodes = 1000
max_t = 500
gamma = 0.98
baseline = True
batch_size = 10
normalize = True
print_every = 100
verbose = 1
reinforce_update = ReinforceAgent(policy, 
                                  policy_optimizer, 
                                  env_name, 
                                  n_episodes, 
                                  max_t, 
                                  gamma, 
                                  baseline, 
                                  batch_size,
                                  normalize,
                                  print_every, 
                                  verbose)
reinforce_update.train()

## UQC

### Visualize Circuit

In [None]:
n_qubits = 5
n_layers = 5
state_dim = 4
device = 'lightning.qubit'
shots = None
diff_method = 'adjoint' 
encoding = 'full'
entanglement = True
entanglement_pattern = "all_to_all"
entanglement_gate = qml.CZ
input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).uniform_(-np.pi, np.pi)
bias_init = torch.nn.init.zeros_
policy_circuit_measure = two_measure_expval
policy_circuit = UQC(n_qubits,
                    n_layers, 
                    state_dim,
                    device,
                    shots, 
                    diff_method,
                    encoding,
                    entanglement, 
                    entanglement_pattern, 
                    entanglement_gate,
                    weight_init,
                    weight_init,
                    bias_init, 
                    policy_circuit_measure)

policy_circuit.visualize_circuit()

### Train Circuit

In [None]:
n_qubits = 4
n_layers = 5
state_dim = 4
device = 'lightning.qubit'
shots = None
diff_method = 'adjoint' 
encoding = 'full'
entanglement = True
entanglement_pattern = "all_to_all"
entanglement_gate = qml.CZ
input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).uniform_(-np.pi, np.pi)
bias_init = torch.nn.init.zeros_
policy_circuit_measure = two_measure_expval
policy_circuit = UQC(n_qubits,
                    n_layers, 
                    state_dim,
                    device,
                    shots, 
                    diff_method,
                    encoding,
                    entanglement, 
                    entanglement_pattern, 
                    entanglement_gate,
                    weight_init,
                    weight_init,
                    bias_init, 
                    policy_circuit_measure)

n_actions = 2
post_processing = 'softmax'
beta_scheduling = False
beta = 1
increase_rate = 0.0005
output_scaling = True
output_init = torch.nn.init.ones_
policy_post_process = PolicyPostProcessing( n_qubits,
                                            n_actions, 
                                            post_processing, 
                                            beta_scheduling, 
                                            beta, increase_rate, 
                                            output_scaling, 
                                            output_init)

policy = QuantumPolicy(policy_circuit,policy_post_process)

policy_lr_list= [0.1, 0.01, 0.1, 0.1]  # [input_weights, weights, bias, output_weights]
policy_params = list(policy_circuit.parameters()) + list(policy_post_process.parameters())
policy_optimizer= create_optimizer_with_lr(policy_params, policy_lr_list, use_amsgrad=True)

env_name = 'CartPole-v1'
n_episodes = 1000
max_t = 500
gamma = 0.98
baseline = True
batch_size = 10
normalize = True
print_every = 100
verbose = 1
reinforce_update = ReinforceAgent(policy, 
                                  policy_optimizer, 
                                  env_name, 
                                  n_episodes, 
                                  max_t, 
                                  gamma, 
                                  baseline, 
                                  batch_size,
                                  normalize,
                                  print_every, 
                                  verbose)
reinforce_update.train()

# Parallel Runs

## Jerbi

In [None]:
@ray.remote
def train_agents(file_name, rundate):
#   Path settings
    current_dir = os.getcwd()
    two_levels_up = os.path.abspath(os.path.join(current_dir, "../../"))

#   VQC settings
    n_qubits = 4
    n_layers = 5
    device = 'lightning.qubit'
    shots = None
    diff_method = 'adjoint' 
    entanglement = True
    entanglement_pattern = "all_to_all"
    entanglement_gate = qml.CZ
    input_scaling = True
    input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
    weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).uniform_(-np.pi, np.pi)
    policy_circuit_measure = two_measure_expval
    policy_circuit = JerbiModel(n_qubits, n_layers,device, shots, diff_method, 
                        entanglement, entanglement_pattern, entanglement_gate, 
                        input_scaling, input_init, weight_init, policy_circuit_measure)

    
#   Post processing settings
    n_actions = 2
    post_processing = 'softmax'
    beta_scheduling = False
    beta = 1
    increase_rate = 0.003
    output_scaling = True
    output_init = torch.nn.init.ones_
    policy_post_process = PolicyPostProcessing(n_qubits, n_actions, 
                            post_processing, 
                            beta_scheduling, 
                            beta, increase_rate, 
                            output_scaling, 
                            output_init)
#   Circuit + Post processing
    policy = QuantumPolicy(policy_circuit,policy_post_process)

#   Gradient learning rates
    policy_lr_list= [0.1, 0.01, 0.1]  # [input_weights, weight, output_weights]
    policy_params = list(policy_circuit.parameters()) + list(policy_post_process.parameters())
    policy_optimizer= create_optimizer_with_lr(policy_params, policy_lr_list, use_amsgrad=True)

#   Agent and environment settings
    env_name = 'CartPole-v1'
    n_episodes = 1000
    max_t = 500
    gamma = 0.98
    print_every = 100
    verbose = 1
    baseline = True
    batch_size = 10
    normalize = True
    reinforce_update = ReinforceAgent(policy, policy_optimizer, env_name, n_episodes, max_t, gamma, baseline, batch_size, normalize, print_every, verbose)
    reinforce_update.train(file_name, rundate, two_levels_up, True)

    return ('Agent ' + str(file_name) + ': ' + str(reinforce_update.solved))

if __name__ == "__main__":

    all_results = []
    rundate = datetime.now().strftime('%Y-%m-%d_%H.%M.%S')
    agents_per_run = 2
    num_agents = 4

    for run_index in range(agents_per_run):  
        start_agent_index = run_index * num_agents

        results = [
            train_agents.remote(str(start_agent_index + i), rundate) for i in range(num_agents)]

        completed_results = ray.get(results)
        all_results.extend(completed_results)
        print(f"Results for run {run_index}: {completed_results}")

    # Shutdown Ray after all tasks are complete
    ray.shutdown()


## TFQ


In [None]:
@ray.remote
def train_agents(file_name, rundate):
#   Path settings
    current_dir = os.getcwd()
    two_levels_up = os.path.abspath(os.path.join(current_dir, "../../"))

#   VQC settings
    n_qubits = 4
    n_layers = 5
    device = 'lightning.qubit'
    shots = None
    diff_method = 'adjoint' 
    entanglement = True
    entanglement_pattern = "all_to_all"
    entanglement_gate = qml.CZ
    input_scaling = True
    input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
    weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).uniform_(-np.pi, np.pi)
    policy_circuit_measure = two_measure_expval
    policy_circuit = TfqTutorial(n_qubits, n_layers,device, shots, diff_method, 
                        entanglement, entanglement_pattern, entanglement_gate, 
                        input_scaling, input_init, weight_init, policy_circuit_measure)

    
#   Post processing settings
    n_actions = 2
    post_processing = 'softmax'
    beta_scheduling = False
    beta = 1
    increase_rate = 0.003
    output_scaling = True
    output_init = torch.nn.init.ones_
    policy_post_process = PolicyPostProcessing(n_qubits, n_actions, 
                            post_processing, 
                            beta_scheduling, 
                            beta, increase_rate, 
                            output_scaling, 
                            output_init)
#   Circuit + Post processing
    policy = QuantumPolicy(policy_circuit,policy_post_process)

#   Gradient learning rates
    policy_lr_list= [0.1, 0.01, 0.1]  # [input_weights, weight, output_weights]
    policy_params = list(policy_circuit.parameters()) + list(policy_post_process.parameters())
    policy_optimizer= create_optimizer_with_lr(policy_params, policy_lr_list, use_amsgrad=True)

#   Agent and environment settings
    env_name = 'CartPole-v1'
    n_episodes = 1000
    max_t = 500
    gamma = 0.98
    print_every = 100
    verbose = 1
    baseline = True
    batch_size = 10
    normalize = True
    reinforce_update = ReinforceAgent(policy, policy_optimizer, env_name, n_episodes, max_t, gamma, baseline, batch_size, normalize, print_every, verbose)
    reinforce_update.train(file_name, rundate, two_levels_up, True)

    return ('Agent ' + str(file_name) + ': ' + str(reinforce_update.solved))

if __name__ == "__main__":

    all_results = []
    rundate = datetime.now().strftime('%Y-%m-%d_%H.%M.%S')
    agents_per_run = 2
    num_agents = 4

    for run_index in range(agents_per_run):  
        start_agent_index = run_index * num_agents

        results = [
            train_agents.remote(str(start_agent_index + i), rundate) for i in range(num_agents)]

        completed_results = ray.get(results)
        all_results.extend(completed_results)
        print(f"Results for run {run_index}: {completed_results}")

    # Shutdown Ray after all tasks are complete
    ray.shutdown()


## UQC

In [None]:
@ray.remote
def train_agents(file_name, rundate):
#   Path settings
    current_dir = os.getcwd()
    two_levels_up = os.path.abspath(os.path.join(current_dir, "../../"))

#   VQC settings
    n_qubits = 4
    n_layers = 5
    state_dim = 4
    device = 'lightning.qubit'
    shots = None
    diff_method = 'adjoint' 
    encoding = 'full'
    entanglement = True
    entanglement_pattern = "all_to_all"
    entanglement_gate = qml.CZ
    input_init = partial(torch.nn.init.normal_, mean=0.0, std=0.01)
    weight_init = lambda shape, dtype=torch.float: torch.FloatTensor(shape).uniform_(-np.pi, np.pi)
    bias_init = torch.nn.init.zeros_
    policy_circuit_measure = two_measure_expval
    policy_circuit = UQC(n_qubits,
                        n_layers, 
                        state_dim,
                        device,
                        shots, 
                        diff_method,
                        encoding,
                        entanglement, 
                        entanglement_pattern, 
                        entanglement_gate,
                        input_init,
                        weight_init,
                        bias_init, 
                        policy_circuit_measure)
    
#   Post processing settings
    n_actions = 2
    post_processing = 'softmax'
    beta_scheduling = False
    beta = 1
    increase_rate = 0.003
    output_scaling = True
    output_init = torch.nn.init.ones_
    policy_post_process = PolicyPostProcessing( n_qubits,
                                                n_actions, 
                                                post_processing, 
                                                beta_scheduling, 
                                                beta, increase_rate, 
                                                output_scaling, 
                                                output_init)
#   Circuit + Post processing
    policy = QuantumPolicy(policy_circuit,policy_post_process)

#   Gradient learning rates
    policy_lr_list= [0.1, 0.01, 0.1, 0.1]  # [weights, params, bias, output_weights]
    policy_params = list(policy_circuit.parameters()) + list(policy_post_process.parameters())
    policy_optimizer = create_optimizer_with_lr(policy_params, policy_lr_list, use_amsgrad=True)

#   Agent and environment settings
    env_name = 'CartPole-v1'
    n_episodes = 1000
    max_t = 500
    gamma = 0.98
    print_every = 100
    verbose = 1
    baseline = True
    batch_size = 10
    normalize = True
    reinforce_update = ReinforceAgent(policy, policy_optimizer, env_name, n_episodes, max_t, gamma, baseline, batch_size, normalize, print_every, verbose)
    reinforce_update.train(file_name, rundate, two_levels_up, True)

    return ('Agent ' + str(file_name) + ': ' + str(reinforce_update.solved))

if __name__ == "__main__":

    all_results = []
    rundate = datetime.now().strftime('%Y-%m-%d_%H.%M.%S')
    agents_per_run = 2
    num_agents = 4

    for run_index in range(agents_per_run):  
        start_agent_index = run_index * num_agents

        results = [
            train_agents.remote(str(start_agent_index + i), rundate) for i in range(num_agents)]

        completed_results = ray.get(results)
        all_results.extend(completed_results)
        print(f"Results for run {run_index}: {completed_results}")

    # Shutdown Ray after all tasks are complete
    ray.shutdown()