In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from qiskit import QuantumCircuit, QuantumRegister
from qiskit_aer import Aer, AerSimulator
from qiskit.quantum_info import state_fidelity, partial_trace, DensityMatrix, Statevector, Operator
from vff_ansatz_8q_up import get_full_vff_quantum_circuit
from typing import List, Tuple, Dict, Optional
import datetime
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import multiprocessing
from functools import lru_cache
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from qiskit.exceptions import QiskitError

from CustomOperations import *

# Configuration, constants

In [3]:
class Config:
    # System parameters

    trial = 4.0
    QUBITS_NUM = 8             # Number of Position Register qubits
    timestep = 10.0             # Trotter step size (smaller step = longer runtime, scales linearly)
    diabaticity = 0.0         # Diabaticity parameter for potential energy
    
    # Physical constants
    mass = 1818.18              # Mass term (in appropriate units)
    box_size = 2               # Simulation box size
    half_box = box_size / 2     # Half box size (box defined from -d to d)
    hbar = 1.0                  # Reduced Planck constant (atomic units)
    
    # State space parameters
    state_count = 2**QUBITS_NUM         # Number of discrete states in position basis
    position_spacing = box_size / state_count  # Position grid spacing
    momentum_spacing = 2*np.pi / (state_count * position_spacing)  # Momentum grid spacing
    Nyquist = np.pi / position_spacing  # Nyquist frequency (max/min momentum)
    
    # Initial wave packet parameters
    base_offset = 0       # Horizontal offset of initialized Gaussian wavepacket
    momentum_init = -1.0         # Initial momentum of Gaussian wavepacket
    wavepacket_width = 1.0/3.0  # Width of initialized Gaussian wavepacket
    
    # Potential parameters
    potential_center = 0        # Relative center for simulation box
    V1_strength = 0.015 # Strength of first quadratic potential (1/2 m*\omega^2)
    V2_strength = 0.015 # Strength of second quadratic potential (1/2 m*\omega^2)
    V1_offset = potential_center + 1.5  # Offset for first potential
    V2_offset = potential_center - 1.5  # Offset for second potential
    
    # VFF optimization parameters
    NUM_LAYERS_EIGENVECTOR = 2  # Number of eigenvector ansatz layers
    NUM_LAYERS_DIAGONAL = 1    # Number of diagonal ansatz layers
    MAX_ITERATIONS = 200       # Maximum optimization iterations
    LEARNING_RATE = 5.0         # Initial learning rate
    ADAPTIVE_LR = True          # Whether to use adaptive learning rate
    LR_PATIENCE = 5             # Patience counter for learning rate adaptation
    MIN_LR = 0.01             # Minimum learning rate
    MAX_LR = 20.0               # Maximum learning rate
    LR_FACTOR = 1.50             # Factor to increase/decrease learning rate
    
    # Evaluation parameters
    FAST_FORWARD_N_VALUES = [1, 2, 4, 8, 16, 32, 64, 128]  # Values of N to evaluate
    
    # Cached simulators for reuse
    STATEVECTOR_SIMULATOR = Aer.get_backend('statevector_simulator')
    UNITARY_SIMULATOR = Aer.get_backend('unitary_simulator')
    
    # File naming methods
    @classmethod
    def get_base_filename(cls):
        current_date = datetime.datetime.now().strftime("%Y-%m-%d")
        return f"Trial{cls.trial}_{current_date}_{cls.QUBITS_NUM}q_vff_kinetic_timestep{cls.timestep}_x0{cls.base_offset}_p0{cls.momentum_init}_alpha{cls.diabaticity}"
    
    @classmethod
    def get_optimizer_filename(cls):
        return f"{cls.get_base_filename()}_parameter_optimization.txt"
    
    @classmethod
    def get_params_filename(cls):
        return f"{cls.get_base_filename()}_optimized.txt"
    
    @classmethod
    def get_evaluation_filename(cls):
        return f"{cls.get_base_filename()}_fast_forwarding_evaluation.txt"
    
    @classmethod
    def get_plot_filename(cls):
        return f"{cls.get_base_filename()}_fidelity_plot.png"
    
    @classmethod
    def calculate_parameter_counts(cls):
        w_params_per_layer = cls.QUBITS_NUM + (cls.QUBITS_NUM//2) + ((cls.QUBITS_NUM-1)//2)
        total_w_params = (cls.NUM_LAYERS_EIGENVECTOR * w_params_per_layer)
        d_params_per_layer = cls.QUBITS_NUM
        total_d_params = cls.NUM_LAYERS_DIAGONAL * d_params_per_layer
        return total_w_params, total_d_params

# Quantum Circuit Operations

In [4]:
def apply_zeroth_order_operations(angle, circuit, target):
    circuit.p(angle, target)
    circuit.x(target)
    circuit.p(angle, target)
    circuit.x(target)

def apply_first_order_operations(angle, circuit, position_register):
    for qubit in range(Config.QUBITS_NUM):
        bit_order = qubit
        position_scaling = 2**bit_order
        target = position_register[bit_order]
        circuit.p(angle * position_scaling, target)

def apply_second_order_operations(angle, circuit, position_register):
    for control in range(Config.QUBITS_NUM):
        bit_order = 2*(control)
        position_scaling = 2**(bit_order)
        circuit.p(angle*position_scaling, position_register[control])
        for target in range(Config.QUBITS_NUM):
            if target != control:
                bit_order = (control) + (target)
                position_scaling = 2**(bit_order)
                circuit.cp(angle*position_scaling, position_register[control], position_register[target])

def apply_kinetic_term(circuit, position_register, timestep):
    beta = (-Config.Nyquist - Config.momentum_init) / Config.momentum_spacing
    #beta = (-Config.Nyquist) / Config.momentum_spacing
    gamma = (Config.momentum_spacing)**2 / (2*Config.mass*Config.hbar)
    
    theta_1 = -(timestep * gamma * beta**2)
    theta_2 = -2 * timestep * gamma * beta
    theta_3 = -timestep * gamma
    
    # Apply quadratic phase
    apply_zeroth_order_operations(theta_1, circuit, position_register[0])
    
    # Apply linear phase for momentum shift
    apply_first_order_operations(theta_2, circuit, position_register)
    
    # Apply kinetic energy operator
    apply_second_order_operations(theta_3, circuit, position_register)
    
    return circuit

def apply_harmonic_potential(circuit, position_register, timestep, potential_num):
    if potential_num == 1:
        vert_offset = Config.diabaticity
        horiz_offset = Config.V1_offset
        strength = Config.V1_strength
    elif potential_num == 2: 
        vert_offset = 0.0
        horiz_offset = Config.V2_offset
        strength = Config.V2_strength

    beta = (-Config.half_box - horiz_offset + (Config.position_spacing)/2) / Config.position_spacing
    gamma = strength * (Config.position_spacing)**2 / Config.hbar
    
    theta_1 = -(gamma * beta**2 + vert_offset)
    theta_2 = -2 * gamma * beta
    theta_3 = -gamma

    apply_zeroth_order_operations(theta_1, circuit, position_register[0])
    apply_first_order_operations(theta_2, circuit, position_register)
    apply_second_order_operations(theta_3, circuit, position_register)

    return circuit

# Circuit Building

In [5]:
def build_target_unitary():
    position_register = QuantumRegister(Config.QUBITS_NUM, name="position")
    qc = QuantumCircuit(position_register)  # Fixed typo: position_register -> position_register
    
    #cqft(qc, position_register, Config.QUBITS_NUM)
    apply_kinetic_term(qc, position_register, Config.timestep)
    #ciqft(qc, position_register, Config.QUBITS_NUM)
    #apply_harmonic_potential(qc, position_register, Config.timestep, 1)
    
    return qc

def build_vff_ansatz(params):
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    return get_full_vff_quantum_circuit(
        params, 
        Config.NUM_LAYERS_EIGENVECTOR, 
        Config.NUM_LAYERS_DIAGONAL, 
        Config.QUBITS_NUM
    )

# Fidelity, Cost Calculations

In [6]:
def calculate_lhst_cost(target_unitary, v_circuit):
    fidelities = []
    n_qubits = 8

    target_matrix = Operator(target_unitary).data
    v_matrix = Operator(v_circuit).data
    
    # Pre-compute these outside the loop
    u_dag = target_matrix.conj().T
    v_dag = v_matrix.conj().T
    u_full = np.kron(u_dag, np.eye(2**n_qubits))
    v_full = np.kron(np.eye(2**n_qubits), v_dag)
    
    # Create Bell state once
    bell_state = np.array([1, 0, 0, 1]) / np.sqrt(2)
    
    for j in range(n_qubits):
        # Create full system Bell state for qubits A_j and B_j
        full_dim = 2**(n_qubits*2)
        full_state = np.zeros(full_dim, dtype=complex)
        
        # Place Bell state at positions j and j+n_qubits
        for i in range(full_dim):
            # Extract bit j from A register and bit j from B register
            bit_a_j = (i >> j) & 1
            bit_b_j = (i >> (j + n_qubits)) & 1
            
            # Check if in Bell state configuration
            if (bit_a_j == 0 and bit_b_j == 0) or (bit_a_j == 1 and bit_b_j == 1):
                full_state[i] = bell_state[bit_a_j * 2 + bit_b_j]
        
        # Normalize
        full_state = full_state / np.linalg.norm(full_state)
        
        # Create the quantum channel E_j as per equation S3
        # First apply U^dagger to qubit A_j and its environment
        u_dag = target_matrix.conj().T
        u_full = np.kron(u_dag, np.eye(2**n_qubits))  # Act on register A
        
        # Apply V^dagger to qubit B_j and its environment  
        v_dag = v_matrix.conj().T
        v_full = np.kron(np.eye(2**n_qubits), v_dag)  # Act on register B
        
        # Apply U^dagger to register A and V^dagger to register B
        state_u = u_full @ full_state
        state_v = v_full @ full_state
        
        # Compute the quantum channel effects
        # Trace out all qubits except j in A and j in B
        qubits_to_trace = [q for q in range(n_qubits*2) if q != j and q != j + n_qubits]
        
        # Get reduced density matrices
        dm_u = partial_trace(DensityMatrix(state_u), qubits_to_trace)
        dm_v = partial_trace(DensityMatrix(state_v), qubits_to_trace)
        
        # Create Bell density matrix for comparison
        bell_dm = DensityMatrix(bell_state)
        
        # Calculate entanglement fidelity F_e^(j)
        f_e = state_fidelity(dm_u, dm_v)
        fidelities.append(f_e)
    
    # Calculate cost according to equation S1
    cost = 1.0 - (1.0/n_qubits) * np.sum(fidelities)
    
    return cost

# Gradient Calculation

In [7]:
def calculate_w_gradient(params, param_idx, target_unitary):
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    
    # Ensure we're calculating W gradient
    assert param_idx < total_w_params
    
    # Term 1: C_LHST(U, W_+^k DW^dagger)
    params_plus_k = params.copy()
    params_plus_k[param_idx] += np.pi/2
    v1 = get_full_vff_quantum_circuit(
        params_plus_k,
        Config.NUM_LAYERS_EIGENVECTOR,
        Config.NUM_LAYERS_DIAGONAL,
        Config.QUBITS_NUM
    )
    cost1 = calculate_lhst_cost(target_unitary, v1)
    
    # Term 2: C_LHST(U, W_-^k DW^dagger)
    params_minus_k = params.copy()
    params_minus_k[param_idx] -= np.pi/2
    v2 = get_full_vff_quantum_circuit(
        params_minus_k,
        Config.NUM_LAYERS_EIGENVECTOR,
        Config.NUM_LAYERS_DIAGONAL,
        Config.QUBITS_NUM
    )
    cost2 = calculate_lhst_cost(target_unitary, v2)
    
    # Term 3: C_LHST(U, WD(W_+^k)^dagger)
    v3 = get_full_vff_quantum_circuit(
        params,
        Config.NUM_LAYERS_EIGENVECTOR,
        Config.NUM_LAYERS_DIAGONAL,
        Config.QUBITS_NUM,
        w_shift_type='O+',
        shift_param_idx=param_idx
    )
    cost3 = calculate_lhst_cost(target_unitary, v3)
    
    # Term 4: C_LHST(U, WD(W_-^k)^dagger)
    v4 = get_full_vff_quantum_circuit(
        params,
        Config.NUM_LAYERS_EIGENVECTOR,
        Config.NUM_LAYERS_DIAGONAL,
        Config.QUBITS_NUM,
        w_shift_type='O-',
        shift_param_idx=param_idx
    )
    cost4 = calculate_lhst_cost(target_unitary, v4)
    
    # Calculate gradient according to equation 27
    gradient = 0.5 * ((cost1 - cost2) + (cost3 - cost4))
    
    return gradient

def calculate_d_gradient(params, param_idx, target_unitary):
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    assert param_idx >= total_w_params
    
    # Term 1: C_LHST(U, WD_+^l W^dagger)
    params_plus_l = params.copy()
    params_plus_l[param_idx] += np.pi/2
    v1 = get_full_vff_quantum_circuit(
        params_plus_l,
        Config.NUM_LAYERS_EIGENVECTOR,
        Config.NUM_LAYERS_DIAGONAL,
        Config.QUBITS_NUM
    )
    cost1 = calculate_lhst_cost(target_unitary, v1)
    
    # Term 2: C_LHST(U, WD_-^l W^dagger)
    params_minus_l = params.copy()
    params_minus_l[param_idx] -= np.pi/2
    v2 = get_full_vff_quantum_circuit(
        params_minus_l,
        Config.NUM_LAYERS_EIGENVECTOR,
        Config.NUM_LAYERS_DIAGONAL,
        Config.QUBITS_NUM
    )
    cost2 = calculate_lhst_cost(target_unitary, v2)
    
    # Calculate gradient according to equation 29
    gradient = 0.5 * (cost1 - cost2)
    
    return gradient



In [8]:
def calculate_gradient_wrapper(args):
    param_index, params, target_unitary = args
    try:
        total_w_params, _ = Config.calculate_parameter_counts()
        
        if param_index < total_w_params:
            gradient = calculate_w_gradient(params, param_index, target_unitary)
        else:
            gradient = calculate_d_gradient(params, param_index, target_unitary)
            
        return (param_index, gradient)
    except Exception as e:
        print(f"Error calculating gradient for parameter {param_index}: {e}")
        import traceback
        traceback.print_exc()
        return (param_index, 0.0)

In [9]:
def calculate_gradients_parallel(params, target_unitary, param_indices):
    num_workers = 2
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    
    # Consider sequential processing instead:
    gradients = np.zeros_like(params)
    for param_index in param_indices:
        if param_index < total_w_params:
            gradients[param_index] = calculate_w_gradient(params, param_index, target_unitary)
        else:
            gradients[param_index] = calculate_d_gradient(params, param_index, target_unitary)
    return gradients

# Optimization

In [10]:
def verify_gradient_calculation(params, param_index, target_unitary):
    total_w_params, _ = Config.calculate_parameter_counts()
    
    # Calculate analytical gradient
    if param_index < total_w_params:
        analytical_gradient = calculate_w_gradient(params, param_index, target_unitary)
    else:
        analytical_gradient = calculate_d_gradient(params, param_index, target_unitary)
    
    # Calculate numerical gradient (finite difference)
    epsilon = 0.01
    
    # Calculate original cost
    vff_ansatz = build_vff_ansatz(params)
    original_cost = calculate_lhst_cost(target_unitary, vff_ansatz)
    
    # Perturb parameter
    params_plus = params.copy()
    params_plus[param_index] += epsilon
    vff_ansatz_plus = build_vff_ansatz(params_plus)
    cost_plus = calculate_lhst_cost(target_unitary, vff_ansatz_plus)
    
    params_minus = params.copy()
    params_minus[param_index] -= epsilon
    vff_ansatz_minus = build_vff_ansatz(params_minus)
    cost_minus = calculate_lhst_cost(target_unitary, vff_ansatz_minus)
    
    # Central difference
    numerical_gradient = (cost_plus - cost_minus) / (2 * epsilon)
    
    print(f"Parameter {param_index}:")
    print(f"  Analytical gradient: {analytical_gradient}")
    print(f"  Numerical gradient: {numerical_gradient}")
    print(f"  Difference: {abs(analytical_gradient - numerical_gradient)}")
    
    return analytical_gradient, numerical_gradient

# Fast-Forwarding Evaluation

In [11]:
def fast_forward(optimized_params, target_unitary, n):
    # Get parameter counts
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    
    # Create the fast-forwarded parameters - scale only diagonal parameters by n
    ff_params = optimized_params.copy()
    for i in range(total_w_params, total_w_params + total_d_params):
        ff_params[i] = optimized_params[i] * n
    
    # Build the fast-forwarded VFF ansatz
    vff_ff_ansatz = build_vff_ansatz(ff_params)
    
    # Create the target unitary raised to power n
    target_n_circuit = QuantumCircuit(Config.QUBITS_NUM)
    for _ in range(n):
        target_n_circuit.compose(target_unitary, inplace=True)
    
    # Calculate the cost
    cost = calculate_lhst_cost(target_n_circuit, vff_ff_ansatz)
    fidelity = 1.0 - cost
    
    return fidelity

def parallel_fast_forward(args):
    n, optimized_params, target_unitary = args
    fidelity = fast_forward(optimized_params, target_unitary, n)
    return n, fidelity

def evaluate_fast_forwarding(optimized_params, target_unitary, n_values=None):
    if n_values is None:
        n_values = Config.FAST_FORWARD_N_VALUES
        
    arg_tuples = [(n, optimized_params, target_unitary) for n in n_values]
    
    results = []
    evaluation_filename = Config.get_evaluation_filename()
    
    # Create empty evaluation file
    with open(evaluation_filename, 'w') as f:
        f.write("N\tFidelity\n")
    
    # Determine number of workers based on CPU count and number of n values
    num_workers = min(os.cpu_count(), len(n_values))
    
    # Run evaluations in parallel
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        for n, fidelity in tqdm(
            executor.map(parallel_fast_forward, arg_tuples),
            total=len(n_values),
            desc="Evaluating fast-forwarding"
        ):
            results.append((n, fidelity))
            
            # Write to file
            with open(evaluation_filename, 'a') as f:
                f.write(f"{n}\t{fidelity}\n")
    
    # Sort results by n for consistency
    results.sort(key=lambda x: x[0])
    
    return results

def plot_fast_forwarding_results(results):
    n_values = [r[0] for r in results]
    fidelities = [r[1] for r in results]
    
    plt.figure(figsize=(10, 6))
    plt.plot(n_values, fidelities, 'o-', linewidth=2)
    plt.xscale('log')
    plt.xlabel('Number of Time Steps (N)')
    plt.ylabel('Fidelity')
    plt.title(f'VFF Fast-Forwarding Performance ({Config.QUBITS_NUM} qubits)')
    plt.grid(True)
    
    plot_filename = Config.get_plot_filename()
    plt.savefig(plot_filename)
    plt.close()
    print(f"Fidelity plot saved to {plot_filename}")
    
    return plot_filename

# Main Execution Functions

In [12]:
def print_circuit_visualization(params=None):
    # Get parameter counts
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    total_params = total_w_params + total_d_params
    
    # Generate random parameters if none provided
    if params is None:
        params = np.random.random(total_params) * 2 * np.pi
        print("Using random parameters for circuit visualization")
    
    # Build the VFF ansatz circuit
    circuit = build_vff_ansatz(params)
    
    # Print circuit information
    print(f"VFF Ansatz Circuit Details:")
    print(f"- Number of qubits: {Config.QUBITS_NUM}")
    print(f"- Eigenvector layers: {Config.NUM_LAYERS_EIGENVECTOR}")
    print(f"- Diagonal layers: {Config.NUM_LAYERS_DIAGONAL}")
    print(f"- Total parameters: {total_params} ({total_w_params} W params, {total_d_params} D params)")
    
    print("\n Circuit Representation:")
    print(circuit.draw(output='text', fold=120))
    
    try:
        from matplotlib import pyplot as plt
        plt.figure(figsize=(14, 10))
        circuit_drawing = circuit.draw(output='mpl')
        plt.title(f"VFF Ansatz ({Config.QUBITS_NUM} qubits, {Config.NUM_LAYERS_EIGENVECTOR}W/{Config.NUM_LAYERS_DIAGONAL}D layers)")
        plt.tight_layout()
        
        # Save the circuit drawing to a file
        circuit_filename = f"{Config.get_base_filename()}_circuit.png"
        plt.savefig(circuit_filename)
        plt.close()
        print(f"\nCircuit visualization saved to {circuit_filename}")
    except ImportError:
        print("\nMatplotlib not available for visual circuit rendering")

In [13]:
def analyze_gradients(cached_u_states, params, param_indices):
    print("\nGradient Analysis:")
    
    # Calculate gradients
    gradient_terms = calculate_gradients_parallel(cached_u_states, params, param_indices)
    
    # Combine gradients
    gradients = np.zeros_like(params)
    for j in range(Config.QUBITS_NUM):
        for k in param_indices:
            key = (j, k)
            if key in gradient_terms:
                gradients[k] += gradient_terms[key] / Config.QUBITS_NUM
    
    # Gradient statistics
    grad_norm = np.linalg.norm(gradients)
    grad_mean = np.mean(gradients)
    grad_std = np.std(gradients)
    grad_min = np.min(gradients)
    grad_max = np.max(gradients)
    grad_nonzero = np.count_nonzero(gradients)
    
    print(f"Gradient norm: {grad_norm:.6f}")
    print(f"Gradient mean: {grad_mean:.6f}")
    print(f"Gradient std dev: {grad_std:.6f}")
    print(f"Gradient min/max: {grad_min:.6f}/{grad_max:.6f}")
    print(f"Non-zero gradients: {grad_nonzero}/{len(gradients)} ({grad_nonzero/len(gradients)*100:.1f}%)")
    
    # Check for vanishing/exploding gradients
    if grad_norm < 1e-4:
        print("Possible vanishing gradients detected")
    if grad_norm > 10:
        print("Possible exploding gradients detected")
    
    # Parameter-wise gradient statistics
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    
    w_grads = gradients[:total_w_params]
    d_grads = gradients[total_w_params:]
    
    print(f"\nW-parameter gradients: mean={np.mean(w_grads):.6f}, std={np.std(w_grads):.6f}, norm={np.linalg.norm(w_grads):.6f}")
    print(f"D-parameter gradients: mean={np.mean(d_grads):.6f}, std={np.std(d_grads):.6f}, norm={np.linalg.norm(d_grads):.6f}")
    
    # Verify a few gradients with numerical method
    print("\nVerifying selected gradients numerically:")
    # Check a few W and D parameters
    w_indices = list(range(0, total_w_params, total_w_params//min(3, total_w_params)))
    d_indices = list(range(total_w_params, total_w_params + total_d_params, max(1, total_d_params//min(3, total_d_params))))
    
    for idx in w_indices + d_indices:
        if idx < len(params):
            analytical, numerical = verify_gradient_calculation(cached_u_states, params, idx)
            if abs(numerical) > 1e-6:  # Avoid division by zero
                ratio = analytical / numerical
                print(f"Parameter {idx}: analytical={analytical:.6f}, numerical={numerical:.6f}, ratio={ratio:.6f}")
            else:
                print(f"Parameter {idx}: analytical={analytical:.6f}, numerical={numerical:.6f} (ratio undefined)")
    
    return gradients

In [None]:
def run_vff_optimization():
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    total_params = total_w_params + total_d_params
    
    # Build target unitary (kinetic evolution)
    target_unitary = build_target_unitary()
    
    # Initialize parameters
    params = np.random.random(total_params) * 2 * np.pi
    
    # Optimization loop
    learning_rate = 5.0
    best_cost = float('inf')
    best_params = params.copy()
    
    # Create optimization log file
    with open(Config.get_optimizer_filename(), 'w') as f:
        f.write("Iteration\tCost\tLearning_Rate\n")
    
    for iteration in range(Config.MAX_ITERATIONS):
        # Calculate current cost
        v_circuit = build_vff_ansatz(params)
        cost = calculate_lhst_cost(target_unitary, v_circuit)
        
        # Update best parameters
        if cost < best_cost:
            best_cost = cost
            best_params = params.copy()
            print(f"Iteration {iteration}: New best cost = {best_cost:.8f}")
        
        # Calculate gradients
        param_indices = list(range(total_params))
        gradients = calculate_gradients_parallel(params, target_unitary, param_indices)
        
        # Update parameters according to equations 31-32
        params = params - learning_rate * gradients
        params = np.mod(params, 2*np.pi)  # Keep in [0, 2π)
        
        # Log progress
        with open(Config.get_optimizer_filename(), 'a') as f:
            f.write(f"{iteration}\t{cost:.8f}\t{learning_rate:.6f}\n")
        
        # Adaptive learning rate
        if iteration % 10 == 0:
            learning_rate *= 0.95  # Gradually reduce learning rate
    
    # Save best parameters
    np.savetxt(Config.get_params_filename(), best_params)
    
    return best_params

def quasi_random_parameter_search(target_unitary, num_samples=1000, top_k=5):
    print(f"Starting quasi-random search with {num_samples} samples...")
    
    # Get parameter counts
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    total_params = total_w_params + total_d_params
    
    # Create search log file
    random_search_filename = f"{Config.get_base_filename()}_random_search.txt"
    with open(random_search_filename, 'w') as f:
        f.write("Sample\tCost\n")
    
    best_params = []
    
    # Use Sobol sequence for quasi-random sampling if available
    try:
        from scipy.stats import qmc
        sampler = qmc.Sobol(d=total_params, scramble=True)
        samples = sampler.random(num_samples) * 2 * np.pi
        print("Using Sobol sequence for quasi-random sampling")
    except ImportError:
        samples = np.random.random((num_samples, total_params)) * 2 * np.pi
        print("Using standard random sampling")
    
    # Evaluate all parameter sets
    for i, params in enumerate(tqdm(samples, desc="Evaluating random parameters")):
        # Build VFF ansatz with these parameters
        vff_ansatz = build_vff_ansatz(params)
        
        # Calculate cost
        cost = calculate_lhst_cost(target_unitary, vff_ansatz)
        
        # Log this sample
        with open(random_search_filename, 'a') as f:
            f.write(f"{i}\t{cost:.8f}\n")
        
        # Update best parameters if needed
        if len(best_params) < top_k:
            best_params.append((params, cost))
            best_params.sort(key=lambda x: x[1])  # Sort by cost
        elif cost < best_params[-1][1]:
            best_params[-1] = (params, cost)
            best_params.sort(key=lambda x: x[1])
        
        if (i+1) % 100 == 0:
            print(f"Evaluated {i+1}/{num_samples}, current best cost: {best_params[0][1]:.8f}")
    
    print(f"\nTop {top_k} parameter sets:")
    for i, (params, cost) in enumerate(best_params):
        print(f"Set {i+1}: Cost = {cost:.8f}")
    
    return best_params

def optimize_multiple_parameter_sets(target_unitary, initial_param_sets):
    results = []
    
    for i, (params, initial_cost) in enumerate(initial_param_sets):
        print(f"\n{'='*80}")
        print(f"Optimizing parameter set {i+1}/{len(initial_param_sets)}")
        print(f"Initial cost: {initial_cost:.8f}")
        print(f"{'='*80}")
        
        # Create set-specific filenames
        set_params_filename = f"{Config.get_base_filename()}_set{i+1}_optimized.txt"
        
        # Custom optimization with adaptive learning rate
        current_params = params.copy()
        best_cost = initial_cost
        best_params = current_params.copy()
        
        # Optimization parameters
        learning_rate = 5.0
        min_lr = 0.01
        patience = 10
        no_improvement_count = 0
        
        total_w_params, total_d_params = Config.calculate_parameter_counts()
        total_params = total_w_params + total_d_params
        
        for iteration in range(Config.MAX_ITERATIONS):
            # Calculate current cost
            v_circuit = build_vff_ansatz(current_params)
            cost = calculate_lhst_cost(target_unitary, v_circuit)
            
            # Update best parameters
            if cost < best_cost:
                improvement = best_cost - cost
                best_cost = cost
                best_params = current_params.copy()
                print(f"Iteration {iteration}: New best cost = {best_cost:.8f} (improvement: {improvement:.8f})")
                no_improvement_count = 0
            else:
                no_improvement_count += 1
            
            # Calculate gradients
            gradients = np.zeros_like(current_params)
            
            # W parameters
            for idx in range(total_w_params):
                gradients[idx] = calculate_w_gradient(current_params, idx, target_unitary)
            
            # D parameters
            for idx in range(total_w_params, total_params):
                gradients[idx] = calculate_d_gradient(current_params, idx, target_unitary)
            
            # Check gradient norm
            grad_norm = np.linalg.norm(gradients)
            if grad_norm < 1e-8:
                print(f"Small gradient norm ({grad_norm:.2e}), optimization likely converged")
                break
            
            # Update parameters
            current_params = current_params - learning_rate * gradients
            current_params = np.mod(current_params, 2*np.pi)
            
            # Adaptive learning rate
            if no_improvement_count >= patience:
                learning_rate = max(learning_rate * 0.5, min_lr)
                print(f"Reducing learning rate to {learning_rate:.6f}")
                no_improvement_count = 0
            
            if learning_rate <= min_lr and no_improvement_count >= patience * 2:
                print("Learning rate at minimum and no improvement, stopping")
                break
        
        # Save best parameters for this set
        np.savetxt(set_params_filename, best_params)
        results.append((best_params, best_cost))
        print(f"Set {i+1} final cost: {best_cost:.8f}")
    
    # Sort results by final cost
    results.sort(key=lambda x: x[1])
    
    return results

def run_fast_forwarding_evaluation(optimized_params, target_unitary):
    print(f"Evaluating fast-forwarding for N={Config.FAST_FORWARD_N_VALUES}")
    
    results = []
    evaluation_filename = Config.get_evaluation_filename()
    
    # Create empty evaluation file
    with open(evaluation_filename, 'w') as f:
        f.write("N\tFidelity\n")
    
    # Run evaluations
    for n in Config.FAST_FORWARD_N_VALUES:
        fidelity = fast_forward(optimized_params, target_unitary, n)
        results.append((n, fidelity))
        
        # Write to file
        with open(evaluation_filename, 'a') as f:
            f.write(f"{n}\t{fidelity:.8f}\n")
        
        print(f"N={n}: Fidelity={fidelity:.8f}")
    
    # Plot the results
    plot_fast_forwarding_results(results)
    
    return results

def run_multi_init_optimization():
    total_start = datetime.datetime.now()
    print(f"Starting VFF workflow with {Config.QUBITS_NUM} qubits and timestep={Config.timestep}")
    
    # Build the target unitary
    print("Building target unitary...")
    target_unitary = build_target_unitary()
    
    # Skip quasi-random search and go directly to optimization with random initial parameters
    print(f"Skipping quasi-random search, starting optimization with random parameters...")
    
    # Initialize a single set of random parameters
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    total_params = total_w_params + total_d_params
    initial_params = np.random.random(total_params) * 2 * np.pi
    
    # Calculate initial cost
    vff_ansatz = build_vff_ansatz(initial_params)
    initial_cost = calculate_lhst_cost(target_unitary, vff_ansatz)
    print(f"Initial cost: {initial_cost:.8f}")
    
    # Optimize with gradient descent
    print("\nStarting gradient optimization...")
    best_params = run_vff_optimization_simple(target_unitary, initial_params)
    
    # Evaluate final cost
    final_circuit = build_vff_ansatz(best_params)
    final_cost = calculate_lhst_cost(target_unitary, final_circuit)
    print(f"Final cost: {final_cost:.8f}")
    
    # Evaluate fast-forwarding with best parameters
    print("\nEvaluating fast-forwarding performance...")
    ff_results = run_fast_forwarding_evaluation(best_params, target_unitary)
    
    total_end = datetime.datetime.now()
    total_time = (total_end - total_start).total_seconds()
    
    # Print final summary
    print("\n" + "="*80)
    print("VFF WORKFLOW SUMMARY")
    print("="*80)
    print(f"Total runtime: {total_time:.2f} seconds")
    print(f"Configuration:")
    print(f"- Qubits: {Config.QUBITS_NUM}")
    print(f"- Timestep: {Config.timestep}")
    print(f"- Layers: {Config.NUM_LAYERS_EIGENVECTOR} eigenvector, {Config.NUM_LAYERS_DIAGONAL} diagonal")
    print(f"\nFinal Cost: {final_cost:.8f}")
    
    print("\nFast-forwarding results:")
    for n, fidelity in ff_results:
        print(f"N={n}: Fidelity={fidelity:.8f}")
    print("="*80)
    
    return {
        'best_params': best_params,
        'final_cost': final_cost,
        'ff_results': ff_results,
        'total_time': total_time
    }

def run_vff_optimization_simple(target_unitary, initial_params):
    total_w_params, total_d_params = Config.calculate_parameter_counts()
    total_params = total_w_params + total_d_params
    
    # Optimization parameters
    params = initial_params.copy()
    learning_rate = 5.0
    best_cost = float('inf')
    best_params = params.copy()
    
    # Create optimization log file
    with open(Config.get_optimizer_filename(), 'w') as f:
        f.write("Iteration\tCost\tLearning_Rate\n")
    
    print("Starting optimization loop...")
    
    for iteration in range(Config.MAX_ITERATIONS):
        # Calculate current cost
        v_circuit = build_vff_ansatz(params)
        cost = calculate_lhst_cost(target_unitary, v_circuit)
        
        # Log progress
        with open(Config.get_optimizer_filename(), 'a') as f:
            f.write(f"{iteration}\t{cost:.8f}\t{learning_rate:.6f}\n")
        
        # Update best parameters
        if cost < best_cost:
            improvement = best_cost - cost
            best_cost = cost
            best_params = params.copy()
            print(f"Iteration {iteration}: New best cost = {best_cost:.8f} (improvement: {improvement:.8f})")
        else:
            print(f"Iteration {iteration}: Cost = {cost:.8f}")
        
        # Calculate gradients
        param_indices = list(range(total_params))
        gradients = calculate_gradients_parallel(params, target_unitary, param_indices)
        
        # Check gradient norm
        grad_norm = np.linalg.norm(gradients)
        print(f"  Gradient norm: {grad_norm:.6f}")
        
        # Update parameters according to equations 31-32
        params = params - learning_rate * gradients
        params = np.mod(params, 2*np.pi)  # Keep in [0, 2π)
        
        # Adaptive learning rate
        if iteration % 10 == 0 and iteration > 0:
            learning_rate *= 0.95  # Gradually reduce learning rate
            print(f"  Reducing learning rate to {learning_rate:.6f}")
    
    # Save best parameters
    np.savetxt(Config.get_params_filename(), best_params)
    
    return best_params

if __name__ == "__main__":
    Config.QUBITS_NUM = 8
    
    # Run the optimization workflow
    results = run_multi_init_optimization()
    
    # Save final results
    print("\nSaving final results...")
    np.savetxt(Config.get_params_filename(), results['best_params'])
    print(f"Best parameters saved to {Config.get_params_filename()}")
    
    print("\nOptimization complete!")

Starting VFF workflow with 3 qubits and timestep=10.0
Building target unitary...
Skipping quasi-random search, starting optimization with random parameters...
Initial cost: 0.86062196

Starting gradient optimization...
Starting optimization loop...
Iteration 0: New best cost = 0.86062196 (improvement: inf)
  Gradient norm: 0.165484
Iteration 1: New best cost = 0.72083929 (improvement: 0.13978267)
  Gradient norm: 0.159697
Iteration 2: New best cost = 0.61263362 (improvement: 0.10820567)
  Gradient norm: 0.133455
Iteration 3: New best cost = 0.51151493 (improvement: 0.10111869)
  Gradient norm: 0.172790
Iteration 4: New best cost = 0.35928762 (improvement: 0.15222730)
  Gradient norm: 0.172022
Iteration 5: New best cost = 0.22240651 (improvement: 0.13688112)
  Gradient norm: 0.163537
Iteration 6: New best cost = 0.09267157 (improvement: 0.12973494)
  Gradient norm: 0.141569
Iteration 7: New best cost = 0.01995906 (improvement: 0.07271251)
  Gradient norm: 0.055332
Iteration 8: New best 