In [None]:
# Cell 1: Imports and Configuration
# Imports PennyLane for quantum circuit simulation and NumPy/Matplotlib for numerical operations and plotting.
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import NesterovMomentumOptimizer
import matplotlib.pyplot as plt
import random

In [None]:
# Cell 2: Quantum Device Initialization
# Sets up the quantum device. Uses 'lightning.gpu' for acceleration if available, otherwise falls back to CPU.
dev = qml.device("lightning.gpu", wires=2)
# dev = qml.device("default.qubit", wires=2) # Fallback option

In [None]:
# Cell 3: Parameter Counting Utility
# Counts the number of rotation gates ('Rot') to determine the dimension of the trainable parameter vector.
def check_np(list):
    num = 0
    for r in list:
        if r[0] == "Rot": num += 1
    return num

In [None]:
# Cell 4: Ansatz Construction
# dynamically constructs the variational quantum circuit based on the gate sequence provided by the RL agent.
def ansatz(W, gatestream):
    w_cnt = 0
    for gate in gatestream:
        # Parameterized Rotation Gate
        if gate[0] == "Rot":
            qml.PauliRot(W[w_cnt], gate[1], wires=gate[2])
            w_cnt += 1
        # Entangling Gate (CNOT)
        elif gate[0] == "CNOT":
            qml.CNOT(wires=[gate[1], gate[2]])

In [None]:
# Cell 5: Data Encoding (Feature Map)
# Converts classical input features (x) into rotation angles for state preparation (Amplitude/Angle Encoding).
def get_angles(x):
    # Mapping logic to convert 2D/4D features into 5 rotation angles
    beta0 = 2 * np.arcsin(np.sqrt(x[1] ** 2) / np.sqrt(x[0] ** 2 + x[1] ** 2 + 1e-12))
    beta1 = 2 * np.arcsin(np.sqrt(x[3] ** 2) / np.sqrt(x[2] ** 2 + x[3] ** 2 + 1e-12))
    beta2 = 2 * np.arcsin(
        np.sqrt(x[2] ** 2 + x[3] ** 2)
        / np.sqrt(x[0] ** 2 + x[1] ** 2 + x[2] ** 2 + x[3] ** 2)
    )
    return np.array([beta2, -beta1 / 2, beta1 / 2, -beta0 / 2, beta0 / 2])

In [None]:
# Cell 6: State Preparation
# Encodes the calculated angles into the quantum state using RY rotations and CNOTs.
def statepreparation(a):
    qml.RY(a[0], wires=0)

    qml.CNOT(wires=[0, 1])
    qml.RY(a[1], wires=1)
    qml.CNOT(wires=[0, 1])
    qml.RY(a[2], wires=1)
    qml.PauliX(wires=0)

    qml.CNOT(wires=[0, 1])
    qml.RY(a[3], wires=1)
    qml.CNOT(wires=[0, 1])
    qml.RY(a[4], wires=1)
    qml.PauliX(wires=0)

In [None]:
# Cell 7: Quantum Circuit Definition (QNode)
# Combines state preparation and the ansatz to measure the expectation value.
@qml.qnode(dev)
def circuit(weights, angles, gatestream):
    statepreparation(angles)
    ansatz(weights, gatestream)
    return qml.expval(qml.PauliZ(0)) # Measure Z-expectation on the first qubit

In [None]:
# Cell 8: Variational Classifier
# Adds a classical bias term to the quantum circuit output.
def variational_classifier(weights, bias, angles, gatestream):
    return circuit(weights, angles, gatestream) + bias

In [None]:
# Cell 9: Loss Function (Square Loss)
# Computes the Mean Squared Error (MSE) for training.
def square_loss(labels, predictions):
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2
    loss = loss / len(labels)
    return loss

In [None]:
# Cell 10: Accuracy Metric
# Calculates classification accuracy based on a threshold.
def accuracy(labels, predictions):
    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)
    return loss

In [None]:
# Cell 11: Cost Function
# The objective function optimized during the inner loop (hybrid training).
def cost(weights, bias, features, labels, gatestream):
    predictions = [variational_classifier(weights, bias, f, gatestream) for f in features]
    return square_loss(labels, predictions)

In [None]:
# Cell 12: Hybrid Optimization Routine (Inner Loop)
# Loads Iris data, pre-processes it, and trains the circuit parameters using Nesterov Momentum.
def opt_classifier(gatestream):
    # 1. Data Loading & Preprocessing (Iris Classes 1 & 2)
    data = np.loadtxt("iris_classes1and2_scaled.txt")
    X = data[:, 0:2] # Use first two features
    
    # Padding and Normalization for Amplitude Encoding
    padding = 0.3 * np.ones((len(X), 1))
    X_pad = np.c_[np.c_[X, padding], np.zeros((len(X), 1))]
    normalization = np.sqrt(np.sum(X_pad ** 2, -1))
    X_norm = (X_pad.T / normalization).T
    
    # Convert features to angles
    features = np.array([get_angles(x) for x in X_norm], requires_grad=False)
    Y = data[:, -1]
    
    # 2. Train/Validation Split (75% / 25%)
    np.random.seed(0)
    num_data = len(Y)
    num_train = int(0.75 * num_data)
    index = np.random.permutation(range(num_data))
    feats_train = features[index[:num_train]]
    Y_train = Y[index[:num_train]]
    feats_val = features[index[num_train:]]
    Y_val = Y[index[num_train:]]
    
    X_train = X[index[:num_train]]
    X_val = X[index[num_train:]]
    
    # 3. Initialization
    weights_init = 0.01 * np.random.randn(check_np(gatestream), requires_grad=True)
    bias_init = np.array(0.0, requires_grad=True)
    
    opt = NesterovMomentumOptimizer(0.01)
    batch_size = 5
    
    weights = weights_init
    bias = bias_init

    out_list = []
    
    # 4. Training Loop (60 Iterations)
    for it in range(60): 
        # Mini-batch sampling
        batch_index = np.random.randint(0, num_train, (batch_size,))
        feats_train_batch = feats_train[batch_index]
        Y_train_batch = Y_train[batch_index]
        
        # Optimization Step
        weights, bias, _, _, _ = opt.step(cost, weights, bias, feats_train_batch, Y_train_batch, gatestream)
    
        # Evaluation
        predictions_train = [np.sign(variational_classifier(weights, bias, f, gatestream)) for f in feats_train]
        predictions_val = [np.sign(variational_classifier(weights, bias, f, gatestream)) for f in feats_val]
    
        acc_train = accuracy(Y_train, predictions_train)
        acc_val = accuracy(Y_val, predictions_val)
        cost_gs = cost(weights, bias, features, Y, gatestream)
        
        out_list.append([it + 1, float(cost_gs), float(acc_train), float(acc_val)])

        # Early Stopping Condition
        if acc_train == 1 and acc_val == 1 and cost_gs < 0.325348: break 

    # 5. Visualization Preparation
    draw_p = qml.draw(circuit)(weights, [0, 0, 0, 0, 0] ,gatestream)
    
    return out_list, draw_p, [weights, bias, X_train, Y_train, X_val, Y_val]

In [None]:
# Cell 13: Gate Encoding (Symbolic to Numerical)
# Converts gate strings to numerical vectors for the RL agent.
def gate_to_obs(gate):
    ob = [0, 0, 0, 0]
    if gate[0] == 'Rot':
        ob[0] = 1
        if gate[1] == 'X': ob[2] = 1
        elif gate[1] == 'Y': ob[2] = 2
        elif gate[1] == 'Z': ob[2] = 3
        ob[3] = gate[2]
    elif gate[0] == 'CNOT':
        ob[1] = 1
        ob[2] = gate[1] 
        ob[3] = gate[2] 
    return ob

In [None]:
# Cell 14: State Update Mechanism
# Updates the environment state based on the selected action.
def update_obs(act, step, obs, gatestream, gates):
    gatestream.append(gates[act])
    ob = gate_to_obs(gates[act])
    obs[step] = ob
    step += 1
    return step, obs, gatestream

In [None]:
# Cell 15: Reward Function Definition
# Calculates the reward based on accuracy (train/val), cost, and structural constraints.
def cal_reward(steps, obs, outs):
    ## Accuracy Reward (Average of Train & Validation)
    acc1 = [row[2] for row in outs]
    acc2 = [row[3] for row in outs]
    acc_m = ((sum(acc1) / len(acc1)) + (sum(acc2) / len(acc2))) / 2

    ## Cost Reward (Inverse of Mean Cost)
    cost = [row[1] for row in outs]
    cost_m = 1 / (sum(cost) / len(cost))

    ## Variance Reward (Uniform Gate Distribution)
    pop_list = [0, 0] # 2 qubits
    for row in obs:
        if row[1] == 1:
            pop_list[row[2]] += 1
            pop_list[row[3]] += 1
        elif row[0] == 1:
            pop_list[row[3]] += 1
    pop_r = (2 - np.var(pop_list)) / 2

    ## Duplicate Gate Penalty
    dup_r = 0
    if obs[steps-1][0] == 1:
        tc = obs[steps-1][3]
        tc_list = []
        for row in obs:
            if row[1] == 1:
                if row[2] == tc or row[3] == tc: tc_list.append(row)
            elif row[0] == 1:
                if row[3] == tc: tc_list.append(row)
        if len(tc_list) > 1:
            if tc_list[-1] == tc_list[-2]: dup_r = -10
    elif obs[steps-1][1] == 1:
        # (Similar logic for CNOTs to prevent redundant operations)
        tc = obs[steps-1][2]
        tc_list_c = []
        for row in obs:
            if row[1] == 1:
                if row[2] == tc or row[3] == tc: tc_list_c.append(row)
            elif row[0] == 1:
                if row[3] == tc: tc_list_c.append(row)
        tc = obs[steps-1][3]
        tc_list_t = []
        for row in obs:
            if row[1] == 1:
                if row[2] == tc or row[3] == tc: tc_list_t.append(row)
            elif row[0] == 1:
                if row[3] == tc: tc_list_t.append(row)
        if len(tc_list_c) > 1 and len(tc_list_t) > 1:
            if tc_list_c[-1] == tc_list_c[-2] and tc_list_t[-1] == tc_list_t[-2]: dup_r = -10

    ## Gate Type Incentive (Favor Rotations)
    if obs[steps-1][0] == 1:
        gate_r = 1
        rot_r = 1
    else:
        gate_r = 0
        rot_r = 0

    ## CNOT Distance (Locality)
    if obs[steps-1][1] == 1:   
        cnot_r = 1 / abs(obs[steps-1][2]-obs[steps-1][3])
    else: cnot_r = 0    

    ## Circuit Depth Penalty
    steps_r = (42 - steps) / 42
    
    # Weighted Sum of Rewards
    return [acc_m, cost_m, gate_r, rot_r, cnot_r, steps_r, pop_r, dup_r], \
           (acc_m - 0.5)*2 * 15 + cost_m * 2 + gate_r * 3 + rot_r + cnot_r + steps_r * 5 + pop_r * 3 + dup_r

In [None]:
# Cell 16: Quantum Circuit Environment Class
# Defines the RL environment for the Iris classification problem.
class qc:
    def __init__(self):
        # Action Space: Rotations (X, Y, Z) for 2 qubits and CNOTs
        self.gates = [['Rot','X', 0], ['Rot','X', 1],
                      ['Rot','Y', 0], ['Rot','Y', 1],
                      ['Rot','Z', 0], ['Rot','Z', 1],
                      ['CNOT', 0, 1],  ['CNOT', 1, 0]]
        self.len_qc = 42  # Max circuit depth (e.g., 2-HE ansatz (7 gates) * 6 layers)
        self.act_space = len(self.gates)
    
    def reset(self):
        self.steps = 0
        self.obs = [[0] * 4 for _ in range(self.len_qc)]
        self.gatestream = []
        self.reward = -1
        self.term = -1
        self.done = 0
        return

    def step(self, act):
        if act > self.act_space-1 or act < 0:
            print("out of action space")
            return 0
        if self.steps > self.len_qc-1:
            print("out of qc length")
            return 0
        
        # Update State
        self.steps, self.obs, self.gatestream = update_obs(act, self.steps, self.obs, self.gatestream, self.gates)
        
        # Inner Optimization Loop
        self.outs, self.draw, self.figset = opt_classifier(self.gatestream)
        
        # Calculate Reward
        self.rlist, self.reward = cal_reward(self.steps, self.obs, self.outs)

        if self.steps == self.len_qc: self.term = 1
        else: self.term = 0

        # Terminate if perfect accuracy is achieved
        if max([row[2] for row in self.outs]) == 1 and max([row[3] for row in self.outs]) == 1:
            self.done = 1
        
        return 1

    def gs_step(self, gs):
        # Helper to evaluate a specific gate sequence
        self.outs, self.draw, self.figset = opt_classifier(gs)
        return 1
    
    def sample(self):
        return random.randint(0, self.act_space-1)

    def showdb(self, figset, gatestream):
        # Visualization of Decision Boundary
        weights = figset[0]
        bias = figset[1]
        X_train = figset[2]
        Y_train = figset[3]
        X_val = figset[4]
        Y_val = figset[5]
        
        plt.figure()
        cm = plt.cm.RdBu
        
        # ... (Visualization Code Omitted for Brevity, standard Matplotlib plotting)
        # Draws contour plots and scatter plots for train/validation data
        
        # ... 
        
        plt.legend()
        plt.show()