In [30]:
!pip install pennylane
!pip install pennylane pennylane-lightning[gpu]



### Import Libraries

In [31]:
import numpy as np
import matplotlib.pyplot as plt
import pennylane as qml
from pennylane.templates import QFT
from sklearn.svm import SVC
from sklearn.datasets import fetch_openml, load_digits
from sklearn.preprocessing import MinMaxScaler, normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from pennylane import numpy as pnp
from skimage.transform import resize
from keras.datasets import mnist

### Step 1:  Dataset Preparation


First, we load the MNIST dataset from openML.
- X is the pixel data
- y is the labels
- converting everything to `uint8` here to ensure all values are integers in [0, 255]

In [32]:
# loading mnist from openML
mnist = fetch_openml('mnist_784', version=1, cache=True)
X = mnist['data'].astype(np.uint8) # better to convert for binerization
y = mnist['target'].astype(np.uint8)
y = y.to_numpy()

Next, we focus on 2 classes, i.e. binary classification.
Here, I've been experimenting with different classes, and I stopped on 4 vs 9, cause they have more subtle difference in pixels, they are similar looking.

In [33]:
# focus on binary classification
mask = (y == 4) | (y == 9)
X, y = X[mask], y[mask]
X.shape

(13782, 784)

- I take only the first `n_samples`.
- I convert X to a NumPy array, and shuffling the data randomly

In [34]:
n_samples = 100 # restricting to 6000 samples for now

X = X.values if hasattr(X, "values") else X # safer conversion
perm = np.random.permutation(len(X))
X, y = X[perm], y[perm]

X = X[:n_samples]
y = y[:n_samples]

Now, I normalise pixel intensities.
- [0, 255] -> [0, 1]
- reshaping images back to 2D for resizing, i.e to 28x28 array with float values between 0 and 1.

In [35]:
X = X / 255.0
X = X.reshape(-1, 28, 28)

print(X.shape)
print("Pixel range:", X.min(), X.max())

(100, 28, 28)
Pixel range: 0.0 1.0


And now I reduce images to 8x8 + flattening to (, 64)
- resize -> flatten -> normalize

In [36]:
# convert each 28x28 binarised image to 8x8, then flatten to length 64
def to_8x8_vector(img_row):
    img_8x8 = resize(
        img_row,
        (8, 8),
        anti_aliasing=False,
        preserve_range=True,
        order=1 # controlling interpolation
    )
    img_8x8 = img_8x8.flatten()
    s = np.sum(img_8x8)

    if s > 0:
        img_8x8 = np.sqrt(img_8x8 / s)
    else:
        img_8x8 = np.zeros_like(img_8x8)
        img_8x8[0] = 1.0
      # should be shape (64,)
    return img_8x8

# apply transformation to all images
X_8x8 = np.array([to_8x8_vector(x) for x in X], dtype=float)
X_8x8.shape

(100, 64)

In [37]:
# sanity check, make sure no NaNs exist and all vectors are normalised, i.e. norm is around 1
print("Any NaNs?", np.isnan(X_8x8).any())
print("Norm check:", np.min(np.linalg.norm(X_8x8, axis=1)), np.max(np.linalg.norm(X_8x8, axis=1)))

Any NaNs? False
Norm check: 0.9999999999999998 1.0000000000000002


I'm gonna do the splitting here, and carry both representations consistently.
- qek inputs: (64,) flattened and normalized vectors, for quantum kernel embedding
- qjpeg: 28x28 images

In [38]:
idx = np.arange(n_samples)

idx_train, idx_test, y_train, y_test = train_test_split(
    idx, y, test_size=0.2, random_state=42, stratify=y
)

# QEK inputs (8x8 -> 64 -> normed)
X_train_qek = X_8x8[idx_train]
X_test_qek  = X_8x8[idx_test]

# QJPEG inputs (28x28 binary images)
X_train_img = X[idx_train]
X_test_img  = X[idx_test]

print("QEK train/test:", X_train_qek.shape, X_test_qek.shape)
print("IMG train/test:", X_train_img.shape, X_test_img.shape)
print("Labels train/test:", y_train.shape, y_test.shape)

QEK train/test: (80, 64) (20, 64)
IMG train/test: (80, 28, 28) (20, 28, 28)
Labels train/test: (80,) (20,)


Data preparation is done.

### Step 2: Quantum Embedding & Kernel Training

Define number of qubits and device.

In [39]:
device = "lightning.qubit"
n_qubits = 6
n_layers = 2          # number of trainable layers
batch_size = 8       # bigger batch for stability
n_steps = 50          # training steps
stepsize = 2e-4       # smaller learning rate
eps = 1e-8      # small epsilon to prevent division by zero
wires = range(n_qubits)

In [40]:
dev = qml.device(device, wires=n_qubits, shots=None)

In [41]:
def qek_layer_amplitude(x, theta_l):
    # Data re-uploading via phase gates (breaks kernel symmetry)
    for q in range(n_qubits):
        qml.RZ(np.pi * x[q], wires=q)

    # Trainable block
    for q in range(n_qubits):
        qml.RX(theta_l[q, 0], wires=q)
        qml.RZ(theta_l[q, 1], wires=q)

    # Entanglement (non-commuting)
    for q in range(n_qubits - 1):
        qml.CNOT(wires=[q, q + 1])

def qek_embedding_amplitude(x, theta):
    qml.AmplitudeEmbedding(
        x,
        wires=range(n_qubits),
        pad_with=0.0,
        normalize=False
    )

    for l in range(theta.shape[0]):
        qek_layer_amplitude(x, theta[l])

# quantum circuit
@qml.qnode(dev)
def qek_kernel_circuit(x1, x2, theta):
    qek_embedding_amplitude(x1, theta)
    qml.adjoint(qek_embedding_amplitude)(x2, theta)
    return qml.probs(wires=range(n_qubits))

def qek_kernel(x1, x2, theta):
    # Fidelity = probability of |0...0>
    return qek_kernel_circuit(x1, x2, theta)[0]

# compute kernel matrix
def square_kernel_matrix(X, theta):
    n = len(X)
    K = pnp.zeros((n, n))
    for i in range(n):
        for j in range(i, n):
            val = qek_kernel(X[i], X[j], theta)
            K[i, j] = val
            K[j, i] = val
    return K

def rectangular_kernel_matrix(X1, X2, theta):
    K = np.zeros((len(X1), len(X2)))
    for i in range(len(X1)):
        for j in range(len(X2)):
            K[i, j] = qek_kernel(X1[i], X2[j], theta)
    return K

def centered_kernel_alignment(K, y):
    y = y.astype(float)
    y = 2 * y - 1          # {0,1} → {-1,+1}
    yyT = np.outer(y, y)

    Kc = K - K.mean(axis=0) - K.mean(axis=1)[:, None] + K.mean()
    return -np.sum(Kc * yyT) / (np.linalg.norm(Kc) + 1e-8)

def kernel_alignment_loss(theta, X, y, batch_size=10):
    idx = np.random.choice(len(X), batch_size, replace=False)
    Xb = X[idx]
    yb = y[idx]

    K = square_kernel_matrix(Xb, theta)
    return centered_kernel_alignment(K, yb)

def feature_map(x, theta):
    # Amplitude encoding (always normalized)
    qml.AmplitudeEmbedding(x, wires=wires, pad_with=0.0, normalize=False)

    # Variational re-uploading layers
    for l in range(theta.shape[0]):
        for i in range(n_qubits):
            qml.RX(theta[l, i, 0], wires=i)
            qml.RY(theta[l, i, 1], wires=i)
            qml.RZ(theta[l, i, 2], wires=i)

        # Entangling layer (ring or chain)
        for i in range(n_qubits - 1):
            qml.CNOT(wires=[i, i + 1])
        qml.CNOT(wires=[n_qubits - 1, 0])

@qml.qnode(dev, interface="autograd")
def kernel_qnode(x1, x2, theta):
    feature_map(x1, theta)
    qml.adjoint(feature_map)(x2, theta)
    return qml.expval(qml.Projector([0]*n_qubits, wires=wires))

In [42]:
def kernel_matrix(X1, X2, theta):
    return qml.math.stack([
        qml.math.stack([
            kernel_qnode(x1, x2, theta)
            for x2 in X2
        ])
        for x1 in X1
    ])

def kernel_alignment_loss_batch(theta, X, y, batch_size):
    idx = np.random.choice(len(X), batch_size, replace=False)
    Xb = X[idx]
    yb = y[idx]

    # Kernel
    K = kernel_matrix(Xb, Xb, theta)
    K = K / pnp.trace(K)
    K /= pnp.linalg.norm(K)

    # Labels: {0,1} → {−1,+1}
    y_pm = 2 * yb - 1
    yy = qml.math.outer(y_pm, y_pm)

    # Center kernel
    Kc = center_kernel(K)
    Kc = Kc / pnp.trace(Kc)

    # Flatten
    Kf = qml.math.reshape(Kc, (-1,))
    yyf = qml.math.reshape(yy, (-1,))

    # Kernel Target Alignment
    numerator = qml.math.dot(Kf, yyf)
    denominator = qml.math.sqrt(
        qml.math.dot(Kf, Kf) * qml.math.dot(yyf, yyf)
    )

    return -numerator / denominator

def kernel_alignment_loss_minibatch(theta, X, y, batch_pairs=20):
    idx = np.random.choice(len(X), size=batch_pairs, replace=True)

    K_vals = []
    y_vals = []

    for i in idx:
        for j in idx:
            K_vals.append(qek_kernel(X[i], X[j], theta))
            y_vals.append(y[i] * y[j])

    K_vec = pnp.array(K_vals)
    y_vec = pnp.array(y_vals)

    return - pnp.dot(K_vec, y_vec) / (
        pnp.linalg.norm(K_vec) * pnp.linalg.norm(y_vec)
    )

def normalized_kernel_alignment(K, y):
    y = y.reshape(-1, 1)
    Ky = y @ y.T

    K_norm = K / qml.numpy.linalg.norm(K)
    Ky_norm = Ky / qml.numpy.linalg.norm(Ky)

    return 1 - qml.numpy.sum(K_norm * Ky_norm)

def kernel_alignment_loss(theta, X, y):
    K = square_kernel_matrix(X, theta)
    return normalized_kernel_alignment(K, y)

def center_kernel(K):
    n = K.shape[0]
    H = qml.numpy.eye(n) - qml.numpy.ones((n, n)) / n
    return H @ K @ H

In [43]:
print(f"\nTraining expressive circuit with {n_layers} layers")
# Initialize theta with the correct shape (L, n_qubits, 3) for the feature_map
theta = 0.01 * pnp.random.randn(n_layers, n_qubits, 3)

opt = qml.AdamOptimizer(stepsize=stepsize)

ema = None
alpha = 0.9

for step in range(n_steps):
    # Use kernel_alignment_loss_batch, which is compatible with Autograd
    theta, loss = opt.step_and_cost(
        lambda t: kernel_alignment_loss_batch(t, Xq, y_train, batch_size=batch_size)
,
        theta
    )

    ema = loss if ema is None else alpha * ema + (1 - alpha) * loss

    print(f"Step {step:02d} | loss = {loss:.4f} | ema={ema:.3f}")


Training expressive circuit with 2 layers
Step 00 | loss = 12.0482 | ema=12.048
Step 01 | loss = 11.4655 | ema=11.990
Step 02 | loss = 15.3187 | ema=12.323
Step 03 | loss = 9.7508 | ema=12.066
Step 04 | loss = 14.7736 | ema=12.336
Step 05 | loss = 13.5919 | ema=12.462
Step 06 | loss = 6.1797 | ema=11.834
Step 07 | loss = 14.5709 | ema=12.107
Step 08 | loss = 16.4403 | ema=12.541
Step 09 | loss = 4.4337 | ema=11.730
Step 10 | loss = 11.0721 | ema=11.664
Step 11 | loss = 5.2990 | ema=11.028
Step 12 | loss = 9.2488 | ema=10.850
Step 13 | loss = 19.9987 | ema=11.765
Step 14 | loss = 17.9321 | ema=12.381
Step 15 | loss = 14.7475 | ema=12.618
Step 16 | loss = 10.3891 | ema=12.395
Step 17 | loss = 12.7569 | ema=12.431
Step 18 | loss = 11.4109 | ema=12.329
Step 19 | loss = 13.1050 | ema=12.407
Step 20 | loss = 7.5146 | ema=11.918
Step 21 | loss = 15.1804 | ema=12.244
Step 22 | loss = 13.2127 | ema=12.341
Step 23 | loss = 9.3768 | ema=12.044
Step 24 | loss = 17.0651 | ema=12.546
Step 25 | loss

### Step 3: QJPEG Compression

In [44]:
def vectorization(img, Cr, Cc, renorm=False):
    "Vectorize the image into amplitude-encoding patches suitable for quantum circuits"
    # splitting the original image (Mr, Mc) into S equal-size patches of shape (Cr, Cc)
    Mr, Mc = img.shape
    assert Mr % Cr == 0 and Mc % Cc == 0
    patches = (img.reshape(Mc//Cr, Cr, -1, Cc).swapaxes(1, 2).reshape(-1, Cr, Cc))
    # 64 patches, (64, 64, 64) shape; S=64

    # vectorize each patch and collect all in a (N, Cr*Cc) array
    vect_patches = np.reshape(patches,  (patches.shape[0], Cr*Cc)) # (64, 4096)

    # normalize each (Cr*Cc) vector to the intensity of the corresponding (Cr, Cc) patch
    states = np.zeros((patches.shape[0], Cr*Cc)) # (64, 4096)
    norm = np.zeros(patches.shape[0])

    for idx in range(patches.shape[0]): # for each patch
        # compute the sum of pixels intensities
        norm[idx] = vect_patches[idx].sum()
        if norm[idx] == 0:
            # empty patch -> encode |0...0>
            states[idx, 0] = 1.0
            norm[idx] = 1.0
            continue

        # normalize the patch vector so that its entries sum is 1
        tmp = vect_patches[idx] / norm[idx]
        # take the element-wise square root of the normalized vector
        states[idx] = np.sqrt(tmp)
    if renorm == False:
        norm = np.ones(patches.shape[0])
    print(states[:10])

    return states, norm # amplitudes, pixel intensities' sums

In [45]:
def qft_swaps(wires):
    n = len(wires)
    # apply QFT to all qubits
    qml.QFT(wires=wires)
    # add swaps to reverse qubit order!
    for i in range(n // 2):
        qml.SWAP(wires=[wires[i], wires[n - i - 1]])


def iqft_swaps(wires):
    n = len(wires)
    # swaps again - BEFORE iqft
    for i in reversed(range(n // 2)):
        qml.SWAP(wires=[wires[i], wires[n-i-1]])
    qml.adjoint(QFT)(wires=wires)

In [46]:
def circuit_builder(states, n0, n2, shots):
    ntilde = (n0 - n2) // 2
    n1 = n0 - ntilde

    qnodes = []

    # define device with n0 qubits
    dev = qml.device(device, wires=n0, shots=shots)

    for idx in range(states.shape[0]):
        # qnode to capture current input state
        @qml.qnode(dev)
        def circuit():
            # print("State norm:", np.linalg.norm(states[idx]))
            # initializing the state (using AmplitudeEmbedding here, but I'm wondering if something else could work faster)
            qml.AmplitudeEmbedding(states[idx], wires=range(n0), pad_with=0.0, normalize=True)

            # Hadamard on all n0 qubits
            for w in range(n0):
                qml.Hadamard(wires=w)

            # apply QFT on all qubits
            qft_swaps(wires=range(n0))

            # apply IQFT on first n1 qubits
            iqft_swaps(wires=range(n1))

            # setting boundaries - Rule 2
            discard_start = n0 // 2 - ntilde
            discard_end = n0 // 2 - 1
            discarded_qubits = set(range(discard_start, discard_end + 1))

            # keep exactly n2 qubits for output
            measured_qubits = list(range(n2))


            # Hadamard on remaining qubits
            for q in measured_qubits:
                qml.Hadamard(wires=q)

            # print(f'Measured qubits: {measured_qubits}')

            return qml.probs(wires=measured_qubits)
        qnodes.append(circuit)

    return qnodes



In [47]:
def reconstruction(qnodes, n2, norm):
    out_freq = np.zeros((len(qnodes), 2**n2))
    for idx, qnode in enumerate(qnodes):
        probs = qnode()
        out_freq[idx] = qnode() * norm[idx]

    return out_freq

In [48]:
def devectorization(out_freq):
    S = out_freq.shape[0]
    nrow = int(np.sqrt(out_freq.shape[1])) # rows per patch
    ncol = nrow

    decoded_patches = np.reshape(out_freq,\
                      (out_freq.shape[0], nrow, ncol)) # (S, nrow, ncol)

    im_h, im_w = nrow*int(np.sqrt(S)), ncol*int(np.sqrt(S)) # final shape

    # initialization
    decoded_img = np.zeros((im_w, im_h))

    idx = 0
    for row in np.arange(im_h - nrow + 1, step=nrow):
        for col in np.arange(im_w - ncol + 1, step=ncol):
            decoded_img[row:row+nrow, col:col+ncol] = decoded_patches[idx]
            idx += 1

    return decoded_img

In [49]:
def qjpeg_feature_map_quantum(img_28x28):
    """
    True QJPEG-inspired feature map:
    - probabilities sum to 1
    - amplitudes = sqrt(probabilities)
    - output dimension = 64 (6 qubits)
    """

    img = img_28x28.astype(float)
    img = img / img.sum()              # probabilities
    amps = np.sqrt(img.flatten())      # amplitudes

    # reduce to 64 amplitudes (simple truncation for now)
    amps = amps[:64]

    # safety
    if np.linalg.norm(amps) == 0:
        amps[0] = 1.0
    else:
        amps /= np.linalg.norm(amps)

    return amps


### Step 4: Inference without retraining

In [50]:
### Step 4: Inference without retraining (robust version)

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

C = 100.0
layers_list = [1, 2, 4, 6, 8, 10, 12]

# --- Device and QNode ---
dev = qml.device(device, wires=n_qubits, shots=None)

# QNode: returns full QEK state vector
@qml.qnode(dev)
def expressive_state_qnode(x, current_theta):
    feature_map(x, current_theta)
    return qml.state()

# --- Helper functions ---

def compute_kernel(states_a, states_b=None):
    """Compute kernel matrix from state vectors or classical vectors."""
    if states_b is None:
        states_b = states_a
    K = np.zeros((len(states_a), len(states_b)))
    for i, a in enumerate(states_a):
        for j, b in enumerate(states_b):
            K[i, j] = np.abs(np.vdot(a, b))**2
    return K

def combine_qek_qjpeg(x_qek, x_qjpeg, alpha=0.8):
    """Alpha controls the contribution of QEK vs QJPEG."""
    v = np.concatenate([alpha * x_qek, (1 - alpha) * x_qjpeg])
    return v / np.linalg.norm(v)

def qjpeg_to_32(img_28x28):
    """Return 32-dimensional QJPEG vector from 28x28 image."""
    amps = qjpeg_feature_map_quantum(img_28x28)  # get 64 amplitudes
    amps_32 = amps[:32]  # truncate to 32
    norm = np.linalg.norm(amps_32)
    if norm == 0:
        amps_32[0] = 1.0
        norm = 1.0
    return amps_32 / norm

# --- Prepare QJPEG 32-dimensional vectors ---
X_train_qjpeg = np.array([qjpeg_to_32(img.reshape(28,28)) for img in X_train_img])
X_test_qjpeg  = np.array([qjpeg_to_32(img.reshape(28,28)) for img in X_test_img])

# --- Evaluate function for a given number of layers ---
results = []

def print_accuracies(layers_count):
    print("\n" + "="*40)
    print(f"Evaluating expressive circuit with {layers_count} layers")
    print("="*40)

    # Initialize a new parameter array for the current evaluation with 'layers_count' layers.
    # This array will have the full desired shape (layers_count, n_qubits, 3).
    current_theta_params = pnp.zeros((layers_count, n_qubits, 3))

    # Copy the learned parameters from the global 'theta' for the layers that were trained.
    # 'theta' here refers to the globally trained theta from cell 0W5UFMQMuQtT (shape (n_layers, n_qubits, 3)).
    # n_layers is 2.
    num_layers_to_copy = min(layers_count, n_layers)
    current_theta_params[:num_layers_to_copy] = theta[:num_layers_to_copy]

    # Add a small perturbation to all layers of current_theta_params.
    # This will perturb the copied trained layers and also the (initially zero)
    # additional layers if layers_count > n_layers.
    current_theta_params += 0.005 * pnp.random.randn(layers_count, n_qubits, 3)

    # --- QEK only ---
    states_train = np.array([expressive_state_qnode(x, current_theta_params) for x in X_train_qek])
    states_test  = np.array([expressive_state_qnode(x, current_theta_params) for x in X_test_qek])

    # Kernel computation
    K_train = compute_kernel(states_train)
    K_test  = compute_kernel(states_test, states_train)

    # Normalize
    K_train /= np.linalg.norm(K_train)
    K_test  /= np.linalg.norm(K_train)

    # Train SVM
    clf = SVC(kernel="precomputed", C=C)
    clf.fit(K_train, y_train)
    y_pred = clf.predict(K_test)
    acc_qek = accuracy_score(y_test, y_pred)
    print(f"QEK accuracy:         {acc_qek:.4f}")

    # --- QEK + QJPEG (classical) ---
    X_train_combined = np.array([combine_qek_qjpeg(a, b) for a, b in zip(states_train, X_train_qjpeg)])
    X_test_combined  = np.array([combine_qek_qjpeg(a, b) for a, b in zip(states_test, X_test_qjpeg)])

    # Classical kernel
    K_train_comb = compute_kernel(X_train_combined)
    K_test_comb  = compute_kernel(X_test_combined, X_train_combined)

    # Normalize
    K_train_comb /= np.linalg.norm(K_train_comb)
    K_test_comb  /= np.linalg.norm(K_train_comb)

    # Train SVM
    clf_comb = SVC(kernel="precomputed", C=C)
    clf_comb.fit(K_train_comb, y_train)
    y_pred_comb = clf_comb.predict(K_test_comb)
    acc_combined = accuracy_score(y_test, y_pred_comb)
    print(f"QEK + QJPEG accuracy: {acc_combined:.4f}")

    results.append((layers_count, acc_qek, acc_combined))

# --- Evaluate for all layer counts ---
for l in layers_list:
    print_accuracies(l)


Evaluating expressive circuit with 1 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500

Evaluating expressive circuit with 2 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500

Evaluating expressive circuit with 4 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500

Evaluating expressive circuit with 6 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500

Evaluating expressive circuit with 8 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500

Evaluating expressive circuit with 10 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500

Evaluating expressive circuit with 12 layers
QEK accuracy:         0.8500
QEK + QJPEG accuracy: 0.8500


#### Experiment with the number of qubits


In [51]:
qubits_list = [4, 5, 6, 7, 8]
layers = 2

def truncate_and_normalize(x, n_qubits):
    dim = 2 ** n_qubits
    v = x[:dim].copy()

    norm = np.linalg.norm(v)
    if norm == 0:
        v[0] = 1.0
        norm = 1.0

    return v / norm

def prepare_features(X, n_qubits):
    return np.array([truncate_and_normalize(x, n_qubits) for x in X])

In [52]:
results = []

for n_qubits in qubits_list:
    print(f"\n{'='*50}")
    print(f"Training with {n_qubits} qubits")
    print(f"{'='*50}")

    wires = range(n_qubits)

    # --- device ---
    dev = qml.device(device, wires=n_qubits, shots=None)

    # --- data ---
    Xq = prepare_features(X_train_qek, n_qubits)

    # --- parameters (NOTE: wrapped!) ---
    theta = 0.01 * pnp.random.randn(layers, n_qubits, 3)

    # --- redefine QNode ---
    @qml.qnode(dev, interface="autograd")
    def kernel_qnode(x1, x2, theta):
        feature_map(x1, theta)
        qml.adjoint(feature_map)(x2, theta)
        return qml.expval(
            qml.Projector([0] * n_qubits, wires=wires)
        )

    # --- kernel + loss stay unchanged ---
    opt = qml.AdamOptimizer(stepsize=stepsize)

    ema = None
    for step in range(n_steps):
        # Use kernel_alignment_loss_batch, which is compatible with Autograd
        theta, loss = opt.step_and_cost(
            lambda t: kernel_alignment_loss_batch(t, Xq, y_train, batch_size=batch_size),
            theta
        )

        ema = loss if ema is None else alpha * ema + (1 - alpha) * loss

    print(f"Final EMA loss: {ema:.4f}")
    results.append((n_qubits, ema))



Training with 4 qubits
Final EMA loss: 20.8903

Training with 5 qubits
Final EMA loss: 14.1380

Training with 6 qubits
Final EMA loss: 16.1554

Training with 7 qubits
Final EMA loss: 13.8977

Training with 8 qubits
Final EMA loss: 15.1698
