In [13]:
import numpy as np
import pandas as pd
import pennylane as qml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def make_staircase_dataset(n_samples=1000, random_state=42):
    rng = np.random.default_rng(random_state)

    # Create three uncorrelated signals uniformly in [-1, 1]
    s0 = rng.uniform(low=-1.0, high=1.0, size=n_samples)
    s1 = rng.uniform(low=-1.0, high=1.0, size=n_samples)
    s2 = rng.uniform(low=-1.0, high=1.0, size=n_samples)

    # Generate label from a thresholded linear combination
    raw_score = 1.0 * s0 + 0.8 * s1 + 0.5 * s2
    y = (raw_score > 0).astype(int)

    # Add small noise
    noise_scale = 0.05
    f0 = s0 #+ rng.normal(0, noise_scale, n_samples)
    f1 = s1 #+ rng.normal(0, noise_scale, n_samples)
    f2 = s2 #+ rng.normal(0, noise_scale, n_samples)

    # 3 pure noise features
    f3_5 = rng.uniform(low=-1.0, high=1.0, size=(n_samples, 3))

    X = np.column_stack([f0, f1, f2, f3_5])
    df = pd.DataFrame(X, columns=[f"f{i}" for i in range(6)])
    df["target"] = y
    return df

In [14]:
class SubsetMajorityVQC:
    def __init__(self, coeffs):
        """
        coeffs: list of feature coefficients, e.g., [1.0, 0.8, 0.5, 0.0, 0.0, 0.0]
        Nonzero coeffs indicate which features are included in the majority subset.
        """
        self.coeffs = np.array(coeffs, dtype=float)
        self.num_qubits = len(self.coeffs)
        wires = list(range(self.num_qubits + 1))  # +1 for readout qubit
        self.dev = qml.device("default.qubit", wires=wires)

        self.beta = 0.9 * np.pi / self.num_qubits  # as suggested in the theory

        self._initialize_circuit()

    def state_preparation(self, x):
        """Prepare the input via RX rotations."""
        for j in range(self.num_qubits):
            qml.RX(np.pi * x[j], wires=j)
        qml.PauliX(wires=self.num_qubits)  # Set readout qubit to |1>

    def apply_subset_majority_unitary(self):
        """Implements U_MS = exp(i (β/2) sum_j a_j Z_j X_readout)."""
        for j in range(self.num_qubits):
            if self.coeffs[j] != 0:
                qml.CNOT(wires=[j, self.num_qubits])
                qml.RX(self.beta, wires=self.num_qubits)
                qml.CNOT(wires=[j, self.num_qubits])

    def _initialize_circuit(self):
        @qml.qnode(self.dev, interface="autograd")
        def circuit(x):
            self.state_preparation(x)
            self.apply_subset_majority_unitary()
            return qml.expval(qml.PauliY(wires=self.num_qubits))

        self.circuit = circuit

    def predict(self, X):
        """Predicts class labels (0 or 1)."""
        X = np.array(X, dtype=np.float64)
        preds = np.array([
            1 if self.circuit(x) > 0 else 0
            for x in X
        ])
        return preds


In [15]:
def evaluate_model_hardcoded(model, df, features, label):
    """Evaluate a non-trainable VQC model (like SubsetMajorityVQC)."""
    X = df[[f"f{i}" for i in features]].values
    y = df["target"].values

    # No scaling needed; assuming features are already in [-1,1]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # No training step!

    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)
    print(f"{label:<20} | Train: {acc_train:.4f} | Test: {acc_test:.4f}")


In [16]:
def run_sanity_check():
    print("\n🔍 Running sanity checks on staircase_dataset...\n")
    df = make_staircase_dataset()

    # extend signal coeffs [1.0, 0.8, 0.5] to match 6 features by zeroing noise
    base_coeffs = [1.0, 0.8, 0.5] + [0.0] * 3  # now length == 6

    feature_sets = [
        ([0],       "f0"),
        ([0, 1],    "f0 + f1"),
        ([0, 2],    "f0 + f2"),
        ([0, 3],    "f0 + f3 (noise)"),
        ([0, 4],    "f0 + f4 (noise)"),
        ([0, 5],    "f0 + f5 (noise)"),
        ([0, 1, 2], "f0 + f1 + f2"),
    ]

    print("=== Angle‐Encoded VQC ===")
    for features, label in feature_sets:
        coeffs = [base_coeffs[i] for i in features]
        model = SubsetMajorityVQC(coeffs)
        evaluate_model_hardcoded(model, df, features, label)


if __name__ == "__main__":
    run_sanity_check()


🔍 Running sanity checks on staircase_dataset...

=== Angle‐Encoded VQC ===
f0                   | Train: 0.5000 | Test: 0.5450
f0 + f1              | Train: 0.5000 | Test: 0.5450
f0 + f2              | Train: 0.5000 | Test: 0.5450
f0 + f3 (noise)      | Train: 0.5000 | Test: 0.5450
f0 + f4 (noise)      | Train: 0.5000 | Test: 0.5450
f0 + f5 (noise)      | Train: 0.5000 | Test: 0.5450
f0 + f1 + f2         | Train: 0.5000 | Test: 0.5450
