In [7]:
import numpy as np
import pandas as pd
import pennylane as qml
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import time

# === Load and Preprocess the Adult Dataset ===
df_adult = pd.read_csv('Datasets/adult/adult_test_int.csv')
df_adult = df_adult.drop(columns=["Unnamed: 0"])

# Select numerical features for the Quantum Model
selected_features = ["age", "capital.gain", "capital.loss", "hours.per.week"]

# Standardize numerical features
scaler = StandardScaler()
df_adult[selected_features] = scaler.fit_transform(df_adult[selected_features])

# For the quantum circuit, we need a binary encoding.
# Here we simply threshold at 0: if a feature is >=0, we encode a 1, otherwise 0.
X_quantum = 2 * (df_adult[selected_features].values >= 0).astype(float) - 1

# Define Target Variable and convert to {-1, +1}
y = df_adult["over50K"].values
y_quantum = y * 2 - 1  # converts 0/1 to -1/1

# Split dataset into training and testing sets
X_train_q, X_test_q, y_train_q, y_test_q = train_test_split(
    X_quantum, y_quantum, test_size=0.10, random_state=42, stratify=y
)

# === Quantum Model Setup (Farhi & Neven Ansatz) ===
num_data_qubits = 4                  # number of features
total_wires = num_data_qubits + 1      # add one readout qubit
dev = qml.device("default.qubit", wires=total_wires)

def state_preparation(x):
    """
    Prepare the state |x,1>:
      - Data qubits: encode binary features (0 -> |0>, 1 -> |1>)
      - Readout qubit (last qubit): prepare in state |1>
    """
    # For each data qubit, if x[i]==1, flip from |0> to |1>
    for i in range(num_data_qubits):
        if x[i] == 1:
            qml.PauliX(wires=i)
    # Prepare the readout qubit in |1>
    qml.PauliX(wires=num_data_qubits)

@qml.qnode(dev)
def circuit(theta, x):
    """
    Implements the Farhi–Neven ansatz:
      U(theta) = exp(i π/4 X_readout) * ∏_j exp(-i theta_j Z_j ⊗ X_readout)
    
    The controlled two-qubit term exp(-i theta_j Z⊗X) is decomposed using:
      exp(-i theta_j Z⊗X) = (I ⊗ RZ(-π/2))
         • CNOT(control=j, target=readout)
         • RZ(-2 theta_j) on readout
         • CNOT(control=j, target=readout)
         • (I ⊗ RZ(π/2))
    """
    # State preparation: prepare |x, 1>
    state_preparation(x)
    
    # Apply the controlled rotations for each data qubit
    for j in range(num_data_qubits):
        qml.RZ(-np.pi/2, wires=total_wires - 1)
        qml.CNOT(wires=[j, total_wires - 1])
        qml.RZ(-2 * theta[j], wires=total_wires - 1)
        qml.CNOT(wires=[j, total_wires - 1])
        qml.RZ(np.pi/2, wires=total_wires - 1)
    
    # Apply the final rotation: exp(i π/4 X) on the readout qubit.
    qml.RX(-np.pi/2, wires=total_wires - 1)
    
    # Measure the expectation value of Y on the readout qubit.
    return qml.expval(qml.PauliY(wires=total_wires - 1))

def variational_classifier(theta, x):
    """
    Ensures that the expectation value output is converted to a float.
    """
    return float(circuit(theta, x))

def square_loss(labels, predictions):
    return np.mean((labels - predictions) ** 2)

def cost(theta, X, Y):
    # Convert the QNode output (ExpectationMP) to a numeric scalar
    predictions = np.array([variational_classifier(theta, x) for x in X])
    return square_loss(Y, predictions)

# === Initialize Quantum Model Parameters ===
np.random.seed(0)
theta_init = np.random.randn(num_data_qubits) * 0.01  # one parameter per data qubit

# Use PennyLane's gradient descent optimizer
opt = qml.GradientDescentOptimizer(stepsize=0.05)
num_it = 10
batch_size = min(48, len(X_train_q))  # Ensure batch_size does not exceed dataset size

# === Debugging Helpers ===
def debug_parameters(theta, iteration):
    """Print parameter statistics to check if theta is updating."""
    print(f"\nIteration {iteration}: Parameter Check")
    print(f"Theta Mean: {np.mean(theta):.6f}, Std: {np.std(theta):.6f}")

def debug_predictions(predictions, iteration):
    """Print unique predictions to check for constant outputs."""
    unique_values = np.unique(predictions)
    print(f"Iteration {iteration}: Unique Prediction Values: {unique_values}")
    if len(unique_values) == 1:
        print("WARNING: Model is outputting the same value for all inputs!")

raw_outputs = np.array([variational_classifier(theta_init, x) for x in X_train_q[:10]])
print("Raw circuit outputs before training:", raw_outputs)

# === Train Quantum Model (Debugging Version) ===
start_time = time.time()
theta = theta_init.copy()

for it in range(num_it):
    batch_idx = np.random.choice(len(X_train_q), batch_size, replace=False)
    X_batch = X_train_q[batch_idx]
    Y_batch = y_train_q[batch_idx]

    # Debug parameter values before update
    debug_parameters(theta, it)

    # Optimization step
    theta = opt.step(lambda th: cost(th, X_batch, Y_batch), theta)

    # Compute training accuracy
    predictions = np.array([np.sign(variational_classifier(theta, x)) for x in X_train_q])
    acc = np.mean(predictions == y_train_q)

    # Debug if predictions are constant
    debug_predictions(predictions, it)

    print(f"Iter: {it+1:5d} | Cost: {cost(theta, X_train_q, y_train_q):0.7f} | Accuracy: {acc:0.7f}")

print(f"Total training time: {time.time() - start_time:.2f} seconds")

# === Quantum Model Evaluation (Debugging Version) ===
predictions_q = np.array([np.sign(variational_classifier(theta, x)) for x in X_test_q])

debug_predictions(predictions_q, "Final Test Set")

print("\nQuantum Model Performance:")
print(f"Accuracy: {accuracy_score(y_test_q, predictions_q):.4f}")
print(f"Precision: {precision_score(y_test_q, predictions_q, zero_division=1):.4f}")
print(f"Recall: {recall_score(y_test_q, predictions_q, zero_division=1):.4f}")
print(f"F1 Score: {f1_score(y_test_q, predictions_q, average='macro'):.4f}")



Raw circuit outputs before training: [-1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]

Iteration 0: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 0: Unique Prediction Values: [-1.]
Iter:     1 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 1: Parameter Check
Theta Mean: 0.013460, Std: 0.007080




Iteration 1: Unique Prediction Values: [-1.]
Iter:     2 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 2: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 2: Unique Prediction Values: [-1.]
Iter:     3 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 3: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 3: Unique Prediction Values: [-1.]
Iter:     4 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 4: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 4: Unique Prediction Values: [-1.]
Iter:     5 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 5: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 5: Unique Prediction Values: [-1.]
Iter:     6 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 6: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 6: Unique Prediction Values: [-1.]
Iter:     7 | Cost: 1.0282440 | Accuracy: 0.7429390

Iteration 7: Parameter Check
Theta Mean: 0.013460, Std: 0.007080
Iteration 7: Unique Pr

In [24]:
# Select same features for Classical Model
selected_features = ["age", "capital.gain", "capital.loss", "hours.per.week"]
X_classical = df_adult[selected_features].values  # Now same as Quantum model

# Update Train/Test Split
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_classical, y, test_size=0.10, random_state=42, stratify=y)

# Define Classical ANN Model
class ClassicalANN(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList([nn.Linear(layers[i], layers[i+1], dtype=torch.float64) for i in range(len(layers) - 1)])
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.to(torch.float64)
        for layer in self.layers[:-1]:
            x = torch.relu(layer(x))
        return self.sigmoid(self.layers[-1](x))

# Train Classical Model with the same 4 features
classical_model = ClassicalANN([4, 5, 1])  # Input layer now has 4 neurons
optimizer_classical = optim.Adam(classical_model.parameters(), lr=0.01)

def train_classical(model, optimizer, X_train, y_train, epochs=50):
    y_train = torch.tensor(y_train.tolist(), dtype=torch.float64).reshape(-1, 1)
    for epoch in range(epochs):
        optimizer.zero_grad()
        y_pred = model(torch.tensor(X_train.tolist(), dtype=torch.float64)).reshape(-1, 1)
        loss = nn.BCELoss()(y_pred, y_train)
        loss.backward()
        optimizer.step()

train_classical(classical_model, optimizer_classical, X_train_c, y_train_c, epochs=50)

# Evaluate Classical Model
with torch.no_grad():
    X_test_c_numeric = np.array(X_test_c, dtype=np.float64)
    y_pred_classical = classical_model(torch.tensor(X_test_c_numeric, dtype=torch.float64)).reshape(-1, 1)
    y_pred_classical = (y_pred_classical.numpy().flatten() > 0.5).astype(int)

print("\nClassical Model Performance:")
print(f"Accuracy: {accuracy_score(y_test_c, y_pred_classical):.4f}")
print(f"Precision: {precision_score(y_test_c, y_pred_classical):.4f}")
print(f"Recall: {recall_score(y_test_c, y_pred_classical):.4f}")
print(f"F1 Score: {f1_score(y_test_c, y_pred_classical, average='macro'):.4f}")



Classical Model Performance:
Accuracy: 0.7978
Precision: 0.7586
Recall: 0.3143
F1 Score: 0.6604
