In [None]:
# NOTEBOOK FOR 5 FEATURES

In [None]:
# load data
import csv
import numpy as np
import pandas as pd

filename = "datasets/Thyroid_Diff.csv"
with open(filename, 'r') as f:
    reader = csv.reader(f)
    header = next(reader)
    data = [row for row in reader]

data = np.array(data, dtype=object)

# split features from labels
X = data[:, :-1]    # features
y = data[:, -1]     # labels

: 

In [2]:
X[0]

array(['27', 'F', 'No', 'No', 'No', 'Euthyroid',
       'Single nodular goiter-left', 'No', 'Micropapillary', 'Uni-Focal',
       'Low', 'T1a', 'N0', 'M0', 'I', 'Indeterminate'], dtype=object)

In [3]:
# converts string values into integers 
from sklearn.preprocessing import LabelEncoder

# encode X
for i in range(X.shape[1]):
    le = LabelEncoder()
    X[:, i] = le.fit_transform(X[:, i])

X = X.astype(float)

# encode y
y_le = LabelEncoder()
y = y_le.fit_transform(y)

In [None]:
# convert to pandas so correlation works
X_df = pd.DataFrame(X)
y_s = pd.Series(y)

# compute correlation
correlations = X_df.apply(lambda col: col.corr(y_s))
correlations_abs = correlations.abs().sort_values(ascending=False)

print("\nPEARSON CORRELATIONS (ABS SORTED)")
print(correlations_abs)

# select top-k features
top2 = correlations_abs.head(2).index.tolist()
top5 = correlations_abs.head(5).index.tolist()
top10 = correlations_abs.head(10).index.tolist()

print("\nTop 2 features:", top2)
print("Top 5 features:", top5)
print("Top 10 features:", top10)



 PEARSON CORRELATIONS (ABS SORTED)
10    0.733376
15    0.708957
12    0.632323
11    0.556201
14    0.449137
9     0.383776
13    0.354360
2     0.333243
1     0.328189
0     0.257659
7     0.182530
4     0.174407
3     0.136073
6     0.131801
5     0.067758
8     0.003272
dtype: float64

Top 2 features: [10, 15]
Top 5 features: [10, 15, 12, 11, 14]
Top 10 features: [10, 15, 12, 11, 14, 9, 13, 2, 1, 0]


In [5]:
from sklearn.model_selection import train_test_split

# how many features to use
X = X[:, top5]     # <-- CHANGE THIS (top2, top5, top10)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
# basic printing and shape checking to confirm dataset size
print(X[0])
print("Shape X:",X.shape)
print("Shape y:", y.shape)

X_new = X[:][0:2]
print("Shape X:",X_new.shape)


[2. 2. 0. 0. 0.]
Shape X: (383, 5)
Shape y: (383,)
Shape X: (2, 5)


In [7]:
# MODEL 1: QSVC + QuantumKernel (main baseline model)
from sklearn.utils import resample
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityStatevectorKernel
from qiskit_machine_learning.algorithms import QSVC

# reduce for speed
X_small, y_small = resample(X_train, y_train, n_samples=300, random_state=42)
X_test_small = X_test[:100]
y_test_small = y_test[:100]

# quantum feature map
feature_map = ZZFeatureMap(feature_dimension=X_small.shape[1], reps=1)

quantum_kernel = FidelityStatevectorKernel(feature_map=feature_map)

qsvc = QSVC(quantum_kernel=quantum_kernel)
qsvc.fit(X_small, y_small)

score_qsvc = qsvc.score(X_test_small, y_test_small)
print("\nQSVC accuracy:", score_qsvc)




QSVC accuracy: 0.961038961038961


In [8]:
# this measures how long it takes to train the QSVC model
# DOES NOT AFFECT ACCURACY

import time
start = time.time()
qsvc.fit(X_small, y_small)
print("Training took", time.time() - start, "seconds")



Training took 0.6025853157043457 seconds


In [9]:
# MODEL 2: Classical SVC (RBF baseline)
# this is the normal, classical SVC
# this is a good baseline to compare how good the quantum methods are
from sklearn.svm import SVC

svc_rbf = SVC(kernel="rbf")
svc_rbf.fit(X_small, y_small)

score_rbf = svc_rbf.score(X_test_small, y_test_small)
print("Classical SVC (RBF) accuracy:", score_rbf)



Classical SVC (RBF) accuracy: 0.961038961038961


In [None]:
# MODEL 3: Classical SVC + Quantum Kernel as a callable
# the classical SVC calls the quantum kernel during training

svc_callable = SVC(kernel=quantum_kernel.evaluate)
svc_callable.fit(X_small, y_small)

score_callable = svc_callable.score(X_test_small, y_test_small)
print("Callable SVC + Quantum Kernel accuracy:", score_callable)



Callable SVC + Quantum Kernel accuracy: 0.961038961038961


In [11]:
# MODEL 4: Classical SVC using precomputed Quantum Kernel
# SVC + QuantumTrainableKernel (Precomputed)
# the quantum kernel matrix is computed BEFORE training

# this is the quantum kernel matrix calculation
K_train = quantum_kernel.evaluate(X_small)
K_test  = quantum_kernel.evaluate(X_test_small, X_small)

svc_pre = SVC(kernel="precomputed")
svc_pre.fit(K_train, y_small)

score_pre = svc_pre.score(K_test, y_test_small)
print("Precomputed SVC + Quantum Kernel accuracy:", score_pre)

Precomputed SVC + Quantum Kernel accuracy: 0.961038961038961


In [None]:
# MODEL 5: Trainable Quantum Kernel

# external imports 
from qiskit import QuantumCircuit
from qiskit.circuit import ParameterVector
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import TrainableFidelityQuantumKernel
from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer
from qiskit_machine_learning.optimizers import SPSA
from qiskit_machine_learning.algorithms import QSVC
from sklearn.svm import SVC
from sklearn import metrics
from qiskit.visualization import circuit_drawer
import numpy as np

class QKTCallback:
    def __init__(self):
        self._data = [[] for _ in range(5)]

    def callback(self, x0, x1=None, x2=None, x3=None, x4=None):
        self._data[0].append(x0)
        self._data[1].append(x1)
        self._data[2].append(x2)
        self._data[3].append(x3)
        self._data[4].append(x4)

    def get_callback_data(self):
        return self._data

    def clear_callback_data(self):
        self._data = [[] for _ in range(5)]




In [None]:
# ----------------------------------------
# SECTION 1 — Feature Map + Trainable Kernel + SPSA + Trainer
# ----------------------------------------

num_features = X_train.shape[1]  # should be 5

# Trainable rotation layer
fm0 = QuantumCircuit(num_features)
training_params = ParameterVector("θ", 1)

for q in range(num_features):
    fm0.ry(training_params[0], q)

# Static ZZFeatureMap
fm1 = ZZFeatureMap(num_features, reps=1, entanglement='linear')

# Compose trainable + fixed map
fm = fm0.compose(fm1)

print(circuit_drawer(fm))
print(f"Trainable parameters: {training_params}")

# Trainable kernel
from qiskit.primitives import Sampler
sampler = Sampler()

quant_kernel = TrainableFidelityQuantumKernel(
    feature_map=fm,
    training_parameters=training_params,
    sampler=sampler,
    cache=True
)

# SPSA
cb_qkt = QKTCallback()
spsa_opt = SPSA(
    maxiter=1,
    callback=cb_qkt.callback,
    learning_rate=0.05,
    perturbation=0.05
)

# Kernel trainer
qkt = QuantumKernelTrainer(
    quantum_kernel=quant_kernel,
    loss="svc_loss",
    optimizer=spsa_opt,
    initial_point=[np.pi / 2]
)


     ┌──────────┐┌─────────────────────────────────────────┐
q_0: ┤ Ry(θ[0]) ├┤0                                        ├
     ├──────────┤│                                         │
q_1: ┤ Ry(θ[0]) ├┤1                                        ├
     ├──────────┤│                                         │
q_2: ┤ Ry(θ[0]) ├┤2 ZZFeatureMap(x[0],x[1],x[2],x[3],x[4]) ├
     ├──────────┤│                                         │
q_3: ┤ Ry(θ[0]) ├┤3                                        ├
     ├──────────┤│                                         │
q_4: ┤ Ry(θ[0]) ├┤4                                        ├
     └──────────┘└─────────────────────────────────────────┘
Trainable parameters: θ, ['θ[0]']
Training trainable quantum kernel on subset...


In [None]:
# ----------------------------------------
# SECTION 2 — Subset + Training Trainable Kernel
# ----------------------------------------

# Use a small subset of training samples to reduce runtime
subset_idx = np.random.choice(len(X_small), size=100, replace=False)
X_subset = X_small[subset_idx]
y_subset = y_small[subset_idx]

print("Training trainable quantum kernel on subset...")
qka_results = qkt.fit(X_subset, y_subset)

optimized_kernel = qka_results.quantum_kernel
print(qka_results)



     ┌──────────┐┌─────────────────────────────────────────┐
q_0: ┤ Ry(θ[0]) ├┤0                                        ├
     ├──────────┤│                                         │
q_1: ┤ Ry(θ[0]) ├┤1                                        ├
     ├──────────┤│                                         │
q_2: ┤ Ry(θ[0]) ├┤2 ZZFeatureMap(x[0],x[1],x[2],x[3],x[4]) ├
     ├──────────┤│                                         │
q_3: ┤ Ry(θ[0]) ├┤3                                        ├
     ├──────────┤│                                         │
q_4: ┤ Ry(θ[0]) ├┤4                                        ├
     └──────────┘└─────────────────────────────────────────┘
Trainable parameters: θ, ['θ[0]']


TypeError: TrainableFidelityQuantumKernel.__init__() got an unexpected keyword argument 'cache'

In [None]:
# ----------------------------------------
# SECTION 3 — QSVC + Classical SVC Using the Optimized Kernel
# ----------------------------------------

# --- Quantum SVC ---
qsvc = QSVC(quantum_kernel=optimized_kernel)
qsvc.fit(X_small, y_small)

labels_test = qsvc.predict(X_test_small)

accuracy_test = metrics.balanced_accuracy_score(y_true=y_test_small, y_pred=labels_test)
print(f"Trainable kernel QSVC test accuracy: {accuracy_test:.3f}")

# ----- Hybrid SVC + Optimal Quantum Kernel (your missing part) -----
adhoc_matrix_train = optimized_kernel.evaluate(X_small)
adhoc_matrix_test  = optimized_kernel.evaluate(X_test_small, X_small)

adhoc_svc = SVC(kernel="precomputed")
adhoc_svc.fit(adhoc_matrix_train, y_small)

adhoc_score_precomputed_kernel = adhoc_svc.score(
    adhoc_matrix_test, 
    y_test_small
)
print(f"Hybrid SVC (precomputed optimal kernel) test score: {adhoc_score_precomputed_kernel:.3f}")

# --- Classical SVC (precomputed kernel) ---
adhoc_matrix_train = optimized_kernel.evaluate(X_small)
adhoc_matrix_test  = optimized_kernel.evaluate(X_test_small, X_small)

adhoc_svc = SVC(kernel="precomputed")
adhoc_svc.fit(adhoc_matrix_train, y_small)

adhoc_score_precomputed_kernel = adhoc_svc.score(adhoc_matrix_test, y_test_small)
print(f"Precomputed kernel classification test score: {adhoc_score_precomputed_kernel:.3f}")

