Read the file and split in X_train, X_test, y_train, y_test

In [10]:
import csv
import numpy as np

# to read the file into a python list
filename = "datasets/Thyroid_Diff.csv"
with open(filename, 'r') as f:
    reader = csv.reader(f)
    header = next(reader)
    data = [row for row in reader]

# this converts the list to an array of objects bc some columns are actually strings
data = np.array(data, dtype=object)  

# split features X from  labels y
X = data[:, :-1]  # all columns except last
y = data[:, -1]   # last column


In [11]:
X[0]

array(['27', 'F', 'No', 'No', 'No', 'Euthyroid',
       'Single nodular goiter-left', 'No', 'Micropapillary', 'Uni-Focal',
       'Low', 'T1a', 'N0', 'M0', 'I', 'Indeterminate'], dtype=object)

In [12]:
# converts string values into integers 
from sklearn.preprocessing import LabelEncoder

# encode X
for i in range(X.shape[1]):
    le = LabelEncoder()
    X[:, i] = le.fit_transform(X[:, i])

# convert to float to make sure can be used, 
# all values must be numbers before QSVC
X = X.astype(float)

# encode y
y_enc = LabelEncoder()
y = y_enc.fit_transform(y)


In [13]:
print(X[0])
print("Shape X:",X.shape)
print("Shape y:", y.shape)

X_new = X[:][0:2]
print("Shape X:",X_new.shape)


[11.  0.  0.  0.  0.  2.  3.  3.  2.  1.  2.  0.  0.  0.  0.  2.]
Shape X: (383, 16)
Shape y: (383,)
Shape X: (2, 16)


In [14]:
# split into X_train, X_test, y_test, y_train
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Apply any of the tecniques from notebook 04_quantum_kernel in your dataset (QSVC, QClustering, QPCA)

In [15]:
# safe QSVC test block
from sklearn.utils import resample
from qiskit.circuit.library import ZZFeatureMap
from qiskit.primitives import StatevectorSampler
from qiskit_machine_learning.kernels import FidelityStatevectorKernel
from qiskit_machine_learning.algorithms import QSVC

# reduce training set for speed
X_small, y_small = resample(X_train, y_train, n_samples=400, random_state=42)

# reduce test set
X_test_small = X_test[:100]
y_test_small = y_test[:100]

# use only first two features (2 qubits)
X_small = X_small[:, :6]
X_test_small = X_test_small[:, :6]

# simple feature map (fast)
feature_map = ZZFeatureMap(feature_dimension=X_small.shape[1], reps=1, entanglement="linear")

# build the kernel (no sampler argument in v0.8.4)
quantum_kernel = FidelityStatevectorKernel(feature_map=feature_map)

# train and evaluate QSVC
qsvc = QSVC(quantum_kernel=quantum_kernel)
qsvc.fit(X_small, y_small)

score = qsvc.score(X_test_small, y_test_small)
print(f"QSVC test accuracy (100 train / 20 test, 6 features): {score:.3f}")





QSVC test accuracy (100 train / 20 test, 6 features): 0.779


In [16]:
import time
start = time.time()
qsvc.fit(X_small, y_small)
print("Training took", time.time() - start, "seconds")


Training took 1.1977260112762451 seconds
