Read the file and split in X_train, X_test, y_train, y_test

In [2]:
import csv
import numpy as np

# to read the file into a python list
filename = "datasets/Thyroid_Diff.csv"
with open(filename, 'r') as f:
    reader = csv.reader(f)
    header = next(reader)
    data = [row for row in reader]

# this converts the list to an array of objects bc some columns are actually strings
data = np.array(data, dtype=object)  

# split features X from  labels y
X = data[:, :-1]  # all columns except last
y = data[:, -1]   # last column


In [3]:
X[0]

array(['27', 'F', 'No', 'No', 'No', 'Euthyroid',
       'Single nodular goiter-left', 'No', 'Micropapillary', 'Uni-Focal',
       'Low', 'T1a', 'N0', 'M0', 'I', 'Indeterminate'], dtype=object)

In [4]:
# converts string values into integers 
from sklearn.preprocessing import LabelEncoder

# encode X
for i in range(X.shape[1]):
    le = LabelEncoder()
    X[:, i] = le.fit_transform(X[:, i])

# convert to float to make sure can be used, 
# all values must be numbers before QSVC
X = X.astype(float)

# encode y
y_enc = LabelEncoder()
y = y_enc.fit_transform(y)


In [10]:
print(X[0])
print("Shape X:",X.shape)
print("Shape y:", y.shape)

X_new = X[:][0:2]
print("Shape X:",X_new.shape)


[11.  0.  0.  0.  0.  2.  3.  3.  2.  1.  2.  0.  0.  0.  0.  2.]
Shape X: (383, 16)
Shape y: (383,)
Shape X: (2, 16)


In [6]:
# split into X_train, X_test, y_test, y_train
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Apply any of the tecniques from notebook 04_quantum_kernel in your dataset (QSVC, QClustering, QPCA)

In [None]:
# applying QSVC
from qiskit.circuit.library import ZZFeatureMap
from qiskit.primitives import Sampler
from qiskit_machine_learning.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms import QSVC

# quantum kernel since QSVC uses kernel-based method
feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=2, entanglement="linear")
sampler = Sampler()  # classical simulator
fidelity = ComputeUncompute(sampler=sampler)
quantum_kernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)

# training QSVC
qsvc = QSVC(quantum_kernel=quantum_kernel)
qsvc.fit(X_train, y_train)

# QSVC predicts labels on test data for new test samples using the trained kernel
score = qsvc.score(X_test, y_test)
print(f"QSVC classification test score: {score:.2f}")