# Image data classification (using QSVM)

The idea is to make a model which predicts the digits. <br />
The data is taken from [MNIST](https://yann.lecun.com/exdb/mnist/) (in lecture with 28x28 pixel images of handwritten digits).

In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap, PauliFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split


import os
import gzip
import numpy as np
import matplotlib.pyplot as plt
from pylab import cm
import warnings

warnings.filterwarnings("ignore")

# scikit-learn imports
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Qiskit imports
from qiskit.providers.basic_provider import BasicSimulator
from qiskit.circuit import QuantumCircuit, Parameter, ParameterVector
from qiskit.circuit.library import PauliFeatureMap, ZFeatureMap, ZZFeatureMap
from qiskit.circuit.library import TwoLocal, NLocal, RealAmplitudes, EfficientSU2
from qiskit.circuit.library import HGate, RXGate, RYGate, RZGate, CXGate, CRXGate, CRZGate
from qiskit_machine_learning.kernels import FidelityQuantumKernel

## Digits classification 4 and 9 (binary classification)

### 1. Preprocessing

In [11]:
DATA_PATH = './data/mnist.npz'
data = np.load(DATA_PATH)

sample_train = data['sample_train']
labels_train = data['labels_train']
sample_test = data['sample_test']

sample_train, sample_val, labels_train, labels_val = train_test_split(
    sample_train, labels_train, test_size=0.2, random_state=42)

LABELS = [4, 5]

ss = StandardScaler()
sample_train = ss.fit_transform(sample_train)
sample_val = ss.transform(sample_val)
sample_test = ss.transform(sample_test)

N_DIM = 5
pca = PCA(n_components=N_DIM)
sample_train = pca.fit_transform(sample_train)
sample_val = pca.transform(sample_val)
sample_test = pca.transform(sample_test)

mms = MinMaxScaler((-1, 1))
sample_train = mms.fit_transform(sample_train)
sample_val = mms.transform(sample_val)
sample_test = mms.transform(sample_test)

### 2. Model (training and evaluation)

In [12]:
def linear_encoding(x): return x
def arcsine_encoding(x): return np.arcsin(x / (2 * np.pi))

repetitions = [1, 3, 5, 7, 9, 11, 13, 15]


# Note: The featuremap itself has already encoding functions (psi(x) and psi(x,y)).
# The first function is just the identity function and therefore aquivalent to the linear encoding.
# In this experiment I also tried to encode the data with the arcsine function (next to normal linearencoding) and then put those into the psi functions of the feature map.
# ZFeatureMap doesnt have psi(x, y) and therefore only uses linear encoding. 
# Therefore in the last cell is the best comparison between the linear and arcsine encoding.


feature_map = PauliFeatureMap(feature_dimension=N_DIM, paulis=['Z'], reps=1)
print(f"Encoding with PauliFeatureMap")
for r in repetitions:
    print(f"Training with repetition rate: {r}")
    
    X_train_reupload_linear = linear_encoding(sample_train) # dont need to repeat (because in feature map already is repeated)
    X_train_reupload_arcsine = arcsine_encoding(sample_train) # dont need to repeat (because in feature map already is repeated)
    X_test_reupload_linear = linear_encoding(sample_val)
    X_test_reupload_arcsine = arcsine_encoding(sample_val)

    feature_map.reps = r
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)
    # kernel_matrix_linear = quantum_kernel.evaluate(x_vec=X_train_reupload_linear[0], y_vec=X_train_reupload_linear[1]) 
    # kernel_matrix_arcsine = quantum_kernel.evaluate(x_vec=X_train_reupload_arcsine[0], y_vec=X_train_reupload_arcsine[1])

    matrix_train_linear = quantum_kernel.evaluate(x_vec=X_train_reupload_linear)
    matrix_val_linear = quantum_kernel.evaluate(x_vec=X_test_reupload_linear, y_vec=X_train_reupload_linear)
    matrix_train_arcsine = quantum_kernel.evaluate(x_vec=X_train_reupload_arcsine)
    matrix_val_arcsine = quantum_kernel.evaluate(x_vec=X_test_reupload_arcsine, y_vec=X_train_reupload_arcsine)

    """
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(np.asmatrix(matrix_train_linear),
                interpolation='nearest', origin='upper', cmap='Blues')
    axs[0].set_title("training kernel matrix linear")
    axs[1].imshow(np.asmatrix(matrix_val_linear),
                interpolation='nearest', origin='upper', cmap='Reds')
    axs[1].set_title("validation kernel matrix linear")
    plt.show()

    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(np.asmatrix(matrix_train_arcsine),
                interpolation='nearest', origin='upper', cmap='Blues')
    axs[0].set_title("training kernel matrix arcsine")
    axs[1].imshow(np.asmatrix(matrix_val_arcsine),
                interpolation='nearest', origin='upper', cmap='Reds')
    axs[1].set_title("validation kernel matrix arcsine")
    plt.show()
    """

    svm = SVC(kernel='precomputed')
    svm.fit(matrix_train_linear, labels_train)
    score_linear = svm.score(matrix_val_linear, labels_val)
    svm.fit(matrix_train_arcsine, labels_train)
    score_arcsine = svm.score(matrix_val_arcsine, labels_val)

    print(f"Precomputed kernel classification test score (using linear): {score_linear:.4f}")
    print(f"Precomputed kernel classification test score (using arcsine): {score_arcsine:.4f}")
    

Encoding: Serial
Training with repetition rate: 1
Precomputed kernel classification test score (using linear): 0.8000
Precomputed kernel classification test score (using arcsine): 0.5500
Training with repetition rate: 3
Precomputed kernel classification test score (using linear): 0.6500
Precomputed kernel classification test score (using arcsine): 0.7000
Training with repetition rate: 5
Precomputed kernel classification test score (using linear): 0.7000
Precomputed kernel classification test score (using arcsine): 0.7500
Training with repetition rate: 7
Precomputed kernel classification test score (using linear): 0.7000
Precomputed kernel classification test score (using arcsine): 0.7000
Training with repetition rate: 9
Precomputed kernel classification test score (using linear): 0.5000
Precomputed kernel classification test score (using arcsine): 0.7500
Training with repetition rate: 11
Precomputed kernel classification test score (using linear): 0.6500
Precomputed kernel classificati

In [15]:
def linear_encoding(x): return x
def arcsine_encoding(x): return np.arcsin(x / (2 * np.pi))

repetitions = [1, 3, 5, 7, 9, 11, 13, 15]

feature_map = ZZFeatureMap(feature_dimension=N_DIM, entanglement='linear', reps=1)
print(f"Encoding with ZZFeatureMap")
for r in repetitions:
    print(f"Training with repetition rate: {r}")
    
    X_train_reupload_linear = linear_encoding(sample_train) # dont need to repeat (because in feature map already is repeated)
    X_train_reupload_arcsine = arcsine_encoding(sample_train) # dont need to repeat (because in feature map already is repeated)
    X_test_reupload_linear = linear_encoding(sample_val)
    X_test_reupload_arcsine = arcsine_encoding(sample_val)

    feature_map.reps = r
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)
    # kernel_matrix_linear = quantum_kernel.evaluate(x_vec=X_train_reupload_linear[0], y_vec=X_train_reupload_linear[1]) 
    # kernel_matrix_arcsine = quantum_kernel.evaluate(x_vec=X_train_reupload_arcsine[0], y_vec=X_train_reupload_arcsine[1])

    matrix_train_linear = quantum_kernel.evaluate(x_vec=X_train_reupload_linear)
    matrix_val_linear = quantum_kernel.evaluate(x_vec=X_test_reupload_linear, y_vec=X_train_reupload_linear)
    matrix_train_arcsine = quantum_kernel.evaluate(x_vec=X_train_reupload_arcsine)
    matrix_val_arcsine = quantum_kernel.evaluate(x_vec=X_test_reupload_arcsine, y_vec=X_train_reupload_arcsine)

    """
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(np.asmatrix(matrix_train_linear),
                interpolation='nearest', origin='upper', cmap='Blues')
    axs[0].set_title("training kernel matrix linear")
    axs[1].imshow(np.asmatrix(matrix_val_linear),
                interpolation='nearest', origin='upper', cmap='Reds')
    axs[1].set_title("validation kernel matrix linear")
    plt.show()

    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(np.asmatrix(matrix_train_arcsine),
                interpolation='nearest', origin='upper', cmap='Blues')
    axs[0].set_title("training kernel matrix arcsine")
    axs[1].imshow(np.asmatrix(matrix_val_arcsine),
                interpolation='nearest', origin='upper', cmap='Reds')
    axs[1].set_title("validation kernel matrix arcsine")
    plt.show()
    """

    svm = SVC(kernel='precomputed')
    svm.fit(matrix_train_linear, labels_train)
    score_linear = svm.score(matrix_val_linear, labels_val)
    svm.fit(matrix_train_arcsine, labels_train)
    score_arcsine = svm.score(matrix_val_arcsine, labels_val)

    print(f"Precomputed kernel classification test score (using linear): {score_linear:.4f}")
    print(f"Precomputed kernel classification test score (using arcsine): {score_arcsine:.4f}")
    

Encoding with ZZFeatureMap
Training with repetition rate: 1
Precomputed kernel classification test score (using linear): 0.5000
Precomputed kernel classification test score (using arcsine): 0.7000
Training with repetition rate: 3
Precomputed kernel classification test score (using linear): 0.6000
Precomputed kernel classification test score (using arcsine): 0.7500
Training with repetition rate: 5
Precomputed kernel classification test score (using linear): 0.7000
Precomputed kernel classification test score (using arcsine): 0.7500
Training with repetition rate: 7
Precomputed kernel classification test score (using linear): 0.5000
Precomputed kernel classification test score (using arcsine): 0.7000
Training with repetition rate: 9
Precomputed kernel classification test score (using linear): 0.5500
Precomputed kernel classification test score (using arcsine): 0.7000
Training with repetition rate: 11
Precomputed kernel classification test score (using linear): 0.6000
Precomputed kernel cl

KeyboardInterrupt: 

In [None]:
def linear_encoding(x): return x
def arcsine_encoding(x): return np.arcsin(x / (2 * np.pi))

repetitions = [1, 3, 5, 7, 9, 11, 13, 15]

feature_map = ZFeatureMap(feature_dimension=N_DIM, reps=1)
print(f"Encoding with ZFeatureMap")
for r in repetitions:
    print(f"Training with repetition rate: {r}")
    
    X_train_reupload_linear = linear_encoding(sample_train) # dont need to repeat (because in feature map already is repeated)
    X_train_reupload_arcsine = arcsine_encoding(sample_train) # dont need to repeat (because in feature map already is repeated)
    X_test_reupload_linear = linear_encoding(sample_val)
    X_test_reupload_arcsine = arcsine_encoding(sample_val)

    feature_map.reps = r
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)
    # kernel_matrix_linear = quantum_kernel.evaluate(x_vec=X_train_reupload_linear[0], y_vec=X_train_reupload_linear[1]) 
    # kernel_matrix_arcsine = quantum_kernel.evaluate(x_vec=X_train_reupload_arcsine[0], y_vec=X_train_reupload_arcsine[1])

    matrix_train_linear = quantum_kernel.evaluate(x_vec=X_train_reupload_linear)
    matrix_val_linear = quantum_kernel.evaluate(x_vec=X_test_reupload_linear, y_vec=X_train_reupload_linear)
    matrix_train_arcsine = quantum_kernel.evaluate(x_vec=X_train_reupload_arcsine)
    matrix_val_arcsine = quantum_kernel.evaluate(x_vec=X_test_reupload_arcsine, y_vec=X_train_reupload_arcsine)

    """
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(np.asmatrix(matrix_train_linear),
                interpolation='nearest', origin='upper', cmap='Blues')
    axs[0].set_title("training kernel matrix linear")
    axs[1].imshow(np.asmatrix(matrix_val_linear),
                interpolation='nearest', origin='upper', cmap='Reds')
    axs[1].set_title("validation kernel matrix linear")
    plt.show()

    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(np.asmatrix(matrix_train_arcsine),
                interpolation='nearest', origin='upper', cmap='Blues')
    axs[0].set_title("training kernel matrix arcsine")
    axs[1].imshow(np.asmatrix(matrix_val_arcsine),
                interpolation='nearest', origin='upper', cmap='Reds')
    axs[1].set_title("validation kernel matrix arcsine")
    plt.show()
    """

    svm = SVC(kernel='precomputed')
    svm.fit(matrix_train_linear, labels_train)
    score_linear = svm.score(matrix_val_linear, labels_val)
    svm.fit(matrix_train_arcsine, labels_train)
    score_arcsine = svm.score(matrix_val_arcsine, labels_val)

    print(f"Precomputed kernel classification test score (using linear): {score_linear:.4f}")
    print(f"Precomputed kernel classification test score (using arcsine): {score_arcsine:.4f}")
    

Encoding with ZFeatureMap
Training with repetition rate: 1
Precomputed kernel classification test score (using linear): 0.8000
Precomputed kernel classification test score (using arcsine): 0.5500
Training with repetition rate: 3
Precomputed kernel classification test score (using linear): 0.6500
Precomputed kernel classification test score (using arcsine): 0.7000
Training with repetition rate: 5
