In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import gc
from sklearn.preprocessing import MinMaxScaler

from qiskit.circuit.library import z_feature_map, zz_feature_map, pauli_feature_map
from qiskit.circuit.library import unitary_overlap
from qiskit import transpile
from qiskit.quantum_info import Statevector, state_fidelity
from qiskit_aer import AerSimulator

In [2]:
RELEVANT_FEATURES = [
    1,2,3,4,5,6,7,8
]


TARGET = 0

In [3]:
data = pd.read_csv("SUSY.csv.gz", nrows=100_000, header=None)
data = data.dropna(subset=[TARGET, *RELEVANT_FEATURES])

In [None]:
data_features = MinMaxScaler(feature_range=(-np.pi/2, np.pi/2)).fit_transform(data[RELEVANT_FEATURES])
data_target = data[TARGET]

x_test = data_features[-1000:]
y_test = data_target[-1000:]

In [None]:
from qiskit import transpile
from qiskit_aer import AerSimulator
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap
from tqdm.notebook import tqdm
from sklearn import svm
from sklearn import metrics

class QCalculator:
    def __init__(self, fm):
        self.fm = fm
        self.calculated = {}
        self.kernel = None

    def psi(self, x):
        qc = self.fm.assign_parameters(x)
        return Statevector.from_instruction(qc)

    def resolve_psi(self, mat):
        out_cols = []
        for row in tqdm(mat, leave=False):
            key = tuple(row)
            if key not in self.calculated:
                self.calculated[key] = self.psi(row).data
            out_cols.append(self.calculated[key])
        return np.column_stack(out_cols)

    def quantum_kernel(self, A, B):
        PsiA = self.resolve_psi(A)
        PsiB = self.resolve_psi(B)
        G = PsiA.conj().T @ PsiB
        return np.abs(G)**2

    def calculate_kernel(self, x_train):
        self.x_train = x_train
        self.kernel = self.quantum_kernel(self.x_train, self.x_train) 

    def svm(self, c, y_train):
        self.svm_linear = svm.SVC(kernel='precomputed', C=c)
        self.svm_linear.fit(self.kernel, y_train)

    def predict(self, x_test, y_test):
        x_test_matrix = self.quantum_kernel(x_test, self.x_train)
        self.predictions = self.svm_linear.predict(x_test_matrix)
        return self.predictions
                    

In [6]:
toprints=[]
for i in range(9):
    num = [100, 200, 400, 800, 1600, 3200, 6400, 12800, 25600][i]
    x_train = data_features[:num]
    y_train = data_target[:num]

    x_train_real = x_train*2**(-1)
    x_test_real = x_test*2**(-1)

    qc = QCalculator(z_feature_map(feature_dimension=8, reps=2))
    qc.calculate_kernel(x_train_real)
    qc.svm(1, y_train)
    predictions = qc.predict(x_test_real, y_test)

    met_ac = metrics.accuracy_score(y_true=y_test, y_pred=predictions)
    met_pr = metrics.precision_score(y_true=y_test, y_pred=predictions)
    met_rec = metrics.recall_score(y_true=y_test, y_pred=predictions)
    met_f1 = metrics.f1_score(y_true=y_test, y_pred=predictions)
    toprints += [f"{num}\t\t{met_ac:.4f}\t\t{met_pr:.4f}\t\t{met_rec:.4f}\t\t{met_f1:.4f}"]

for x in toprints:
    print(x)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/400 [00:00<?, ?it/s]

  0%|          | 0/400 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/400 [00:00<?, ?it/s]

  0%|          | 0/800 [00:00<?, ?it/s]

  0%|          | 0/800 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/800 [00:00<?, ?it/s]

  0%|          | 0/1600 [00:00<?, ?it/s]

  0%|          | 0/1600 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1600 [00:00<?, ?it/s]

  0%|          | 0/3200 [00:00<?, ?it/s]

  0%|          | 0/3200 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/3200 [00:00<?, ?it/s]

  0%|          | 0/6400 [00:00<?, ?it/s]

  0%|          | 0/6400 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/6400 [00:00<?, ?it/s]

  0%|          | 0/12800 [00:00<?, ?it/s]

  0%|          | 0/12800 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/12800 [00:00<?, ?it/s]

  0%|          | 0/25600 [00:00<?, ?it/s]

  0%|          | 0/25600 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/25600 [00:00<?, ?it/s]

100		0.5900		0.6495		0.2692		0.3807
200		0.6490		0.7647		0.3611		0.4906
400		0.6930		0.7361		0.5363		0.6205
800		0.7210		0.8649		0.4786		0.6162
1600		0.7300		0.8322		0.5299		0.6475
3200		0.7400		0.8210		0.5684		0.6717
6400		0.7420		0.8088		0.5876		0.6807
12800		0.7470		0.7994		0.6132		0.6941
25600		0.7510		0.7984		0.6261		0.7018


depending on number of tests...

100		0.6430		0.7342		0.3718		0.4936
200		0.6750		0.7544		0.4530		0.5661
400		0.7240		0.7637		0.5940		0.6683
800		0.7420		0.8125		0.5833		0.6791
1600		0.7440		0.8081		0.5940		0.6847
3200		0.7520		0.8125		0.6111		0.6976
6400		0.7520		0.8039		0.6218		0.7012
12800		0.7550		0.8038		0.6303		0.7066
25600		0.7560		0.8011		0.6368		0.7095

In [7]:
toprints=[]
C_wide = np.array([0.5,1,2,3,4,5])
for c in C_wide:
    x_train = data_features[:1000]
    y_train = data_target[:1000]

    x_train_real = x_train*2**(-1)
    x_test_real = x_test*2**(-1)

    qc = QCalculator(z_feature_map(feature_dimension=8, reps=2))
    qc.calculate_kernel(x_train_real)
    qc.svm(c, y_train)
    predictions = qc.predict(x_test_real, y_test)

    met_ac = metrics.accuracy_score(y_true=y_test, y_pred=predictions)
    met_pr = metrics.precision_score(y_true=y_test, y_pred=predictions)
    met_rec = metrics.recall_score(y_true=y_test, y_pred=predictions)
    met_f1 = metrics.f1_score(y_true=y_test, y_pred=predictions)
    toprints += [f"{c}\t\t{met_ac:.4f}\t\t{met_pr:.4f}\t\t{met_rec:.4f}\t\t{met_f1:.4f}"]

for x in toprints:
    print(x)

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

0.5		0.7130		0.8694		0.4551		0.5975
1.0		0.7300		0.8438		0.5192		0.6429
2.0		0.7370		0.8339		0.5470		0.6606
3.0		0.7380		0.8179		0.5662		0.6692
4.0		0.7420		0.8070		0.5897		0.6815
5.0		0.7450		0.8160		0.5876		0.6832


0.001		0.5320		0.0000		0.0000		0.0000
0.01		0.5320		0.0000		0.0000		0.0000
0.1		    0.7000		0.8415		0.4423		0.5798
1.0		    0.7430		0.8112		0.5876		0.6815
10.0		0.7320		0.7475		0.6453		0.6927
100.0		0.7270		0.7273		0.6667		0.6957
1000.0		0.7050		0.6860		0.6816		0.6838

0.5		0.7310		0.7988		0.5684		0.6642
1.0		0.7430		0.8112		0.5876		0.6815
2.0		0.7500		0.7995		0.6218		0.6995
3.0		0.7520		0.7957		0.6325		0.7048 // sweet spot
4.0		0.7450		0.7825		0.6303		0.6982
5.0		0.7420		0.7778		0.6282		0.6950