### The handwritten digits dataset provided by sklearn has been used for experiments, which is basically a smaller and preprocessed version of the MNIST dataset. This preprocessing reduces dimensionality and gives invariance to small distortions. 

For more details visit - http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

In [None]:
import numpy as np
from sklearn.datasets import load_digits

data, labels = load_digits(return_X_y=True)
(n_samples, n_features), n_digits = data.shape, np.unique(labels).size

print(f"# digits: {n_digits}; # samples: {n_samples}; # features {n_features}")

# digits: 10; # samples: 1797; # features 64


### Here we extract only the samples with digits 4 and 5 as outputs, as we are currently working with a binary classifier. We also change the representation of 4 and 5 to 0 and 1 respectively, in order to treat the outputs in quantum circuits later on.

In [None]:
data_01 = []
labels_01 = []

for i in range(len(data)):
    if labels[i] == 4:
        data_01.append(data[i])
        labels_01.append(0)
    elif labels[i] == 5:
        data_01.append(data[i])
        labels_01.append(1)       

In [None]:
len(labels_01)

363

### Out of these we randomly select 128 samples for our experiments; 64 Training samples and 64 Testing samples.

In [None]:
import random

x = data_01
y = labels_01
m = 128

X, y = zip(*random.sample(list(zip(x, y)), m))

In [None]:
X

(array([ 0.,  0.,  0., 13.,  8.,  0.,  0.,  0.,  0.,  0.,  2., 15.,  1.,
         0.,  0.,  0.,  0.,  0., 11., 10.,  0.,  8.,  2.,  0.,  0.,  4.,
        16.,  5., 11., 16.,  8.,  0.,  0.,  7., 16., 16., 16., 16.,  3.,
         0.,  0.,  2., 13.,  9., 16., 12.,  0.,  0.,  0.,  0.,  0.,  7.,
        16.,  6.,  0.,  0.,  0.,  0.,  0., 13., 15.,  1.,  0.,  0.]),
 array([ 0.,  1.,  8.,  8., 11., 15., 10.,  0.,  0.,  4., 16., 16., 11.,
        12.,  6.,  0.,  0.,  4., 16.,  4.,  0.,  0.,  0.,  0.,  0.,  1.,
        16., 15.,  8.,  0.,  0.,  0.,  0.,  0.,  4., 10., 16.,  6.,  0.,
         0.,  0.,  0.,  0.,  0., 12., 12.,  0.,  0.,  0.,  6., 15.,  9.,
        13., 10.,  0.,  0.,  0.,  1., 13., 16., 13.,  4.,  0.,  0.]),
 array([ 0.,  8., 16., 12., 15., 16.,  7.,  0.,  0., 13., 16., 14.,  6.,
         4.,  1.,  0.,  0., 12., 10.,  0.,  0.,  0.,  0.,  0.,  0.,  3.,
        16., 10.,  0.,  0.,  0.,  0.,  0.,  0.,  6., 15.,  9.,  0.,  0.,
         0.,  0.,  0.,  0.,  4., 16.,  2.,  0.,  0.,  0.,

In [None]:
y

(0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1)

In [None]:
import sklearn
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn import metrics

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler

### We perform dimensionality reduction using PCA for reducing number of features in our samples from 64 to 5, in order to reduce some computation time.

In [None]:
# Ramdomly Splitting dataset into training and testing
sample_train, sample_test, label_train, label_test = train_test_split(
     X, y, test_size=0.5, random_state=22)


n_dim = 5
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalise
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)


# train data
X = sample_train
y = label_train

## testing data 

X_test = sample_test
y_test = label_test

# here we will also define the value of Q (sample complexity, to be used later in the Quantum Boosting Algorithms)
no_of_Q = 4


### **In order to maintain consistency across all experiments, the same data samples printed below have been copied and used as inputs in all our experiments**

In [None]:
X

array([[-0.80589907, -0.59042291, -0.05324325,  0.10774176, -0.04505932],
       [ 0.06256734, -0.37244006,  0.0939138 , -0.2033949 , -0.94811908],
       [-0.84338317, -0.6326198 , -0.43462352,  0.24523402, -0.52480534],
       [ 0.13286258,  0.37155658, -0.87001548,  0.70985865,  0.6677744 ],
       [-0.75234354, -0.21787303,  0.12046462, -0.59419401, -0.56680623],
       [ 0.28907508,  0.9888873 , -0.36947398, -0.3852527 ,  0.12494092],
       [-0.94193876, -0.07222975, -0.51910519, -0.34613681, -0.50121778],
       [-0.63461962, -0.66262997, -0.01359157, -0.20885209,  0.32881507],
       [ 0.26177535,  0.73210506,  0.14869942, -0.08739544, -0.41974897],
       [-0.87627756, -0.05459176, -0.79168413, -0.44217547, -0.76591014],
       [-0.31582251, -0.65735629,  0.80118443, -0.14383058,  0.05621122],
       [-0.56914731,  0.02827011, -0.70470652, -0.62345748,  0.07444681],
       [ 0.35079842,  0.84817601, -0.38494076, -0.29560828, -0.07015869],
       [ 0.60451149, -0.77078297, -0.0

In [None]:
y

[0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0]

In [None]:
X_test

array([[-0.68707325, -0.50735238, -0.20700664, -0.62336223,  0.39806279],
       [ 0.46234745,  0.81775949, -0.44946658,  0.1140975 , -0.17951633],
       [ 0.61516826,  0.29117629, -0.20364858,  0.46090612,  0.00305875],
       [-0.50432232, -0.64050608,  0.13880843, -0.42755397,  0.47996396],
       [-0.58363044, -0.59302057, -0.36068798, -0.20546695,  0.42447254],
       [-0.77781706, -0.52822617,  0.1248982 , -0.39869912, -0.65804837],
       [-0.31182115, -0.16652907, -0.02315063,  0.05638778, -0.47035166],
       [-0.35714232, -0.56180839,  0.02707521, -0.3134225 ,  0.7454719 ],
       [ 0.40935228,  1.        , -0.52272328,  0.10199332,  0.13573632],
       [-0.5209226 , -0.19721416, -0.90344255, -0.43913667, -0.10011288],
       [-0.40235592, -0.6796702 , -0.49861898, -0.07365373,  0.85654452],
       [ 0.81742342, -0.54015171, -0.13383637, -0.1039951 , -0.93044641],
       [ 0.3254841 , -0.50934448,  0.04402021,  0.36110195, -0.37572541],
       [-0.09288116, -0.13640449,  0.0

In [None]:
y_test

[0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0]