# Import e utility

In [None]:
import json
import math
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

from qiskit import ClassicalRegister, QuantumCircuit, QuantumRegister
from qiskit.circuit.library import PauliFeatureMap, ZFeatureMap, ZZFeatureMap

from qiskit.opflow import X, Y, Z, I, CircuitStateFn
from qiskit.opflow.state_fns import StateFn
from qiskit.opflow.expectations import PauliExpectation, MatrixExpectation
from qiskit.opflow.converters import CircuitSampler
from qiskit.providers.aer import QasmSimulator
from qiskit.quantum_info import Statevector, state_fidelity

from sklearn import datasets
from sklearn.cluster import DBSCAN, AgglomerativeClustering, KMeans
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn import metrics

import matplotlib.cm as cm


import numpy as np
import math
import matplotlib.pyplot as plt
import json


from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from skimage import transform


from qiskit import QuantumRegister, ClassicalRegister, QuantumCircuit, transpile,assemble
from qiskit import Aer, execute
from qiskit.extensions import Initialize
from qiskit.tools.visualization import plot_histogram, plot_bloch_multivector, array_to_latex
from qiskit.quantum_info import partial_trace, Statevector, random_statevector, Operator, SparsePauliOp
from qiskit_textbook.tools import simon_oracle

from qiskit.circuit import QuantumCircuit, Parameter, ParameterVector
from qiskit.circuit.library import PauliFeatureMap, ZFeatureMap, ZZFeatureMap
from qiskit.circuit.library import TwoLocal, NLocal, RealAmplitudes, EfficientSU2
from qiskit.circuit.library import HGate, RXGate, RYGate, RZGate, CXGate, CRXGate, CRZGate
from qiskit.circuit.library import RealAmplitudes

from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.algorithms.classifiers import VQC, NeuralNetworkClassifier
from qiskit_machine_learning.neural_networks import EstimatorQNN

from qiskit.algorithms.optimizers import COBYLA, GradientDescent, ADAM
from qiskit.primitives import Sampler
from qiskit.utils import algorithm_globals


from IPython.display import clear_output

%matplotlib inline

In [None]:
def even(x):
    return (x % 2) == 0


def generate_pauli(n):
    hamiltonians = []

    h = 1
    for _ in range(n):
        h = h^Z
    hamiltonians.append(h)

    for k in range(n):
       
        h = 1

        if(k != 0):
            for _ in range(k):
                h = h^X

        if even(k+1):
            h = h^X
        else:
            h = h^Y

        if (k+1 != n):
            for _ in range(k+1, n):
                h = h^Z


        hamiltonians.append(h)

    return hamiltonians


def purity_score(y_true, y_pred):
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix) 

# Encodings

In [None]:
def amplitude_encoding(x):
    N_QBIT = math.ceil(math.log2(x.size))
    value =  math.sqrt(1/(np.sum(x.flatten()**2)))
    state_preparation = np.append(x.flatten() * value, [0] * (2**N_QBIT - x.size))

    qc = QuantumCircuit(N_QBIT)
    qc.initialize(state_preparation, range(N_QBIT))

    return qc


def angles_encoding(x, rotation='y'):
    qc = QuantumCircuit(len(x))
    rot_fun = None
    if rotation == 'y':
        rot_fun = qc.ry
    elif rotation == 'x':
        rot_fun = qc.rx
    else:
        rot_fun = qc.rz

    for i, el in enumerate(x):
        rot_fun(el, i)
        
    return qc


# Funzioni di distanza

In [None]:
def fidelity(x, y, encoding):
    stx = Statevector.from_instruction(encoding(x))
    sty = Statevector.from_instruction(encoding(y))
    return state_fidelity(stx, sty)


def fidelity_matrix(dataset, encoding):

    mat = np.zeros((dataset.shape[0], dataset.shape[0]))

    for i in range(dataset.shape[0]):
        for j in range(i):
            dis = fidelity(dataset[i], dataset[j], encoding)
            mat[i,j] = dis
            mat[j,i] = dis

    return mat


def em_distance_approximate(data_1, data_2, encode_map, observables, simulator):

    x = CircuitStateFn(encode_map.bind_parameters(data_1))
    y = CircuitStateFn(encode_map.bind_parameters(data_2))

    em = 0

    for h in observables:

        measurable_x = StateFn(h, is_measurement=True).compose(x)
        measurable_y = StateFn(h, is_measurement=True).compose(y)

        expectation_x = PauliExpectation().convert(measurable_x)
        expectation_y = PauliExpectation().convert(measurable_y)

        sampler_x = CircuitSampler(simulator).convert(expectation_x)
        sampler_y = CircuitSampler(simulator).convert(expectation_y)

        current_expectation = abs(sampler_x.eval().real - sampler_y.eval().real)

        if current_expectation > em:
            em = current_expectation

    return em

def em_distance_exact(data_1, data_2, encode_map, observables):

    x = CircuitStateFn(encode_map(data_1))
    y = CircuitStateFn(encode_map(data_2))

    em = 0

    for h in observables:

        circuit_x = x.adjoint().compose(h).compose(x)
        circuit_y = y.adjoint().compose(h).compose(y)

        current_expectation = abs(circuit_x.eval().real - circuit_y.eval().real)

        if current_expectation > em:
            em = current_expectation

    return em


def em_dissimilarity_matrix(dataset, approximate = False, encode_map=None):

    n = encode_map(dataset[0]).num_qubits
    observables = generate_pauli(n)
    mat = np.zeros((dataset.shape[0], dataset.shape[0]))

    if approximate:
        simulator = QasmSimulator()

        for i in range(dataset.shape[0]):
            for j in range(i):
                dis = em_distance_approximate(dataset[i], dataset[j], encode_map, observables, simulator)
                mat[i,j] = dis
                mat[j,i] = dis
    else:

        for i in range(dataset.shape[0]):
            for j in range(i):
                dis = em_distance_exact(dataset[i], dataset[j], encode_map, observables)
                mat[i,j] = dis
                mat[j,i] = dis

    return mat

def euclidean_matrix(dataset):
    mat = np.zeros((dataset.shape[0], dataset.shape[0]))

    for i in range(dataset.shape[0]):
        for j in range(i):
            dis = np.sqrt(np.sum((dataset[i] - dataset[j])**2))
            mat[i,j] = dis
            mat[j,i] = dis

    return mat


def kernel_matrix(dataset):
    zz_map = ZZFeatureMap(feature_dimension= dataset.shape[1] , reps=1, entanglement='linear', insert_barriers=True)
    zz_kernel = QuantumKernel(feature_map=zz_map, quantum_instance=Aer.get_backend('statevector_simulator'))
    return zz_kernel.evaluate(x_vec=dataset)


def zz_map(data):
    encode_map = ZZFeatureMap(feature_dimension=len(data), reps=1, entanglement='linear', insert_barriers=True)
    encode_map = encode_map.bind_parameters(data)
    return encode_map
    

# Dataset

In [None]:
TRAIN_SIZE = 50

dataset, classes = datasets.make_blobs(n_samples=TRAIN_SIZE, random_state=10, centers=2, n_features=2)
dataset = np.array(dataset)
scaler = MinMaxScaler(feature_range=(-1,1))
dataset = scaler.fit_transform(dataset)

plt.scatter(dataset[:,0], dataset[:,1], c=classes, cmap="Accent")

In [None]:
plot_data = dataset

emd_zz_matrix = em_dissimilarity_matrix(plot_data, encode_map = zz_map)
emd_an_matrix = em_dissimilarity_matrix(plot_data, encode_map = angles_encoding)
ker_zz_matrix = 1- kernel_matrix(plot_data)


fid_zz_matrix = fidelity_matrix(plot_data, encoding= zz_map)
fid_an_matrix = fidelity_matrix(plot_data, encoding= angles_encoding)
fid_ag_matrix = fidelity_matrix(plot_data, encoding= amplitude_encoding)


fig, axs = plt.subplots(3,3)
fig.set_size_inches(10,12)



axs[0,0].set_title("EM + ZZ Encoding")
axs[0,0].matshow(emd_zz_matrix, cmap='RdPu')

axs[0,1].set_title("EM + Angles Y Encoding")
axs[0,1].matshow(emd_an_matrix, cmap='RdPu')

axs[0,2].matshow(ker_zz_matrix, cmap='RdPu')
axs[0,2].set_title("Kernel + ZZ Encoding")


axs[1,0].set_title("Fidelity + ZZ Encoding")
axs[1,0].matshow(fid_zz_matrix , cmap='RdPu')

axs[1,1].set_title("Fidelity + Amplitude Y Encoding")
axs[1,1].matshow(fid_ag_matrix , cmap='RdPu')

axs[1,2].matshow(fid_an_matrix , cmap='RdPu')
axs[1,2].set_title("Fidelity + Angles Encoding")

axs[2,1].matshow(euclidean_matrix(plot_data) , cmap='RdPu')
axs[2,1].set_title("Euclidean")

fig.delaxes(axs[2,0])
fig.delaxes(axs[2,2])
plt.tight_layout()


# Clustering

In [None]:
from sklearn.metrics.cluster import homogeneity_score

def ag_clustering(dataset, classes, dissimilarity_matrix, linkage="complete", title=""):
    clustering = AgglomerativeClustering(affinity='precomputed', n_clusters=np.unique(classes).size,  linkage=linkage).fit(dissimilarity_matrix)

    labels = clustering.labels_
    cmap = plt.cm.get_cmap("Accent").copy()
    cmap.set_under('red')
    contingency_matrix = metrics.cluster.contingency_matrix(classes, labels)

    fig, axs = plt.subplots(1,2)
    fig.suptitle(title)
    fig.set_size_inches(15,6)

    axs[0].set_title("Contingency Matrix")
    axs[0].matshow(contingency_matrix, cmap='RdPu')
    axs[1].set_title("Predicted Clusters")
    axs[1].scatter(dataset[:,0], dataset[:,1], c=labels, cmap=cmap)

    return purity_score(classes, labels), homogeneity_score(classes, labels)


def dbscan_clustering(dataset, classes, dissimilarity_matrix, eps=0.5, min_samples=5, title=""):
    clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed').fit(dissimilarity_matrix)

    labels = clustering.labels_
    cmap = plt.cm.get_cmap("Accent").copy()
    cmap.set_under('red')
    contingency_matrix = metrics.cluster.contingency_matrix(classes, labels)

    fig, axs = plt.subplots(1,2)
    fig.suptitle(title)
    fig.set_size_inches(15,6)

    axs[0].set_title("Contingency Matrix")
    axs[0].matshow(contingency_matrix, cmap='RdPu')
    axs[1].set_title("Predicted Clusters")
    axs[1].scatter(dataset[:,0], dataset[:,1], c=labels, cmap=cmap)

    return purity_score(classes, labels), homogeneity_score(classes, labels)

In [None]:


print(ag_clustering(dataset, classes, emd_zz_matrix, linkage="complete", title="Agglomerative + Fidelity + ZZ"))
print(ag_clustering(dataset, classes, emd_an_matrix, linkage="complete", title="Agglomerative + Fidelity + Angles"))
print(ag_clustering(dataset, classes, ker_zz_matrix, linkage="complete", title="Agglomerative + Fidelity + Euclidean"))
print(ag_clustering(dataset, classes, fid_zz_matrix, linkage="complete", title="Agglomerative + Fidelity + ZZ"))
print(ag_clustering(dataset, classes, 1 - fid_an_matrix, linkage="complete", title="Agglomerative + Fidelity + Angles"))
print(ag_clustering(dataset, classes, fid_ag_matrix, linkage="complete", title="Agglomerative + Fidelity + Euclidean"))

In [None]:
print(dbscan_clustering(dataset, classes, emd_zz_matrix, eps=0.2, min_samples=10, title="Agglomerative + Fidelity + ZZ"))
print(dbscan_clustering(dataset, classes, emd_an_matrix, eps=0.2, min_samples=10, title="Agglomerative + Fidelity + Angles"))
print(dbscan_clustering(dataset, classes, ker_zz_matrix, eps=0.2, min_samples=10, title="Agglomerative + Fidelity + Euclidean"))
print(dbscan_clustering(dataset, classes, fid_zz_matrix, eps=0.2, min_samples=10, title="Agglomerative + Fidelity + ZZ"))
print(dbscan_clustering(dataset, classes, 1 - fid_an_matrix, eps=0.2, min_samples=10, title="Agglomerative + Fidelity + Angles"))
print(dbscan_clustering(dataset, classes, fid_ag_matrix, eps=0.2, min_samples=10, title="Agglomerative + Fidelity + Euclidean"))

In [None]:
TRAIN_SIZE = 50

dataset, classes = datasets.make_moons(n_samples=TRAIN_SIZE, noise=0.2)
dataset = np.array(dataset)
scaler = MinMaxScaler(feature_range=(-1,1))
dataset = scaler.fit_transform(dataset)

plt.scatter(dataset[:,0], dataset[:,1], c=classes, cmap="Accent")

encoded_matrix = 1 - fidelity_matrix(dataset, encoding= angles_encoding)


print(ag_clustering(dataset, classes, encoded_matrix, linkage="complete", title="Agglomerative + Fidelity + ZZ"))
print(dbscan_clustering(dataset, classes, encoded_matrix, eps=0.5, min_samples=10, title="Agglomerative + Fidelity + ZZ"))