In [None]:


import json
import math
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

from qiskit import ClassicalRegister, QuantumCircuit, QuantumRegister

from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

from qiskit import Aer, execute


%matplotlib inline

In [None]:
def amplitude_encoding(x):
    N_QBIT = math.ceil(math.log2(x.size))
    value =  math.sqrt(1/(np.sum(x.flatten()**2)))
    state_preparation = np.append(x.flatten() * value, [0] * (2**N_QBIT - x.size))

    qc = QuantumCircuit(N_QBIT, name='Encoding')
    qc.initialize(state_preparation, range(N_QBIT))

    return qc

def angles_encoding(x, rotation='y'):
    qc = QuantumCircuit(len(x), name='Encoding')
    rot_fun = None
    if rotation == 'y':
        rot_fun = qc.ry
    elif rotation == 'x':
        rot_fun = qc.rx
    else:
        rot_fun = qc.rz

    for i, el in enumerate(x):
        rot_fun(el, i)
        
    return qc

In [None]:
TRAIN_SIZE = 100
N_CLUSTER = 4
N_FEATURES = 4

dataset, classes = datasets.make_blobs(n_samples=TRAIN_SIZE, random_state=10, centers= N_CLUSTER, n_features= N_FEATURES,)
dataset = np.array(dataset)
scaler = MinMaxScaler(feature_range=(-1,1))
dataset = scaler.fit_transform(dataset)

mean = np.mean(dataset, axis = 0)
std = np.std(dataset, axis = 0)
centers = np.random.randn(N_CLUSTER, N_FEATURES)*std + mean

plt.scatter(dataset[:,0], dataset[:,1], c=classes, cmap="Accent")
plt.scatter(centers[:,0], centers[:,1], marker='*', c='r', s=150)

In [None]:
circ = angles_encoding(np.append(dataset[0], centers[0]))
circ.decompose().decompose().decompose().decompose().decompose().decompose().draw(output='mpl')

In [None]:

def point_centroid_distance(point, center, encoding):

    encoded_data = encoding(np.append(point, center))
    data = QuantumRegister(encoded_data.num_qubits, 'data')
    ancilla = QuantumRegister(1, 'ancilla')
    classical = ClassicalRegister(1, 'output')
    qc = QuantumCircuit(data, ancilla, classical)

    qc.h(range(qc.num_qubits))

    qc.append(encoded_data.decompose().to_instruction(), data)

    qc.barrier()

    for i in range(int(encoded_data.num_qubits / 2)):
        qc.cswap(ancilla, i, i+ int(encoded_data.num_qubits / 2))

    qc.barrier()

    qc.h(ancilla)
    qc.measure(ancilla, classical)

    #display(qc.draw(output='mpl'))

    backend = Aer.get_backend('qasm_simulator')
    job = execute(qc, backend=backend, shots=5000)
    result = job.result().get_counts(qc)

    return result.get('1', 0)


def closest_centroid(points, centroids, encoding):
    closest = []
    for point in points:
        closest.append(np.argmin([point_centroid_distance(point, c, encoding) for c in centroids]), )
    return closest

    

In [None]:
values = closest_centroid(dataset, centers, angles_encoding)
plt.scatter(dataset[:,0], dataset[:,1], c=values, cmap="Accent")
plt.scatter(centers[:,0], centers[:,1], marker='*', c=np.array(range(N_CLUSTER)), s=150, cmap="Accent")


In [None]:
from copy import deepcopy

ERROR_TRESHOLD = 0.01

current_error = math.inf
old_error = 0
centers_new = centers
iter = 1

while abs(old_error - current_error) > ERROR_TRESHOLD:
    
    clusters = closest_centroid(dataset, centers_new, angles_encoding)

    centers_old = deepcopy(centers_new)
    centers_new = np.array([np.mean(dataset[np.equal(clusters, i)], axis=0) for i in range(N_CLUSTER)])

    plt.title(iter)
    plt.scatter(dataset[:,0], dataset[:,1], c=clusters, cmap="Accent")
    plt.scatter(centers_old[:,0], centers_old[:,1], marker='*', c=np.array(range(N_CLUSTER)), s=150, cmap="Accent")
    plt.scatter(centers_new[:,0], centers_new[:,1], marker='*', c='r', s=150)
    plt.show()

    iter += 1

    old_error = deepcopy(current_error)
    current_error = np.linalg.norm(centers_new - centers_old)

In [None]:

prediction = closest_centroid(dataset, centers_new, angles_encoding)

plt.title("Final model")
plt.scatter(dataset[:,0], dataset[:,1], c=prediction, cmap="Accent")
plt.scatter(centers_new[:,0], centers_new[:,1], marker='*', c=np.array(range(N_CLUSTER)), s=150, cmap="Accent")
plt.show()
 

In [None]:
def purity_score(y_true, y_pred):
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix) 

purity_score(classes, prediction)
