In [57]:
import pickle
import numpy as np
import random

from bqskit.ir import Circuit

# Specify the path to the pickle file
file_path = "small_circs.pkl"

k = 100

# Load the pickle data
with open(file_path, "rb") as file:
    data: list[Circuit] = pickle.load(file)

unitaries = [u.get_unitary() for u in data]

# data = np.array(data)

# Access the loaded data
print(len(data))

1000


In [58]:
from bqskit.qis import UnitaryMatrix
import time

def euclidean_distance(a, b):
    return np.sum((a - b) ** 2)

def get_distances(ensemble):
    avg_cost = 0
    count = 0
    for i in range(len(ensemble) - 1):
        for j in range(i + 1, len(ensemble)):
            avg_cost += UnitaryMatrix(ensemble[i]).get_frobenius_distance(UnitaryMatrix(ensemble[j]))
            count += 1
    return avg_cost / count

def get_euc_distances(ensemble):
    avg_cost = 0
    count = 0
    for i in range(len(ensemble) - 1):
        for j in range(i + 1, len(ensemble)):
            avg_cost += euclidean_distance(ensemble[i], ensemble[j])
            count += 1
    return avg_cost / count

vec_unitaries = np.array([u.get_flat_vector() for u in unitaries])



In [59]:
start = time.time()

dist_1 = get_distances(unitaries)

t1 = time.time() - start

start = time.time()

dist_2 = get_euc_distances(vec_unitaries)

t2 = time.time() - start

print(dist_1, t1)
print(dist_2, t2)


1.2668105756471378e-05 15.056024312973022
1.2668105756471378e-05 3.3388831615448


In [60]:
from sklearn.cluster import KMeans

k_means = KMeans(n_clusters=k, random_state=0).fit(vec_unitaries)

# print(k_means.labels_)

  super()._check_params_vs_input(X, default_n_init=10)


In [61]:
unitaries = np.array(unitaries)
def print_distances(cluster_ids):
    small_vecs = []
    small_unitaries = []
    for id in range(k):
        all_vecs = vec_unitaries[cluster_ids == id]
        all_unitaries = unitaries[cluster_ids == id]

        if len(all_vecs) == 0:
            continue
        rand_int = np.random.randint(0, len(all_vecs))
        small_vecs.append(all_vecs[rand_int])
        small_unitaries.append(all_unitaries[rand_int])


    # small_vecs = np.array(small_vecs)

    print(get_euc_distances(small_vecs))
    print(get_distances(small_unitaries))

print_distances(np.array(k_means.labels_))

1.4925295064265732e-05
1.4925295064265732e-05


In [62]:
# PCA and then K-means
from sklearn.decomposition import PCA
pca = PCA(n_components=90).fit_transform(vec_unitaries)

k_means_pca = KMeans(n_clusters=k, random_state=0).fit(pca)

print_distances(np.array(k_means_pca.labels_))

  super()._check_params_vs_input(X, default_n_init=10)


1.4417993837844208e-05
1.4417993837844208e-05


In [63]:
# PCA, then TSNE, then K-means
from sklearn.manifold import TSNE

tsne = TSNE(n_components=32, method='exact').fit_transform(pca)

k_means_tsne = KMeans(n_clusters=k, random_state=0).fit(tsne)

print_distances(np.array(k_means_tsne.labels_))

  super()._check_params_vs_input(X, default_n_init=10)


1.280944874177726e-05
1.2809448741777261e-05


In [64]:
# Now try agglomerative clustering for all sets

from sklearn.cluster import AgglomerativeClustering

agglo = AgglomerativeClustering(n_clusters=k).fit(vec_unitaries)
agglo_pca = AgglomerativeClustering(n_clusters=k).fit(pca)
agglo_tsne = AgglomerativeClustering(n_clusters=k).fit(tsne)

print_distances(np.array(agglo.labels_))
print_distances(np.array(agglo_pca.labels_))
print_distances(np.array(agglo_tsne.labels_))

1.3658888731566029e-05
1.3658888731566029e-05
1.3159159306635826e-05
1.315915930663582e-05
1.2589790849800638e-05
1.2589790849800641e-05
