In [3]:
import os
import numpy as np
import dimod

In [4]:
class QuantumClustering:
    def __init__(self, n_clusters):
        self.n_clusters = n_clusters
    
    def build_qubo_matrix(self, embeddings, medoid_indices, qubo_matrix_path):
        """Constructs and saves the QUBO matrix for k-medoids clustering using cosine similarity."""
        norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
        cosine_matrix = (embeddings @ embeddings.T) / (norms @ norms.T)
        np.fill_diagonal(cosine_matrix, 0)

        mapped_indices = np.arange(len(medoid_indices))

        for idx in mapped_indices:
            cosine_matrix[idx, idx] += 2  

        np.save(qubo_matrix_path, cosine_matrix)
        print(f"QUBO matrix saved at: {qubo_matrix_path}")
    
    def solve_qubo(self, qubo_matrix_path):
        """Loads and solves the QUBO problem using a Simulated Annealing solver."""
        qubo_matrix = np.load(qubo_matrix_path)
        sampler = dimod.SimulatedAnnealingSampler()
        bqm = dimod.BinaryQuadraticModel.from_qubo(qubo_matrix)
        response = sampler.sample(bqm, num_reads=100)
        
        best_sample = response.first.sample
        assignments = sorted(best_sample.keys(), key=lambda x: best_sample[x], reverse=True)[:self.n_clusters]
        return assignments
    
    def save_results(self, cluster_assignments, save_path):
        """Saves optimized cluster assignments."""
        np.save(save_path, cluster_assignments)
        print(f"Final quantum cluster assignments saved at: {save_path}")

In [7]:
script_dir = os.getcwd()
data_dir = os.path.abspath(os.path.join(script_dir, "..", "..", "..", "data", "test_data"))
os.makedirs(data_dir, exist_ok=True)

In [8]:
np.random.seed(42)
doc_embeddings = np.random.rand(10, 5)
medoid_indices = np.array([0, 3, 7])

In [9]:
qubo_matrix_path = os.path.join(data_dir, "test_qubo_matrix.npy")
quantum_clustering = QuantumClustering(n_clusters=len(medoid_indices))
quantum_clustering.build_qubo_matrix(doc_embeddings, medoid_indices, qubo_matrix_path)

QUBO matrix saved at: c:\Users\karishma\OneDrive\Projects\qCLEF\data\test_data\test_qubo_matrix.npy


In [10]:
clustered_output_path = os.path.join(data_dir, "test_quantum_cluster_labels.npy")
cluster_labels = quantum_clustering.solve_qubo(qubo_matrix_path)
quantum_clustering.save_results(cluster_labels, clustered_output_path)

Final quantum cluster assignments saved at: c:\Users\karishma\OneDrive\Projects\qCLEF\data\test_data\test_quantum_cluster_labels.npy


In [11]:
print("Test execution completed.")

Test execution completed.


In [12]:
script_dir = os.getcwd()
data_dir = os.path.abspath(os.path.join(script_dir, "..", "..", "..", "data", "test_data"))

files = os.listdir(data_dir)
print("Files in test_data:", files)

Files in test_data: ['test_quantum_cluster_labels.npy', 'test_qubo_matrix.npy']


In [13]:
clustered_output_path = os.path.join(data_dir, "test_quantum_cluster_labels.npy")

if os.path.exists(clustered_output_path):
    cluster_labels = np.load(clustered_output_path)
    print("Cluster Labels Shape:", cluster_labels.shape)
    print("Cluster Labels Sample:", cluster_labels[:10])
else:
    print("Cluster Labels file not found.")

Cluster Labels Shape: (3,)
Cluster Labels Sample: [0 1 2]


In [14]:
qubo_matrix_path = os.path.join(data_dir, "test_qubo_matrix.npy")

if os.path.exists(qubo_matrix_path):
    qubo_matrix = np.load(qubo_matrix_path)
    print("QUBO Matrix Shape:", qubo_matrix.shape)
    print("QUBO Matrix Sample:\n", qubo_matrix[:5, :5])
else:
    print("QUBO Matrix file not found.")

QUBO Matrix Shape: (10, 10)
QUBO Matrix Sample:
 [[2.         0.67876099 0.92441938 0.91021874 0.68213929]
 [0.67876099 2.         0.61868866 0.91996955 0.77506265]
 [0.92441938 0.61868866 2.         0.82013025 0.46529034]
 [0.91021874 0.91996955 0.82013025 0.         0.80570999]
 [0.68213929 0.77506265 0.46529034 0.80570999 0.        ]]
