In [1]:
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering
import numpy as np

In [2]:
# Generisanje slučajnog skupa podataka
X, _ = make_blobs(n_samples=100, centers=3, random_state=0)

In [4]:
# Funkcija za izračunavanje CLARA klasterovanja
def clara(X, k, num_replicas):
    n_samples = X.shape[0]
    sample_indices = np.random.choice(range(n_samples), size=min(40, n_samples), replace=False)

    best_cost = float('inf')
    best_labels = None

    for i in range(num_replicas):
        indices = np.random.choice(sample_indices, size=min(5, len(sample_indices)), replace=False)
        representative_samples = X[indices]
        clustering = AgglomerativeClustering(n_clusters=k).fit(X)

        labels = clustering.labels_
        cost = calculate_cost(X, labels, representative_samples)

        if cost < best_cost:
            best_cost = cost
            best_labels = labels

    return best_labels

In [6]:
# Funkcija za izračunavanje troška CLARA klasterovanja
def calculate_cost(X, labels, representative_samples):
    cost = 0
    for i, sample in enumerate(X):
        min_distance = float('inf')
        for representative in representative_samples:
            distance = np.linalg.norm(sample - representative)
            if distance < min_distance:
                min_distance = distance
        cost += min_distance
    return cost

In [7]:
# Primjena CLARA algoritma na generisani skup podataka
k = 3
num_replicas = 5
labels = clara(X, k, num_replicas)

In [8]:
# Ispis rezultata
print("Klasteri:")
for i in range(k):
    cluster_points = X[labels == i]
    print(f"Cluster {i+1}:")
    for point in cluster_points:
        print(point)
    print()

Klasteri:
Cluster 1:
[2.63185834 0.6893649 ]
[3.00251949 0.74265357]
[-0.07228289  2.88376939]
[2.72756228 1.3051255 ]
[ 2.52092996 -0.63858003]
[3.92282648 1.80370832]
[0.1631238  2.57750473]
[0.08848433 2.32299086]
[0.9845149  1.95211539]
[2.18217961 1.29965302]
[1.28535145 1.43691285]
[0.89011768 1.79849015]
[3.54351972 2.79355284]
[1.64164854 0.15020885]
[1.38093486 0.92949422]
[2.41163392 1.60423683]
[2.45760916 0.21285357]
[2.3535057  2.22404956]
[1.18447037 0.318814  ]
[1.36069966 0.74802912]
[1.41942144 1.57409695]
[2.2635425 1.8743027]
[3.23404709 0.71773882]
[2.06576754 2.68353415]
[ 3.93841822 -0.4500954 ]
[0.78478252 1.86706037]
[1.65209057 2.12010873]
[1.74371499 0.953829  ]
[3.2460247  2.84942165]
[3.97820955 2.37817845]
[1.19404184 2.80772861]
[2.11567076 3.06896151]
[ 2.43169305 -0.20173713]
[1.78726415 1.70012006]
[0.88214412 2.84128485]
[0.46546494 3.12315514]
[2.66934689 1.81987033]
[1.62011397 2.74692739]
[ 2.45127423 -0.19539785]
[1.28933778 3.44969159]
[1.84070628