In [5]:
import numpy as np
from scipy.spatial import ConvexHull

def projective_clustering_coreset(P, j):
    C = set()
    u = np.mean(P, axis=0)
    U, S, Vt = np.linalg.svd(P - u, full_matrices=False)
    W = Vt[:j].T
    Q = (P - u) @ W
    hull = ConvexHull(Q)
    G = Q[hull.vertices]
    c = np.mean(G, axis=0)
    S = np.vstack([c + (1/j) * (v - c) for v in G])
    for s in np.vstack([S, c]):
        convex_hull_points = find_convex_hull_points(Q, s, j+1)
        C.update(map(tuple, convex_hull_points))
    return np.array(list(C))

def find_convex_hull_points(Q, point, max_points):
    hull = ConvexHull(Q)
    vertices = Q[hull.vertices]
    distances = np.linalg.norm(vertices - point, axis=1)
    sorted_indices = np.argsort(distances)
    return vertices[sorted_indices[:max_points]]

def sensitivity_sampling(Q, sensitivities, epsilon):
    total_sensitivity = np.sum(sensitivities)
    probabilities = sensitivities / total_sensitivity
    sample_size = int(np.ceil(total_sensitivity / epsilon**2))
    sampled_indices = np.random.choice(len(Q), size=sample_size, p=probabilities)
    coreset = Q[sampled_indices]
    return coreset

def l2_coreset_projective_clustering(P, j, k, epsilon):
    linf_coreset = projective_clustering_coreset(P, j)
    sensitivities = np.ones(len(linf_coreset))
    l2_coreset = sensitivity_sampling(linf_coreset, sensitivities, epsilon)
    return l2_coreset

P = np.random.rand(100, 5)
j = 2
k = 3
epsilon = 0.1
C = l2_coreset_projective_clustering(P, j, k, epsilon)
print("L2 Coreset:", C)
print("Size of L2 Coreset:", len(C))


L2 Coreset: [[ 0.44772924  0.56565219]
 [-0.49138553  0.39975164]
 [-0.49138553  0.39975164]
 ...
 [ 0.44772924  0.56565219]
 [ 0.49810663  0.5420031 ]
 [-0.45708211 -0.52787426]]
Size of L2 Coreset: 900


In [6]:
import numpy as np
from scipy.spatial import ConvexHull
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

def projective_clustering_coreset(P, j):
    C = set()
    u = np.mean(P, axis=0)
    U, S, Vt = np.linalg.svd(P - u, full_matrices=False)
    W = Vt[:j].T
    Q = (P - u) @ W
    hull = ConvexHull(Q)
    G = Q[hull.vertices]
    c = np.mean(G, axis=0)
    S = np.vstack([c + (1/j) * (v - c) for v in G])
    for s in np.vstack([S, c]):
        convex_hull_points = find_convex_hull_points(Q, s, j+1)
        C.update(map(tuple, convex_hull_points))
    return np.array(list(C))

def find_convex_hull_points(Q, point, max_points):
    hull = ConvexHull(Q)
    vertices = Q[hull.vertices]
    distances = np.linalg.norm(vertices - point, axis=1)
    sorted_indices = np.argsort(distances)
    return vertices[sorted_indices[:max_points]]

def sensitivity_sampling(Q, sensitivities, epsilon):
    total_sensitivity = np.sum(sensitivities)
    probabilities = sensitivities / total_sensitivity
    sample_size = int(np.ceil(total_sensitivity / epsilon**2))
    sampled_indices = np.random.choice(len(Q), size=sample_size, p=probabilities)
    coreset = Q[sampled_indices]
    return coreset

def l2_coreset_projective_clustering(P, j, k, epsilon):
    linf_coreset = projective_clustering_coreset(P, j)
    sensitivities = np.ones(len(linf_coreset))
    l2_coreset = sensitivity_sampling(linf_coreset, sensitivities, epsilon)
    return l2_coreset

def evaluate_clustering(data, n_clusters):
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    labels = kmeans.fit_predict(data)
    inertia = kmeans.inertia_
    silhouette_avg = silhouette_score(data, labels)
    return inertia, silhouette_avg

P = np.random.rand(100, 5)
j = 2
k = 3
epsilon = 0.01

linf_coreset = projective_clustering_coreset(P, j)
l2_coreset = l2_coreset_projective_clustering(P, j, k, epsilon)

original_inertia, original_silhouette = evaluate_clustering(P, k)

linf_inertia, linf_silhouette = evaluate_clustering(linf_coreset, k)

l2_inertia, l2_silhouette = evaluate_clustering(l2_coreset, k)

print("Original Data: Inertia =", original_inertia, ", Silhouette Score =", original_silhouette)
print("L∞ Coreset: Inertia =", linf_inertia, ", Silhouette Score =", linf_silhouette)
print("L2 (1+ε)-Coreset: Inertia =", l2_inertia, ", Silhouette Score =", l2_silhouette)

Original Data: Inertia = 28.6292001997833 , Silhouette Score = 0.1623577720924801
L∞ Coreset: Inertia = 0.958119553516255 , Silhouette Score = 0.25437571000675135
L2 (1+ε)-Coreset: Inertia = 8708.087717436842 , Silhouette Score = 0.6272830865389569
