In [18]:
import numpy as np
from scipy.spatial import ConvexHull

def projective_clustering_coreset(P, j):
    C = set()

    u = np.mean(P, axis=0)
    U, S, Vt = np.linalg.svd(P - u, full_matrices=False)
    W = Vt[:j].T

    Q = (P - u) @ W

    hull = ConvexHull(Q)
    G = Q[hull.vertices]
    c = np.mean(G, axis=0)

    S = np.vstack([c + (1/j) * (v - c) for v in G])
    
    for s in np.vstack([S, c]):
        convex_hull_points = find_convex_hull_points(Q, s, j+1)
        C.update(map(tuple, convex_hull_points))
    
    return np.array(list(C))

def find_convex_hull_points(Q, point, max_points):
    hull = ConvexHull(Q)
    vertices = Q[hull.vertices]
    distances = np.linalg.norm(vertices - point, axis=1)
    sorted_indices = np.argsort(distances)
    return vertices[sorted_indices[:max_points]]

P = np.random.rand(100, 5)
j = 2
C = projective_clustering_coreset(P, j)
print("Coreset:", C)


Coreset: [[-0.77823865  0.01718491]
 [ 0.30459764 -0.73450114]
 [ 0.54350318  0.05773647]
 [-0.35747693  0.51900686]
 [ 0.59531353 -0.03265919]
 [ 0.12409499  0.47994896]
 [ 0.32717147  0.39844567]
 [-0.07961886  0.5470665 ]
 [ 0.34429826 -0.68988589]
 [-0.60844161 -0.31955072]
 [-0.42315631 -0.40517253]]
