In [61]:
import typing
import numpy
import numpy.random

T = typing.TypeVar(name='T')

        
class KMeans:

    
    def __init__(self, k: int) -> None:
        # Number of class
        self.k = k
        # Sum of elements in clusters
        self.__cluster_acc = numpy.empty(shape=k)
        # Number of elements in clusters
        self.__cluster_length = numpy.zeros(shape=k)
        # Indices of element in clusters
        # self.__cluster_indices = numpy.empty(shape=k, dtype=list)
        self.__cluster_indices = { i : [] for i in range(0, k) }
        
    
    def cluster(self, i: int) -> numpy.ndarray:
        return numpy.array(self.__cluster_indices[i])
    
    
    def centroid(self, i: int) -> T:
        return self.__cluster_acc[i] / self.__cluster_length[i]
    
    
    def centroids(self) -> numpy.ndarray:
        return self.__cluster_acc / self.__cluster_length
    
        
    def fit(self, 
        objs: numpy.ndarray, 
        f_similarity: typing.Callable[[T, T], float]#,
        # centroid_indices: typing.Optional[numpy.ndarray] = None
    ) -> None:
        
        N = objs.shape[0]
        objs_idx = numpy.arange(start=0, stop=N, step=1)
        numpy.random.shuffle(objs_idx)
        
        # Initialize centroids
        idx_centroid = objs_idx[:self.k]
        for i in range(0, self.k):
            self.__cluster_acc[i] = objs[idx_centroid[i]].copy()
            self.__cluster_indices[i].append(idx_centroid[i])
            self.__cluster_length[i] += 1
            
        # Class element into clusters:
        for i in range(self.k, N):
            
            current_idx = objs_idx[i]
            current = objs[current_idx]
            cluster_idx = self.argmin_cluster(current, f_similarity)
            
            self.__cluster_acc[cluster_idx] += objs[current_idx]
            self.__cluster_indices[cluster_idx].append(current_idx)
            self.__cluster_length[cluster_idx] += 1
    
        
    def argmin_cluster(self, 
        obj: T, 
        f_similarity: typing.Callable[[T, T | numpy.ndarray], float]
    ) -> int:
        return numpy.argmin(f_similarity(obj, self.centroids()))

In [62]:
kmeans = KMeans(k=2)

objs = numpy.arange(0, 20)
def f_similarity(a, b) -> float:
    return abs(a-b)

kmeans.fit(objs, f_similarity)

for i in range(0, 2):
    print('cluster {} : {}'.format(i, kmeans.cluster(i)))

cluster 0 : [1 2 5 4 6 9 0 3 7 8]
cluster 1 : [10 19 18 13 11 17 14 12 16 15]


In [48]:
objs = numpy.array([0, 1])
objs[0].copy()

0

In [7]:
a = numpy.empty(shape=2, dtype=numpy.ndarray)
b = numpy.zeros(shape=2)
a[0] = b.copy()
a, b

(array([array([0., 0.]), None], dtype=object), array([0., 0.]))

In [8]:
a[0] += 1
a, b

(array([array([1., 1.]), None], dtype=object), array([0., 0.]))

In [9]:
numpy.copy(1)

array(1)

In [23]:
lists = numpy.full(shape=3, fill_value=1)
lists

array([1, 1, 1])