In [28]:
from typing import Iterable,Tuple,Sequence,Dict,List
from collections import defaultdict
from math import fsum,sqrt
from pprint import pprint
from random import sample
from functools import partial

In [21]:
Point=Tuple[int,...]
Centroid=Point

In [22]:
points=[(10,41,23),(22,30,29),(11,42,5),(20,32,4),(12,40,12),(21,36,23)]

In [23]:
def mean(data:Iterable[float])->float:
    'Accurate Aritmethic mean'
    data=list(data)
    return fsum(data)/len(data)

In [24]:
def dist(p:Point,q:Point,fsum=fsum,sqrt=sqrt,zip=zip)->float:
    'Euclidean distance'
    return sqrt(fsum([(x-y)**2 for x,y in zip(p,q)]))

In [34]:
def assign_data(centroids:Sequence[Centroid],data:Iterable[Point])->Dict[Centroid,List[Point]]:
    'Group the data points to the closest centroid'
    d=defaultdict(list)
    for point in data:
        closest_centroid=min(centroids,key=partial(dist,point))
        d[closest_centroid].append(point)
    return dict(d)

In [27]:
def compute_centroid(groups:Iterable[Sequence[Point]])->List[Centroid]:
    'Compute centroid of each group'
    return [tuple(map(mean,zip(*group))) for group in groups]

In [35]:
def k_means(data,k=2,iterations=50):
    data=list(data)
    centroids=sample(data,k)
    for i in range(iterations):
        labeld=assign_data(centroids,data)
        centroids=compute_centroid(labeld.values())
    return centroids

In [38]:
k_means(points,k=6)

[(10.0, 41.0, 23.0),
 (22.0, 30.0, 29.0),
 (11.0, 42.0, 5.0),
 (20.0, 32.0, 4.0),
 (12.0, 40.0, 12.0),
 (21.0, 36.0, 23.0)]