In [6]:
import numpy as np 

In [7]:
def k_means(data, k, num_iter, initial_centroids):
    """
    does k-means clustering on the data for a given number of iterations.

    Parameters:
        data: a list of lists of floats
        k: an integer
        num_iter: an integer
       initial_centroids: a list of floats
    
    Returns:
        final_centroids: a list of floats
    """
    from scipy.spatial import distance
    #initialize the clusters
    clusters = [[] for _ in range(k)]
    #iterate over the number of iterations
    centroids = initial_centroids
    for _ in range(num_iter):

        #assign each data point to the closest centroid
        for j in range(len(data)):
            distances = np.array([np.abs(data[j]-centroids[l]) for l in range(k)])
            clusters[np.argmin(distances)].append(data[j])
        
        #update the centroids
        
        for j in range(k):
            if len(clusters[j]) > 0:
                centroids[j] = np.mean(clusters[j])
        #reset the clusters
        clusters = [[] for _ in range(k)]
    
    final_centroids = centroids

    return final_centroids

In [8]:
data = [3, 6, 7, 9, 10, 11, 14]
k = 3
num_iter = 100
initial_centroids = [4, 7, 14]

print(k_means(data, k, num_iter, initial_centroids))

[3.0, 8.0, 12.5]
