In [15]:
import numpy as np

In [16]:
class KMeans():
    def __init__(self, n_clusters):
        """
        Args:
            n_clusters (int): Number of clusters to cluster the given data into.
        """
        
        self.n_clusters = n_clusters
        self.means = None
        self.labels = None
        self.assignments_change = None
    def fit(self, features):
        """
        Fit KMeans to the given data using `self.n_clusters` number of clusters.
        Features can have greater than 2 dimensions.

        Args:
            features (np.ndarray): array containing inputs of size
                (n_samples, n_features).
        Returns:
            None (saves model - means - internally)
        """
        self.means = []
        self.labels = [0 for i in range(len(features))]
        for i in range(self.n_clusters):
            self.means.append(features[i])
        self.assignments_change = True
        time = 0
        self.means = list(self.means)

        while self.assignments_change == True:
            self.update_assignments(features)
            self.update_means(features)
            time += 1
            if time == 10:
                break

    def update_assignments(self, features):
        new_labels = []
        for i in range(len(features)):
            distances = []
            for j in range(len(self.means)):
                distances.append(np.sum((features[i] - self.means[j]) ** 2))
            new_labels.append(np.argmin(distances))
        if new_labels == self.labels:
            self.assignments_change = False
        else:
            self.labels = new_labels

    def update_means(self, features):
        self.means = []
        for i in range(self.n_clusters):
            new_mean = []
            for j in range(len(self.labels)):
                if self.labels[j] == i:
                    new_mean.append(features[j])
            self.means.append(np.mean(np.array(new_mean), axis=0))

    def predict(self, features):
        """
        Given features, an np.ndarray of size (n_samples, n_features), predict cluster
        membership labels.

        Args:
            features (np.ndarray): array containing inputs of size
                (n_samples, n_features).
        Returns:
            predictions (np.ndarray): predicted cluster membership for each features,
                of size (n_samples,). Each element of the array is the index of the
                cluster the sample belongs to.
        """
        predict = []
        for i in range(len(features)):
            distances = []
            for j in range(len(self.means)):
                distances.append(np.sum((features[i] - self.means[j]) ** 2))
            predict.append(np.argmin(distances))

        return predict
                

In [17]:
kmean = KMeans(3)

In [19]:
a=np.random.random((10,10))

In [20]:
a

array([[0.15192444, 0.63138776, 0.30129794, 0.96498439, 0.09694661,
        0.20838533, 0.79105204, 0.44708058, 0.4424925 , 0.94162354],
       [0.93432911, 0.79182538, 0.21257301, 0.75108243, 0.93470286,
        0.93631129, 0.77239441, 0.53562628, 0.15973785, 0.75857229],
       [0.36572489, 0.83589865, 0.16557974, 0.99412162, 0.51452886,
        0.29169248, 0.58383211, 0.24824584, 0.13000273, 0.61916447],
       [0.98379366, 0.53506944, 0.69333891, 0.13523703, 0.42159851,
        0.34189126, 0.74463992, 0.29100152, 0.78800413, 0.26067651],
       [0.99242071, 0.73384425, 0.41044478, 0.03141162, 0.3397715 ,
        0.08985014, 0.31419869, 0.29872631, 0.75530335, 0.93879798],
       [0.96393321, 0.75341797, 0.75456611, 0.21880317, 0.57331251,
        0.27328443, 0.64036211, 0.20157068, 0.05196727, 0.33247168],
       [0.82019292, 0.42873431, 0.12617901, 0.82815865, 0.36559459,
        0.52915693, 0.87264132, 0.12663032, 0.74077913, 0.61009514],
       [0.56151282, 0.21184024, 0.6574831

In [21]:
kmean.fit(a)


In [22]:
print(kmean.labels)

[0, 1, 2, 1, 0, 2, 1, 2, 2, 2]


In [23]:
b = np.random.random((10,10))

In [25]:
kmean.predict(b)

[1, 2, 2, 0, 0, 1, 2, 2, 2, 1]