In [2]:
import pandas as pd 
import numpy as np

In [6]:
class Custom_KMeans:
    '''
    Параметры:
    К: int, по умолчанию 2, количество кластеров
    randomd_state: int по умолчанию 42
    '''

    def __init__(self, K=2, random_state=42):
        self.K = K
        self.random_state = random_state
    
    def __is_pandas(self, X):
        return isinstance(X, pd.DataFrame)
    
    def fit(self, X, y=None):

        is_df = self.__is_pandas(X)

        #переводим в нам пай если пандас
        if is_df:
            X = X.values

        np.random.seed(self.random_state)

        self.centroids = X[np.random.choice(len(X), self.K, replace=False)]
        self.initial_centroids = self.centroids
        self.prev_labels, self.labels = None, np.zeros(len(X), dtype=int)

        while not np.all(self.labels == self.prev_labels):
            self.prev_labels = self.labels
            self.labels = self.predict(X)
            self.__update_centroid(X)
        return self
    
    def predict(self, X):
        return np.apply_along_axis(self.__compute_label, 1, X)
    
    def __compute_label(self, x):
        return np.argmin(np.sqrt(np.sum((self.centroids - x) ** 2, axis=1)))
    
    def __update_centroid(self, X):
        self.centroids = np.array([np.mean(X[self.labels == k], axis=0) for k in range(self.K)])


In [7]:
X_toy = np.array([[.1, .2, .3],
                  [.7, .5, .2,],
                  [.1, .2, .2,],
                  [.9, .7, 3.5,],
                  [.2, .4, 1.4,],
                  [.4, .1, .5,]])

In [8]:
custom_kmean = Custom_KMeans(K=2, random_state=42).fit(X_toy)

In [9]:
pred = custom_kmean.predict(X_toy)

In [10]:
X_toy = np.insert(X_toy, 3, pred, axis=1)

In [11]:
X_toy

array([[0.1, 0.2, 0.3, 0. ],
       [0.7, 0.5, 0.2, 0. ],
       [0.1, 0.2, 0.2, 0. ],
       [0.9, 0.7, 3.5, 1. ],
       [0.2, 0.4, 1.4, 1. ],
       [0.4, 0.1, 0.5, 0. ]])

In [12]:
X_toy = np.delete(X_toy, 3, axis=1)

In [13]:
X_toy

array([[0.1, 0.2, 0.3],
       [0.7, 0.5, 0.2],
       [0.1, 0.2, 0.2],
       [0.9, 0.7, 3.5],
       [0.2, 0.4, 1.4],
       [0.4, 0.1, 0.5]])

In [14]:
random_state = 42

In [15]:
K= 2

In [16]:
np.random.seed(random_state)

In [17]:
centroids = X_toy[np.random.choice(len(X_toy), K, replace=False)]

In [18]:
centroids

array([[0.1, 0.2, 0.3],
       [0.7, 0.5, 0.2]])

In [19]:
initial_centroids = centroids
initial_centroids

array([[0.1, 0.2, 0.3],
       [0.7, 0.5, 0.2]])

In [20]:
prev_labels, labels = None, np.zeros(len(X_toy), dtype=int)

In [21]:
print(prev_labels)

None


In [22]:
print(labels)

[0 0 0 0 0 0]


In [23]:
print(np.all(labels == prev_labels))

False


In [25]:
prev_labels = labels

In [26]:
labels = np.apply_along_axis(lambda x: np.argmin(
    np.sqrt(np.sum((centroids - x) ** 2, axis=1))), 1, X_toy)

In [27]:
labels

array([0, 1, 0, 1, 0, 0])

In [28]:
X_toy[labels==0]

array([[0.1, 0.2, 0.3],
       [0.1, 0.2, 0.2],
       [0.2, 0.4, 1.4],
       [0.4, 0.1, 0.5]])

In [29]:
X_toy[labels==1]

array([[0.7, 0.5, 0.2],
       [0.9, 0.7, 3.5]])

In [30]:
centroids = np.array([np.mean(X_toy[labels==k], axis=0) for k in range(K)])
centroids

array([[0.2  , 0.225, 0.6  ],
       [0.8  , 0.6  , 1.85 ]])

In [31]:
print(prev_labels)

[0 0 0 0 0 0]


In [32]:
print(labels)

[0 1 0 1 0 0]


In [33]:
print(np.all(labels ==prev_labels))

False


In [41]:
prev_labels = labels

In [42]:
labels = np.apply_along_axis(lambda x: np.argmin(
    np.sqrt(np.sum((centroids - x) ** 2, axis=1))), 1, X_toy)

In [43]:
centroids = np.array([np.mean(X_toy[labels == k], axis=0) for k in range(K)])

In [44]:
print(np.all(labels ==prev_labels))

True


In [45]:
print(labels)

[0 0 0 1 1 0]


In [46]:
print(prev_labels)

[0 0 0 1 1 0]


In [47]:
np.insert(X_toy, 3, labels, axis=1)

array([[0.1, 0.2, 0.3, 0. ],
       [0.7, 0.5, 0.2, 0. ],
       [0.1, 0.2, 0.2, 0. ],
       [0.9, 0.7, 3.5, 1. ],
       [0.2, 0.4, 1.4, 1. ],
       [0.4, 0.1, 0.5, 0. ]])