In [131]:
import numpy as np

In [132]:
class ScratchKMeans():
    """
    K-means scratch implementation
    Parameters
    ----------
    n_clusters : int
      Number of clusters
    n_init : int
      How many times to change the initial value of the center point for calculation
    max_iter : int
      Maximum number of iterations in one calculation
    tol : float
      Margin of error between the center point and the center of gravity, which is the reference for ending the iteration
    verbose : bool
      True to output the learning process
    """

    def __init__(self, n_clusters, n_init, max_iter, tol, verbose=False):
        # Record hyperparameters as attributes
        self.n_clusters = n_clusters
        self.n_init = n_init
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose

    def _init_mu_k(self, X):
        rows = np.random.choice(X.shape[0], size=self.n_clusters, replace=False)
        return X[rows, :]

    def _compute_SSE(self, X, r, mu):
        '''
        Calculate SSE
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
          Features of training data
        r : shape (n_samples, self.n_clusters)
          cluster assignment
        mu : shape (self.n_clusters, n_features)
          center points
        Return
        ----------
        SSE: shape (n_features, )
          SSE
        '''
        SSE = 0.0
        for k in range(0, self.n_clusters):
            SSE += r[:, k] @ (X - mu[k, :])**2
        return SSE
    
    def _allocate_r(self, X, mu):
        """
        Allocate data points X to the nearest center point.
        """
        distance_matrix = np.zeros((X.shape[0], n_clusters))
        for k in range(0, self.n_clusters):
            distance_matrix[:, k] = np.linalg.norm(X - mu[k, :], axis=1)
        condition = (distance_matrix == distance_matrix[np.arange(len(distance_matrix)), np.argmin(distance_matrix, axis=1)])
        return np.where(condition, 1, 0)

    def _move_mu(self, X, r):
        """
        Moves mu to the mean (center of gravity)
        """
        mu = np.zeros((self.n_clusters, X.shape[1]))
        for k in range(0, self.n_clusters):
            mu[k, :] = X[r[:, k] == 1].mean(axis=0)
        return mu

    def fit(self, X):
        """
        Calculate clustering by K-means
        Parameters
        ----------
        X : The following forms of ndarray, shape (n_samples, n_features)
            Features of training data
        """
        if self.verbose:
            print()
        pass

    def predict(self, X):
        """
        Calculate which cluster the input data belongs to
        """
        pass
        return

In [133]:
a = np.array([0,0])
b = np.array([[3, 4], [6,5]])
np.linalg.norm(a- b, axis = 1)

array([5.        , 7.81024968])

In [134]:
b

array([[3, 4],
       [6, 5]])

In [135]:
np.argmin(b, axis=1)

array([0, 1])

In [136]:
b[np.arange(len(b)), np.argmin(b, axis=1)] = 1
b

array([[1, 4],
       [6, 1]])

In [138]:
np.where(b == b[np.arange(len(b)), np.argmin(b, axis=1)], -1, 0)

array([[-1,  0],
       [ 0, -1]])

In [90]:
q = np.array([1, 0, 1])
c = np.array([[1,2,3], [2,3,4], [4,5,6]])
c[q == 1].mean(axis=0)

array([2.5, 3.5, 4.5])