In [None]:
import numpy as np
import random
class Kmeans:
    
    def __init__(self, n_clusters = 2, max_iter = 100):
        
        #initialize hyperparameters
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.centroids = None
     
    def fit_predict(self, X):
        
        #picking random centroids
        random_index = random.sample(range(X.shape[0]), self.n_clusters)
        self.centroids = X[random_index]
        
        for i in range(self.max_iter):
            
            #Assign clusters
            
            cluster_group = self.assign_cluster(X)
            
            old_centroids = self.centroids
            
            #Move centroids
            
            self.centroids = self.move_centroids(X, cluster_group) 

            #Check finish
            
            if (self.centroids == old_centroids).all():
                break
                
        return cluster_group
    
    # finding euclidean distance and assigning clusters to each data point
    def assign_cluster(self, X):
        
        distance = []
        clustergroup =[]
        
        for row in X:
            
            for centroid in self.centroids:
                
                distance.append(np.sqrt(np.dot(row-centroid, row-centroid)))
            min_distance = min(distance)
            index_pos = distance.index(min_distance)
            clustergroup.append(index_pos)
            distance.clear()
            
        return np.array(clustergroup)
    
    # finding new centroids from the mean
    def move_centroids(self, X, cluster_group):
        
        new_centroids = []
        
        cluster_type = np.unique(cluster_group)
        
        for type in cluster_type:
            new_centroids.append(X[cluster_group==type].mean(axis = 0))
            
        return np.array(new_centroids)
        