**1a) K-Means algorithm**

Input: 
n: Number of clusters
max_iter: Maximum iterations


In [3]:
import numpy as np

class KMeans:
    def __init__(self, k, max_iter):
        self._k = k
        self._max_iter = max_iter
        
    def __normalize(self, X):
        (m, n) = X.shape      # m samples with n features
        std = np.std(X, axis=0)
        means = np.mean(X, axis=0)
        return (X - means)/std
    
    def __centroid_index(self, X, centroids):
        (m, n) = X.shape
        distances = np.ndarray((m,self._k))
        
        for i in range(m):
            for j in range(self._k):
                distances[i,j] = np.linalg.norm(X[i,:] - centroids[j, :])

        minimum_distances_indexes = np.argmin(distances,axis=1)    # This is the centroid index for all samples due to minimization. Should be of dimension m
        return minimum_distances_indexes
        
    def fit_predict(self, X):
        
        X_norm = self.__normalize(X)
        
        (m, n) = X.shape      # m samples with n features
        centroids = np.random.rand(self._k,n)     # k centroids with n features
        
        S_old = np.zeros(m,)
        for _ in range(self._max_iter):
            
            # Step 1) Find which centroid each sample belongs to due to minimum distance 
            S = self.__centroid_index(X_norm, centroids)    # Will be a array of dimension m with a content ranging from 0..k-1 (cluster number) in each element
            if np.all(S_old == S) :
                return S, centroids
            
            
            # Step 2) Based on each collection of points, find the new centroid center.
            # new_centroids = np.array(self._k,n)
            for i in range(self._k):
                cluster_points_i = X[S == i,:]
                centroids[i] = np.mean(cluster_points_i, axis=0)  # Update centroid position
                print(centroids[i])
            
            S_old = S

np.random.seed(42)
X = np.random.rand(10, 2)
kmeans = KMeans(3, 10)
kmeans.fit_predict(X)



            
            
        
        
        
    
    

[0.64937415 0.45257483]
[0.17504237 0.4325829 ]
[0.19756231 0.96031208]
[0.72185053 0.50635672]
[0.26850772 0.28884615]
[0.15106941 0.92893343]


(array([2, 0, 1, 2, 0, 2, 0, 1, 1, 1], dtype=int64),
 array([[0.72185053, 0.50635672],
        [0.26850772, 0.28884615],
        [0.15106941, 0.92893343]]))