### Step-by-step implementation

In [1]:
import numpy as np
import pandas as pd

data = pd.read_csv('iris_1D.csv')
data

Unnamed: 0,Petal_Length,Label
0,1.4,0.0
1,1.0,0.0
2,1.5,0.0
3,3.1,1.0
4,3.8,1.0
5,4.1,1.0


In [2]:
# 1. data

X = data['Petal_Length'].to_numpy()
print(X)

[1.4 1.  1.5 3.1 3.8 4.1]


In [4]:
k = 2
max_iters = 2

# initialize centroids
centroids = X[:2]
print('Init centroids: ', centroids)

for _ in range(max_iters):
    # assign points to nearest centroid
    #distances = np.abs(X[:, np.newaxis] - centroids) 
    distances = np.sqrt( (X[:, np.newaxis] - centroids)**2 )
    labels = np.argmin(distances, axis=1)
    print('Distances: ', distances)
    print('labels: ', labels)
    
    # update centroids
    new_centroids = np.array([X[labels == i].mean() for i in range(k)])
    
    # check for convergence
    if np.all(centroids == new_centroids):
        break
    
    centroids = new_centroids    

    # print
    print("Labels:", labels)
    print("Centroids:", centroids)
    print()

# calculate WCSS
wcss = np.sum([np.sum(np.square(X[labels == i] - centroids[i])) for i in range(k)])
print("WCSS:", wcss)

Init centroids:  [1.4 1. ]
Distances:  [[0.  0.4]
 [0.4 0. ]
 [0.1 0.5]
 [1.7 2.1]
 [2.4 2.8]
 [2.7 3.1]]
labels:  [0 1 0 0 0 0]
Labels: [0 1 0 0 0 0]
Centroids: [2.78 1.  ]

Distances:  [[1.38 0.4 ]
 [1.78 0.  ]
 [1.28 0.5 ]
 [0.32 2.1 ]
 [1.02 2.8 ]
 [1.32 3.1 ]]
labels:  [1 1 1 0 0 0]
Labels: [1 1 1 0 0 0]
Centroids: [3.66666667 1.3       ]

WCSS: 0.6666666666666662


### Using sklearn

In [1]:
import numpy as np
import pandas as pd

data = pd.read_csv('iris_1D.csv')
X = data['Petal_Length'].to_numpy()

In [2]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2)
kmeans.fit(X.reshape(-1, 1))
labels = kmeans.labels_
print(labels)
print(kmeans.inertia_)

for x, label in zip(X, labels):
    print(f"Cluster {label}: {x}")

[0 0 0 1 1 1]
0.6666666666666662
Cluster 0: 1.4
Cluster 0: 1.0
Cluster 0: 1.5
Cluster 1: 3.1
Cluster 1: 3.8
Cluster 1: 4.1
