In [2]:
using Clustering

n_clusters = 20

# make a random dataset with 1000 random 5-dimensional points
X = rand(5, 100)

# cluster X into 20 clusters using K-means
R = kmeans(X, n_clusters; maxiter=200)

@assert nclusters(R) == 20 # verify the number of clusters

a = assignments(R) # get the assignments of points to clusters
c = counts(R) # get the cluster sizes
M = R.centers # get the cluster centers

println("number of centroids ", size(M,2))

number of centroids 20


We can find cluster assingments for new data using the following code 

In [5]:
using Distances 

function get_cluster_assignments(
    X::Matrix{T}, 
    R::KmeansResult, 
    distance::SemiMetric=SqEuclidean()) where {F<:Function, T}

    cluster_assignments = zeros(Int, size(X,2))
    
    Threads.@threads for n in axes(X,2)
        min_dist = typemax(T)
        cluster_assignment = 0
        
        for k in axes(R.centers, 2)
            dist = distance(@view(X[:,n]),@view(R.centers[:,k]))
            if dist < min_dist
                min_dist = dist
                cluster_assignment = k
            end
        end
        cluster_assignments[n] = cluster_assignment
    end
    
    return cluster_assignments
end

get_cluster_assignments (generic function with 2 methods)

In [7]:
clusters = get_cluster_assignments(X, R);

In [8]:
R.assignments == clusters

true

In [None]:
using Distances

function get_cluster_assignments(
    X::Matrix{T}, 
    centers::Matrix{T}, 
    distance::SemiMetric=SqEuclidean(),       # in: function to calculate distance with
    ) where {F<:Function,T}


    cluster_assignments = zeros(Int, size(X,2))

    Threads.@threads for n in axes(X,2)
        min_dist = typemax(T)
        cluster_assignment = 0
        for k in axes(centers, 2)
            dist = distance(@view(X[:,n]),@view(centers[:,k]))
            if dist < min_dist
                min_dist = dist
                cluster_assignment = k
            end
        end
        cluster_assignments[n] = cluster_assignment
    end
    return cluster_assignments
end


In [None]:
clusters = get_cluster_assignments(X, R);

In [None]:
R.assignments == clusters

In [None]:
using RDatasets, Clustering, Plots
iris = dataset("datasets", "iris"); # load the data

features = collect(Matrix(iris[:, 1:4])'); # features to use for clustering
result = kmeans(features, 3); # run K-means for the 3 clusters

# plot with the point color mapped to the assigned cluster index
scatter(iris.PetalLength, iris.PetalWidth, marker_z=result.assignments,
        color=:lightrainbow, legend=false)