# Weekly project 6
Today we will continue work from monday.
We will follow the style of last week.

Weekly project:
- You will need to implement your own k-means algorithm. (So you are not allowed to use the one in *sklearn*)
- it should be able to cluster each of the different figures.
- Extend your k-means so it finds the optimal amount of clusters.
Challenge
- Implement the mean shift clustering algorithm


In [None]:
import numpy as np
import open3d as o3d
import copy
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans, k_means
%matplotlib notebook

def draw_labels_on_model(pcl, labels):
    cmap = plt.get_cmap("tab20")
    pcl_temp = copy.deepcopy(pcl)
    max_label = labels.max()
    colors = cmap(labels / (max_label if max_label > 0 else 1))
    colors[labels < 0] = 0
    pcl_temp.colors = o3d.utility.Vector3dVector(colors[:, :3])
    o3d.visualization.draw_geometries([pcl_temp])

d = 4
mesh = o3d.geometry.TriangleMesh.create_tetrahedron().translate((-d, 0, 0))
mesh += o3d.geometry.TriangleMesh.create_octahedron().translate((0, 0, 0))
mesh += o3d.geometry.TriangleMesh.create_icosahedron().translate((d, 0, 0))
mesh += o3d.geometry.TriangleMesh.create_torus().translate((-d, -d, 0))
mesh += o3d.geometry.TriangleMesh.create_moebius(twists=1).translate((0, -d, 0))
mesh += o3d.geometry.TriangleMesh.create_moebius(twists=2).translate((d, -d, 0))

## apply k means on this
point_cloud = mesh.sample_points_uniformly(int(1e5))
xyz = np.asarray(point_cloud.points)

In [None]:
def RandomCentroids(points, k):
    n_samples, n_features = np.shape(points)
    #make a zero matrix to store values
    centroids = np.zeros((k, n_features))
    #bcs there is k centroids, so we loop k tiems
    for i in range(k):
        #selecting values under the range radomly
        centroid = points[np.random.choice(range(n_samples))]
        centroids[i] = centroid
    return centroids

def EuclidianDistance(point, centroids):
    #transform single point into 1D vector
    point = point.reshape(1,-1)
    #transform Centroids into 1D vector
    centroids = centroids.reshape(centroids.shape[0], -1)
    distances = np.power(np.tile(point,(centroids.shape[0],1))-centroids,2).sum(axis=1)
    return distances # each element is the Euclidian distance from one of the centroids

def ClosestCentroid(point, centroids):
    distances = EuclidianDistance(point, centroids)
    closest_i = np.argmin(distances) # return the indices
    return closest_i
    
def CreateCluster(points, centroids, k):
    n_samples = np.shape(points)[0]
    #This is to construct the nested list for storing clusters
    clusters = [[] for _ in range(k)]
    for sample_i, sample in enumerate(points):
        centroid_i = ClosestCentroid(sample, centroids)
        clusters[centroid_i].append(sample_i)
    return clusters

#update the centroids based on mean algorithm
def UpdateCentroids(clusters, points, k):
    n_features = np.shape(points)[1]
    centroids = np.zeros((k, n_features))
    for i, cluster in enumerate(clusters):
        centroid = np.mean(points[cluster],axis=0)
        centroids[i] = centroid
    return centroids

#obtain the labels
#same cluster, same y_pred value
def GetClusterLabels(clusters, points):
    y_pred = np.zeros(np.shape(points)[0])
    for cluster_i, cluster in enumerate(clusters):
        for sample_i in cluster:
            y_pred[sample_i] = cluster_i
    return y_pred

#predict the labels
def Predict(points, k, max_iterations, tolerance):
    #selecting the centroids randomly
    centroids = RandomCentroids(points, k)

    for _ in range(max_iterations):
        #clustering all the data point
        clusters = CreateCluster(points, centroids, k)
        former_centroids = centroids
        #calculate new cluster center
        centroids = UpdateCentroids(clusters, points, k)
        #judge the current difference if it meets convergence  
        diff = centroids - former_centroids
        if diff.any() < tolerance:
            break

    return GetClusterLabels(clusters, points)

In [None]:
labels = Predict(xyz, 6, max_iterations=1500,tolerance=0.000001)
draw_labels_on_model(point_cloud,labels)