In [6]:
import pandas as pd
import numpy as np
from sklearn_extra.cluster import KMedoids
from sklearn.datasets import make_blobs
from IPython.display import display_html

def toy_dataset():
    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=0)
    titles = ['x1', 'x2']
    data = pd.DataFrame(X, columns=titles)
    print("Do you want to view the first 10 data elements?")
    choice = input()
    if choice.lower() == 'yes':
        display_html(data.head(10))
    return data, X

def K_medoids(data):
    k_medoids = KMedoids(n_clusters=3, max_iter=50).fit(data)
    labels = k_medoids.labels_
    print("Learned cluster centroids for three clusters:")
    centroids = k_medoids.cluster_centers_
    display_html(pd.DataFrame(centroids, columns=data.columns))
    return k_medoids, labels

def cluster_new_data(k_medoids):
    testData = np.array([[0.81, 1.12], [-1.145, -1.194], [0.676, 0.7133], [0.4442, -1.3245], [1.23623, 1.34634],
                         [-0.93423, -0.0332], [-1.00234, -1.546], [0.946, -0.4674], [1.534, 0.4789], [1.23523, 1.0547]])
    labels = k_medoids.predict(testData)
    labels = labels.reshape(-1, 1)
    cols = ['x1', 'x2']
    cols.append('Assigned Cluster')
    newdata_cluster = pd.DataFrame(np.concatenate((testData, labels), axis=1), columns=cols)
    display_html(newdata_cluster)

def main():
    data, X = toy_dataset()
    k_medoids, labels = K_medoids(data)

    print("Do you want clustering for new data based on learned clusters?")
    choice = input()
    if choice.lower() == 'yes':
        cluster_new_data(k_medoids)

main()


Do you want to view the first 10 data elements?


 yes


Unnamed: 0,x1,x2
0,0.84022,1.148022
1,-1.154748,-1.204117
2,0.678636,0.72418
3,0.450783,-1.427097
4,1.492116,1.480952
5,-0.981946,-0.06415
6,-1.023435,-1.127017
7,0.935113,-0.692228
8,1.450654,0.568027
9,1.379768,1.03502


Learned cluster centroids for three clusters:


Unnamed: 0,x1,x2
0,1.086872,-0.968716
1,-1.022453,-1.000554
2,0.947957,1.037581


Do you want clustering for new data based on learned clusters?


 yes


Unnamed: 0,x1,x2,Assigned Cluster
0,0.81,1.12,2.0
1,-1.145,-1.194,1.0
2,0.676,0.7133,2.0
3,0.4442,-1.3245,0.0
4,1.23623,1.34634,2.0
5,-0.93423,-0.0332,1.0
6,-1.00234,-1.546,1.0
7,0.946,-0.4674,0.0
8,1.534,0.4789,2.0
9,1.23523,1.0547,2.0
