In [9]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

import pandas as pd
import numpy as np
from IPython.display import display_html
from sklearn import cluster

def toy_dataset():
    ratings = [['Finn', 5, 5, 2, 1],
               ['Jane', 4, 5, 3, 2],
               ['El', 4, 4, 4, 3],
               ['Maya', 2, 2, 4, 5],
               ['Sadie', 1, 2, 3, 4],
               ['Dustin', 2, 1, 5, 5]]
    
    titles = ['user', 'Id1', 'Id2', 'Num1', 'Num2']
    movies = pd.DataFrame(ratings, columns=titles)
    display_html(movies)
    return movies

def k_means_learn(k, movies):
    data = movies.drop('user', axis=1)
    k_means = cluster.KMeans(n_clusters=k, max_iter=50, random_state=1, n_init=10)
    k_means.fit(data)
    labels = k_means.labels_
    pd.DataFrame(labels, index=movies['user'], columns=['Cluster ID'])
    print("Learned cluster centroids for", k, "clusters:")
    centroids = k_means.cluster_centers_
    display_html(pd.DataFrame(centroids, columns=data.columns))
    print("Now use cluster centroids to assign other users to their clusters.")
    return k_means

def cluster_new_data(k_means, movies):
    testData = np.array([[4, 5, 1, 2], [3, 2, 4, 4], [2, 3, 4, 1], [3, 2, 3, 3], [5, 4, 1, 4]])
    labels = k_means.predict(testData)
    labels = labels.reshape(-1, 1)
    usernames = np.array(['Mike', 'Lucas', 'Max', 'Jim', 'Steve']).reshape(-1, 1)
    cols = movies.columns.tolist()
    newusers = pd.DataFrame(np.concatenate((usernames, testData), axis=1), columns=cols)
    cols.append('Assigned Cluster')
    newusers_cluster = pd.DataFrame(np.concatenate((usernames, testData, labels), axis=1), columns=cols)
    print("New users (test data) are:")
    display_html(newusers)
    print("New users with their assigned cluster:")
    display_html(newusers_cluster)

def main():
    k = 2
    movies = toy_dataset()
    k_means = k_means_learn(k, movies)
    cluster_new_data(k_means, movies)

main()


Unnamed: 0,user,Id1,Id2,Num1,Num2
0,Finn,5,5,2,1
1,Jane,4,5,3,2
2,El,4,4,4,3
3,Maya,2,2,4,5
4,Sadie,1,2,3,4
5,Dustin,2,1,5,5




Learned cluster centroids for 2 clusters:


Unnamed: 0,Id1,Id2,Num1,Num2
0,4.333333,4.666667,3.0,2.0
1,1.666667,1.666667,4.0,4.666667


Now use cluster centroids to assign other users to their clusters.
New users (test data) are:




Unnamed: 0,user,Id1,Id2,Num1,Num2
0,Mike,4,5,1,2
1,Lucas,3,2,4,4
2,Max,2,3,4,1
3,Jim,3,2,3,3
4,Steve,5,4,1,4


New users with their assigned cluster:


Unnamed: 0,user,Id1,Id2,Num1,Num2,Assigned Cluster
0,Mike,4,5,1,2,0
1,Lucas,3,2,4,4,1
2,Max,2,3,4,1,0
3,Jim,3,2,3,3,1
4,Steve,5,4,1,4,0
