In [None]:
#! python
import numpy; 
import matplotlib.pyplot as plt 
import pandas as pd;
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.manifold import MDS, TSNE

#load data
profiles = pd.read_csv('user_profiles_normed.csv');

# create training dataset
training_data = profiles[["rounds","mean_game_score", "mean_total_score", "mean_NoTasks", "mean_challenge_level", "mean_OLM_use" , "mean_speed", "mean_diff", "mean_sfun", "mean_sdiff", "mean_sperf", "mean_srules"]]; 

input_data = training_data; 
model = KMeans(n_clusters=2, init= "k-means++", n_init = 50).fit(input_data);

# set width of bars
barWidth = 0.25
 
# set heights of bars
bars1 = model.cluster_centers_[0]; 
bars2 = model.cluster_centers_[1]; 

tlabels = ["rounds","mean_game_score", "mean_total_score", "mean_NoTasks", "mean_challenge_level", "mean_OLM_use" , "mean_speed", "mean_diff", "mean_sfun", "mean_sdiff", "mean_sperf", "mean_srules"]

# Set position of bar on X axis
r1 = numpy.arange(len(bars1))
r2 = [x + barWidth for x in r1]

# Make the plot
plt.bar(r1, bars1, color='b', width=barWidth, edgecolor='white', label='cluster1')
plt.bar(r2, bars2, color='r', width=barWidth, edgecolor='white', label='cluster2')
 
# Add xticks on the middle of the group bars
plt.xlabel('cluster data (centroids)', fontweight='bold')
plt.xticks([r + 0.5*barWidth for r in range(len(bars1))], tlabels)
 
# Create legend & Show graphic
plt.legend()
plt.show()

# multidimensional scaling
M = training_data - numpy.asarray(training_data).mean(); 
similarities = euclidean_distances(M); 
mds = MDS(n_components=2, max_iter=1000, random_state=100, eps=1e-16, dissimilarity="precomputed")
pos = mds.fit(similarities).embedding_

colors = ["r", "b"];  
for i, p in enumerate(pos):
    plt.plot(p[0], p[1], 'o', markersize=9, color = colors[model.labels_[i]]);      

plt.show()

profiles['cluster'] = model.labels_;
profiles.to_csv(r'user_profiles_clusters.csv', index=False, header = True);

 

