In [111]:
import pandas as pd

df = pd.read_csv('data/mobile.csv', index_col=0)
df.head()

Unnamed: 0,battery_power,clock_speed,four_g,int_memory,n_cores,pc,ram,talk_time,touch_screen,wifi,price_range
0,1142,1.4,0,6,8,8,663,5,1,1,0
1,728,1.7,1,5,2,20,834,5,1,0,0
2,1868,0.5,1,40,8,17,298,17,1,0,0
3,890,2.2,0,44,8,13,751,3,0,0,0
4,1433,1.6,0,4,8,11,258,4,1,1,0


In [45]:
chosen_data = df[['battery_power', 'pc', 'int_memory', 'ram']]
chosen_data.head()

Unnamed: 0,battery_power,pc,int_memory,ram
0,1142,8,6,663
1,728,20,5,834
2,1868,17,40,298
3,890,13,44,751
4,1433,11,4,258


In [113]:
import numpy as np

X = np.array(chosen_data)
me = np.mean(X, axis=0) # mean
ra = np.ptp(X, axis=0) # range
Y = np.divide(np.subtract(X, me), ra)

me2 = np.mean(X, axis=0)

# 5 clusters

In [173]:
from sklearn.cluster import KMeans
np.set_printoptions(precision=4)

n_clusters = 5
min_inertia = np.inf
random_states = np.arange(20)

for i in range(20):
    kmeans = KMeans(n_clusters=n_clusters, random_state=random_states[i])
    kmeans.fit(Y)
    preds = kmeans.labels_
    if kmeans.inertia_ < min_inertia:
        min_inertia = kmeans.inertia_
        best_partition, best_deltas, best_means = [], [], []
        
        for j in range(n_clusters):
            cluster = X[np.where(preds==j)]
            cluster_means = np.mean(cluster, axis=0)
            cur_delta = 100 * (np.divide(np.subtract(cluster_means, me), me))
            best_partition.append(cluster)
            best_means.append(cluster_means)
            best_deltas.append(cur_delta)

for i in range(n_clusters):
    print("delta {}".format(i), best_deltas[i], "#el:", best_partition[i].shape[0])
print("min inertia", min_inertia)
    

delta 0 [19.2013 -4.2259 52.2376 48.4992] #el: 82
delta 1 [-25.8201 -41.1796 -46.782    8.6553] #el: 92
delta 2 [  8.3991 -47.1756  50.5577 -45.0334] #el: 76
delta 3 [ 37.2706  43.0807 -50.063  -14.6005] #el: 71
delta 4 [-31.4381  59.0084  -3.3854  -3.9753] #el: 79
min inertia 69.52784105786077


In [174]:
result = pd.DataFrame(best_deltas, columns=chosen_data.columns)
result['elements'] = [len(part) for part in best_partition]
print(min_inertia)
result

69.52784105786077


Unnamed: 0,battery_power,pc,int_memory,ram,elements
0,19.201307,-4.225855,52.237574,48.499206,82
1,-25.820097,-41.179593,-46.782048,8.655309,92
2,8.399145,-47.175631,50.5577,-45.033362,76
3,37.270614,43.080706,-50.062989,-14.600546,71
4,-31.438061,59.008362,-3.385387,-3.975305,79


# 9 clusters

In [181]:
from sklearn.cluster import KMeans
np.set_printoptions(precision=4)

n_clusters = 9
min_inertia = np.inf
random_states = np.arange(100, 120)

for i in range(20):
    kmeans = KMeans(n_clusters=n_clusters, random_state=random_states[i])
    kmeans.fit(Y)
    preds = kmeans.labels_
    if kmeans.inertia_ < min_inertia:
        min_inertia = kmeans.inertia_
        best_partition, best_deltas, best_means = [], [], []
        
        for j in range(n_clusters):
            cluster = X[np.where(preds==j)]
            cluster_means = np.mean(cluster, axis=0)
            cur_delta = 100 * (np.divide(np.subtract(cluster_means, me), me))
            best_partition.append(cluster)
            best_means.append(cluster_means)
            best_deltas.append(cur_delta)

for i in range(n_clusters):
    print("delta {}".format(i), best_deltas[i], "#el:", best_partition[i].shape[0])
print("min inertia", min_inertia)
    

delta 0 [ 36.9663  56.2882 -50.8037 -48.2236] #el: 38
delta 1 [28.8713 44.726  55.4375 40.8964] #el: 49
delta 2 [-36.1125  -6.4765 -48.7716  41.2977] #el: 47
delta 3 [ 32.4908   4.5177 -51.249   44.0179] #el: 40
delta 4 [-27.473   14.8284  62.378  -37.3916] #el: 45
delta 5 [-28.7903  67.8094 -45.0402 -14.5972] #el: 39
delta 6 [-12.4156 -53.8414  47.1931  47.9858] #el: 51
delta 7 [-27.7694 -51.3982 -43.3973 -47.5977] #el: 41
delta 8 [ 34.1819 -53.3089  37.5665 -42.3399] #el: 50
min inertia 46.50203608897823


In [183]:
result = pd.DataFrame(best_deltas, columns=chosen_data.columns)
result['elements'] = [len(part) for part in best_partition]
print(min_inertia)
result

46.50203608897823


Unnamed: 0,battery_power,pc,int_memory,ram,elements
0,36.966343,56.288156,-50.803681,-48.223585,38
1,28.87133,44.726022,55.437517,40.896381,49
2,-36.112529,-6.476502,-48.771581,41.297725,47
3,32.490776,4.517705,-51.248993,44.017891,40
4,-27.47303,14.828381,62.378011,-37.391566,45
5,-28.790319,67.809399,-45.040187,-14.597199,39
6,-12.415609,-53.841366,47.193124,47.985786,51
7,-27.769439,-51.398195,-43.397339,-47.597673,41
8,34.18187,-53.308913,37.566479,-42.339889,50


# 4 clusters

In [219]:
from sklearn.cluster import KMeans
np.set_printoptions(precision=4)

n_clusters = 4
min_inertia = np.inf
centers = np.array([Y[0, :], Y[100, :], Y[200, :], Y[300, :]])

for i in range(20):
    kmeans = KMeans(n_clusters=n_clusters, init=centers)
    kmeans.fit(Y)
    preds = kmeans.labels_
    if kmeans.inertia_ < min_inertia:
        min_inertia = kmeans.inertia_
        best_partition, best_deltas, best_means = [], [], []
        
        for j in range(n_clusters):
            cluster = X[np.where(preds==j)]
            cluster_means = np.mean(cluster, axis=0)
            cur_delta = 100 * (np.divide(np.subtract(cluster_means, me), me))
            best_partition.append(cluster)
            best_means.append(cluster_means)
            best_deltas.append(cur_delta)

for i in range(n_clusters):
    print("delta {}".format(i), best_deltas[i], "#el:", best_partition[i].shape[0])
print("min inertia", min_inertia)
    

delta 0 [-33.6525  15.0161 -28.9152 -25.2096] #el: 111
delta 1 [ 18.9995 -48.5838  42.2816 -33.9822] #el: 91
delta 2 [ 34.4316  52.0328 -22.7388   5.9965] #el: 101
delta 3 [-15.1662 -25.7833  17.0988  54.4845] #el: 97
min inertia 80.23166287663187


  return_n_iter=True)


In [220]:
result = pd.DataFrame(best_deltas, columns=chosen_data.columns)
result['elements'] = [len(part) for part in best_partition]
print(min_inertia)
result

80.23166287663187


Unnamed: 0,battery_power,pc,int_memory,ram,elements
0,-33.652469,15.016115,-28.915217,-25.209569,111
1,18.999495,-48.583773,42.28157,-33.982231,91
2,34.431618,52.032786,-22.738769,5.996486,101
3,-15.16622,-25.783266,17.098781,54.484537,97
