## Clustering Using K-Means

#### Problem
You want to group observations into k groups.

In [4]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [5]:
#Load data
iris = datasets.load_iris()
feature = iris.data

In [6]:
# Standardize feature
scaler = StandardScaler()
feature_std = scaler.fit_transform(feature)

In [7]:
#Create k-mean object
cluster = KMeans(n_clusters=3, random_state=0, n_jobs=-1)


In [8]:
#Train model
model = cluster.fit(feature_std)

In [9]:
#View Predict class
model.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2,
       2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])

In [10]:
#View true class
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [11]:
#Create new observation
new_observation = [[0.8, 0.8, 0.8, 0.8]]

In [12]:
#Predict observation's cluster
model.predict(new_observation)

array([2])

In [13]:
#view cluster centers
model.cluster_centers_

array([[-0.05021989, -0.88337647,  0.34773781,  0.2815273 ],
       [-1.01457897,  0.85326268, -1.30498732, -1.25489349],
       [ 1.13597027,  0.08842168,  0.99615451,  1.01752612]])

## Speeding Up K-Means Clustering

### Problem
You want to group observations into k groups, but k-means takes too long.

In [16]:
#Load Libraries
from sklearn.cluster import MiniBatchKMeans

In [18]:
#Load data
iris = datasets.load_iris()
feature = iris.data

In [19]:
#Standardize features
scaler = StandardScaler()
feature_std = scaler.fit_transform(feature)

In [20]:
#Create k-mean object
cluster = MiniBatchKMeans(n_clusters=3, random_state=0, batch_size=100)

In [21]:
#Train model
model = cluster.fit(feature_std)

## Clustering Using Meanshift

### Problem 
You want to group observations without assuming the number of clusters or their shape

In [24]:
from sklearn.cluster import MeanShift

In [25]:
cluster = MeanShift(n_jobs=-1)

In [26]:
model = cluster.fit(feature_std)