In [None]:
%matplotlib inline
import matplotlib as mpl
mpl.style.use('ggplot')
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, cluster
from sklearn import mixture

In [None]:
if 0: # mixture model (full covariance matrix) blobs
    mu = np.array([-2, -2])
    std1 = 1.0
    std2 = 2.0
    corr = 0.5
    sigma = np.array([[std1**2, corr * std1 * std2], [corr * std1 * std2, std2**2]])
    xx1 = np.random.multivariate_normal(mu, sigma, size=500)
    print(sigma)

    mu = np.array([2, 2])
    std1 = 2.0
    std2 = 2.0
    corr = -0.8
    sigma = np.array([[std1**2, corr * std1 * std2], [corr * std1 * std2, std2**2]])
    xx2 = np.random.multivariate_normal(mu, sigma, size=500)
    print(sigma)
    xx = np.concatenate([xx1, xx2], axis=0)

elif 0: 
    # mixture model (spherical) blobs
    xx, yy = datasets.make_blobs(n_samples=500, centers=4, random_state=4)
else:
    # circles (requires non-linear cluster boundaries)
    xx, yy = datasets.make_circles(n_samples=500, factor=0.5, noise=0.05)

plt.figure(figsize=(7, 7))
plt.scatter(xx[:, 0], xx[:, 1])
#plt.xlim((-8, 8)); plt.ylim((-8, 8))

In [None]:
model = cluster.KMeans(n_clusters=2)
zz = model.fit_predict(xx)
plt.scatter(xx[:, 0], xx[:, 1], c=zz)
plt.scatter(*model.cluster_centers_.T, s=200, c=np.arange(len(model.cluster_centers_)))

In [None]:
model = mixture.GMM(n_components=2, covariance_type='full')
zz = model.fit_predict(xx)
plt.scatter(xx[:, 0], xx[:, 1], c=zz)

In [None]:
model.covars_[0]

In [None]:
model.covars_[1]

In [None]:
std = 0.1
gamma = 1 / (2 * std**2)
model = cluster.SpectralClustering(n_clusters=2, gamma=gamma)
zz = model.fit_predict(xx)
plt.scatter(xx[:, 0], xx[:, 1], c=zz)
a = np.linspace(-2, 2, 100)
from scipy.stats import norm
plt.plot(a, norm.pdf(a, loc=-0.5, scale=std))
plt.xlim((-2, 2)); plt.ylim((-2, 2))

In [None]:
std = 0.1
bandwidth = np.sqrt(2 * std)
model = cluster.MeanShift(bandwidth=bandwidth)
zz = model.fit_predict(xx)
plt.scatter(xx[:, 0], xx[:, 1], c=zz)
plt.scatter(*model.cluster_centers_.T, s=200, c=np.arange(len(model.cluster_centers_)))
a = np.linspace(-2, 2, 100)
from scipy.stats import norm
plt.plot(a, norm.pdf(a, loc=-0.5, scale=std))
plt.xlim((-2, 2)); plt.ylim((-2, 2))