In [153]:
import itertools

import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl

from sklearn import mixture

In [2]:
np.random.seed(0)
C = np.array([[0., -0.1], [1.7, .4]])

In [3]:
C

array([[ 0. , -0.1],
       [ 1.7,  0.4]])

In [4]:
# Number of samples per component
n_samples = 500

In [36]:
X = np.r_[np.dot(np.random.randn(n_samples, 2), C), .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]

In [171]:
X

50    0.876169
60    0.892447
70    0.848522
80   -0.112841
90   -1.001516
dtype: float64

In [170]:
# Fit a Dirichlet process mixture of Gaussians using five components
dpgmm = mixture.DPGMM(n_components=20, covariance_type='full', alpha=100.)
dpgmm.fit(X)

ValueError: n_samples=1 should be >= n_clusters=20

In [156]:
for i, (clf, title) in enumerate([(dpgmm, 'Dirichlet Process GMM')]):
    splot = plt.subplot(2, 1, 1 + i)
    Y_ = clf.predict(X)
    for i, (mean, covar, color) in enumerate(zip(
            clf.means_, clf._get_covars(), color_iter)):
        v, w = linalg.eigh(covar)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
        # unless it needs it, we shouldn't plot the redundant
        # components.
        if not np.any(Y_ == i):
            continue
        plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], 5, color=color)

        # Plot an ellipse to show the Gaussian component
        angle = np.arctan(u[1] / u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
        ell.set_clip_box(splot.bbox)
        ell.set_alpha(0.5)
        splot.add_artist(ell)

    plt.xlim(0, 210)
    plt.ylim(0, 70)
    plt.xticks(())
    plt.yticks(())
    plt.title(title)

In [157]:
color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm','y'])

In [158]:
plt.show()

In [140]:
clf

DPGMM(alpha=100.0, covariance_type='full', init_params='wmc', min_covar=None,
   n_components=20, n_iter=10, params='wmc', random_state=None,
   thresh=None, tol=0.001, verbose=False)

In [15]:
dpgmm.get_params

<bound method DPGMM.get_params of DPGMM(alpha=1.0, covariance_type='full', init_params='wmc', min_covar=None,
   n_components=5, n_iter=10, params='wmc', random_state=None, thresh=None,
   tol=0.001, verbose=False)>

In [154]:
X = np.array([[50,60],[51,60],[52,60],[53,60],[70,60],[71,60],[72,60],[73,60],[76,60],[78,60],[79,60],[80,60],[100,60],[104,60],[105,60],[106,60],[107,60],[200,60],[205,60],[206,60],[207,60]])

In [160]:
X = np.array([[50,60,51,60],[100,100, 100,100]])

In [164]:
from pandas import Series,DataFrame

In [166]:
from numpy import random

In [167]:
X = Series(random.randn(5), index=[50, 60, 70, 80, 90])

In [168]:
len(X)

5

In [126]:
dpgmm.means_

array([[-0.00852317,  0.16289887],
       [ 0.0250477 ,  0.10374071],
       [-0.01391796,  0.15181522],
       [ 0.00098144,  0.00058052],
       [-0.00397202,  0.15736018],
       [-0.02416514,  0.15450284],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052],
       [ 0.00098144,  0.00058052]])

In [145]:
dpgmm.bic(X)

552.34481131152017

In [146]:
dpgmm.weights_

array([ 0.10760189,  0.11124724,  0.11478104,  0.02465071,  0.14147306,
        0.12438381,  0.0268473 ,  0.0268473 ,  0.0268473 ,  0.0268473 ,
        0.0268473 ,  0.0268473 ,  0.0268473 ,  0.0268473 ,  0.0268473 ,
        0.0268473 ,  0.02684731,  0.02684731,  0.02684731,  0.02684731])

In [147]:
dpgmm.predict(X)

array([5, 5, 5, 5, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 4, 0, 0, 0, 0])

In [148]:
len(dpgmm.predict(X))

21

In [150]:
dpgmm.bic.im_class

sklearn.mixture.dpgmm.DPGMM

In [None]:
dp