In [13]:
import numpy as np
from hmmlearn import hmm
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.metrics.pairwise import pairwise_distances_argmin
import warnings
from pprint import pprint
%matplotlib auto

Using matplotlib backend: Qt5Agg


In [3]:
def expand(a, b):
    d = (b - a) * 0.05
    return a - d, b + d

In [5]:
np.random.seed(0)
n=5
n_samples=1000
pi=np.random.randn(n)
pi/=sum(pi)
print('init proba: ',pi)

init proba:  [0.24327063 0.05518345 0.1349723  0.3090291  0.25754452]


In [6]:
A=np.random.rand(n,n)
mask=np.zeros((n,n),dtype=np.bool)
mask[0][1] = mask[0][4] = True
mask[1][0] = mask[1][2] = True
mask[2][1] = mask[2][3] = True
mask[3][2] = mask[3][4] = True
mask[4][0] = mask[4][3] = True

In [26]:
A[mask]=0
for i in range(n):
    A[i] /=A[i].sum()

In [27]:
pprint(A)

array([[0.24518927, 0.        , 0.53996042, 0.21485031, 0.        ],
       [0.        , 0.78220435, 0.        , 0.09781753, 0.11997812],
       [0.01137787, 0.        , 0.43790636, 0.        , 0.55071577],
       [0.57955719, 0.3346691 , 0.        , 0.08577371, 0.        ],
       [0.        , 0.54571298, 0.30145948, 0.        , 0.15282754]])


In [28]:
means=np.array(((30,30),(0,50),(-25,30),(-15,0),(15,0)))
pprint(means)

array([[ 30,  30],
       [  0,  50],
       [-25,  30],
       [-15,   0],
       [ 15,   0]])


In [29]:
(np.random.rand(2)+0.001)*10

array([4.39601513, 9.89373838])

In [30]:
covars=np.empty((n,2,2))
for i in range(n):
    covars[i]=np.diag(np.random.rand(2)+0.001)*10
print(covars)    

[[[1.03044811 0.        ]
  [0.         2.09876756]]

 [[1.62309518 0.        ]
  [0.         6.54108325]]

 [[2.54291603 0.        ]
  [0.         4.67310773]]

 [[2.45425592 0.        ]
  [0.         1.59969584]]

 [[1.11375141 0.        ]
  [0.         6.57329589]]]


In [32]:
model=hmm.GaussianHMM(n_components=n,covariance_type='full')
model.startprob_=pi
model.transmat_=A
model.means_=means
model.covars_=covars
sample,labels=model.sample(n_samples=n_samples,random_state=0)

In [33]:
model=hmm.GaussianHMM(n_components=n,covariance_type='full',n_iter=10)
model=model.fit(sample)
y=model.predict(sample)
np.set_printoptions(suppress=True)
print('eval proba: \n',model.startprob_)
print('eval transmat : \n',model.transmat_)
print('eval means :\n',model.means_)
print('eval covars : \n',model.covars_)

eval proba: 
 [0. 0. 0. 0. 1.]
eval transmat : 
 [[0.75686275 0.14705882 0.         0.         0.09607843]
 [0.54871795 0.15897436 0.29230769 0.         0.        ]
 [0.         0.51724138 0.47126437 0.01149425 0.        ]
 [0.         0.         0.61403509 0.21052632 0.1754386 ]
 [0.26984127 0.         0.         0.68253968 0.04761905]]
eval means :
 [[ -0.09542759  50.07669686]
 [ 15.10633317  -0.03600881]
 [-25.14629099  30.0068524 ]
 [ 29.88355298  30.30651981]
 [-15.10499204  -0.29595076]]
eval covars : 
 [[[ 1.55026253  0.17411848]
  [ 0.17411848  6.2190854 ]]

 [[ 1.04296652 -0.19284186]
  [-0.19284186  6.14173831]]

 [[ 2.28182722 -0.27008127]
  [-0.27008127  3.45758667]]

 [[ 0.93755933 -0.00995598]
  [-0.00995598  1.46585161]]

 [[ 2.14840116  0.19993933]
  [ 0.19993933  1.59973287]]]


In [34]:
order = pairwise_distances_argmin(means,model.means_,metric='euclidean')
order

array([3, 0, 2, 4, 1], dtype=int64)

In [36]:
pi_hat=model.startprob_[order]
A_hat=model.transmat_[order]
A_hat=A_hat[:,order]
means_hat=model.means_[order]
covars_hat=model.covars_[order]
change=np.empty((n,n_samples),dtype=np.bool)
for i in range(n):
    change[i]=y==order[i]
for i in range(n):
    y[change[i]]=i
print('eval proba: \n',pi_hat)
print('eval transmat : \n',A_hat)
print('eval means :\n',means_hat)
print('eval covars : \n',covars_hat)    


eval proba: 
 [0. 0. 0. 1. 0.]
eval transmat : 
 [[0.21052632 0.         0.61403509 0.1754386  0.        ]
 [0.         0.75686275 0.         0.09607843 0.14705882]
 [0.01149425 0.         0.47126437 0.         0.51724138]
 [0.68253968 0.26984127 0.         0.04761905 0.        ]
 [0.         0.54871795 0.29230769 0.         0.15897436]]
eval means :
 [[ 29.88355298  30.30651981]
 [ -0.09542759  50.07669686]
 [-25.14629099  30.0068524 ]
 [-15.10499204  -0.29595076]
 [ 15.10633317  -0.03600881]]
eval covars : 
 [[[ 0.93755933 -0.00995598]
  [-0.00995598  1.46585161]]

 [[ 1.55026253  0.17411848]
  [ 0.17411848  6.2190854 ]]

 [[ 2.28182722 -0.27008127]
  [-0.27008127  3.45758667]]

 [[ 2.14840116  0.19993933]
  [ 0.19993933  1.59973287]]

 [[ 1.04296652 -0.19284186]
  [-0.19284186  6.14173831]]]


In [38]:
sample.shape

(1000, 2)

In [42]:
plt.scatter(sample[:, 0],
            sample[:, 1],
            s=50,
            c=labels,
            cmap=plt.cm.Spectral,
            marker='o',
            label=u'观测值',
            linewidths=0.5,
            zorder=20)
plt.plot(sample[:, 0], sample[:, 1], 'r-', zorder=10)
plt.scatter(means[:, 0],
            means[:, 1],
            s=100,
            c=np.random.rand(n),
            marker='D',
            label=u'中心',
            alpha=0.8,
            zorder=30)

<matplotlib.collections.PathCollection object at 0x00000000173BDA20>