以”三球模型”为例，介绍了观测状态是离散状态的情况下的HMM模型

In [1]:
import numpy as np
from hmmlearn import hmm

In [5]:
# 定义状态集合
states = ["box 1", "box 2", "box3"]
n_states = len(states)

# 定义观测集合
observations = ["red", "white"]
n_observations = len(observations)

# 状态初始值
start_probability = np.array([0.2, 0.4, 0.4])

# 状态转移矩阵
transition_probability = np.array([
  [0.5, 0.2, 0.3],
  [0.3, 0.5, 0.2],
  [0.2, 0.3, 0.5]
])

# 观测生成概率（发射概率）
emission_probability = np.array([
  [0.5, 0.5],
  [0.4, 0.6],
  [0.7, 0.3]
])

model = hmm.MultinomialHMM(n_components=n_states)
model.startprob_=start_probability    # 初始状态概率
model.transmat_=transition_probability    # 状态转移矩阵
model.emissionprob_=emission_probability    # 发射概率矩阵

#### 1. 评估问题

In [6]:
# 已知模型参数和观测序列，求解概率
seen = np.array([[0,1,0]]).T
print(model.score(seen))

-2.038545309915233


In [12]:
# 上述求解的是以指数为底的log概率，需要进行转换
print("序列seen发生的概率为:{}".format(np.exp(model.score(seen))))

序列seen发生的概率为:0.13021800000000003


#### 2. 解码问题

In [17]:
# 已知模型参数和观测序列，求解最可能的隐藏序列
seen = np.array([[0,1,0]]).T
logprob, box = model.decode(seen, algorithm="viterbi")

In [36]:
print("观测到的球颜色的序列为:", list(map(lambda x: observations[x], seen.ravel())))
print("隐藏的球编号的序列为:", list(map(lambda x: states[x], box)))

观测到的球颜色的序列为: ['red', 'white', 'red']
隐藏的球编号的序列为: ['box3', 'box3', 'box3']


In [37]:
# 还可以得到隐藏序列的概率
print("隐藏序列发生的概率为:{}".format(np.exp(logprob)))

隐藏发生的概率为:0.014699999999999996


In [39]:
# 也可用predict函数进行解码
box2 = model.predict(seen)
print("观测到的球颜色的序列为:", list(map(lambda x: observations[x], seen.ravel())))
print("隐藏的球编号的序列为:", list(map(lambda x: states[x], box2)))

观测到的球颜色的序列为: ['red', 'white', 'red']
隐藏的球编号的序列为: ['box3', 'box3', 'box3']


#### 3. 学习问题

只知道隐藏状态、观察结果的集合和观测序列，求解未知的初始状态pi,状态转移概率矩阵A和发射概率矩阵B

In [40]:
# 定义隐藏状态集合
states = ["box 1", "box 2", "box3"]
n_states = len(states)

# 定义观测结果集合
observations = ["red", "white"]
n_observations = len(observations)

In [41]:
model2 = hmm.MultinomialHMM(n_components=n_states, n_iter=20, tol=0.01)
X2 = np.array([[0,1,0,1],[0,0,0,1],[1,0,1,1]])     # 若干组观测序列
model2.fit(X2)

MultinomialHMM(algorithm='viterbi', init_params='ste', n_components=3,
        n_iter=20, params='ste',
        random_state=<mtrand.RandomState object at 0x10504d750>,
        startprob_prior=1.0, tol=0.01, transmat_prior=1.0, verbose=False)

In [42]:
# 由于求解学习问题采用的鲍姆-韦尔奇算法是基于EM算法的近似算法，所以需要多跑几次
model2.fit(X2)
print(model2.startprob_)
print(model2.transmat_)
print(model2.emissionprob_)
print(model2.score(X2))

[0.31404741 0.4491969  0.23675569]
[[0.33382519 0.32850166 0.33767316]
 [0.33428472 0.32387754 0.34183774]
 [0.33349289 0.33170954 0.33479757]]
[[0.49221094 0.50778906]
 [0.56626827 0.43373173]
 [0.43990012 0.56009988]]
-8.289138877942124


In [44]:
model2.fit(X2)
print(model2.startprob_)
print(model2.transmat_)
print(model2.emissionprob_)
print(model2.score(X2))

[9.99747788e-01 2.52212174e-04 1.90113973e-25]
[[0.00091685 0.08394974 0.9151334 ]
 [0.21569715 0.48525561 0.29904724]
 [0.41008868 0.25167919 0.33823213]]
[[0.99667229 0.00332771]
 [0.76707089 0.23292911]
 [0.05727461 0.94272539]]
-6.732298269772565


In [46]:
model2.fit(X2)
print(model2.startprob_)
print(model2.transmat_)
print(model2.emissionprob_)
print(model2.score(X2))

[3.50614852e-16 1.76226777e-01 8.23773223e-01]
[[0.36958333 0.28124618 0.34917048]
 [0.76902791 0.15407799 0.0768941 ]
 [0.94363894 0.04171766 0.0146434 ]]
[[0.14317431 0.85682569]
 [0.90301081 0.09698919]
 [0.98014948 0.01985052]]
-6.881313144243788


最终选择分数最高的模型参数（score为log分数）