## 数据准备

In [1]:
import numpy as np
state = np.array(['认真复习', '简单复习', '没有复习'])
grade = np.array(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-'])
n_state = len(state)
m_grade = len(grade)
pi = np.ones(n_state)/n_state
t = np.array([
    [0.4, 0.3, 0.3],
    [0.3, 0.4, 0.3],
    [0.3, 0.3, 0.4]
])
e = np.zeros([3,9])
e[0, :9]=1/9
e[1, 3:9]=1/6
e[2, 5:9]=1/4

In [2]:
print("初始概率矩阵：\n",pi)
print("转移矩阵：\n",t)
print("发射矩阵：\n",e)

初始概率矩阵：
 [0.33333333 0.33333333 0.33333333]
转移矩阵：
 [[0.4 0.3 0.3]
 [0.3 0.4 0.3]
 [0.3 0.3 0.4]]
发射矩阵：
 [[0.11111111 0.11111111 0.11111111 0.11111111 0.11111111 0.11111111
  0.11111111 0.11111111 0.11111111]
 [0.         0.         0.         0.16666667 0.16666667 0.16666667
  0.16666667 0.16666667 0.16666667]
 [0.         0.         0.         0.         0.         0.25
  0.25       0.25       0.25      ]]


## hmmlearn

In [3]:
pip install hmmlearn

Looking in indexes: http://mirrors.tencentyun.com/pypi/simple
Note: you may need to restart the kernel to use updated packages.


In [4]:
from hmmlearn.hmm import CategoricalHMM
hmm = CategoricalHMM(n_state)

In [5]:
# 设置模型的初始状态概率向量
hmm.startprob_ = pi  # pi 是长度为 n_state 的一维数组，表示每个状态的初始概率

# 设置模型的状态转移概率矩阵
hmm.transmat_ = t  # t 是形状为 (n_state, n_state) 的二维数组，表示状态之间的转移概率

# 设置模型的观测概率矩阵
hmm.emissionprob_ = e  # e 是形状为 (n_state, n_features) 的二维数组，表示每个状态生成观测值的概率

# 设置模型的观测值特征数量
hmm.n_features = 9  # 9 表示观测值的特征

In [6]:
# 对应 a+ b a- c+ a
datas = np.array([0, 4, 2, 6, 1])
 # 将数据转换为列向量形式，HMM模型要求每个样本是一个独立的观测序列
datas = np.expand_dims(datas, axis=1)
states = hmm.predict(datas)

In [7]:
#结果对应[s1,s1,s1,s3,s0]
states

array([0, 0, 0, 2, 0])

In [8]:
# 计算给定观测序列的对数概率值
# 这行代码调用了隐马尔可夫模型的 score 方法，用来计算给定观测序列 datas 的对数概率值。对数概率值越高，表示观测序列在当前模型下出现的可能性越大。
prob = hmm.score(datas)
#计算出来的 值是log的概率
prob

-14.003674820375014

In [9]:
print(np.exp(prob))

8.284786081615825e-07


In [10]:
#生成三要素
datas , states = hmm.sample(10000)

In [11]:
# 生成状态转移矩阵

t_2 = np.zeros([3,3])
for i in range(3):
    current = np.where(states == i)[0]
    next_index = current+1
    next_index = next_index[:-1]

    tmp = states[next_index]
    for j in range(3):
        t_2[i][j] = np.where(tmp==j)[0].shape[0]/np.shape(tmp)[0]
print(t_2)

[[0.41121495 0.29333735 0.29544769]
 [0.28884285 0.40988458 0.30127257]
 [0.29627386 0.30930021 0.39442593]]


In [12]:
#生成 发射概率矩阵
e_2 = np.zeros([3,9])
for i in range(3):
    current = np.where(states == i)[0]
    next_index = current+1
    next_index = next_index[:-1]
    tmp = datas[current]
    for j in range(9):
        e_2[i][j] = np.where(tmp==j)[0].shape[0]/np.shape(tmp)[0]
print(e_2)

[[0.10518385 0.10066305 0.11030741 0.11603376 0.11000603 0.1106088
  0.12115732 0.12085594 0.10518385]
 [0.         0.         0.         0.1760355  0.15591716 0.16242604
  0.17071006 0.16952663 0.16538462]
 [0.         0.         0.         0.         0.         0.24863719
  0.25741975 0.24500303 0.24894004]]
