In [38]:
from hmmlearn import hmm  # https://www.lfd.uci.edu/~gohlke/pythonlibs/#hmmlearn for windows
import numpy as np

todo:
- load real highway data from US Department of Transportation 
- https://github.com/hmmlearn/hmmlearn/issues/70
- https://gist.github.com/dougalsutherland/1329976
- https://drive.google.com/file/d/0B51S7y4fFTS2bnNLcnllc1dpdVE/view

In [39]:
seed = 93
np.random.seed(seed)

In [29]:
states = ["left_lane", "right_lane"]
observations = ["low_speed", "high_speed"]

In [30]:
discrete_model = hmm.MultinomialHMM(n_components=2,
                                    algorithm='viterbi',  # Decoder algorithm.
                                    # algorithm='map'
                                    random_state=seed,
                                    n_iter=10,
                                    tol=0.01  # EM Convergence threshold (gain in log-likelihood)
                                   )
discrete_model.startprob_ = np.array(
    [1/3, 2/3]
)
discrete_model.transmat_ = np.array([
    [0.6, 0.4],  # P(state_t+1|state_t=state_0)
    [0.2, 0.8]]  # P(state_t+1|state_t=state_1)
)
discrete_model.emissionprob_ = np.array(
    [[0.4, 0.6],  # P(obs|state_0)
     [0.8, 0.2]]  # P(obs|state_1)
)

### Sampling
Generate samples from the HMM 

In [31]:
# use the parameters to generates samples
# using a large sample size to estimate the stationary state distributions: 1/3 vs 2/3
# (n_samples, n_features)
Obss, States = discrete_model.sample(100000)
States = States.flatten()
Obss = Obss.flatten()
from collections import Counter
print(Counter(States))
print(Counter(Obss))

Counter({1: 66134, 0: 33866})
Counter({0: 66520, 1: 33480})


In [37]:
# print(discrete_model.get_stationary_distribution())
eigvals, eigvecs = np.linalg.eig(discrete_model.transmat_.T)
eigvec = np.real_if_close(eigvecs[:, np.argmax(eigvals)])
print(discrete_model.transmat_.T)
print(eigvecs)
print(eigvec)
print(eigvals)
print(eigvec / eigvec.sum())
print(np.sqrt(2)/2)

# The stationary distribution is proportional to the left-eigenvector
# associated with the largest eigenvalue (i.e., 1) of the transition matrix.

[[0.6 0.2]
 [0.4 0.8]]
[[-0.70710678 -0.4472136 ]
 [ 0.70710678 -0.89442719]]
[-0.4472136  -0.89442719]
[0.4 1. ]
[0.33333333 0.66666667]
0.7071067811865476


In [36]:
0.4472136 /0.70710678

0.6324555394589767

### Decoding
Find most likely state sequence corresponding to the observation sequence

In [16]:
obs_sequence = np.array([[0, 1, 0]]).T
logprob, state_sequence = discrete_model.decode(obs_sequence)

# Log probability of the produced state sequence
for o, s in zip(obs_sequence.T[0], state_sequence):
    print("{} -> {}".format(states[int(s)], observations[int(o)]))
print("\nprob = {}\nlog_prob = {}".format(np.exp(logprob), logprob))

right_lane -> low_speed
right_lane -> high_speed
right_lane -> low_speed

prob = 0.05461333333333335
log_prob = -2.9074772257991035


### Learning
Estimate HMM model parameters based on observation data

In [19]:
obs_sequence = np.array([[0, 1, 0]]).T
new_model = discrete_model.fit(obs_sequence)
print(new_model.transmat_)

[[9.26752917e-09 9.99999991e-01]
 [1.00000000e+00 5.77188052e-39]]


In [127]:
# sample with sample()
# train an HMM by calling the fit() method.
# the inferred optimal hidden states can be obtained by calling predict() method.
#     Viterbi algorithm ("viterbi"), and maximum a posteriori estimation ("map") are supported
# the score of the model can be calculated by the score() method.

In [None]:
eigenvalue of the matrix