In [2]:
import numpy as np
import matplotlib.pyplot as plt  
from hmmlearn import hmm

Some useful references for this:

- https://hmmlearn.readthedocs.io/en/latest/auto_examples/plot_variational_inference.html
- https://hmmlearn.readthedocs.io/en/latest/auto_examples/plot_casino.html
- https://www.cs.sjsu.edu/~stamp/RUA/HMM.pdf

In [7]:
# Make a generative model with 2 hidden states: high suspense and low suspense
gen_model = hmm.CategoricalHMM(n_components=2, random_state=99)
# At the start of the round, they're in a low suspenses state
gen_model.startprob_ = np.array([1.0, 0.0])
# Let's initialise the transition probability matrix
# Part of our experiment is to see if people are keen to reach states of high suspense, 
# so we could bear that in mind
# we'll intialise with 25% chance of staying and 75% of transitioning higher
gen_model.transmat_ = np.array([[0.25, 0.75],
                               [0.8, 0.2]])
# now we can intitialise the emissions matrix
# these are the probs of being in each hidden state given the observation
# in this initial case, say we observe 4 states: 1, 2, 3, 4 (in reality, we have like 21+ states remember)
# We can imagine that 1 and 2 are low suspense, and 3 and 4 are high suspense
gen_model.emissionprob_ = np.array([
    [0.4, 0.3, 0.2, 0.1], # i.e. the probability im in hidden state 1 (low suspense) given each possible observation
    [0.1, 0.2, 0.3, 0.4] # the probability i'm in hidden state 2 for each possible observation
])

In [38]:
rolls, gen_states = gen_model.sample(3000)

In [39]:
# Do parameter recovery on our initial model

# Split data into training and validation
x_train = rolls[:rolls.shape[0] // 2]
x_validate = rolls[rolls.shape[0] // 2:]

# Generate an initial optimal score
gen_score = gen_model.score(x_validate)
best_score = best_model = None

n_fits = 50
np.random.seed(13)
for ix in range(n_fits):
    model = hmm.CategoricalHMM(
        n_components=2, n_features=4, random_state=ix, init_params='ste'
    )
    model.fit(x_train)
    score = model.score(x_validate)
    print(f'Model #{ix}\tScore: {score}')
    if best_score is None or score > best_score:
        best_model = model
        best_score = score

print(f'Generated score: {gen_score}\nBest score: {best_score}')

Model #0	Score: -2079.2023916242306
Model #1	Score: -2085.7473541887784
Model #2	Score: -2076.5715494280234
Model #3	Score: -2081.0994465932536
Model #4	Score: -2079.1353917697265
Model #5	Score: -2074.358454441661
Model #6	Score: -2074.236949701335
Model #7	Score: -2071.8984451128363
Model #8	Score: -2080.968518959379
Model #9	Score: -2080.6361359573903
Model #10	Score: -2081.8742622767954
Model #11	Score: -2076.69108513878
Model #12	Score: -2084.6860628696963
Model #13	Score: -2080.373920441045
Model #14	Score: -2078.118810856695
Model #15	Score: -2081.0933643611447
Model #16	Score: -2080.0528072430097
Model #17	Score: -2079.8051915973524
Model #18	Score: -2078.305514372171
Model #19	Score: -2080.1039744034247
Model #20	Score: -2076.204594353516
Model #21	Score: -2078.0701733149967
Model #22	Score: -2079.4196688870416
Model #23	Score: -2078.9957554343227
Model #24	Score: -2072.616659093793
Model #25	Score: -2075.338893905001
Model #26	Score: -2079.955695350764
Model #27	Score: -2078.

In [40]:
states = best_model.predict(rolls)

In [42]:
print(f'Transmission Matrix Generated:\n{gen_model.transmat_.round(3)}\n\n'
      f'Transmission Matrix Recovered:\n{best_model.transmat_.round(3)}\n\n')

Transmission Matrix Generated:
[[0.25 0.75]
 [0.8  0.2 ]]

Transmission Matrix Recovered:
[[0.187 0.813]
 [0.636 0.364]]




In [44]:
test_rolls = np.array([[0, 0, 1, 1, 2, 2, 3]])
best_model.predict(test_rolls)

array([0, 1, 0, 1, 0, 1, 0], dtype=int64)