Generating Hidden States

Hidden States - Sunny and Cloudy
Observable States - Happy and Sad

Transition Probabilities

Sunny -- 0.8 --> Sunny

Sunny -- 0.2 --> Cloudy

Cloudy -- 0.4 --> Sunny

Cloudy -- 0.6 --> Cloudy


Emission Probabilities

Sunny -- 0.7 --> Happy

Sunny -- 0.3 --> Sad

Cloudy -- 0.4 --> Happy

Cloudy -- 0.6 --> Sad

In [1]:
import numpy as np
import pandas as pd
from hmmlearn import hmm

In [3]:
sample_size = 500
hidden_floats = np.random.uniform(0, 1, sample_size)
hidden_states = ['Sunny', 'Cloudy']
hidden_seq = []

if hidden_floats[0] < 0.5:
    hidden_seq.append(hidden_states[0])
else:
    hidden_seq.append(hidden_states[1])
    
for i in range(1, sample_size):
    
    if hidden_seq[i - 1] == hidden_states[0]:
        if hidden_floats[i] < 0.8:
            hidden_seq.append(hidden_states[0])
        else:
            hidden_seq.append(hidden_states[1])
            
    elif hidden_seq[i - 1] == hidden_states[1]:
        if hidden_floats[i] < 0.4:
            hidden_seq.append(hidden_states[0])
        else:
            hidden_seq.append(hidden_states[1])

In [4]:
obs_floats = np.random.uniform(0, 1, sample_size)
obs_states = ['Happy', 'Sad']
obs_seq = []

for i in range(sample_size):
    if hidden_seq[i] == hidden_states[0]:
        if obs_floats[i] < 0.7:
            obs_seq.append(obs_states[0])
        else:
            obs_seq.append(obs_states[1])
    
    elif hidden_seq[i] == hidden_states[1]:
        if obs_floats[i] < 0.4:
            obs_seq.append(obs_states[0])
        else:
            obs_seq.append(obs_states[1])

In [5]:
pd.DataFrame({'Hidden States': hidden_seq,
             'Observations': obs_seq})

Unnamed: 0,Hidden States,Observations
0,Sunny,Happy
1,Cloudy,Happy
2,Sunny,Happy
3,Cloudy,Sad
4,Cloudy,Sad
...,...,...
495,Sunny,Happy
496,Sunny,Happy
497,Sunny,Happy
498,Sunny,Happy


In [6]:
with open('hmm_sim.txt', 'w') as f:
    for state in obs_seq:
        f.write("%s\n" % state)

In [7]:
with open('hmm_hidden_sim.txt', 'w') as f:
    for state in hidden_seq:
        f.write("%s\n" % state)

Learning HMM

In [3]:
with open('left_right.txt', 'r') as f:
    line = f.readline()
    obs = []
    while line != '':
        obs += list(''.join(x for x in line if x.isalpha()))
        line = f.readline()

In [15]:
X = []
for x in obs:
    if x == 'L':
        X.append([1])
    else:
        X.append([0])

In [37]:
remodel = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=1000)
remodel.fit(X)
pred = remodel.predict(X)

In [22]:
zero_after_one = 0
zero_after_zero = 0
one_after_one = 0
one_after_zero = 0
for i in range(1, len(pred)):
    if pred[i] == 0 and pred[i - 1] == 1:
        zero_after_one += 1
    elif pred[i] == 0 and pred[i - 1] == 0:
        zero_after_zero += 1
    elif pred[i] == 1 and pred[i - 1] == 1:
        one_after_one += 1
    elif pred[i] == 1 and pred[i - 1] == 0:
        one_after_zero += 1

after_one = zero_after_one + one_after_one
after_zero = zero_after_zero + one_after_zero

trans_probs = [zero_after_one / after_one,
              one_after_one / after_one,
              zero_after_zero / after_zero,
              one_after_zero / after_zero]

trans_probs

[0.398406374501992, 0.601593625498008, 0.5967741935483871, 0.4032258064516129]

In [23]:
left_after_one = 0
left_after_zero = 0
right_after_one = 0
right_after_zero = 0

for i in range(len(pred)):
    if pred[i] == 1 and obs[i] == 'L':
        left_after_one += 1
    elif pred[i] == 0 and obs[i] == 'L':
        left_after_zero += 1
    elif pred[i] == 1 and obs[i] == 'R':
        right_after_one += 1
    elif pred[i] == 0 and obs[i] == 'R':
        right_after_zero += 1
        
emission_probs = [left_after_one / after_one,
                  right_after_one / after_one,
                  left_after_zero / after_zero,
                  right_after_zero / after_zero
                 ]

emission_probs     

[0.0, 1.0, 1.0040322580645162, 0.0]

In [45]:
pd.DataFrame({'hidden': pred,
             'obs': obs})

Unnamed: 0,hidden,obs
0,1,L
1,0,R
2,1,L
3,1,L
4,0,R
...,...,...
495,0,R
496,0,R
497,1,L
498,1,L


In [25]:
with open('heads_tails.txt', 'r') as f:
    line = f.readline()
    hidden = []
    while line != '':
        hidden += list(''.join(x for x in line if x.isalpha()))
        line = f.readline()

y = []
for hid in hidden:
    if hid == 'H':
        y.append(1)
    else:
        y.append(0)

In [82]:
model = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=10000)

model.transmat = np.array([[0.65, 0.35],
                           [0.35, 0.65]])

model.startprob = np.array([0.5, 0.5])

ones = np.sum(np.squeeze(X))
one_emis = ones / len(X)
zero_emis = (len(X) - ones) / len(X)

model.means = np.array([[one_emis], [zero_emis]])

model.fit(X)

pred = model.predict(X)

In [83]:
np.mean(pred == y)

0.13

In [84]:
pred == y

array([ True, False, False, False,  True, False, False, False, False,
        True, False, False, False, False, False,  True, False,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False,  True, False, False,
       False, False,  True, False, False, False, False,  True, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False,