In [27]:
"""
Gaussian HMM of stock data
"""

from __future__ import print_function

import datetime

import numpy as np
from matplotlib import cm, pyplot as plt
import hmmlearn

from hmmlearn.hmm import GaussianHMM


print(__doc__)

###############################################################################
data = pd.read_csv('data/Google.csv')
close_v = data['Close']
dates = data['Date']
volume = data['Volume']
close_v = close_v.as_matrix()
volume = volume.as_matrix()
dates = dates.as_matrix()
dates = dates[1:]
diff = np.diff(close_v)
volume = volume[1:]
diff = diff[:100]
volume = volume[:100]
dates = dates[:100]

# Unpack quotes

# Take diff of close value. Note that this makes
# ``len(diff) = len(close_t) - 1``, therefore, other quantities also
# need to be shifted by 1.
#diff = np.diff(close_v)
#dates = dates[1:]
#close_v = close_v[1:]

# Pack diff and volume for training.
X = np.column_stack([diff, volume])
print(X)
###############################################################################
# Run Gaussian HMM
print("fitting to HMM and decoding ...", end="")

# Make an HMM instance and execute fit
model = GaussianHMM(n_components=3, covariance_type="full", n_iter=100)
model.fit(X)

# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print("done")

###############################################################################
# Print trained parameters and plot
print("Transition matrix")
print(model.transmat_)
print()

print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()

fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True)
colours = cm.rainbow(np.linspace(0, 1, model.n_components))
for i, (ax, colour) in enumerate(zip(axs, colours)):
    # Use fancy indexing to plot data in each state.
    mask = hidden_states == i
    ax.plot_date(dates[mask], close_v[mask], ".-", c=colour)
    ax.set_title("{0}th hidden state".format(i))

    # Format the ticks.
    ax.xaxis.set_major_locator(YearLocator())
    ax.xaxis.set_minor_locator(MonthLocator())

    ax.grid(True)

plt.show()


Gaussian HMM of stock data

[[  7.97500000e+00   2.28343000e+07]
 [  1.09000000e+00   1.82561000e+07]
 [ -4.53000000e+00   1.52473000e+07]
 [  1.13000000e+00   9.18860000e+06]
 [  1.91000000e+00   7.09480000e+06]
 [ -1.76000000e+00   6.21170000e+06]
 [ -4.14000000e+00   5.19670000e+06]
 [  3.60000000e-01   4.91780000e+06]
 [ -2.12000000e+00   9.13820000e+06]
 [  1.26000000e+00   1.51186000e+07]
 [ -1.50000000e+00   5.15240000e+06]
 [  1.57000000e+00   5.84750000e+06]
 [  7.20000000e-01   4.98560000e+06]
 [  1.00000000e-02   4.06170000e+06]
 [  3.02000000e+00   8.69880000e+06]
 [  2.17000000e+00   7.84410000e+06]
 [  3.99000000e+00   1.08289000e+07]
 [  5.10000000e-01   1.07130000e+07]
 [  1.97000000e+00   9.26630000e+06]
 [  3.52000000e+00   9.47250000e+06]
 [  1.87000000e+00   1.06287000e+07]
 [ -1.52000000e+00   7.22870000e+06]
 [  5.40000000e-01   7.58120000e+06]
 [  2.44000000e+00   8.53560000e+06]
 [ -9.90000000e-01   9.12340000e+06]
 [ -1.57000000e+00   7.06610000e+06]
 [  8.600

IndexError: tuple index out of range