In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils.extmath import randomized_svd

from spectral_dagger.spectral.hankel import single_obs_basis, top_k_basis
from spectral_dagger.utils.math import normalize
from spectral_dagger.hmm import HMM
from spectral_dagger.spectral import SpectralPSR
from spectral_dagger.spectral import hankel

In [43]:
model_seed = np.random.randint(10000)
data_seed = np.random.randint(10000)
np.random.seed(model_seed)

n_states = 1
n_obs = 5

observations = range(n_obs)
states = range(n_states)

O = np.random.binomial(1, 0.5, (n_states, n_obs))
for row in O:
    if sum(row) == 0:
        row[:] = 1.0

O = normalize(O, ord=1, conservative=True)
print(O)

T = np.random.binomial(1, 0.5, (n_states, n_states))
for row in T:
    if sum(row) == 0:
        row[:] = 1.0

T = normalize(T, ord=1, conservative=True)
print(T)

init_dist = normalize(np.ones(n_states), ord=1, conservative=True)

hmm = HMM(observations, states, T, O, init_dist)

np.random.seed(data_seed)
print(hmm.sample_trajectory(10))

horizon = 3
n_samples = 100

[[ 0.49999976  0.          0.          0.49999976  0.        ]]
[[ 0.99999905]]
[0, 3, 3, 3, 0, 3, 3, 0, 0, 0]


In [46]:
def scan_n_states(training_data, testing_data, dimension_seq, e):
    results = []
    
    for m in dimension_seq:
        if e == 'single':
            basis = single_obs_basis(hmm.observations, True)
            e = 'substring'
        else:
            basis = top_k_basis(training_data, np.inf, e)

        psr = SpectralPSR(hmm.observations)
        psr.fit(training_data, m, e, basis=basis)

        llh = psr.get_log_likelihood(testing_data, base=2)
        perplexity = 2**(-llh)
        
        results.append((psr, m, perplexity))
    
    return min(results, key=lambda x: x[2])

def scan_n_states_true(training_data, testing_data, dimension_seq, e):
    results = []
    
    for m in dimension_seq:
        if e == 'single':
            basis = single_obs_basis(hmm.observations, True)
            e = 'substring'
        else:
            basis = top_k_basis(training_data, np.inf, e)

        psr = SpectralPSR(hmm.observations)
        
        true_hankel = hankel.true_hankel_for_hmm(hmm, basis, horizon, e)

        n_oversamples = 10
        n_iter = 5
        max_dim = 80

        svd = randomized_svd(true_hankel, max_dim, n_oversamples, n_iter)
        psr.fit(training_data, m, e, basis=basis, svd=svd)

        llh = psr.get_log_likelihood(testing_data, base=2)
        perplexity = 2**(-llh)
        
        results.append((psr, m, perplexity))
    
    return min(results, key=lambda x: x[2])


In [47]:
training_samples = [hmm.sample_trajectory(horizon) for i in range(n_samples)]
testing_samples = [hmm.sample_trajectory(horizon) for i in range(10)]

dimension_seq = range(n_states, 2*n_states+1)

# Do vanilla spectral learning
for e in ['string', 'prefix', 'substring', 'single']:
    model, n, perplexity = scan_n_states(training_samples, testing_samples, dimension_seq, e)
    
    print "^" * 40
    print "Estimated SVD with estimator %s" % e
    print "Dimension: ", n
    print "Perplexity: ", perplexity
    print "^" * 40
    
# Do spectral learning with true U, S, V
for e in ['string', 'prefix', 'substring', 'single']:
    model, n, perplexity = scan_n_states_true(training_samples, testing_samples, dimension_seq, e)
    
    print "@" * 40
    print "True SVD with estimator %s" % e
    print "Dimension: ", n
    print "Perplexity: ", perplexity
    print "@" * 40

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Estimated SVD with estimator string
Dimension:  1
Perplexity:  4.65785958405e+44
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Estimated SVD with estimator prefix
Dimension:  1
Perplexity:  11.0044343935
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Estimated SVD with estimator substring
Dimension:  2
Perplexity:  12.2071948741
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Estimated SVD with estimator single
Dimension:  2
Perplexity:  12.2071948741
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
True SVD with estimator string
Dimension:  2
Perplexity:  3.19273084812e+16
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
True SVD with estimator prefix
Dimension:  1
Perplexity:  10.8317123547
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
True