In [1]:
%load_ext autoreload
%autoreload 2
import sys, os
from os.path import expanduser
## actions required!!!!!!!!!!!!!!!!!!!! change your folder path 
path_repo = expanduser("~/Documents/G3_2/regime-identification"); sys.path.append(path_repo)
path_file = expanduser("~/data/G3_2/regime-identification/simulation")
path = {}
for folder in ["data", "estimation", "score", "figure", "latex"]:
    path[folder] = f"{path_file}/{folder}"

In [8]:
n_t, n_buffer = 1024, 20
len_list = [[250, 500, 1000, 2000], [50, 100, 250, 500, 1000], [60, 120, 250, 500]]

In [30]:
import numpy as np
from numpy.random import RandomState
random_state = RandomState(0)

In [38]:
from hmmlearn.hmm import GaussianHMM

In [52]:
class HMM_for_sample(BaseHMM):
    """
    A base class for HMM only used for fast sampling.
    means_, covars_ would be squeezed when being inputted. _check method does nothing.
    """
    def __init__(self, n_components, random_state):
        super().__init__(n_components=n_components, random_state=random_state,)
    
    @property
    def means_(self):
        return self._means_
    
    @means_.setter
    def means_(self, means_):
        self._means_ = means_.squeeze()
        
    @property
    def covars_(self):
        return self._covars_
    
    @covars_.setter
    def covars_(self, covars_):
        self._covars_ = covars_.squeeze()    
        
    def _check(self):
        return 

In [61]:
class tHMM_1d_for_sample(HMM_for_sample):
    """
    a class for 1d t-HMM that can sample very fastly, but can only do sampling.
    """
    def __init__(self, n_components, random_state):
        super().__init__(n_components, random_state)

    def _generate_sample_from_state(self, state, random_state):
        return [self._means_[state] + np.sqrt(self._covars_[state] * (self.dof_-2)/self.dof_) * random_state.standard_t(self.dof_)]

In [62]:
class GaussianHMM_1d_for_sample(HMM_for_sample):
    """
    a class for 1d GaussianHMM that can sample very fastly, but can only do sampling.
    """
    def __init__(self, n_components, random_state):
        super().__init__(n_components, random_state)

    def _generate_sample_from_state(self, state, random_state):
        return [self._means_[state] + np.sqrt(self._covars_[state]) * random_state.standard_normal()]

In [None]:
def get_HMM_instance_for_sampling(means_, covars_, transmat_, startprob_=None, emission = "Gaussian", dof_ = None, random_state=None):
    n_c = len(transmat_)
    random_state = check_random_state(random_state)
    if startprob_ is None:
        startprob_ = invariant_dist_transmat_(transmat_)
    
    if emission == "Gaussian":
        model = GaussianHMM_1d_for_sample(n_c, random_state)
    elif emission == "t":
        model = tHMM_1d_for_sample(n_c, random_state)
    else:
        raise NotImplementedError()
    
    model.means_ = means_
    model.covars_ = covars_
    model.transmat_ = transmat_
    model.startprob_ = startprob_
    if emission == "t":
        model.dof_ = dof_
    return model

In [64]:
def generate_key_data(data="all", **kwargs):
    """
    data can be a string of key, a list, or "all".
    if data == "t", needs a kwargs "dof", which can be either an int, or a list of ints.
    """
    all_keys = [2, 3, "t", "Onat"]
    scale_list = ["daily", "weekly", "monthly"]
    if isinstance(data, list):
        return list(chain.from_iterable([generate_key_data(key, **kwargs) for key in data]))
    if data == "all":
        data = all_keys
        return generate_key_data(all_keys, **kwargs)
    if data == 2 or data == 3:
        return [f"{data}-state-{scale}" for scale in scale_list]
    if data == "t":
        dof = kwargs["dof"]
        if isinstance(dof, list):
            return list(chain.from_iterable([generate_key_data("t", dof=dof_) for dof_ in dof]))
        return [f"t-{dof}-{scale}" for scale in scale_list]
    if data == 'Onat':
        return [f"Onat-{i}" for i in range(1, 3)]

    raise NotImplementedError()

In [68]:
generate_key_data(["t", 2], dof=[3, 5])

['t-3-daily',
 't-3-weekly',
 't-3-monthly',
 't-5-daily',
 't-5-weekly',
 't-5-monthly',
 '2-state-daily',
 '2-state-weekly',
 '2-state-monthly']

In [None]:
get_GaussianHMM_model_sample()

In [None]:
def get_GaussianHMM_model(means_, covars_, transmat_, startprob_=None, random_state=None):
    """
    get a GaussianHMM model with the given params. The instance won't update any parameter if fit method is called. 
    """
    n_c, n_f = means_.shape
    random_state = check_random_state(random_state)
    
    if startprob_ is None:
        startprob_ = invariant_dist_transmat_(transmat_)
        
    hmm_model = GaussianHMM(n_components=n_c, random_state=random_state, params="", init_params="")
    hmm_model.n_features = n_f
    hmm_model.means_ = means_
    hmm_model.covars_ = covars_
    hmm_model.transmat_ = transmat_
    hmm_model.startprob_ = startprob_
    return hmm_model

In [None]:
[self.means_[state, 0] + np.sqrt(self.covars_[state, 0]) * random_state.standard_normal()]

In [54]:
model = tHMM_1d_for_sample(2, random_state)

In [57]:
model.random_state

RandomState(MT19937) at 0x14660EB40

In [59]:
model.means_ = random_state.randn(3).reshape((1, 3, 1))

In [60]:
model._means_

array([-0.16989499,  0.15985927, -1.09584447])

In [34]:
%%timeit
random_state.multivariate_normal([0.], [[1.]])

22.6 µs ± 125 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [33]:
%%timeit
random_state.standard_normal()

161 ns ± 0.65 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [37]:
%%timeit
random_state.standard_t(5)

298 ns ± 0.635 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [39]:
model = GaussianHMM()

In [40]:
model.means = [[0], [1.], [2.]]

In [41]:
model.covars_ = np.array([[1], [4], [9.]])

In [43]:
model._covars_

array([[1.],
       [4.],
       [9.]])

In [None]:
random_state.randn()

In [2]:
from hmmlearn.base import BaseHMM

In [3]:
from regime.simulation_helper import *

In [23]:
class GaussianHMM_sample(BaseHMM):
    def __init__(self, n_components, random_state):
        super().__init__(n_components=n_components, random_state=random_state,)
    
    def _check(self):
        return 
    
    def _generate_sample_from_state(self, state, random_state):
        return [self.means_[state, 0] + np.sqrt(self.covars_[state, 0]) * random_state.standard_normal()]

In [24]:
def get_GaussianHMM_model_sample(means_, covars_, transmat_, startprob_=None, random_state=None):
    """
    get a GaussianHMM model with the given params. The instance won't update any parameter if fit method is called. 
    """
    n_c, n_f = means_.shape
    random_state = check_random_state(random_state)
    
    if startprob_ is None:
        startprob_ = invariant_dist_transmat_(transmat_)
        
    hmm_model = GaussianHMM_sample(n_components=n_c, random_state=random_state)
    hmm_model.means_ = means_
    hmm_model.covars_ = covars_[..., np.newaxis]
    hmm_model.transmat_ = transmat_
    hmm_model.startprob_ = startprob_
    return hmm_model

In [25]:
n_c = 3
random_state = None
key_data_list_3_state = generate_key_data(n_c)
DGP_dict_3_state = {key_data: \
            get_GaussianHMM_model_sample(*load_hardy_params(key_data.split("-")[-1], n_c), random_state=random_state) for key_data in key_data_list_3_state}
n_s_dict_3_state = dict(zip(key_data_list_3_state, len_list))

In [26]:
simulate_data_estimate_true_model(DGP_dict_3_state, n_s_dict_3_state, n_t, n_buffer, False, path=path)

  0%|                                                                                    | 0/3 [00:00<?, ?it/s]

shape of the saved Xs: (1024, 290, 1, 1).
shape of the saved Xs: (1024, 250, 1, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1, 1).
shape of the saved Xs: (1024, 500, 1, 1).
shape of the saved Zs: (1024, 500).
shape of the saved Xs: (1024, 1040, 1, 1).
shape of the saved Xs: (1024, 1000, 1, 1).
shape of the saved Zs: (1024, 1000).


 33%|█████████████████████████▎                                                  | 1/3 [00:12<00:25, 12.66s/it]

shape of the saved Xs: (1024, 2040, 1, 1).
shape of the saved Xs: (1024, 2000, 1, 1).
shape of the saved Zs: (1024, 2000).
shape of the saved Xs: (1024, 90, 1, 1).
shape of the saved Xs: (1024, 50, 1, 1).
shape of the saved Zs: (1024, 50).
shape of the saved Xs: (1024, 140, 1, 1).
shape of the saved Xs: (1024, 100, 1, 1).
shape of the saved Zs: (1024, 100).
shape of the saved Xs: (1024, 290, 1, 1).
shape of the saved Xs: (1024, 250, 1, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1, 1).
shape of the saved Xs: (1024, 500, 1, 1).
shape of the saved Zs: (1024, 500).


 33%|█████████████████████████▎                                                  | 1/3 [00:17<00:35, 17.71s/it]


KeyboardInterrupt: 