In [1]:
# timelimit = 00:10:00
%load_ext autoreload
%autoreload 2
import sys, os
from os.path import expanduser
## actions required!!!!!!!!!!!!!!!!!!!! change your folder path 
path_repo = expanduser("~/Documents/G3_2/regime-identification"); sys.path.append(path_repo)
path_file = expanduser("~/data/G3_2/regime-identification/simulation")
path = {}
for folder in ["data", "estimation"]:
    path[folder] = f"{path_file}/{folder}"

In [2]:
import numpy as np
from numpy.random import RandomState

In [3]:
from regime.simulation_helper import *

In [4]:
n_t, n_buffer = 1024, 20
len_list = [[250, 500, 1000, 2000], [50, 100, 250, 500, 1000], [60, 120, 250, 500]]

In [5]:
key_data_dict = {}
DGP_dict = {}
n_s_dict = {}

# 0-generate-data

In this notebook we systematically generate the simulation data. We postpone estimation using the true model to a later notebook, to put estimation all in one place.


# 2-state models

- scale: We use the parameters estimated in the classical Hardy's paper, and convert into three scales: **daily, weekly, monthly**, with decreasing persistency.
- length: We simulate seqs of different length.

For each combo, we simulate `n_t=1024` seqs. The data in each combo are saved in a batch, thus in the shape of `(n_t, n_s, n_f)`. Also since we need to do feature engineering, every seq is 20 periods longer at both the beginning and the end.

In [6]:
n_c = 2
data_name = f"{n_c}-state"
random_state = RandomState(0)
key_data_dict[data_name] = generate_key_data(n_c)
DGP_dict[data_name] = {key_data: \
            get_HMM_instance_for_sampling(*load_hardy_params(key_data.split("-")[-1], n_c), emission="Gaussian", random_state=random_state) for key_data in key_data_dict[data_name]}
n_s_dict[data_name] = dict(zip(key_data_dict[data_name], len_list))

In [7]:
simulate_data(DGP_dict[data_name], n_s_dict[data_name], n_t, n_buffer, path=path)

  0%|                                                                                    | 0/3 [00:00<?, ?it/s]

created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/2-state-daily.
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).
shape of the saved Xs: (1024, 1040, 1).
shape of the saved Xs: (1024, 1000, 1).
shape of the saved Zs: (1024, 1000).


 33%|█████████████████████████▎                                                  | 1/3 [00:08<00:17,  8.78s/it]

shape of the saved Xs: (1024, 2040, 1).
shape of the saved Xs: (1024, 2000, 1).
shape of the saved Zs: (1024, 2000).
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/2-state-weekly.
shape of the saved Xs: (1024, 90, 1).
shape of the saved Xs: (1024, 50, 1).
shape of the saved Zs: (1024, 50).
shape of the saved Xs: (1024, 140, 1).
shape of the saved Xs: (1024, 100, 1).
shape of the saved Zs: (1024, 100).
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).


 67%|██████████████████████████████████████████████████▋                         | 2/3 [00:13<00:06,  6.51s/it]

shape of the saved Xs: (1024, 1040, 1).
shape of the saved Xs: (1024, 1000, 1).
shape of the saved Zs: (1024, 1000).
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/2-state-monthly.
shape of the saved Xs: (1024, 100, 1).
shape of the saved Xs: (1024, 60, 1).
shape of the saved Zs: (1024, 60).
shape of the saved Xs: (1024, 160, 1).
shape of the saved Xs: (1024, 120, 1).
shape of the saved Zs: (1024, 120).
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).


100%|████████████████████████████████████████████████████████████████████████████| 3/3 [00:16<00:00,  5.50s/it]

shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).





# t-components

In [8]:
n_c = 2; dof_ = 5
data_name = f"t-{dof_}"
random_state = RandomState(1)
key_data_dict[data_name] = generate_key_data("t", dof=dof_)
DGP_dict[data_name] = {key_data: \
            get_HMM_instance_for_sampling(*load_hardy_params(key_data.split("-")[-1], n_c), emission="t", dof_=dof_, random_state=random_state) for key_data in key_data_dict[data_name]}
n_s_dict[data_name] = dict(zip(key_data_dict[data_name], len_list))

In [9]:
simulate_data(DGP_dict[data_name], n_s_dict[data_name], n_t, n_buffer, path=path)

  0%|                                                                                    | 0/3 [00:00<?, ?it/s]

created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/t-5-daily.
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).
shape of the saved Xs: (1024, 1040, 1).
shape of the saved Xs: (1024, 1000, 1).
shape of the saved Zs: (1024, 1000).


 33%|█████████████████████████▎                                                  | 1/3 [00:10<00:20, 10.00s/it]

shape of the saved Xs: (1024, 2040, 1).
shape of the saved Xs: (1024, 2000, 1).
shape of the saved Zs: (1024, 2000).
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/t-5-weekly.
shape of the saved Xs: (1024, 90, 1).
shape of the saved Xs: (1024, 50, 1).
shape of the saved Zs: (1024, 50).
shape of the saved Xs: (1024, 140, 1).
shape of the saved Xs: (1024, 100, 1).
shape of the saved Zs: (1024, 100).
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).


 67%|██████████████████████████████████████████████████▋                         | 2/3 [00:15<00:07,  7.42s/it]

shape of the saved Xs: (1024, 1040, 1).
shape of the saved Xs: (1024, 1000, 1).
shape of the saved Zs: (1024, 1000).
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/t-5-monthly.
shape of the saved Xs: (1024, 100, 1).
shape of the saved Xs: (1024, 60, 1).
shape of the saved Zs: (1024, 60).
shape of the saved Xs: (1024, 160, 1).
shape of the saved Xs: (1024, 120, 1).
shape of the saved Zs: (1024, 120).
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).


100%|████████████████████████████████████████████████████████████████████████████| 3/3 [00:18<00:00,  6.15s/it]

shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).





# 3-state models

In [10]:
n_c = 3
data_name = f"{n_c}-state"
random_state = RandomState(10)
key_data_dict[data_name] = generate_key_data(n_c)
DGP_dict[data_name] = {key_data: \
            get_HMM_instance_for_sampling(*load_hardy_params(key_data.split("-")[-1], n_c), emission="Gaussian", random_state=random_state) for key_data in key_data_dict[data_name]}
n_s_dict[data_name] = dict(zip(key_data_dict[data_name], len_list))

In [11]:
simulate_data(DGP_dict[data_name], n_s_dict[data_name], n_t, n_buffer, path=path)

  0%|                                                                                    | 0/3 [00:00<?, ?it/s]

created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/3-state-daily.
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).
shape of the saved Xs: (1024, 1040, 1).
shape of the saved Xs: (1024, 1000, 1).
shape of the saved Zs: (1024, 1000).


 33%|█████████████████████████▎                                                  | 1/3 [00:09<00:18,  9.17s/it]

shape of the saved Xs: (1024, 2040, 1).
shape of the saved Xs: (1024, 2000, 1).
shape of the saved Zs: (1024, 2000).
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/3-state-weekly.
shape of the saved Xs: (1024, 90, 1).
shape of the saved Xs: (1024, 50, 1).
shape of the saved Zs: (1024, 50).
shape of the saved Xs: (1024, 140, 1).
shape of the saved Xs: (1024, 100, 1).
shape of the saved Zs: (1024, 100).
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).
shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).


 67%|██████████████████████████████████████████████████▋                         | 2/3 [00:13<00:06,  6.61s/it]

shape of the saved Xs: (1024, 1040, 1).
shape of the saved Xs: (1024, 1000, 1).
shape of the saved Zs: (1024, 1000).
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/data/3-state-monthly.
shape of the saved Xs: (1024, 100, 1).
shape of the saved Xs: (1024, 60, 1).
shape of the saved Zs: (1024, 60).
shape of the saved Xs: (1024, 160, 1).
shape of the saved Xs: (1024, 120, 1).
shape of the saved Zs: (1024, 120).
shape of the saved Xs: (1024, 290, 1).
shape of the saved Xs: (1024, 250, 1).
shape of the saved Zs: (1024, 250).


100%|████████████████████████████████████████████████████████████████████████████| 3/3 [00:16<00:00,  5.51s/it]

shape of the saved Xs: (1024, 540, 1).
shape of the saved Xs: (1024, 500, 1).
shape of the saved Zs: (1024, 500).





# Feature engineering

In [13]:
key_data_all = generate_key_data([2, 3, "t"], dof=5)
feature_engineer("zhengB", key_data_all, n_buffer, path)

  0%|                                                                                    | 0/9 [00:00<?, ?it/s]

shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 2000, 15).
shape of the saved Xs: (1024, 500, 15).


 11%|████████▍                                                                   | 1/9 [00:11<01:28, 11.06s/it]

shape of the saved Xs: (1024, 1000, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 100, 15).
shape of the saved Xs: (1024, 500, 15).
shape of the saved Xs: (1024, 50, 15).


 22%|████████████████▉                                                           | 2/9 [00:21<01:16, 10.88s/it]

shape of the saved Xs: (1024, 1000, 15).
shape of the saved Xs: (1024, 60, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 120, 15).


 33%|█████████████████████████▎                                                  | 3/9 [00:29<00:55,  9.30s/it]

shape of the saved Xs: (1024, 500, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 2000, 15).
shape of the saved Xs: (1024, 500, 15).


 44%|█████████████████████████████████▊                                          | 4/9 [00:40<00:49,  9.98s/it]

shape of the saved Xs: (1024, 1000, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 100, 15).
shape of the saved Xs: (1024, 500, 15).
shape of the saved Xs: (1024, 50, 15).


 56%|██████████████████████████████████████████▏                                 | 5/9 [00:50<00:40, 10.08s/it]

shape of the saved Xs: (1024, 1000, 15).
shape of the saved Xs: (1024, 60, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 120, 15).


 67%|██████████████████████████████████████████████████▋                         | 6/9 [00:57<00:27,  9.15s/it]

shape of the saved Xs: (1024, 500, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 2000, 15).
shape of the saved Xs: (1024, 500, 15).


 78%|███████████████████████████████████████████████████████████                 | 7/9 [01:09<00:20, 10.10s/it]

shape of the saved Xs: (1024, 1000, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 100, 15).
shape of the saved Xs: (1024, 500, 15).
shape of the saved Xs: (1024, 50, 15).


 89%|███████████████████████████████████████████████████████████████████▌        | 8/9 [01:21<00:10, 10.50s/it]

shape of the saved Xs: (1024, 1000, 15).
shape of the saved Xs: (1024, 60, 15).
shape of the saved Xs: (1024, 250, 15).
shape of the saved Xs: (1024, 120, 15).


100%|████████████████████████████████████████████████████████████████████████████| 9/9 [01:28<00:00,  9.87s/it]

shape of the saved Xs: (1024, 500, 15).





# makedir

In [19]:
makedir(path, "estimation", key_data_all)

created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/2-state-daily.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/2-state-weekly.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/2-state-monthly.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/3-state-daily.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/3-state-weekly.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/3-state-monthly.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/t-5-daily.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/t-5-weekly.
created folder: /Users/yizhan/data/G3_2/regime-identification/simulation/estimation/t-5-monthly.
