In [1]:
%load_ext autoreload
%autoreload 2
import sys, os
from os.path import expanduser
## actions required!!!!!!!!!!!!!!!!!!!! change your folder path 
path = "~/Documents/G3_2/regime-identification"
path = expanduser(path)
sys.path.append(path)

path_file = f"~/data/G3_2/regime-identification/simulation"
path_file = expanduser(path_file)
path_data = f"{path_file}/data"
path_estimation = f"{path_file}/estimation"
path_score = f"{path_file}/score"

In [5]:
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import ParameterGrid

from numpy.random import RandomState
random_state = RandomState(0)

In [112]:
from regime.cluster_utils import *
from regime.stats import *
from regime.simulation_helper import *
from regime.jump import *

# 2-model-training

In this notebook we train several models, with different hyperparameters, and save the estimation results.

In [4]:
scale_lst = ["daily", "weekly", "monthly"]
n_s_lst = [250, 500, 1000]
key_data_list = [f"{scale}_{n_s}" for scale in scale_lst for n_s in n_s_lst]

n_buffer, n_t, n_c = 20, 1024, 2
key_feat = "zheng"

In [6]:
model_discrete = jump_model(n_c, state_type="discrete", random_state=random_state)
model_cont_mode = jump_model(n_c, state_type="cont", grid_size=.02, mode_loss=True, random_state=random_state)
model_cont_no_mode = jump_model(n_c, state_type="cont", grid_size=.02, mode_loss=False, random_state=random_state)

In [7]:
model_dict = {"discrete": model_discrete, "cont_mode": model_cont_mode, "cont_no_mode": model_cont_no_mode}

In [8]:
lambd_list = 10 ** np.linspace(0, 8, 9)
print(lambd_list)
param_grid = {'jump_penalty': lambd_list}

[1.e+00 1.e+01 1.e+02 1.e+03 1.e+04 1.e+05 1.e+06 1.e+07 1.e+08]


In [9]:
n_batch=32

In [10]:
def train_models_datas_params(key_data_list, key_feat_list, key_model_list, param_grid):
    N_combos = len(key_data_list) * len(key_feat_list) * len(key_model_list)
    for i, (key_data, key_feat, key_model) in enumerate(product(key_data_list, key_feat_list, key_model_list)):
        Xs = np.load(f"{path_data}/X_{key_data}_{key_feat}.npy")[:n_batch]
        model = model_dict[key_model]
        proba_arr_, labels_arr_ = train_one_model_one_data_batch_params(model, Xs, param_grid)
        # save results
        np_save_print(f"{path_estimation}/labels_{key_data}_{key_feat}_{key_model}.npy", labels_arr_, "labels")
        np_save_print(f"{path_estimation}/proba_{key_data}_{key_feat}_{key_model}.npy", proba_arr_, "proba")
        print(f"{i+1}/{N_combos} combos done.")

In [11]:
key_data_list=['daily_1000']
key_feat_list=['zheng']
key_model_list = ["cont_mode"]

In [15]:
train_models_datas_params(key_data_list, key_feat_list, key_model_list, param_grid)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:36<00:00,  4.02s/it]

shape of the saved labels: (32, 1000, 9).
shape of the saved proba: (32, 1000, 2, 9).
1/1 combos done.





In [None]:
la

In [13]:
def train_one_model_one_data_batch(model, Xs):
    """
    train a model, on a batch of training data.
    
    Parameters:
    -----------------------------
    model:
    
    Xs: array of shape (n_t, n_s, n_f)
    
    Returns:
    ----------------------------
    labels_: (n_t, n_s)
    
    proba_: (n_t, n_s, n_c)
    """
    labels_list, proba_list = [], []
    # for i_trial in tqdm(range(len(Xs))):
    for i_trial in range(len(Xs)):
        model.fit(Xs[i_trial])
        labels_list.append(model.labels_)
        proba_list.append(model.proba_)
        
    if proba_list[0] is not None:  # has estimated probability
        proba_arr = np.array(proba_list)
        labels_arr = proba_arr.argmax(axis=-1).astype(np.int32)
        
        
        
        
        return proba_arr, labels_arr
    else: # only estimated labels
        labels_arr = np.array(labels_list)
        #
        n_t, n_s, _ = Xs.shape
        n_c = model.n_components
        proba_arr = np.zeros((n_t, n_s, n_c))
        np.put_along_axis(proba_arr, indices=labels_arr[..., np.newaxis], values=1., axis=-1)
        return proba_arr, labels_arr    

In [17]:
model_cont_mode.set_params(jump_penalty=1000.)

In [44]:
key_data="daily_1000"
key_feat="zheng"
Xs = np.load(f"{path_data}/X_{key_data}_{key_feat}.npy")[:n_batch]
Xs_raw = np.load(f"{path_data}/X_raw_{key_data}.npy")[:n_batch]
Zs = np.load(f"{path_data}/Z_{key_data}.npy")[:n_batch]

In [99]:
proba_arr, labels_arr = train_one_model_one_data_batch(model_cont_mode, Xs)

In [100]:
from itertools import permutations

In [102]:
permutations(range(n_c))

<itertools.permutations at 0x127b904f0>

In [76]:
proba_arr.shape

(32, 1000, 2)

In [25]:
from sklearn.metrics import accuracy_score

In [166]:
acc = scorer_batch(accuracy_score, Zs, labels_arr, )

In [67]:
accuracy_score(Zs[1], labels_arr[1])

0.033

In [72]:
(proba_arr[1].argmax(-1)==labels_arr[1]).all()

True

In [87]:
model_cont_mode.centers_

array([[-0.00068745,  0.01869673,  0.0187656 , -0.00064584,  0.01434598,
        -0.00061376,  0.01196528, -0.00067792,  0.01188814, -0.00054851,
         0.01551057, -0.00046508,  0.01468934, -0.00063194,  0.0146868 ],
       [ 0.00092417,  0.00982028,  0.00977366,  0.00095472,  0.00721107,
         0.00096275,  0.00608297,  0.00094669,  0.00612873,  0.00089509,
         0.00782269,  0.00084006,  0.00740016,  0.00095013,  0.00734796]])

array([[0, 1, 2, 3],
       [0, 1, 3, 2],
       [0, 2, 1, 3],
       [0, 2, 3, 1],
       [0, 3, 1, 2],
       [0, 3, 2, 1],
       [1, 0, 2, 3],
       [1, 0, 3, 2],
       [1, 2, 0, 3],
       [1, 2, 3, 0],
       [1, 3, 0, 2],
       [1, 3, 2, 0],
       [2, 0, 1, 3],
       [2, 0, 3, 1],
       [2, 1, 0, 3],
       [2, 1, 3, 0],
       [2, 3, 0, 1],
       [2, 3, 1, 0],
       [3, 0, 1, 2],
       [3, 0, 2, 1],
       [3, 1, 0, 2],
       [3, 1, 2, 0],
       [3, 2, 0, 1],
       [3, 2, 1, 0]])

In [201]:
# def generate_all_perms_as_arr(n_c):
#     return np.array(list(permutations(range(n_c))))

In [209]:
# def _permute_labels(labels_arr, all_perms):
#     """
#     return the labels under every permutation. new axis to the last
#     """
#     labels_all_perms = np.zeros(labels_arr.shape + (len(all_perms),), dtype=np.int32)
#     labels_all_perms[..., 0] = labels_arr
#     for i_perm, perm in enumerate(all_perms[1:]):
#         labels_permuted = labels_all_perms[..., i_perm+1]
#         for i_cluster in range(n_c): # permute
#             labels_permuted[labels_arr==i_cluster] = perm[i_cluster] # labels_permuted[labels_==i]
#     return labels_all_perms      

In [172]:
# def align_labels_proba_by_accuracy(Zs_true, proba_arr, labels_arr):
#     """
#     In a clustering problem, any permutation of the labels is a vaid clustering result. 
#     we find the best permutation for each trial. Here best refers to the highest accuracy.
    
#     Parameters:
#     ---------------------
#     labels_arr: arr of shape (n_t, n_s)
#     """
#     n_t, n_s, n_c = proba_arr.shape
#     # all the perms
#     all_perms = generate_all_perms_as_arr(n_c)
#     # all the possible perms of labels
#     labels_all_perms = _permute_labels(labels_arr, all_perms)
#     # score accuracy for each perm
#     acc_all_perms = scorer_batch(accuracy_score, Zs_true, labels_all_perms, has_params=True) # of shape (n_t, n_p)
#     # best perm for each trial 
#     best_perm_idx = acc_all_perms.argmax(-1) # shape (n_t,)
#     # take the corresponding perm for labels
#     labels_arr_new = np.take_along_axis(labels_all_perms, best_perm_idx[:, np.newaxis, np.newaxis], axis=-1).squeeze(axis=-1)
#     # do the same for proba_
#     best_perm = all_perms[best_perm_idx]
#     proba_arr_new = np.take_along_axis(proba_arr, best_perm[:, np.newaxis, :], axis=-1)
#     # proba_arr_new = proba_arr[np.arange(n_t)[:, np.newaxis, np.newaxis], np.arange(n_s)[np.newaxis, :, np.newaxis], best_perm[:, np.newaxis, :]]
#     return proba_arr_new, labels_arr_new

In [255]:
%%timeit
proba_arr[np.arange(n_t)[:, np.newaxis, np.newaxis], np.arange(n_s)[np.newaxis, :, np.newaxis], best_perm[:, np.newaxis, :]]

480 µs ± 2.59 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [256]:
%%timeit
np.take_along_axis(proba_arr, best_perm[:, np.newaxis, :], axis=-1)

485 µs ± 3.77 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [238]:
n_t, n_s, n_c = proba_arr.shape
# all the perms
all_perms = generate_all_perms_as_arr(n_c)
# all the possible perms of labels
labels_all_perms = _permute_labels(labels_arr, all_perms)
# score accuracy for each perm
acc_all_perms = scorer_batch(accuracy_score, Zs, labels_all_perms, has_params=True) # of shape (n_t, n_p)
# best perm for each trial 
best_perm_idx = acc_all_perms.argmax(-1) # shape (n_t,)

In [239]:
best_perm_idx

array([1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 0])

In [240]:
labels_arr_new = np.take_along_axis(labels_all_perms, best_perm_idx[:, np.newaxis, np.newaxis], axis=-1).squeeze(axis=-1)

In [242]:
best_perm = all_perms[best_perm_idx]
proba_arr_new = np.take_along_axis(proba_arr, best_perm[:, np.newaxis, :], axis=-1)#.squeeze(axis=-1)

In [243]:
proba_arr_new.shape

(32, 1000, 2)

In [241]:
scorer_batch(accuracy_score, Zs, labels_arr_new)

array([0.906, 0.966, 0.987, 0.963, 0.86 , 0.985, 0.936, 0.988, 0.865,
       0.983, 0.995, 0.943, 0.946, 0.873, 0.874, 0.985, 0.93 , 0.982,
       0.974, 0.99 , 0.958, 0.99 , 0.959, 0.984, 0.985, 1.   , 0.841,
       0.992, 0.984, 0.994, 0.99 , 0.971])

In [244]:
from sklearn.metrics import roc_auc_score

In [249]:
idxx = get_idx_have_all_clusters(Zs, 2)

In [252]:
xx=scorer_batch(roc_auc_score, Zs, proba_arr[..., 1], idx_subset=idxx)

In [254]:
np.maximum(xx, 1-xx)

array([0.90905584, 0.99221691, 0.99817947, 0.99105733,        nan,
       0.9991669 , 0.94443305, 0.99938795,        nan, 0.9989771 ,
       0.99998104, 0.93621684, 0.95324645,        nan, 0.96439471,
       0.92307692, 0.93642703, 0.98845787, 0.99655997, 0.99572588,
       0.95372316, 0.99186841, 0.98253295, 0.99262626, 0.9978296 ,
              nan,        nan, 0.99990343, 0.97032959, 0.99986407,
       0.99937297, 0.99872088])

In [251]:
scorer_batch(roc_auc_score, Zs, proba_arr_new[..., 1], idx_subset=idxx)

array([0.90905584, 0.99221691, 0.99817947, 0.99105733,        nan,
       0.9991669 , 0.94443305, 0.99938795,        nan, 0.9989771 ,
       0.99998104, 0.93621684, 0.95324645,        nan, 0.96439471,
       0.92307692, 0.93642703, 0.98845787, 0.99655997, 0.99572588,
       0.95372316, 0.99186841, 0.98253295, 0.99262626, 0.9978296 ,
              nan,        nan, 0.99990343, 0.97032959, 0.99986407,
       0.99937297, 0.99872088])

In [None]:
xx[np.arange(2)[:, np.newaxis, np.newaxis], np.arange(4)[np.newaxis, :,  np.newaxis], ind[:, np.newaxis, :]] #np.repeat(ind[:, np.newaxis, :], 4, axis=1)

In [214]:
best_perm_idx = best_perm.argmax(-1)

In [218]:
labels_all_perms = _permute_labels(labels_arr, generate_all_perms_as_arr(2))

In [220]:
labels_all_perms.shape

(32, 1000, 2)

In [221]:
best_perm_idx.shape

(32,)

In [228]:
labels_aligned=np.take_along_axis(labels_all_perms, best_perm_idx[:, np.newaxis, np.newaxis], axis=-1).squeeze(axis=-1)#.shape

In [234]:
((labels_arr[best_perm_idx==1]+labels_aligned[best_perm_idx==1])==1).all()

True

In [227]:
labels_arr

array([[1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 1, 1],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 0, 0, 0]], dtype=int32)

In [172]:
# def align_labels_proba_by_accuracy(Zs_true, labels_, proba_):
#     """
#     In a clustering problem, any permutation of the labels is a vaid clustering result. 
#     we find the best permutation for each trial. Here best refers to the highest accuracy.
    
#     Parameters:
#     ---------------------
#     labels: arr of shape (n_t, n_s)
#     """
#     n_c = proba_.shape[-1]
#     all_perms = generate_all_perms_as_arr(n_c)
#     labels_all_perms = [labels_]

#     for perm in all_perms:
#         labels_permuted = labels_.copy()
#         for i in range(n_c): # permute
#             labels_permuted[labels_==i] = perm[i]
#         labels_all_perms.append(labels_permuted.copy())
#     labels_all_perms = np.stack(labels_all_perms, axis=-1)
#     acc_all_perms = scorer_batch(accuracy_score, Zs_true, labels_all_perms, has_params=True) # of shape (n_t, n_p)
#     best_perm = acc_all_perms.argmax(-1)
    
#     return best_perm

In [192]:
xx = random_state.randn(2, 4, 3)

In [193]:
ind = np.array([[0, 2, 1], [2, 0, 1]])

In [196]:
xx[np.arange(2)[:, np.newaxis, np.newaxis], np.arange(4)[np.newaxis, :,  np.newaxis], ind[:, np.newaxis, :]] #np.repeat(ind[:, np.newaxis, :], 4, axis=1)

array([[[ 0.33452309, -0.90386939, -0.27593472],
        [ 0.07379354, -0.00494645,  1.71452041],
        [-0.4580803 , -1.56095892,  0.89393175],
        [ 1.19976071,  0.21477518, -1.35711167]],

       [[ 0.42631307,  0.60415032, -0.48270287],
        [-0.27934218,  0.35805394,  2.0956007 ],
        [ 1.00258809, -1.62195909, -0.27029894],
        [-0.17738506,  0.7011757 ,  0.17225113]]])

In [195]:
xx

array([[[ 0.33452309, -0.27593472, -0.90386939],
        [ 0.07379354,  1.71452041, -0.00494645],
        [-0.4580803 ,  0.89393175, -1.56095892],
        [ 1.19976071, -1.35711167,  0.21477518]],

       [[ 0.60415032, -0.48270287,  0.42631307],
        [ 0.35805394,  2.0956007 , -0.27934218],
        [-1.62195909, -0.27029894,  1.00258809],
        [ 0.7011757 ,  0.17225113, -0.17738506]]])

In [183]:
xx

array([[ 0.31997435, -0.0393219 ,  1.51422095],
       [-1.8734129 , -0.85630588,  0.7394201 ]])

In [184]:
ind

array([[0, 2, 1],
       [2, 0, 1]])

In [187]:
xx[np.arange(2)[:, np.newaxis], ind]

array([[ 0.31997435,  1.51422095, -0.0393219 ],
       [ 0.7394201 , -1.8734129 , -0.85630588]])

In [181]:
np.repeat(ind[:, np.newaxis, :], 4, axis=1).shape

(2, 4, 3)

In [179]:
xx[np.repeat(ind[:, np.newaxis, :], 4, axis=1)]

IndexError: index 2 is out of bounds for axis 0 with size 2

In [173]:
align_labels_by_accuracy(Zs, labels_arr, n_c)

array([1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 0])

In [162]:
best_perm = align_labels_by_accuracy(Zs, labels_arr, n_c)
# labels_all_perms = align_labels_by_accuracy(Zs, labels_arr, 2)

In [163]:
best_perm

array([[0.094, 0.906],
       [0.966, 0.034],
       [0.013, 0.987],
       [0.963, 0.037],
       [0.86 , 0.14 ],
       [0.985, 0.015],
       [0.936, 0.064],
       [0.012, 0.988],
       [0.865, 0.135],
       [0.017, 0.983],
       [0.995, 0.005],
       [0.943, 0.057],
       [0.946, 0.054],
       [0.127, 0.873],
       [0.874, 0.126],
       [0.015, 0.985],
       [0.07 , 0.93 ],
       [0.018, 0.982],
       [0.026, 0.974],
       [0.99 , 0.01 ],
       [0.958, 0.042],
       [0.99 , 0.01 ],
       [0.959, 0.041],
       [0.984, 0.016],
       [0.985, 0.015],
       [0.   , 1.   ],
       [0.841, 0.159],
       [0.008, 0.992],
       [0.016, 0.984],
       [0.006, 0.994],
       [0.01 , 0.99 ],
       [0.971, 0.029]])

In [160]:
acc

array([0.907, 0.033, 0.987, 0.039, 0.142, 0.016, 0.065, 0.988, 0.865,
       0.984, 0.995, 0.059, 0.946, 0.127, 0.145, 0.985, 0.931, 0.983,
       0.026, 0.011, 0.958, 0.99 , 0.042, 0.984, 0.016, 0.   , 0.985,
       0.993, 0.016, 0.006, 0.99 , 0.971])

In [123]:
labels_all_perms.shape

(32,)

In [105]:
all_perms = permutations(range(3))

In [107]:
next(all_perms)

(0, 1, 2)

In [108]:
list(all_perms)

[(0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]

In [104]:
list(permutations(range(3)))

[(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]

In [28]:
model_cont_mode.fit(Xs[1])

In [52]:
X, Z = Xs_raw[1][20:, 0], Zs[1]

In [55]:
X[Z==1].mean()

-0.0005728669792952882

In [40]:
model_cont_mode.centers_

array([[ 0.00073126,  0.00938523,  0.00936263,  0.00069329,  0.00709878,
         0.00066697,  0.00596211,  0.00071962,  0.00597843,  0.00062275,
         0.00774752,  0.00056181,  0.00736689,  0.00068369,  0.00725888],
       [-0.00082534,  0.02053406,  0.02057727, -0.00069956,  0.01542119,
        -0.0005997 ,  0.01302674, -0.00079943,  0.01291568, -0.00057971,
         0.01678036, -0.00049512,  0.01577277, -0.0006643 ,  0.01576505]])

In [64]:
proba_=model_cont_mode.proba_
labels_ = proba_.argmax(axis=-1).astype(np.int32)

In [61]:
X_feat = Xs[1]

In [62]:
X_feat

array([[ 0.00642934,  0.01310741,  0.00301724, ...,  0.01073042,
         0.00550611,  0.01010821],
       [ 0.00355489,  0.00287445,  0.01310741, ...,  0.01019238,
         0.00732339,  0.00828638],
       [-0.01137158,  0.01492646,  0.00287445, ...,  0.00961714,
         0.00622623,  0.01000284],
       ...,
       [-0.00825768,  0.00723429,  0.00712813, ...,  0.00836968,
        -0.00425244,  0.00463607],
       [ 0.00100025,  0.00925794,  0.00723429, ...,  0.00784358,
        -0.00454174,  0.00421716],
       [ 0.00176006,  0.00075981,  0.00925794, ...,  0.00713427,
        -0.00396878,  0.0047307 ]])

In [59]:
proba_

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [29]:
accuracy_score(Zs[1], model_cont_mode.labels_)

0.511

In [27]:
acc

array([0.907, 0.033, 0.013, 0.963, 0.142, 0.016, 0.936, 0.012, 0.865,
       0.017, 0.004, 0.943, 0.054, 0.874, 0.874, 0.984, 0.931, 0.983,
       0.974, 0.011, 0.041, 0.011, 0.959, 0.984, 0.018, 0.   , 0.985,
       0.993, 0.984, 0.006, 0.99 , 0.971])

In [21]:
# def scorer_batch(scorer, Zs_true, Zs_pred, *args, has_params = False, idx_subset = None, **kwargs):
#     """
#     compute scores for a batch of seqs, with possibly some hyparameters.
    
#     Parameters:
#     --------------------------
#     - scorer:
    
#     - Zs_true: array of size (n_t, n_s)
    
#     - Zs_pred: array of size (n_t, n_s), (n_t, n_s, *hyperparam_shape) (e.g. (n_t, n_s, 1)), or (n_t, n_s, n_c).
#         dimensions of the hyperparams are assumed to be in the last few axes.
#     - hyperparams_dim: 
#         number of hyperparams to tune. 0 means that there are no hyperparams.
#     - idx_subset: 
#         indices of the trials to score. None means there is no need to subset.
    
#     Returns:
#     --------------------------
#     - scores_arr: array of size (n_t, n_s), (n_t, n_s, *hyperparam_shape).
#     """
#     if idx_subset is None: # no need to sub-index
#         if not has_params:  # no hyperparam
#             return np.array([scorer(Z_true, Z_pred, *args, **kwargs) for Z_true, Z_pred in zip(Zs_true, Zs_pred)])
#         else: # has hyperparams
#             return np.stack([scorer_batch(scorer, Zs_true, Zs_pred[..., i_param], *args, **kwargs) for i_param in range(Zs_pred.shape[-1])], axis=-1)
#     # need to subset
#     n_t = len(Zs_true)
#     scores_arr_subset = scorer_batch(scorer, Zs_true[idx_subset], Zs_pred[idx_subset], *args, has_params = has_params, **kwargs)
#     scores_arr = np.full((n_t,) + scores_arr_subset.shape[1:], np.nan)
#     scores_arr[idx_subset] = scores_arr_subset
#     return scores_arr

In [14]:
def train_one_model_one_data_batch_params(model, Xs, param_grid):
    """
    train a model, on a batch of training data.
    
    Parameters:
    -----------------------------
    model:
    
    Xs: array of shape (n_t, n_s, n_f)
    
    Returns:
    ----------------------------
    labels_: (n_t, n_s)
    
    proba_: (n_t, n_s, n_c)
    """
    PG = ParameterGrid(param_grid)
    labels_arr_list, proba_arr_list = [], []
    for param_ in tqdm(PG):
        model.set_params(**param_)
        proba_arr, labels_arr = train_one_model_one_data_batch(model, Xs)
        proba_arr_list.append(proba_arr)
        labels_arr_list.append(labels_arr)
    return np.stack(proba_arr_list, axis=-1), np.stack(labels_arr_list, axis=-1)

In [93]:
lambd_list = 10 ** np.linspace(-4, 4, 5)
param_grid = {'jump_penalty': lambd_list}

In [103]:
proba_arr_, labels_arr_ = train_one_model_one_data_batch_params(model_cont_mode, Xs[:2], param_grid)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  2.64it/s]


In [100]:
proba_arr, labels_arr = train_one_model_one_data_batch(model_cont_mode, Xs[:32])

In [101]:
proba_arr.shape

(32, 1000, 2)

In [102]:
labels_arr.shape

(32, 1000)

In [104]:
proba_arr_.shape

(2, 1000, 2, 5)

In [105]:
labels_arr_.shape

(2, 1000, 5)

In [78]:
proba_arr, labels_arr = train_one_model_one_data_batch(model_discrete, Xs[:32])

In [81]:
proba_arr, labels_arr = train_one_model_one_data_batch(model_cont_mode, Xs[:32])

In [80]:
np.isclose(labels_arr, proba_arr[:, :, 1]).all()

True

In [62]:
labels_arr.shape

(32, 1000)

In [58]:
Xs.shape

(1024, 1000, 15)

In [15]:
model_discrete

In [16]:
model_discrete.labels_

AttributeError: 'jump_model' object has no attribute 'labels_'

In [20]:
hasattr(model_discrete, "discrete")

True

In [22]:
key_data = "daily_1000"
key_feat = "zheng"
Xs = np.load(f"{path_data}/X_{key_data}_{key_feat}.npy")

In [23]:
X = Xs[0]

In [35]:
model_discrete.fit(X)

In [38]:
model_discrete.proba_ is None

True

In [5]:
def fit_models(model, lambd_list, Xs_, fit_proba=False):
    """
    fit models on different trials and lambds,
    extract labels and proba (if exists)
    """
    n_t, n_s, n_f = Xs_.shape
    n_lambd = len(lambd_list)
    labels_arr_ = np.empty((n_lambd, n_t, n_s), dtype=int)
    if not fit_proba:
        with tqdm(total=n_lambd * n_t) as pbar:
            for i_lambd, lambd in enumerate(lambd_list):
                model.set_params(jump_penalty=lambd)
                for i_trial in range(n_t):
                    _ = model.fit(Xs_[i_trial])
                    labels_arr_[i_lambd][i_trial] = model.labels_
                    pbar.update(1)
        return labels_arr_, None
    # fit_proba
    proba_arr_ = np.empty((n_lambd, n_t, n_s)) 
    with tqdm(total=n_lambd * n_t) as pbar:
        for i_lambd, lambd in enumerate(lambd_list):
            model.set_params(jump_penalty=lambd)
            for i_trial in range(n_t):
                _ = model.fit(Xs_[i_trial])
                proba_arr_[i_lambd][i_trial] = model.proba_[:, 1]        
                pbar.update(1)
    labels_arr_ = (proba_arr_>.5).astype(int)
    return labels_arr_, proba_arr_

In [7]:
lambd_list = 10 ** np.linspace(-4, 4, 17)

In [8]:
n_t_try=n_t

In [9]:
model_dict = {}

# Jump w/ mode loss

In [10]:
model_name = "cont_mode"
jump_cont_mode_loss = jump_model(n_components=2, state_type="cont", grid_size=.02, mode_loss=True, random_state=random_state)
model_dict[model_name] = jump_cont_mode_loss

In [11]:
for key in key_list[-3:]:
    Xs_feat = np.load(f"{path_data}/X_feat_zheng_{key}.npy")
    labels_arr, proba_arr = fit_models(model_dict[model_name], lambd_list, Xs_feat[:n_t_try], fit_proba=True)
    np.save(f"{path_estimation}/labels_{model_name}_zheng_feat_{key}.npy", labels_arr)
    np.save(f"{path_estimation}/proba_{model_name}_zheng_feat_{key}.npy", proba_arr)

100%|█████████████████████████████████████| 17000/17000 [12:47<00:00, 22.15it/s]
100%|█████████████████████████████████████| 17000/17000 [29:47<00:00,  9.51it/s]
100%|███████████████████████████████████| 17000/17000 [1:08:28<00:00,  4.14it/s]
