In [2]:
# import tensorflow as tf

# print(tf.__version__)

In [3]:
# import os
# os.environ.get("tf_upgrade_v2")

In [4]:
# !tf_upgrade_v2 \
#   --infile /home/hcxia/TradeMaster_dev/TradeMaster/data_synthesis/TimeGAN/timegan.py \
#   --outfile /home/hcxia/TradeMaster_dev/TradeMaster/data_synthesis/TimeGAN/timegan_v2_new.py \
#   --reportfile report.txt

In [5]:
# import tensorflow as tf

# print(tf.__version__)

In [6]:
# import os
# os.environ.get("tf_upgrade_v2")

# TimeGAN Tutorial

## Time-series Generative Adversarial Networks

- Paper: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar, "Time-series Generative Adversarial Networks," Neural Information Processing Systems (NeurIPS), 2019.

- Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks

- Last updated Date: April 24th 2020

- Code author: Jinsung Yoon (jsyoon0823@gmail.com)

This notebook describes the user-guide of a time-series synthetic data generation application using timeGAN framework. We use Stock, Energy, and Sine dataset as examples.

### Prerequisite
Clone https://github.com/jsyoon0823/timeGAN.git to the current directory.

## Necessary packages and functions call

- timegan: Synthetic time-series data generation module
- data_loading: 2 real datasets and 1 synthetic datasets loading and preprocessing
- metrics: 
    - discriminative_metrics: classify real data from synthetic data
    - predictive_metrics: train on synthetic, test on real
    - visualization: PCA and tSNE analyses

In [7]:
## Necessary packages
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import warnings
warnings.filterwarnings("ignore")

# 1. TimeGAN model
from timegan import timegan
# 2. Data loading
from data_loading import real_data_loading, sine_data_generation
# 3. Metrics
from metrics.discriminative_metrics import discriminative_score_metrics
from metrics.predictive_metrics import predictive_score_metrics
from metrics.visualization_metrics import visualization
import pandas as pd
from tqdm import tqdm
import pickle

In [8]:
def get_intervals(data):
    index=data['index']
    last_value=index[0]-1
    last_index=0
    intervals=[]
    for i in range(data.shape[0]):
        if last_value!=index[i]-1:
            intervals.append([last_index,i])
            last_value=index[i]
            last_index=i
        last_value=index[i]
    intervals.append([last_index, i])
    return intervals

In [9]:
def interpolation(data):
    max_len=24
    l=len(data)
    to_fill=max_len-l
    if to_fill!=0:
        interval=max_len//to_fill
        for j in range(to_fill):
            idx=(interval+1)*j+interval
            data.insert(min(idx,len(data)-1),float('nan'))
    data=pd.Series(data).interpolate(method='polynomial', order=2)
    return data

In [10]:
def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    norm_data = numerator / (denominator + 1e-7)
    return np.min(data, 0),np.max(data, 0),norm_data

In [11]:
def minmax_normlization(data):
    normalized_data=(data-data.min())/(data.max()-data.min())
    return normalized_data

In [12]:
def get_data_of_same_length(data,seq_len):
    data_processed=[]
    intervals=get_intervals(data)
    temp_data=[]
    data.drop(columns=['index'])
    for interval in intervals:
#         print(interval)
        data_seg=data.iloc[interval[0]:interval[1],:]
        for i in range(0, len(data_seg) - seq_len):
            _x = data_seg.iloc[i:i + seq_len,:]
            temp_data.append(_x)
    return temp_data

# Run through

In [13]:
def dataset_prepareation(path):
    data=pd.read_csv(path).reset_index()
    tics=data['tic'].unique()
    # features=[ 'open', 'high', 'low', 'close', 'adjcp','zopen', 'zhigh', 'zlow', 'zadjcp', 'zclose', 'zd_5', 'zd_10',
    #    'zd_15', 'zd_20', 'zd_25', 'zd_30', 'pct_return', 'adjcp_filtered',
    #    'pct_return_filtered','volume']
    features=['open','high','low','close','adjcp','volume']
    ret=[]
    for col in data.columns:
        if col in features:
            ret.append(col)
    features=ret
    min_sclar_by_tic={}
    max_sclar_by_tic={}
    for tic in tics:
        data_by_tic=data.loc[data['tic']==tic,features].astype(float)
        min_scalr,max_sclar,norm_data_by_tic=MinMaxScaler(data_by_tic)
        # print(min_scalr.shape)
        data.loc[data['tic']==tic,features]=norm_data_by_tic
        min_sclar_by_tic[tic]=min_scalr
        max_sclar_by_tic[tic]=max_sclar
    stock_group_num=len(data['stock_type'].unique())
    regime_num=len(data['label'].unique())
    for tic in tics:
        with open('./data/scalr/'+str(tic)+'_minsclar'+'.pickle', 'wb') as handle:
            pickle.dump(min_sclar_by_tic[tic], handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open('./data/scalr/'+str(tic)+'_maxsclar'+'.pickle', 'wb') as handle:
            pickle.dump(max_sclar_by_tic[tic], handle, protocol=pickle.HIGHEST_PROTOCOL)
        for j in range(regime_num):
            data_seg=data.loc[(data['tic']==tic) & (data['label']==j),['index','open','high','low','close','adjcp','volume']]
    #         data_dict[(i,j)]=data_seg
            data_seg.to_csv('./data/data_seg_'+tic+'_'+str(j)+'.csv')
        

## Data Loading

Load original dataset and preprocess the loaded data.

- data_name: stock, energy, or sine
- seq_len: sequence length of the time-series data

In [14]:
from pathlib import Path
import numpy as np

In [40]:
mae_acl18_ori_path='/data/home/zwt/mae_st_rebuild/data/acl18_inter/features/'
mae_acl18_generated_path='/home/hcxia/mae_st_reframe_pretrained_eval/mae_st_reframe/pretrained_simmim/output_dir/exp05_patchsize1x1/simmim_pretrain/vit_maskratio05/generated_data/epoch_1600/'

In [41]:
pathlist = Path(mae_acl18_ori_path).rglob("*")
mae_acl18_inter_ori_dict={}
mae_acl18_inter_ori_dict['all']=[]
mae_acl18_inter_ori_small_dict={}
mae_acl18_inter_ori_small_dict['all']=[]
features = [
    'open',
    'high',
    'low',
    'close',
    'kmid2',
    'kup2',
    'klow',
    'klow2',
    'ksft2',
    'roc_5',
    'roc_10',
    'roc_20',
    'roc_30',
    'roc_60',
    'ma_5',
    'ma_10',
    'ma_20',
    'ma_30',
    'ma_60',
    'std_5',
    'std_10',
    'std_20',
    'std_30',
    'std_60',
    'beta_5',
    'beta_10',
    'beta_20',
    'beta_30',
    'beta_60',
    'max_5',
    'max_10',
    'max_20',
    'max_30',
    'max_60',
    'min_5',
    'min_10',
    'min_20',
    'min_30',
    'min_60',
    'qtlu_5',
    'qtlu_10',
    'qtlu_20',
    'qtlu_30',
    'qtlu_60',
    'qtld_5',
    'qtld_10',
    'qtld_20',
    'qtld_30',
    'qtld_60',
    'rank_5',
    'rank_10',
    'rank_20',
    'rank_30',
    'rank_60',
    'imax_5',
    'imax_10',
    'imax_20',
    'imax_30',
    'imax_60',
    'imin_5',
    'imin_10',
    'imin_20',
    'imin_30',
    'imin_60',
    'imxd_5',
    'imxd_10',
    'imxd_20',
    'imxd_30',
    'imxd_60',
    'cntp_5',
    'cntp_10',
    'cntp_20',
    'cntp_30',
    'cntp_60',
    'cntn_5',
    'cntn_10',
    'cntn_20',
    'cntn_30',
    'cntn_60',
    'cntd_5',
    'cntd_10',
    'cntd_20',
    'cntd_30',
    'cntd_60',
    'sump_5',
    'sump_10',
    'sump_20',
    'sump_30',
    'sump_60',
    'sumn_5',
    'sumn_10',
    'sumn_20',
    'sumn_30',
    'sumn_60',
    'sumd_5',
    'sumd_10',
    'sumd_20',
    'sumd_30',
    'sumd_60',
]
seq_len=24
for path in pathlist:
    data=pd.read_csv(path).loc[:,features]
    for i in range(0, len(data) - seq_len):
            _x = data.iloc[i:i + seq_len,:].to_numpy()
            _x_small = data.iloc[i:i + seq_len,:4].to_numpy()
            mae_acl18_inter_ori_dict['all'].append(_x)
            mae_acl18_inter_ori_small_dict['all'].append(_x_small)
    # print(mae_acl18_inter_ori_small_dict['all'][-1].shape)

display(len(mae_acl18_inter_ori_dict['all']))

76245

In [46]:
pathlist = Path(mae_acl18_generated_path).rglob("*")
mae_acl18_inter_generated_dict={}
mae_acl18_inter_generated_dict['all']=[]
mae_acl18_inter_generated_small_dict={}
mae_acl18_inter_generated_small_dict['all']=[]
for path in pathlist:
    data=np.load(path)
    mae_acl18_inter_generated_dict['all'].append(data)
    mae_acl18_inter_generated_small_dict['all'].append(data[:,:4])
    # print(mae_acl18_inter_generated_dict['all'][-1].shape)
mae_acl18_inter_generated_dict['all']=np.stack(mae_acl18_inter_generated_dict['all'],axis=0)
mae_acl18_inter_generated_small_dict['all']=np.stack(mae_acl18_inter_generated_small_dict['all'],axis=0)
display(mae_acl18_inter_generated_dict['all'].shape)
display(mae_acl18_inter_generated_small_dict['all'].shape)
    # print(data.shape)
    # print(type(data))

(18054, 24, 99)

(18054, 24, 4)

## Evaluate the generated data



In [47]:
def dis_score(ori_data, generated_data):
    metric_iteration = 5
    discriminative_score = list()
    for _ in range(metric_iteration):
        temp_disc = discriminative_score_metrics(ori_data, generated_data)
        discriminative_score.append(temp_disc)
    print('Discriminative score: ' + str(np.round(np.mean(discriminative_score), 4)))
    return np.round(np.mean(discriminative_score), 4)

def pred_score(ori_data, generated_data):
    predictive_score = list()
    metric_iteration = 5
    for tt in range(metric_iteration):
        temp_pred = predictive_score_metrics(ori_data, generated_data)
        predictive_score.append(temp_pred)   
    print('Predictive score: ' + str(np.round(np.mean(predictive_score), 4)))
    return np.round(np.mean(predictive_score), 4)
    
def evaluation(data_set_dict,genrated_data_set_dict):
    length_dict={}
    discriminative_score_dict={}
    predictive_score_dict={}
    for dataset_name, data_set in data_set_dict.items():
        length_dict[dataset_name]=len(data_set_dict[dataset_name])
        # print(len(data_set_dict[dataset_name]),len(genrated_data_set_dict[dataset_name]))
        discriminative_score_dict[dataset_name] = dis_score(data_set_dict[dataset_name],genrated_data_set_dict[dataset_name])
        predictive_score_dict[dataset_name] = pred_score(data_set_dict[dataset_name],genrated_data_set_dict[dataset_name])
        visualization(data_set_dict[dataset_name],genrated_data_set_dict[dataset_name], 'pca')
        visualization(data_set_dict[dataset_name],genrated_data_set_dict[dataset_name], 'tsne')
    return length_dict,discriminative_score_dict,predictive_score_dict

In [48]:
mae_acl18_inter_ori_dict['all'][0].shape

(24, 99)

In [49]:
mae_acl18_inter_generated_dict['all'].shape

(18054, 24, 99)

In [50]:
mae_acl18_res=evaluation(mae_acl18_inter_ori_small_dict,mae_acl18_inter_generated_small_dict)





Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instr

2023-01-23 16:17:16.870935: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-01-23 16:17:16.871099: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-23 16:17:16.871305: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: NVIDIA RTX A6000 major: 8 minor: 6 memoryClockRate(GHz): 1.8
pciBusID: 0000:21:00.0
2023-01-23 16:17:16.871372: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-23 16:17:16.871521: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 1 with properties: 
name: NVIDIA RTX A6000 major: 8 minor: 6 memoryClockRate(GHz): 1.8
pciBusID: 0000:22:00.0
202




2023-01-23 16:17:17.094294: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-23 16:17:17.181483: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-23 16:17:17.182264: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-23 16:17:17.182743: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-23 16:17:17.183092: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x9090c50 initialized for plat

Discriminative score: 0.5



2023-01-23 16:18:18.934224: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:18:18.934246: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:18:51.480311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:18:51.480338: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:19:26.764385: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:19:26.764502: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:19:59.343473: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:19:59.343499: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:20:32.429122: I tensorflow

Predictive score: 84.4344


IndexError: index 47010 is out of bounds for axis 0 with size 18054

In [None]:
mae_acl18_res=evaluation(mae_acl18_inter_ori_dict,mae_acl18_inter_generated_dict)

2023-01-23 16:30:28.602227: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:30:28.602255: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:31:04.012652: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:31:04.012682: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:31:39.136727: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:31:39.136755: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:32:14.456377: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:32:14.456408: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      
2023-01-23 16:32:50.432951: I tensorflow

Discriminative score: 0.5


2023-01-23 16:33:25.809757: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-01-23 16:33:25.809855: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165]      


In [None]:
print(mae_acl18_res,mae_acl18_res)

## result analysis

In [None]:
display(GOOG_data_res)

In [None]:
display(AAPL_res)

In [None]:
display(group_3_res)

In [None]:
display(all_data_res)

In [None]:
display(group_4_res)

In [None]:
data_dict_tic_res={}
for tic in data_dict_tic_res.keys():
    display(data_dict_tic_res[tic])

In [None]:
visualization(GOOG_processed_data,GOOG_genrated_data, 'pca')
visualization(GOOG_processed_data,GOOG_genrated_data, 'tsne')

In [None]:
for k in data_dict.keys():
    print('vis of '+k)
    visualization(data_dict[k], generated_data_dict[k], 'pca')
    visualization(data_dict[k], generated_data_dict[k], 'tsne')

# Leverage Effect

In [None]:
def get_std_list(data):
    intervals=get_intervals(data)
    std_list=[]
    data.drop(columns=['index'])
    for interval in intervals:
        data_seg=data.iloc[interval[0]:interval[1],:].pct_return.to_numpy()
        std=data_seg.std()
        std_list.append(std)
    return std_list

single stock

In [None]:
for tic in tics:
    for j in range(regime_num):
        data=pd.read_csv('data_seg_'+tic+'_'+str(j)+'.csv')
        print('stock',tic,'regime',j)
        std_list=get_std_list(data)
        display(pd.DataFrame(std_list).describe()) 

by stock group

In [None]:
for i in range(stock_group_num):
    for j in range(regime_num):
        data=pd.read_csv('data_seg_'+str(i)+'_'+str(j)+'.csv')
        print('stock',i,'regime',j)
        std_list=get_std_list(data)
        display(pd.DataFrame(std_list).describe()) 

In [None]:
def get_std_list(data):
    intervals=get_intervals(data)
    std_list=[]
    data.drop(columns=['index'])
    for interval in intervals:
        data_seg=data.iloc[interval[0]:interval[1],:].to_numpy()
        std=data_seg.adj.std()
        std_list.append(std)
    return std_list

# key takeaway
1. regime 0 has high variance mean and high variance variance
2. regime 1 has low variance mean and low variance variance
3. regime 2 has low variance mean and low/high variance variance

# Pre-train Static learning classification discriminator

In [None]:
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.datasets import load_arrow_head
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sktime.classification.kernel_based import RocketClassifier

In [None]:
def get_data_of_same_length_df(data,seq_len):
    data_processed=[]
    intervals=get_intervals(data)
    temp_data=[]
    data.drop(columns=['index'])
    for interval in intervals:
        data_seg=data.iloc[interval[0]:interval[1],:]
        for i in range(0, len(data_seg) - seq_len):
            _x = data[i:i + seq_len]
            temp_data.append(_x)
    return temp_data

In [None]:
data=pd.read_csv('data_seg_'+"0"+'_'+"0"+'.csv')
display(data.columns)

In [None]:
for tic in tics:
    print(tic)
    X=[]
    y=np.empty(0)
    for j in range(regime_num):
        data=pd.read_csv('data_seg_'+tic+'_'+str(j)+'.csv').loc[:,['index', 'open', 'high', 'low', 'close', 'adjcp',
       'pct_return', 'adjcp_filtered', 'pct_return_filtered']]
        process_data=get_data_of_same_length_df(data,24)
        label=np.full(len(process_data), j)
        X.extend(process_data)
        y=np.concatenate((y, label), axis=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    classifier = RocketClassifier(num_kernels=2000,n_jobs=-1)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(accuracy_score(y_test, y_pred))

ALL single stock classification have unbelieve 100% acc?

In [None]:
for i in range(stock_group_num):
    print('stock_group',i)
    X=[]
    y=np.empty(0)
    for j in range(regime_num):
        data=pd.read_csv('data_seg_'+str(i)+'_'+str(j)+'.csv').loc[:,['index', 'open', 'high', 'low', 'close', 'adjcp',
       'pct_return', 'adjcp_filtered', 'pct_return_filtered']]
        process_data=get_data_of_same_length_df(data,24)
        label=np.full(len(process_data), j)
        X.extend(process_data)
        y=np.concatenate((y, label), axis=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    classifier = RocketClassifier(num_kernels=2000,n_jobs=-1,use_multivariate='yes')
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(accuracy_score(y_test, y_pred))

Still 1.0?

In [None]:

# X_train, X_test, y_train, y_test = train_test_split(X, y)
# classifier = TimeSeriesForestClassifier()
# classifier.fit(X_train, y_train)
# y_pred = classifier.predict(X_test)
# accuracy_score(y_test, y_pred)

# Pre-train Deep learning classification discriminator

### training

In [None]:
from tsai.all import *
my_setup()

In [None]:
for i in range(stock_group_num):
    print('stock_group',i)
    X=[]
    y=np.empty(0)
    for j in range(regime_num):
        data=pd.read_csv('data_seg_'+str(i)+'_'+str(j)+'.csv').loc[:,['index', 'open', 'high', 'low', 'close', 'adjcp',
       'pct_return', 'adjcp_filtered', 'pct_return_filtered']]
        process_data=get_data_of_same_length(data,24)
        label=np.full(len(process_data), j)
        X.extend([p.transpose() for p in process_data])
        y=np.concatenate((y, label), axis=0)
    X=np.array(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    X, y, splits = combine_split_data([X_train, X_test], [y_train, y_test])
    tfms  = [None, [Categorize()]]
    dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)
    dls   = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[64, 128], batch_tfms=[TSStandardize()], num_workers=0)
    model = InceptionTime(dls.vars, dls.c)
    learn = Learner(dls, model, metrics=accuracy)
    learn.fit_one_cycle(25, lr_max=1e-3)
    learn.plot_metrics()
    learn.save_all(path='export', dls_fname='dls_'+str(i)+'_'+str(j), model_fname='model_'+str(i)+'_'+str(j), learner_fname='learner_'+str(i)+'_'+str(j))
#     display(type(X_train),X_train.shape)

# Key takeaway

InceptionTime can do the job

In [None]:
X, y, splits = get_classification_data('LSST', split_data=False)

In [None]:
tfms  = [None, TSClassification()] # TSClassification == Categorize
batch_tfms = TSStandardize()
dls = get_ts_dls(X, new_y, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=[64, 128])
dls.dataset

In [None]:
def get_std_list(data):
    intervals=get_intervals(data)
    std_list=[]
    data.drop(columns=['index'])
    for interval in intervals:
        data_seg=data.iloc[interval[0]:interval[1],:].to_numpy()
        std=data_seg.adj.std()
        std_list.append(std)
    return std_list

In [None]:
batch_tfms = TSStandardize(by_sample=True)
mv_clf = TSClassifier(X, y, splits=splits, path='models', arch=InceptionTimePlus, batch_tfms=batch_tfms, metrics=accuracy, cbs=ShowGraph())
mv_clf.fit_one_cycle(10, 1e-2)
mv_clf.export("mv_clf.pkl")

## inference

## inference