In [1]:
import glob
import re
import json
import pandas as pd

In [2]:
data_dir = '../data/'

In [3]:
files = glob.glob(data_dir + 'etl_s2_v2*.parquet')
files

['../data/etl_s2_v2_1681390080_1681543680.parquet',
 '../data/etl_s2_v2_1680419520_1680590400.parquet',
 '../data/etl_s2_v2_1680112320_1680265920.parquet',
 '../data/etl_s2_v2_1680897600_1681051200.parquet',
 '../data/etl_s2_v2_1681697280_1681850880.parquet',
 '../data/etl_s2_v2_1679805120_1679958720.parquet',
 '../data/etl_s2_v2_1681850880_1682004480.parquet',
 '../data/etl_s2_v2_1680744000_1680897600.parquet',
 '../data/etl_s2_v2_1679958720_1680112320.parquet',
 '../data/etl_s2_v2_1680590400_1680744000.parquet',
 '../data/etl_s2_v2_1679178240_1679331840.parquet',
 '../data/etl_s2_v2_1680265920_1680419520.parquet',
 '../data/etl_s2_v2_1679485440_1679651520.parquet',
 '../data/etl_s2_v2_1681082880_1681236480.parquet',
 '../data/etl_s2_v2_1681543680_1681697280.parquet',
 '../data/etl_s2_v2_1679651520_1679805120.parquet',
 '../data/etl_s2_v2_1681236480_1681390080.parquet',
 '../data/etl_s2_v2_1679331840_1679485440.parquet']

In [4]:
ticker = 'ETH-USD'

In [5]:
data = pd.read_parquet(files[0])
columns = data.columns

In [6]:
ticker_columns = []
for col in columns:
    if col.startswith(ticker):
        ticker_columns.append(col)
ticker_columns = [col for col in ticker_columns if col != f'{ticker}:trade_avg_price' and col != f'{ticker}:trade_return']

In [7]:
len(ticker_columns)

706

In [8]:
input_columns = []
for col in ticker_columns:
    m = re.match('^{}:(.*)$'.format(ticker), col)
    input_columns.append(m.group(1))

In [9]:
config_filename = data_dir + 'one_volatility_prediction_v1.json'
config_filename

'../data/one_volatility_prediction_v1.json'

In [10]:
config = dict()
config['ticker_column_names'] = input_columns
config['global_column_names'] = ['book_mean_return_27', 'sequence_interval_s']
config['targets'] = [
    {
        'source_name' : 'book_volatility_9',
        'target_name' : 'book_volatility_9_target',
        'shift' : 9,
    },
    {
        'source_name' : 'book_volatility_27',
        'target_name' : 'book_volatility_27_target',
        'shift' : 27,
    },
    {
        'source_name' : 'book_volatility_81',
        'target_name' : 'book_volatility_81_target',
        'shift' : 81,
    },
    {
        'source_name' : 'book_volatility_162',
        'target_name' : 'book_volatility_162_target',
        'shift' : 162,
    },
    {
        'source_name' : 'book_volatility_324',
        'target_name' : 'book_volatility_324_target',
        'shift' : 324,
    },
    {
        'source_name' : 'book_volatility_648',
        'target_name' : 'book_volatility_648_target',
        'shift' : 648,
    },
    {
        'source_name' : 'book_volatility_960',
        'target_name' : 'book_volatility_960_target',
        'shift' : 960,
    },
]
config['train_pct'] = 0.8
config['nn_hidden_size'] = 2048
config['learning_rate'] = 0.000001
config['patience'] = 14
config['patience_decay'] = 0.8
config['threshold'] = 0.000001
config['total_epochs'] = 30
config['loader_args'] = {
    'batch_size' : 256,
    'shuffle' : True,
    'num_workers' : 0,
}
config['score_args'] = {
    'batch_size' : 256,
    'shuffle' : False,
    'num_workers' : 0,
}

In [11]:
with open(config_filename, 'w') as fd:
    json.dump(config, fd)