### Please use `env:tensorflow`

In [1]:
# !pip install --upgrade tensorflow --quiet
# !pip install keras_tuner --quiet
# !pip install tensorflow-io --quiet
# # Google colab modules
# from google.colab import drive
import sys, importlib

# # Mount drive
# drive.mount('/content/gdrive', force_remount=True)
ROOT_PATH = './'
# sys.path.append(ROOT_PATH)

import coremlv2 as core
core._init_ml()
# core._init_models()
# core.os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

# Reload coreml
importlib.reload(core)
import keras_tuner as kt

In [2]:
# Limiting GPU memory growth
gpus = core.tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            core.tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = core.tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs, ", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs,  1 Logical GPUs


In [3]:
# Use generator
from tensorflow.keras.utils import Sequence
import numpy as np
class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y     

### model_314_kt - BBCA, kt_iter=v16

In [2]:
ticker_group = ['BBCA']
shift = 0
interval = 1
recurrent = 120
db_ver = '3'
dataset_ver = '4'
kt_iter = '16'
split = 0.8
epochs = 15
max_epochs = 25
generator = False
batch_size = 32

In [None]:
# Generator version of load dataset. Different return values.
train_gen, train_labels, train_changes, test_gen, test_labels, test_changes, data_version = core.load_dataset(ticker_group=ticker_group, shift=shift, interval=interval, recurrent=recurrent, db_ver=db_ver, dataset_ver=dataset_ver, split=split, ROOT_PATH=ROOT_PATH, generator=generator, batch_size=batch_size)
tuner = kt.Hyperband(hypermodel=core.model_314_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))
tuner.search(core.np.nan_to_num(train_gen, posinf=0.0, neginf=0.0), train_labels, validation_data=(core.np.nan_to_num(test_gen, posinf=0.0, neginf=0.0), test_labels), epochs=epochs, verbose=1)

Trial 6 Complete [04h 27m 34s]
val_loss: 0.6936681866645813

Best val_loss So Far: 0.692253828048706
Total elapsed time: 18h 14m 52s

Search: Running Trial #7

Hyperparameter    |Value             |Best Value So Far 
superblock_fina...|4                 |2                 
lr                |3.0198e-05        |1.3575e-06        
comp2             |15                |48                
comp3             |17                |24                
comp5             |22                |10                
comp7             |7                 |7                 
comp11            |5                 |5                 
comp13            |8                 |4                 
comp17            |4                 |5                 
comp19            |5                 |4                 
comp23            |5                 |4                 
comp29            |4                 |4                 
comp31            |4                 |4                 
comp37            |4                 |4   

In [None]:
# Start: 15.30 1/27/2022
# 17 minutes still waiting for training to start
# 8s/step
# 2256s/epoch

### model_321_kt - world stock data (small)
est: 1GB memory

### model_322_kt - world stock data (small)
est: 1GB memory

### model_324_kt - world stock data (small)
est: 1GB memory

### 321/29 v8 full_new_wsd

In [2]:
kt_iter = '23'
ticker_group = ['wsd']
epochs = 20
max_epochs = 20
batch_size = 64

In [3]:
dataset_size = 'small'
shuffle_buffer_size = 1024
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''H:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''H:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator)

Total constituents: 22
Total constituents: 8


In [4]:
tuner = kt.Hyperband(hypermodel=core.model_321_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_inputs, train_labels, validation_data=(validation_inputs, validation_labels), epochs=epochs, verbose=1)

Trial 30 Complete [00h 11m 35s]
val_loss: 0.694282054901123

Best val_loss So Far: 0.6696702837944031
Total elapsed time: 02h 13m 19s


In [5]:
kt_iter = '25'
ticker_group = ['wsd']
epochs = 20
max_epochs = 20
batch_size = 64

dataset_size = 'small'
shuffle_buffer_size = 1024
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''H:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''H:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator)

tuner = kt.Hyperband(hypermodel=core.model_322_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_inputs, train_labels, validation_data=(validation_inputs, validation_labels), epochs=epochs, verbose=1)

Trial 30 Complete [00h 15m 22s]
val_loss: 0.6727032661437988

Best val_loss So Far: 0.6726565361022949
Total elapsed time: 02h 46m 13s


In [6]:
kt_iter = '27'
ticker_group = ['wsd']
epochs = 20
max_epochs = 20
batch_size = 64

dataset_size = 'small'
shuffle_buffer_size = 1024
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''H:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''H:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator)

tuner = kt.Hyperband(hypermodel=core.model_324_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_inputs, train_labels, validation_data=(validation_inputs, validation_labels), epochs=epochs, verbose=1)

Trial 30 Complete [00h 27m 34s]
val_loss: 0.6966272592544556

Best val_loss So Far: 0.6725248098373413
Total elapsed time: 05h 47m 27s


In [None]:
model_no = '321'
kt_iter = '29'
ticker_group = ['wsd']
epochs = 20
max_epochs = 20
batch_size = 64

dataset_size = 'full_new_wsd'
shuffle_buffer_size = 2048
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

tuner = kt.Hyperband(hypermodel=core.model_321_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_inputs, train_labels, validation_data=(validation_inputs, validation_labels), epochs=epochs, verbose=1)

Trial 15 Complete [01h 07m 07s]
val_loss: 0.6910593509674072

Best val_loss So Far: 0.6910593509674072
Total elapsed time: 11h 20m 15s

Search: Running Trial #16

Hyperparameter    |Value             |Best Value So Far 
lr                |9.2708e-05        |2.5662e-05        
c_filters         |64                |96                
r_units           |32                |192               
tuner/epochs      |7                 |7                 
tuner/initial_e...|3                 |3                 
tuner/bracket     |2                 |2                 
tuner/round       |1                 |1                 
tuner/trial_id    |7da237ab282d7e3...|79dbd2da038e512...

Epoch 4/7
Epoch 5/7
 995/5345 [====>.........................] - ETA: 10:29 - loss: 0.6825 - accuracy: 0.5456

In [None]:
# RERUN WITH REVISED traintest slice code
model_no = '326'
kt_iter = '32'
ticker_group = ['wsd']
epochs = 20
max_epochs = 20
batch_size = 64

dataset_size = 'full_new_wsd'
shuffle_buffer_size = 2048
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

tuner = kt.Hyperband(hypermodel=core.model_326_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_inputs, train_labels, validation_data=(validation_inputs, validation_labels), epochs=epochs, verbose=1)

Trial 27 Complete [00h 35m 14s]
val_loss: 0.6986758708953857

Best val_loss So Far: 0.6073791980743408
Total elapsed time: 04h 49m 37s

Search: Running Trial #28

Hyperparameter    |Value             |Best Value So Far 
lr                |1.0649e-05        |0.00057138        
r_units           |96                |112               
d_units_1         |64                |224               
d_units_2         |48                |16                
tuner/epochs      |20                |20                
tuner/initial_e...|0                 |7                 
tuner/bracket     |0                 |2                 
tuner/round       |0                 |2                 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
 352/5416 [>.............................] - ETA: 1:04 - loss: 0.6396 - accuracy: 0.6057

In [4]:
# RERUN WITH REVISED traintest slice code
model_no = '326'
kt_iter = '33'
ticker_group = ['wsd']
epochs = 30
max_epochs = 40
batch_size = 64

dataset_size = 'full_new_wsd_mix'
shuffle_buffer_size = 2048
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

Total constituents: 555
Total constituents: 423


In [None]:
# Convert to generator
train_gen = DataGenerator(train_inputs, train_labels, batch_size)
validation_gen = DataGenerator(validation_inputs, validation_labels, batch_size)

tuner = kt.Hyperband(hypermodel=core.model_326_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_gen, validation_data=validation_gen, epochs=epochs, verbose=1)

Trial 55 Complete [00h 16m 59s]
val_loss: 0.6374726295471191

Best val_loss So Far: 0.6362730264663696
Total elapsed time: 12h 10m 21s

Search: Running Trial #56

Hyperparameter    |Value             |Best Value So Far 
lr                |1.0981e-06        |0.0009985         
r_units           |32                |112               
d_units_1         |80                |80                
d_units_2         |32                |64                
tuner/epochs      |5                 |40                
tuner/initial_e...|0                 |14                
tuner/bracket     |2                 |3                 
tuner/round       |0                 |3                 

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
  289/14334 [..............................] - ETA: 2:58 - loss: 0.6755 - accuracy: 0.5794

In [4]:
validation_inputs.shape

(173306, 120, 60)

In [5]:
# RERUN WITH REVISED traintest slice code
model_no = '327'
kt_iter = '34'
ticker_group = ['wsd']
epochs = 20
max_epochs = 20
batch_size = 64

dataset_size = 'full_new_wsd_mix'
shuffle_buffer_size = 1024
generator = False

train_inputs, train_labels = core.load_dataset_wsd_traintest(subset='training', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

validation_inputs, validation_labels = core.load_dataset_wsd_traintest(subset='validation', dataset_size=dataset_size, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

Total constituents: 555
Total constituents: 423


In [None]:
# Convert to generator
train_gen = DataGenerator(train_inputs, train_labels, batch_size)
validation_gen = DataGenerator(validation_inputs, validation_labels, batch_size)

tuner = kt.Hyperband(hypermodel=core.model_326_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_gen, validation_data=validation_gen, epochs=epochs, verbose=1)

Trial 29 Complete [01h 03m 36s]
val_loss: 0.6493789553642273

Best val_loss So Far: 0.6360958814620972
Total elapsed time: 11h 04m 13s

Search: Running Trial #30

Hyperparameter    |Value             |Best Value So Far 
lr                |0.0095164         |0.0010391         
r_units           |64                |96                
d_units_1         |32                |80                
d_units_2         |128               |64                
tuner/epochs      |20                |3                 
tuner/initial_e...|0                 |0                 
tuner/bracket     |0                 |2                 
tuner/round       |0                 |0                 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
 2413/14334 [====>.........................] - ETA: 2:23 - loss: 0.6687 - accuracy: 0.5885

### Run with revised generator
- Use 326 as early benchmark model, with world portion variation to find the optimal point

In [None]:
# RERUN WITH REVISED traintest slice code
model_no = '326'
constituent_limits = 0
id_constituent = 1
kt_iter = f'_model-{model_no}_wc-{constituent_limits}_ic-{id_constituent}'
ticker_group = ['wsd']
epochs = 40
max_epochs = 40
batch_size = 4096
shuffle_buffer_size = 16
generator = True

# Train: `slice_from_beginning`=True
train_gen = core.load_dataset_wsd(slice_from_beginning=True, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', constituent_limits=constituent_limits, id_constituent=id_constituent, batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)
# Validation: `slice_from_beginning`=False. constituent_limits in validation is always 0 (focus on idx performance progression only)
validation_gen = core.load_dataset_wsd(slice_from_beginning=False, ROOT_PATH='''J:\#PROJECT\idx''', db_ver='8', constituent_limits=0, id_constituent=id_constituent, batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, seed=0, generator=generator, model_no=model_no)

tuner = kt.Hyperband(hypermodel=core.model_326_kt, objective='val_loss', max_epochs=max_epochs, hyperband_iterations=1, overwrite=True, directory=f'{ROOT_PATH}models/kt/v{kt_iter}/', project_name='_'.join(ticker_group))

tuner.search(train_gen, validation_data=validation_gen, epochs=epochs, verbose=1)

Trial 3 Complete [00h 17m 32s]
val_loss: 0.6559761762619019

Best val_loss So Far: 0.6455338597297668
Total elapsed time: 00h 52m 14s

Search: Running Trial #4

Hyperparameter    |Value             |Best Value So Far 
lr                |1.3451e-07        |0.0065233         
r_units           |80                |96                
d_units_1         |64                |96                
d_units_2         |48                |64                
tuner/epochs      |2                 |2                 
tuner/initial_e...|0                 |0                 
tuner/bracket     |3                 |3                 
tuner/round       |0                 |0                 

Epoch 1/2
Epoch 2/2

In [None]:
#