# Recommendation Algorithm v2
vs v1: Using full features (4000+ features) vs simplified version (v1, 64 features)

## Part 1: Fetch new data
please use `conda env:talib`

In [None]:
import coremlv2 as core
core._init_calc()
import importlib
importlib.reload(core)

In [None]:
core.calculate_full_features_db(db_ver='3')

## Part 2: Model engineering and recommendation
Please use `conda env:tensorflow`

In [1]:
import coremlv2 as core
core._init_ml()
import importlib
importlib.reload(core)

core.os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

### Load model
Two models that used based on val_loss and val_acc performance, also the overfitting rate (the difference between loss and val_loss)

model_no 307 (kt_iter v9)
loss - acc - val_loss - val_acc
0.5514 - 0.6911 - 0.5904 - 0.6501

model_no 312 (kt_iter v14)
loss - acc - val_loss - val_acc
0.5998 - 0.6531 - 0.6005 - 0.6390

In [3]:
ROOT_PATH = './'
model_base_ids = ['307','312']
kt_iters = ['9','14']
model_source = ['MERGED-shuffled']
model_ticker_target = ['MERGED-shuffled']
shift = 0
interval = 1
recurrent = 120
db_ver = '4'
dataset_ver = '1'
split = 0.8

models = []
data_versions = []
for i, model_base_id in enumerate(model_base_ids):
    kt_iter = kt_iters[i]
    model, data_version = core.read_best_model(model_base_id, model_source, model_ticker_target, shift, interval, recurrent, db_ver, dataset_ver, kt_iter, split, ROOT_PATH, retrain=False, backtest=False, save_model=False, generator=True) 
    models.append(model)
    data_versions.append(data_version)

Load weights from epoch 13
Load weights from epoch 1


### Load standard scaler
### Make one shot input
### Make prediction

In [4]:
ticker_groups = ['ADRO', 'AKRA', 'ASII', 'BBCA', 'BBRI', 'BMRI', 'BUMI', 'CMNP', 'CSAP', 'DGIK', 'INDY', 'ISAT', 'KLBF', 'MASA', 'MTDL', 'PKPK', 'POLY', 'PTBA', 'SMSM', 'SRSN', 'TURI', 'UNSP', 'UNTR']
ticker_groups_expanded = core.np.expand_dims(core.np.array(ticker_groups), axis=1)

db_path = 'idx_indicators.db'
db_conn = core.sqlite3.connect(db_path)

standard_scaler = []
prediction_results = []
for i, model in enumerate(models):
    data_version = data_versions[i]
    prediction_result = []
    for j, ticker_group in enumerate(ticker_groups_expanded):
        # Fetch one shot input
        tomorrow_input = core.make_one_shot_input(ticker_group[0], db_conn, recurrent, shift_from_last=0)

        # Fetch standard scaler
        train_mean, train_std = core.standard_scaler_one_shot(ticker_group, data_version, ROOT_PATH=ROOT_PATH, split=split)
        standard_scaler.append([train_mean, train_std])

        tomorrow_input_normalized = (tomorrow_input - train_mean) / train_std
        tomorrow_input_expanded = core.np.expand_dims(tomorrow_input_normalized, 0)

        # Predict
        tomorrow_prediction = model.predict(tomorrow_input_expanded)
        
        # Convert prediction to text
        tomorrow_prediction_argmax = core.np.argmax(tomorrow_prediction)
        prediction = 'UP' if tomorrow_prediction_argmax == 1 else 'DOWN'
        # print(sticker, prediction)
        prediction_result.append(prediction)
    prediction_results.append(prediction_result)
    
# Check double down and double up
double_up = []
double_down = []
for i, value in enumerate(prediction_results[0]):
    if prediction_results[0][i] == prediction_results[1][i]:
        if value == 'UP':
            double_up.append(ticker_groups[i])
        elif value == 'DOWN':
            double_down.append(ticker_groups[i])
print(f'Double up: {double_up}')
print(f'Double down: {double_down}')

Double up: ['AKRA', 'ASII', 'BBCA', 'BBRI', 'CSAP', 'DGIK', 'ISAT', 'PTBA', 'UNTR']
Double down: ['BUMI', 'CMNP', 'INDY', 'KLBF', 'PKPK', 'POLY', 'SMSM', 'SRSN', 'TURI', 'UNSP']


In [5]:
core.pd.Timestamp(1642723200000000000)

Timestamp('2022-01-21 00:00:00')

In [9]:
train_inputs, train_labels, train_changes, test_inputs, test_labels, test_changes, data_version = core.load_dataset(ticker_group=['KLBF'], shift=0, interval=3, recurrent=120, db_ver='3', dataset_ver='4', split=0.8, ROOT_PATH='./')

In [None]:
# REFER TO MODEL 256
# shape=(train_inputs.shape[1], train_inputs.shape[2])
input_shape = core.tf.keras.Input(shape=(train_inputs.shape[1], train_inputs.shape[2]))
X = core.tf.keras.layers.Conv1D(filters=32, kernel_size=1, strides=1, padding='causal')(input_shape)
X = core.tf.keras.layers.BatchNormalization()(X)
X = core.tf.keras.layers.Activation('relu')(X)

X = core.tf.keras.layers.Dense(units=8)(X)
X = core.tf.keras.layers.BatchNormalization()(X)
X = core.tf.keras.layers.Activation('relu')(X)

X = core.tf.keras.layers.LSTM(units=32, return_sequences=False)(X)
X = core.tf.keras.layers.Dense(units=16)(X)
# X = core.tf.keras.layers.Flatten()(X)
X = core.tf.keras.layers.Activation('relu')(X)
# units=train_one_hot_labels.shape[1]
outputs = core.tf.keras.layers.Dense(units=train_labels.shape[1], activation='softmax')(X)
model = core.tf.keras.models.Model(inputs=input_shape, outputs=outputs)

optimizer = core.tf.keras.optimizers.Adam(learning_rate=5e-6)
loss = core.tf.keras.losses.CategoricalCrossentropy(from_logits=False)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

model.summary()
epochs = 20
history = model.fit(core.np.nan_to_num(train_inputs, posinf=0.0, neginf=0.0), train_labels, validation_data=(core.np.nan_to_num(test_inputs, posinf=0.0, neginf=0.0), test_labels), epochs=epochs, verbose=1) 

In [10]:
model = core.tf.keras.models.Sequential([
    core.tf.keras.Input(shape=(train_inputs.shape[1], train_inputs.shape[2])),
    core.tf.keras.layers.Conv1D(filters=32, kernel_size=1, strides=1, padding='same'),
    core.tf.keras.layers.Conv1D(filters=96, kernel_size=9, strides=3, padding='same'),
    core.tf.keras.layers.Conv1D(filters=32, kernel_size=15, strides=3, padding='same'),
    core.tf.keras.layers.GRU(units=48, return_sequences=False),
    core.tf.keras.layers.Dense(units=12, activation='relu'),
    core.tf.keras.layers.Dense(units=train_labels.shape[1], activation='softmax')
])
optimizer = core.tf.keras.optimizers.Adam(learning_rate=0.00001) # 
loss = core.tf.keras.losses.CategoricalCrossentropy(from_logits=False)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # , run_eagerly=True

model.summary()
epochs = 10
history = model.fit(core.np.nan_to_num(train_inputs, posinf=0.0, neginf=0.0), train_labels, validation_data=(core.np.nan_to_num(test_inputs, posinf=0.0, neginf=0.0), test_labels), epochs=epochs, verbose=1) 

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_13 (Conv1D)          (None, 120, 32)           1952      
                                                                 
 conv1d_14 (Conv1D)          (None, 40, 96)            27744     
                                                                 
 conv1d_15 (Conv1D)          (None, 14, 32)            46112     
                                                                 
 gru_4 (GRU)                 (None, 48)                11808     
                                                                 
 dense_11 (Dense)            (None, 12)                588       
                                                                 
 dense_12 (Dense)            (None, 2)                 26        
                                                                 
Total params: 88,230
Trainable params: 88,230
Non-trai

In [3]:
train_inputs.shape

(3482, 15, 3896)

In [4]:
train_labels.shape

(3482, 2)

In [None]:
def block_v2_base(prev_node, base, block, layers, kernel_size, strides):
    '''Version detail:
    v2: 
    - replace concatenate with add
    - add relu after batchnorm
    - remove lstm layer
    - add activation after block_input
    '''
    conv1 = core.tf.keras.layers.Conv1D(filters=base*block*layers[0], kernel_size=kernel_size[0], strides=strides[0], padding='causal')(prev_node)
    bn1 = core.tf.keras.layers.BatchNormalization()(conv1)
    act1 = core.tf.keras.layers.Activation('relu')(bn1)
    conv2 = core.tf.keras.layers.Conv1D(filters=base*block*layers[1], kernel_size=kernel_size[1], strides=strides[1], padding='causal')(bn1)
    bn2 = core.tf.keras.layers.BatchNormalization()(conv2)
    # add1 = core.tf.keras.layers.Add()([bn1, bn2])
    act2 = core.tf.keras.layers.Activation('relu')(bn2)
    conv3 = core.tf.keras.layers.Conv1D(filters=base*block*layers[2], kernel_size=kernel_size[2], strides=strides[2], padding='causal')(bn2)
    bn3 = core.tf.keras.layers.BatchNormalization()(conv3)
    act3 = core.tf.keras.layers.Activation('relu')(bn3)
    conv4 = core.tf.keras.layers.Conv1D(filters=base*block*layers[3], kernel_size=kernel_size[3], strides=strides[3], padding='causal')(bn3)
    bn4 = core.tf.keras.layers.BatchNormalization()(conv4)
    # add2 = core.tf.keras.layers.concatenate([bn3, bn4], axis=1)
    act4 = core.tf.keras.layers.Activation('relu')(bn4)
    return act4

def block_v2(prev_node, skip_node, base, block, layers=[1,2,3,4], kernel_size=[1,2,3,4], strides=[1,1,1,1]):
    '''Version detail:
    - v2: replace concatenate with add'''
    input_reshape = core.tf.keras.layers.Conv1D(filters=base*(block-1)*layers[3], kernel_size=1, strides=1)(skip_node)
    input_bn = core.tf.keras.layers.BatchNormalization()(input_reshape)
    block_input = core.tf.keras.layers.Add()([prev_node, input_bn])
    block_act = core.tf.keras.layers.Activation('relu')(block_input)
    output = block_v2_base(block_act, base, block, layers, kernel_size, strides)
    return output


In [11]:
def model_181_kt(hp):
    entry_conv_filters = hp.Int('entry_conv_filters', min_value=8, max_value=128, step=8)
    entry_conv_kernelsize = hp.Int('entry_conv_kernelsize', min_value=1, max_value=7, step=1)
    stack_units = hp.Int('d_stack_units', min_value=8, max_value=48, step=8)
    final_lstm_units = hp.Int('final_lstm_units', min_value=8, max_value=128, step=16)
    final_dense_units = hp.Int('final_dense_units', min_value=4, max_value=64, step=8)
    learning_rate = hp.Float('learning_rate', min_value=1e-7, max_value=9e-6, sampling='log')
    
    # shape=(train_inputs.shape[1], train_inputs.shape[2])
    input_shape = core.tf.keras.Input(shape=(15, 3896))
    X = core.tf.keras.layers.Conv1D(filters=entry_conv_filters, kernel_size=entry_conv_kernelsize, strides=1, padding='causal')(input_shape)
    X = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Activation('relu')(X)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X1 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Activation('relu')(X)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X2 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Activation('relu')(X2)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X3 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Add()([X3, X1])
    X = core.tf.keras.layers.Activation('relu')(X)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X4 = core.tf.keras.layers.BatchNormalization()(X)
    X5 = core.tf.keras.layers.Add()([X4, X2])
    X6 = core.tf.keras.layers.Activation('relu')(X5)

    X = core.tf.keras.layers.LSTM(units=stack_units, return_sequences=True)(X6)
    X7 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Add()([X7, X5])

    X = core.tf.keras.layers.LSTM(units=stack_units, return_sequences=True)(X)
    X8 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Add()([X8, X7])

    X = core.tf.keras.layers.LSTM(units=final_lstm_units, return_sequences=False)(X)
    X = core.tf.keras.layers.Dense(units=final_dense_units)(X)
    X = core.tf.keras.layers.Activation('relu')(X)
    # units=train_one_hot_labels.shape[1]
    outputs = core.tf.keras.layers.Dense(units=2, activation='softmax')(X)
    model = core.tf.keras.models.Model(inputs=input_shape, outputs=outputs)

    optimizer = core.tf.keras.optimizers.Adam(learning_rate=learning_rate)
    loss = core.tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return model

def model_266_kt(hp):
    '''Slightly modified version:
    - second dense stack layer activation not input from BN, but directly from Dense.'''
    entry_conv_filters = hp.Int('entry_conv_filters', min_value=8, max_value=128, step=8)
    entry_conv_kernelsize = hp.Int('entry_conv_kernelsize', min_value=1, max_value=7, step=1)
    stack_units = hp.Int('d_stack_units', min_value=8, max_value=48, step=8)
    final_lstm_units = hp.Int('final_lstm_units', min_value=8, max_value=128, step=16)
    final_dense_units = hp.Int('final_dense_units', min_value=4, max_value=64, step=8)
    learning_rate = hp.Float('learning_rate', min_value=1e-7, max_value=9e-6, sampling='log')
    
    # shape=(train_inputs.shape[1], train_inputs.shape[2])
    input_shape = core.tf.keras.Input(shape=(15, 3896))
    X = core.tf.keras.layers.Conv1D(filters=entry_conv_filters, kernel_size=entry_conv_kernelsize, strides=1, padding='causal')(input_shape)
    X = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Activation('relu')(X)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X1 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Activation('relu')(X1)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X2 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Activation('relu')(X2)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X3 = core.tf.keras.layers.BatchNormalization()(X)
    X = core.tf.keras.layers.Add()([X3, X1])
    X = core.tf.keras.layers.Activation('relu')(X)

    X = core.tf.keras.layers.Dense(units=stack_units)(X)
    X4 = core.tf.keras.layers.BatchNormalization()(X)
    X5 = core.tf.keras.layers.Add()([X4, X2])
    X6 = core.tf.keras.layers.Activation('relu')(X5)

    X = core.tf.keras.layers.LSTM(units=final_lstm_units, return_sequences=False)(X6)
    X = core.tf.keras.layers.Dense(units=final_dense_units)(X)
    X = core.tf.keras.layers.Activation('relu')(X)
    # units=train_one_hot_labels.shape[1]
    outputs = core.tf.keras.layers.Dense(units=2, activation='softmax')(X)
    model = core.tf.keras.models.Model(inputs=input_shape, outputs=outputs)

    optimizer = core.tf.keras.optimizers.Adam(learning_rate=learning_rate)
    loss = core.tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return model

In [15]:
tuner = kt.Hyperband(hypermodel=model_266_kt, objective='val_loss', max_epochs=50, hyperband_iterations=2, overwrite=True, directory='kt/v1/', project_name='model_266_KLBF')

In [16]:
tuner.search_space_summary()

Search space summary
Default search space size: 6
entry_conv_filters (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 128, 'step': 8, 'sampling': None}
entry_conv_kernelsize (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 7, 'step': 1, 'sampling': None}
d_stack_units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 48, 'step': 8, 'sampling': None}
final_lstm_units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 128, 'step': 16, 'sampling': None}
final_dense_units (Int)
{'default': None, 'conditions': [], 'min_value': 4, 'max_value': 64, 'step': 8, 'sampling': None}
learning_rate (Float)
{'default': 1e-07, 'conditions': [], 'min_value': 1e-07, 'max_value': 9e-06, 'step': None, 'sampling': 'log'}


In [17]:
tuner.search(train_inputs, train_labels, validation_data=(test_inputs, test_labels), epochs=20, verbose=2, callbacks=[core.tf.keras.callbacks.TensorBoard('/kt/v1/tmp/tb_logs')])

Trial 180 Complete [00h 03m 49s]
val_loss: 0.7076895833015442

Best val_loss So Far: 0.6908930540084839
Total elapsed time: 03h 47m 57s


In [9]:
models = tuner.get_best_models(num_models=3)

In [18]:
tuner.results_summary()

Results summary
Results in kt/v1/model_266_KLBF
Showing 10 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
entry_conv_filters: 56
entry_conv_kernelsize: 2
d_stack_units: 24
final_lstm_units: 72
final_dense_units: 4
learning_rate: 3.7709734351661005e-06
tuner/epochs: 50
tuner/initial_epoch: 17
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: a36139cc3a41f72c4c5efae8764e8def
Score: 0.6908930540084839
Trial summary
Hyperparameters:
entry_conv_filters: 64
entry_conv_kernelsize: 2
d_stack_units: 24
final_lstm_units: 24
final_dense_units: 44
learning_rate: 1.0200022277723317e-06
tuner/epochs: 6
tuner/initial_epoch: 2
tuner/bracket: 3
tuner/round: 1
tuner/trial_id: db7787679391206f2c6faa4eebdce44d
Score: 0.6916313171386719
Trial summary
Hyperparameters:
entry_conv_filters: 112
entry_conv_kernelsize: 6
d_stack_units: 48
final_lstm_units: 104
final_dense_units: 28
learning_rate: 5.9181602483036725e-06
tuner/epochs: 17
tuner/initial_epoch: 0
tuner/bracket: 1

In [None]:
# Backtest performance
train_ideal, train_real, test_ideal, test_real = core.backtest_v1(model, train_inputs, test_inputs, train_one_hot_labels, test_one_hot_labels, train_changes, test_changes)
train_r, train_nr, test_r, test_nr = core.performance_ratio(train_ideal, train_real, test_ideal, test_real)
print(f'Model backtest performance: tr {train_r:.2f}, tnr {train_nr:.2f}, vr {test_r:.2f}, vnr {test_nr:.2f}')
print('above results is in performance fraction between ideal (observed) and real condition produced from the model\n')

In [110]:
train_df

Unnamed: 0,open,high,low,close,Volume,change,Volume_rank,rsi14,close_EMA3,close_EMA10,...,cdlunique3river_EMA3_EMA10_offset,cdlunique3river_EMA3_EMA30_offset,cdlunique3river_EMA3_EMA200_offset,cdlxsidegap3methods_EMA3,cdlxsidegap3methods_EMA10,cdlxsidegap3methods_EMA30,cdlxsidegap3methods_EMA200,cdlxsidegap3methods_EMA3_EMA10_offset,cdlxsidegap3methods_EMA3_EMA30_offset,cdlxsidegap3methods_EMA3_EMA200_offset
0,-1.153512,-1.154730,-1.151345,-1.152404,1.479004,-0.047021,0.646929,-4.248831,-1.152059,-1.150373,...,1.271557,1.092374,0.268114,-0.029298,-0.053544,-0.092869,-0.221858,2.770435,1.637598,0.210889
1,-1.152823,-1.154730,-1.152043,-1.152404,0.271038,-0.047021,0.593092,-4.248831,-1.152059,-1.150373,...,1.271557,1.092374,0.268114,-0.029298,-0.053544,-0.092869,-0.221858,2.770435,1.637598,0.210889
2,-1.152133,-1.154730,-1.151345,-1.152404,-0.063900,-0.047021,0.485416,-4.248831,-1.152059,-1.150373,...,1.271557,1.092374,0.268114,-0.029298,-0.053544,-0.092869,-0.221858,2.770435,1.637598,0.210889
3,-1.152823,-1.155413,-1.151345,-1.153094,-0.029311,-0.445532,0.323903,-4.248831,-1.152404,-1.150498,...,1.271557,1.092374,0.268114,-0.029298,-0.053544,-0.092869,-0.221858,2.770435,1.637598,0.210889
4,-1.153512,-1.156096,-1.151345,-1.153784,-0.356906,-0.450567,0.054714,-4.248831,-1.152922,-1.150726,...,1.271557,1.092374,0.268114,-0.029298,-0.053544,-0.092869,-0.221858,2.770435,1.637598,0.210889
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3492,0.929069,0.970775,0.957645,0.954842,-0.587178,0.821316,-0.706373,-1.113230,0.937212,0.991948,...,-0.767516,-0.690682,-0.247881,-0.029298,-0.053544,-0.088886,0.592551,-0.316598,-0.238547,-0.114839
3493,0.969995,1.059925,0.982513,1.036753,-0.310670,1.160574,0.025665,-0.595125,0.987423,1.000729,...,-0.767516,-0.690682,-0.247881,-0.029298,-0.053544,-0.089143,0.584447,-0.316598,-0.238547,-0.114839
3494,1.068219,1.084239,1.098560,1.094091,-0.726396,0.767441,-1.542987,-0.271714,1.041204,1.018346,...,-0.767516,-0.690682,-0.247881,-0.029298,-0.053544,-0.089383,0.576424,-0.316598,-0.238547,-0.114839
3495,1.092775,1.068030,1.048826,1.036753,-0.401050,-0.841194,-0.348633,-0.564927,1.039418,1.022327,...,-0.767516,-0.690682,-0.247881,-0.029298,-0.053544,-0.089608,0.568481,-0.316598,-0.238547,-0.114839
