In [1]:
import numpy as np
import pandas as pd

from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
tf.random.set_seed(87)

import keras.backend as K
from keras.models import Model
from keras.layers import Input, Layer, Dot, RepeatVector, Activation, Add, Lambda, Concatenate, BatchNormalization
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import EarlyStopping, ModelCheckpoint

import keras_tuner as kt
from keras_tuner.engine.hyperparameters import HyperParameters
from livelossplot import PlotLossesKeras

import warnings
warnings.filterwarnings('ignore')

Attention Layer

In [2]:
import os

# KERAS_ATTENTION_DEBUG: If set to 1. Will switch to debug mode.
# In debug mode, the class Attention is no longer a Keras layer.
# What it means in practice is that we can have access to the internal values
# of each tensor. If we don't use debug, Keras treats the object
# as a layer, and we can only get the final output.
debug_flag = int(os.environ.get('KERAS_ATTENTION_DEBUG', 0))


# References:
# - https://arxiv.org/pdf/1508.04025.pdf (Luong).
# - https://arxiv.org/pdf/1409.0473.pdf (Bahdanau).
# - https://machinelearningmastery.com/the-bahdanau-attention-mechanism/ (Some more explanation).

class Attention(object if debug_flag else Layer):
    SCORE_LUONG = 'luong'
    SCORE_BAHDANAU = 'bahdanau'

    def __init__(self, units: int = 128, score: str = 'luong', **kwargs):
        super(Attention, self).__init__(**kwargs)
        if score not in {self.SCORE_LUONG, self.SCORE_BAHDANAU}:
            raise ValueError(f'Possible values for score are: [{self.SCORE_LUONG}] and [{self.SCORE_BAHDANAU}].')
        self.units = units
        self.score = score

    # noinspection PyAttributeOutsideInit
    def build(self, input_shape):
        input_dim = int(input_shape[-1])
        with K.name_scope(self.name if not debug_flag else 'attention'):
            # W in W*h_S.
            if self.score == self.SCORE_LUONG:
                self.luong_w = Dense(input_dim, use_bias=False, name='luong_w')
                # dot : last hidden state H_t and every hidden state H_s.
                self.luong_dot = Dot(axes=[1, 2], name='attention_score')
            else:
                # Dense implements the operation: output = activation(dot(input, kernel) + bias)
                self.bahdanau_v = Dense(1, use_bias=False, name='bahdanau_v')
                self.bahdanau_w1 = Dense(input_dim, use_bias=False, name='bahdanau_w1')
                self.bahdanau_w2 = Dense(input_dim, use_bias=False, name='bahdanau_w2')
                self.bahdanau_repeat = RepeatVector(input_shape[1])
                self.bahdanau_tanh = Activation('tanh', name='bahdanau_tanh')
                self.bahdanau_add = Add()

            self.h_t = Lambda(lambda x: x[:, -1, :], output_shape=(input_dim,), name='last_hidden_state')

            # exp / sum(exp) -> softmax.
            self.softmax_normalizer = Activation('softmax', name='attention_weight')

            # dot : score * every hidden state H_s.
            # dot product. SUM(v1*v2). H_s = every source hidden state.
            self.dot_context = Dot(axes=[1, 1], name='context_vector')

            # [Ct; ht]
            self.concat_c_h = Concatenate(name='attention_output')

            # x -> tanh(w_c(x))
            self.w_c = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')
        if not debug_flag:
            # debug: the call to build() is done in call().
            super(Attention, self).build(input_shape)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.units

    def __call__(self, inputs, training=None, **kwargs):
        if debug_flag:
            return self.call(inputs, training, **kwargs)
        else:
            return super(Attention, self).__call__(inputs, training, **kwargs)

    # noinspection PyUnusedLocal
    def call(self, inputs, training=None, **kwargs):
        """
        Many-to-one attention mechanism for Keras. Supports:
            - Luong's multiplicative style.
            - Bahdanau's additive style.
        @param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
        @param training: not used in this layer.
        @return: 2D tensor with shape (batch_size, units)
        @author: philipperemy, felixhao28.
        """
        h_s = inputs
        if debug_flag:
            self.build(h_s.shape)
        h_t = self.h_t(h_s)
        if self.score == self.SCORE_LUONG:
            # Luong's multiplicative style.
            score = self.luong_dot([h_t, self.luong_w(h_s)])
        else:
            # Bahdanau's additive style.
            self.bahdanau_w1(h_s)
            a1 = self.bahdanau_w1(h_t)
            a2 = self.bahdanau_w2(h_s)
            a1 = self.bahdanau_repeat(a1)
            score = self.bahdanau_tanh(self.bahdanau_add([a1, a2]))
            score = self.bahdanau_v(score)
            score = K.squeeze(score, axis=-1)

        alpha_s = self.softmax_normalizer(score)
        context_vector = self.dot_context([h_s, alpha_s])
        a_t = self.w_c(self.concat_c_h([context_vector, h_t]))
        return a_t

    def get_config(self):
        config = super(Attention, self).get_config()
        config.update({'units': self.units, 'score': self.score})
        return config

# Data preprocessing

1.1. Import data

In [3]:
#############
TICKER = 2330
TP = 10
#############

### import data ###
data = pd.read_csv('/Users/yitsung/Desktop/MasterThesis/data/TaiwanStockData_Top100_EMA')
ticker_data = data[data['ticker']==TICKER].reset_index(drop=True)
ticker_data = ticker_data.drop(columns=['ticker'])

# (SMA-P/P, 2class) #
ticker_data[f'y_{TP}'] = ticker_data['close'].rolling(window=TP).mean()
ticker_data[f'y_{TP}'] = ticker_data[f'y_{TP}'].shift(-TP)
ticker_data = ticker_data.dropna().reindex()
ticker_data[f'y_{TP}'] = ((ticker_data[f'y_{TP}'] - ticker_data['close']) >= 0).astype(int)

ticker_data

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,y_5
0,2021-01-04,530.0,540.0,528.0,536.0,39490.0,454.0,12463.0,-33.0,865.0,2342.0,6.0443,521.295251,518.980386,513.251221,5.729165,3.933239,84.477581,1
1,2021-01-05,536.0,542.0,535.0,542.0,34839.0,-355.0,2884.0,179.0,-451.0,-1374.0,5.3592,525.437881,522.532126,515.535238,6.996887,4.619674,88.417310,1
2,2021-01-06,555.0,555.0,541.0,549.0,55614.0,-256.0,5355.0,105.0,-4163.0,1.0,6.9696,530.151835,526.614084,518.179719,8.434365,5.454306,91.005801,1
3,2021-01-07,554.0,570.0,553.0,565.0,53393.0,2200.0,1671.0,-75.0,2060.0,-402.0,8.7664,537.123278,532.531850,521.861371,10.670478,6.574521,93.325963,1
4,2021-01-08,580.0,580.0,571.0,580.0,62957.0,-502.0,3278.0,187.0,1176.0,-5041.0,9.0658,545.700404,539.847445,526.412277,13.435169,8.026473,94.939847,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
699,2023-11-20,576.0,579.0,575.0,577.0,26606.0,176.0,3579.0,-125.0,270.0,-2193.0,5.4217,570.883694,567.188910,556.797391,10.391519,7.167611,94.971748,0
700,2023-11-21,582.0,585.0,581.0,585.0,39881.0,-334.0,18793.0,97.0,-772.0,10844.0,6.7572,573.706955,569.929078,558.886473,11.042605,7.942610,100.000000,0
701,2023-11-22,576.0,579.0,574.0,577.0,23922.0,533.0,-2966.0,-478.0,-230.0,-7073.0,4.7807,574.365564,571.016912,560.228216,10.788696,8.511827,97.938530,0
702,2023-11-23,574.0,578.0,574.0,578.0,15144.0,173.0,3740.0,-253.0,-218.0,93.0,3.0366,575.092451,572.091233,561.544644,10.546589,8.918779,95.718908,0


1.2.Splite data into train(Library) and test(Prediction)

In [4]:
Library = ticker_data[ticker_data['Date'] <= '2023-06-30'] # windows=20, the last prediction from Library is 6/30
Prediction = ticker_data[(ticker_data['Date'] >= '2023-06-01')&(ticker_data['Date'] <= '2023-10-31')] # windows=20, start from using 6/1 to predict 7/3

1.3.Data Normalize

In [5]:
def make_data_minmax(Library, Prediction):

    # MinMax #
    scaler = MinMaxScaler()
    feature_to_standardize = Library.columns.to_list()[1 : ] # exclude 'Date'
    Library[feature_to_standardize] = scaler.fit_transform(Library[feature_to_standardize])
    Prediction[feature_to_standardize] = scaler.fit_transform(Prediction[feature_to_standardize])

    return Library, Prediction

### splite train set and validation set ###
train_Library = Library[: int((len(Library) * 0.8))]
valid_Library = Library[int((len(Library) * 0.8)): ]
train_Library, valid_Library = make_data_minmax(Library=train_Library, Prediction=valid_Library)

### splite whole data ###
Library, Prediction = make_data_minmax(Library=Library, Prediction=Prediction)

1.4.Make window data: X, y

In [6]:
def data_preprocess(data, window_size):

    X = np.array(data.iloc[:, 1: -1])
    y = data.iloc[:, -1].values.reshape(-1, 1)

    data_X, data_y = [], []
    for i in range(len(data) - window_size + 1):
        data_X.append(X[i : (i + window_size), :])
        data_y.append(y[i + window_size - 1])

    data_X, data_y = np.array(data_X), np.array(data_y)
        
    return data_X, data_y

### train set and validation set ###
train_X, train_y = data_preprocess(data=train_Library, window_size=20)
valid_X, valid_y = data_preprocess(data=valid_Library, window_size=20)

### whole data ###
# full_X, full_y = data_preprocess(data=Library, window_size=20) # just test 
test_X, test_y = data_preprocess(data=Prediction, window_size=20)

1.5.Over-smapling

In [7]:
### train set and validation set ###
ros = RandomOverSampler(random_state=87)
train_X_resampled, train_y_resampled = ros.fit_resample(train_X.reshape(train_X.shape[0], -1), train_y)
train_X_resampled = train_X_resampled.reshape(-1, train_X.shape[1], train_X.shape[2])
train_y_resampled = train_y_resampled.reshape(-1,1) # just test

print("Shape of resampled train_X:", train_X_resampled.shape)
print("Shape of resampled train_y:", train_y_resampled.shape)
print("Number of positive samples after resampling:", train_y_resampled.sum())

# ### whole data ###
# ros = RandomOverSampler(random_state=87)
# full_X_resampled, full_y_resampled = ros.fit_resample(full_X.reshape(full_X.shape[0], -1), full_y)
# full_X_resampled = full_X_resampled.reshape(-1, full_X.shape[1], full_X.shape[2])
# full_y_resampled = full_y_resampled.reshape(-1,1) # just test

# print("Shape of resampled full_X:", full_X_resampled.shape)
# print("Shape of resampled full_y:", full_y_resampled.shape)
# print("Number of positive samples after resampling:", full_y_resampled.sum())

Shape of resampled train_X: (508, 20, 17)
Shape of resampled train_y: (508, 1)
Number of positive samples after resampling: 254.0


# Create model

In [8]:
#############
TUNNING = False
params = {'X_shape': train_X.shape,
          'hidden_units': [32, 80, 16, 112, 128, 48, 96], 
          'dropout_rates': [0.5, 0.0, 0.8, 0.2, 0.0],
          'ls': 1e-05, 'lr': 0.01}
#############

2.1.Create model and find hyperparameter

In [9]:
def tunning_model(hp, X_shape):

    tf.random.set_seed(87)

    #############################################
    hidden_units = [hp.Int(name=f"units_{i}", min_value=16, max_value=128, step=16) for i in range(1, 8)]
    dropout_rates = [hp.Choice(f"dropout_{i}", [0.0, 0.2, 0.5, 0.8]) for i in range(1, 6)]
    ls = hp.Choice('ls',[1e-2, 1e-3, 1e-5])
    lr = hp.Choice('lr',[1e-2, 1e-3, 1e-5])
    #############################################
    
    inp = Input(shape = (X_shape[1], X_shape[2]))
    x0 = BatchNormalization()(inp)

    lstm = LSTM(hidden_units[0], return_sequences=True)(x0)
    lstm = LSTM(hidden_units[1], return_sequences=True)(lstm)
    lstm = LSTM(hidden_units[2], return_sequences=True)(lstm)
    lstm = Dropout(dropout_rates[0])(lstm)

    att = Attention(hidden_units[3])(lstm)
    x = Dropout(dropout_rates[1])(att)
    
    for i in range(4, len(hidden_units)):
        x = Dense(hidden_units[i])(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(dropout_rates[i - 2])(x)

    out = Dense(1, activation = 'sigmoid')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=lr),
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=ls), 
                  metrics=tf.keras.metrics.AUC(name='AUC'))

    return model

if TUNNING:
    model_fn = lambda hp: tunning_model(hp, X_shape=train_X.shape)
    tuner = kt.BayesianOptimization(model_fn,
                                    objective=kt.Objective("val_AUC", direction="max"),
                                    max_trials=10,
                                    executions_per_trial=1,
                                    directory="model_kt",
                                    overwrite=True,
                                    seed=87)
    path = f'model.hdf5'
    ckp = ModelCheckpoint(path, monitor='val_AUC', verbose=0,                      # If you want to use, uncomment
                          save_best_only=True, save_weights_only=True, mode='max')
    es = EarlyStopping(monitor='val_AUC', min_delta=1e-4, patience=50, mode='max', # If you want to use, uncomment # or choose patience=n by experience
                       baseline=None, restore_best_weights=True, verbose=1)
    
    tuner.search(train_X_resampled, train_y_resampled, 
                 validation_data=(valid_X, valid_y), # validation_data=(valid_X, [valid_X, valid_y, valid_y]) # validation_split=0.2, shuffle=True
                 epochs=100,                                                       # 100 or coose epochs=n by experience
                 batch_size=16, 
                 callbacks=[ckp, es],                                              # If you want to use, uncomment
                 verbose=1)
    model = tuner.get_best_models()[0]

    tf.keras.backend.clear_session() # clear memory
    
    best_hyperparameters = tuner.get_best_hyperparameters()[0]
    print("Best Hyperparameters:")
    print(best_hyperparameters.values)

Trial 10 Complete [00h 01m 15s]
val_AUC: 0.6820651888847351

Best val_AUC So Far: 0.6820651888847351
Total elapsed time: 00h 12m 53s
Best Hyperparameters:
{'units_1': 32, 'units_2': 80, 'units_3': 16, 'units_4': 112, 'units_5': 128, 'units_6': 48, 'units_7': 96, 'dropout_1': 0.5, 'dropout_2': 0.0, 'dropout_3': 0.8, 'dropout_4': 0.2, 'dropout_5': 0.0, 'ls': 1e-05, 'lr': 0.001}


2.2.Train model(with parameter)

In [10]:
def create_model(X_shape, hidden_units, dropout_rates, lr, ls):

    tf.random.set_seed(87)

    inp = Input(shape = (X_shape[1], X_shape[2]))
    x0 = BatchNormalization()(inp)

    lstm = LSTM(hidden_units[0], return_sequences=True)(x0)
    lstm = LSTM(hidden_units[1], return_sequences=True)(lstm)
    lstm = LSTM(hidden_units[2], return_sequences=True)(lstm)
    lstm = Dropout(dropout_rates[0])(lstm)

    att = Attention(hidden_units[3])(lstm)
    x = Dropout(dropout_rates[1])(att)
    
    for i in range(4, len(hidden_units)):
        x = Dense(hidden_units[i])(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(dropout_rates[i - 2])(x)

    out = Dense(1, activation = 'sigmoid')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=lr),
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=ls), 
                  metrics=tf.keras.metrics.AUC(name='AUC'))

    return model

if TUNNING == False:

    path = f'model.hdf5'
    model = create_model(**params)
    ckp = ModelCheckpoint(path, monitor='val_AUC', verbose = 0,                    # If you want to use, uncomment
                          save_best_only=True, save_weights_only=True, mode='max')
    es = EarlyStopping(monitor='val_AUC', min_delta=1e-4, patience=50, mode='max', # If you want to use, uncomment # or choose patience=n by experience
                       baseline=None, restore_best_weights=True, verbose=0)
    
    history = model.fit(train_X_resampled, train_y_resampled, # train_X_resampled, train_y_resampled # full_X_resampled, full_y_resampled
                        validation_data=(valid_X, valid_y), # validation_data=(valid_X, valid_y) # validation_split=0.2, shuffle=True                 
                        # sample_weight = sw[tr], 
                        epochs=100,                                                # 100 or coose epochs=n by experience
                        batch_size=16, 
                        callbacks=[ckp, es],                                       # If you want to use, uncomment
                        verbose=1)
    
    tf.keras.backend.clear_session() # clear memory

    hist = pd.DataFrame(history.history)
    score = hist['val_AUC'].max()
    print(f'AUC:', score)

2.3.Test model on one stock

In [10]:
pred_dir = model.predict(test_X)
pred_dir = (pred_dir > 0.5).astype(int)

result_df = pd.DataFrame(pred_dir, columns=['Pred'])
result_df['True'] = test_y

match_count = (result_df['Pred'] == result_df['True']).sum()
correct = match_count / len(result_df)

print(f'ACC: {correct}\n')
result_df.head(60)

2024-03-24 22:43:46.640420: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


ACC: 0.6309523809523809



Unnamed: 0,Pred,True
0,0,0.0
1,0,0.0
2,0,0.0
3,0,0.0
4,0,1.0
5,0,1.0
6,0,1.0
7,0,1.0
8,0,1.0
9,0,0.0


# Whole experiment 

In [None]:
#############
TP = 10
TUNNING = True
params = {'X_shape': train_X.shape,
          'hidden_units': [32, 80, 16, 112, 128, 48, 96], 
          'dropout_rates': [0.5, 0.0, 0.8, 0.2, 0.0],
          'ls': 1e-05, 'lr': 0.01}
# constituent = [2330, 2454, 2317, 2308, 2382, 2303, 2891, 3711, 2881, 2412,
#                2886, 2882, 2884, 1216, 2885, 3231, 3034, 2357, 2002, 2892,
#                1303, 2379, 5880, 2301, 3037, 2345, 1301, 3008, 3661, 2890,
#                5871, 2880, 2327, 2883, 2887, 2207, 4938, 1101, 6669, 1326,
#                2395, 3045, 5876, 2603, 1590, 2912, 4904, 2801, 6505, 2408]
constituent = [2330, 2454, 2317, 2308, 2382, 2303, 2891, 3711, 2881, 2412] # just test
#############

experiment_0050_result = pd.DataFrame()

for TICKER in constituent:

    print(f'\n now: processing {TICKER} \n')

    try:
        ##### import data #####
        data = pd.read_csv('/Users/yitsung/Desktop/MasterThesis/data/TaiwanStockData_Top100_EMA')
        ticker_data = data[data['ticker']==TICKER].reset_index(drop=True)
        ticker_data = ticker_data.drop(columns=['ticker'])

        ticker_data[f'y_{TP}'] = ticker_data['close'].rolling(window=TP).mean()
        ticker_data[f'y_{TP}'] = ticker_data[f'y_{TP}'].shift(-TP)
        ticker_data = ticker_data.dropna().reindex()
        ticker_data[f'y_{TP}'] = ((ticker_data[f'y_{TP}'] - ticker_data['close']) >= 0).astype(int)

        ##### Splite data into train(Library) and test(Prediction) #####
        Library = ticker_data[ticker_data['Date'] <= '2023-06-30']
        Prediction = ticker_data[(ticker_data['Date'] >= '2023-06-01')&(ticker_data['Date'] <= '2023-10-31')]

        ##### Data Normalize #####
        train_Library = Library[: int((len(Library) * 0.8))]
        valid_Library = Library[int((len(Library) * 0.8)): ]

        train_Library, valid_Library = make_data_minmax(Library=train_Library, Prediction=valid_Library)
        Library, Prediction = make_data_minmax(Library=Library, Prediction=Prediction)

        ##### Make window data: X, y #####
        train_X, train_y = data_preprocess(data=train_Library, window_size=20)
        valid_X, valid_y = data_preprocess(data=valid_Library, window_size=20)
        test_X, test_y = data_preprocess(data=Prediction, window_size=20)

        ##### Over-smapling #####
        ros = RandomOverSampler(random_state=87)
        train_X_resampled, train_y_resampled = ros.fit_resample(train_X.reshape(train_X.shape[0], -1), train_y)
        train_X_resampled = train_X_resampled.reshape(-1, train_X.shape[1], train_X.shape[2])
        train_y_resampled = train_y_resampled.reshape(-1,1) # just test

        ###### Create model and find hyperparameter #####
        if TUNNING:
            model_fn = lambda hp: tunning_model(hp, X_shape=train_X.shape)
            tuner = kt.BayesianOptimization(model_fn,
                                            objective=kt.Objective("val_AUC", direction="max"),
                                            max_trials=10,
                                            executions_per_trial=1,
                                            directory="model_kt",
                                            overwrite=True,
                                            seed=87)
            path = f'model.hdf5'
            ckp = ModelCheckpoint(path, monitor='val_AUC', verbose=0,                      # If you want to use, uncomment
                                save_best_only=True, save_weights_only=True, mode='max')
            es = EarlyStopping(monitor='val_AUC', min_delta=1e-4, patience=50, mode='max', # If you want to use, uncomment # or choose patience=n by experience
                            baseline=None, restore_best_weights=True, verbose=1)
            
            tuner.search(train_X_resampled, train_y_resampled, 
                        validation_data=(valid_X, valid_y), # validation_data=(valid_X, [valid_X, valid_y, valid_y]) # validation_split=0.2, shuffle=True
                        epochs=100,                                                       # 100 or coose epochs=n by experience
                        batch_size=16, 
                        callbacks=[ckp, es],                                              # If you want to use, uncomment
                        verbose=1)
            model = tuner.get_best_models()[0]

            tf.keras.backend.clear_session() # clear memory
            
            best_hyperparameters = tuner.get_best_hyperparameters()[0]
            print("Best Hyperparameters:")
            print(best_hyperparameters.values)

        ##### Train model(with parameter) #####
        else:
            path = f'model.hdf5'
            model = create_model(**params)
            ckp = ModelCheckpoint(path, monitor='val_AUC', verbose = 0,                    # If you want to use, uncomment
                                save_best_only=True, save_weights_only=True, mode='max')
            es = EarlyStopping(monitor='val_AUC', min_delta=1e-4, patience=50, mode='max', # If you want to use, uncomment # or choose patience=n by experience
                            baseline=None, restore_best_weights=True, verbose=0)
            
            history = model.fit(train_X_resampled, train_y_resampled, # train_X_resampled, train_y_resampled # full_X_resampled, full_y_resampled
                                validation_data=(valid_X, valid_y), # validation_data=(valid_X, valid_y) # validation_split=0.2, shuffle=True                 
                                # sample_weight = sw[tr], 
                                epochs=100,                                                 # 100 or coose epochs=n by experience
                                batch_size=16, 
                                # callbacks=[ckp, es],                                       # If you want to use, uncomment
                                verbose=1)
            
            hist = pd.DataFrame(history.history)
            score = hist['val_AUC'].max()
            print(f'AUC:', score)

        ##### Test model on one stock #####
        pred_dir = model.predict(test_X)
        pred_dir = (pred_dir > 0.5).astype(int)

        result_df = pd.DataFrame(pred_dir, columns=['Pred'])
        result_df['True'] = test_y

        match_count = (result_df['Pred'] == result_df['True']).sum()
        correct = match_count / len(result_df)
        print(f'ACC: {correct}\n')

        tf.keras.backend.clear_session() # clear memory

        ##### Add to result dataframe #####
        experiment_0050_result = pd.concat([experiment_0050_result, result_df], axis=0, ignore_index=True)

    except:
        print(f'{TICKER} import data failed.')
        continue


#### Final ACC ####
whole_match_count = (experiment_0050_result['Pred'] == experiment_0050_result['True']).sum()
whole_correct = whole_match_count / len(experiment_0050_result)
print(f'\n Whole 0050 ACC: {whole_correct} \n')

In [None]:
experiment_0050_result.to_csv(f'LSTM-AM_Tp={TP}_result.csv', index=False)
print(f'\n Whole 0050 ACC: {whole_correct} \n')

experiment_0050_result.head(60)


 Whole 0050 ACC: 0.4851190476190476 



Unnamed: 0,Pred,True
0,1,1.0
1,1,1.0
2,1,0.0
3,1,0.0
4,1,1.0
5,1,1.0
6,1,1.0
7,1,1.0
8,1,1.0
9,1,1.0
