In [1]:
import numpy as np
import pandas as pd

from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import tensorflow as tf
tf.random.set_seed(87)

import keras.backend as K
from keras.models import Model
from keras.layers import Input, Layer, Dot, RepeatVector, Activation, Add, Lambda, Concatenate
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import EarlyStopping, ModelCheckpoint

import keras_tuner as kt
from keras_tuner.engine.hyperparameters import HyperParameters
from livelossplot import PlotLossesKeras

import warnings
warnings.filterwarnings('ignore')

Attention Layer

In [2]:
import os

# KERAS_ATTENTION_DEBUG: If set to 1. Will switch to debug mode.
# In debug mode, the class Attention is no longer a Keras layer.
# What it means in practice is that we can have access to the internal values
# of each tensor. If we don't use debug, Keras treats the object
# as a layer, and we can only get the final output.
debug_flag = int(os.environ.get('KERAS_ATTENTION_DEBUG', 0))


# References:
# - https://arxiv.org/pdf/1508.04025.pdf (Luong).
# - https://arxiv.org/pdf/1409.0473.pdf (Bahdanau).
# - https://machinelearningmastery.com/the-bahdanau-attention-mechanism/ (Some more explanation).

class Attention(object if debug_flag else Layer):
    SCORE_LUONG = 'luong'
    SCORE_BAHDANAU = 'bahdanau'

    def __init__(self, units: int = 128, score: str = 'luong', **kwargs):
        super(Attention, self).__init__(**kwargs)
        if score not in {self.SCORE_LUONG, self.SCORE_BAHDANAU}:
            raise ValueError(f'Possible values for score are: [{self.SCORE_LUONG}] and [{self.SCORE_BAHDANAU}].')
        self.units = units
        self.score = score

    # noinspection PyAttributeOutsideInit
    def build(self, input_shape):
        input_dim = int(input_shape[-1])
        with K.name_scope(self.name if not debug_flag else 'attention'):
            # W in W*h_S.
            if self.score == self.SCORE_LUONG:
                self.luong_w = Dense(input_dim, use_bias=False, name='luong_w')
                # dot : last hidden state H_t and every hidden state H_s.
                self.luong_dot = Dot(axes=[1, 2], name='attention_score')
            else:
                # Dense implements the operation: output = activation(dot(input, kernel) + bias)
                self.bahdanau_v = Dense(1, use_bias=False, name='bahdanau_v')
                self.bahdanau_w1 = Dense(input_dim, use_bias=False, name='bahdanau_w1')
                self.bahdanau_w2 = Dense(input_dim, use_bias=False, name='bahdanau_w2')
                self.bahdanau_repeat = RepeatVector(input_shape[1])
                self.bahdanau_tanh = Activation('tanh', name='bahdanau_tanh')
                self.bahdanau_add = Add()

            self.h_t = Lambda(lambda x: x[:, -1, :], output_shape=(input_dim,), name='last_hidden_state')

            # exp / sum(exp) -> softmax.
            self.softmax_normalizer = Activation('softmax', name='attention_weight')

            # dot : score * every hidden state H_s.
            # dot product. SUM(v1*v2). H_s = every source hidden state.
            self.dot_context = Dot(axes=[1, 1], name='context_vector')

            # [Ct; ht]
            self.concat_c_h = Concatenate(name='attention_output')

            # x -> tanh(w_c(x))
            self.w_c = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')
        if not debug_flag:
            # debug: the call to build() is done in call().
            super(Attention, self).build(input_shape)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.units

    def __call__(self, inputs, training=None, **kwargs):
        if debug_flag:
            return self.call(inputs, training, **kwargs)
        else:
            return super(Attention, self).__call__(inputs, training, **kwargs)

    # noinspection PyUnusedLocal
    def call(self, inputs, training=None, **kwargs):
        """
        Many-to-one attention mechanism for Keras. Supports:
            - Luong's multiplicative style.
            - Bahdanau's additive style.
        @param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
        @param training: not used in this layer.
        @return: 2D tensor with shape (batch_size, units)
        @author: philipperemy, felixhao28.
        """
        h_s = inputs
        if debug_flag:
            self.build(h_s.shape)
        h_t = self.h_t(h_s)
        if self.score == self.SCORE_LUONG:
            # Luong's multiplicative style.
            score = self.luong_dot([h_t, self.luong_w(h_s)])
        else:
            # Bahdanau's additive style.
            self.bahdanau_w1(h_s)
            a1 = self.bahdanau_w1(h_t)
            a2 = self.bahdanau_w2(h_s)
            a1 = self.bahdanau_repeat(a1)
            score = self.bahdanau_tanh(self.bahdanau_add([a1, a2]))
            score = self.bahdanau_v(score)
            score = K.squeeze(score, axis=-1)

        alpha_s = self.softmax_normalizer(score)
        context_vector = self.dot_context([h_s, alpha_s])
        a_t = self.w_c(self.concat_c_h([context_vector, h_t]))
        return a_t

    def get_config(self):
        config = super(Attention, self).get_config()
        config.update({'units': self.units, 'score': self.score})
        return config

# Data preprocessing

1.1. Import data

In [3]:
#############
TICKER = 2330
TP = 10
#############

### import data ###
data = pd.read_csv('/Users/yitsung/Desktop/MasterThesis/data/TaiwanStockData_Top100_EMA')
ticker_data = data[data['ticker']==TICKER].reset_index(drop=True)
ticker_data = ticker_data.drop(columns=['ticker'])

### generate y ###

# ver.1(P-P, 2class) #
# ticker_data[f'y_after_{TP}'] = ticker_data['close'].shift(-TP)
# ticker_data[f'y_after_{TP}'] = ticker_data[f'y_after_{TP}'] - ticker_data['close']
# ticker_data = ticker_data.dropna().reindex()
# ticker_data[f'y_after_{TP}'] = (ticker_data[f'y_after_{TP}'] >= 0).astype(int)

# ver.2(SMA-P/P, 2class) #
ticker_data[f'y_{TP}'] = ticker_data['close'].rolling(window=TP).mean()
ticker_data[f'y_{TP}'] = ticker_data[f'y_{TP}'].shift(-TP)
ticker_data = ticker_data.dropna().reindex()
ticker_data[f'y_{TP}'] = ((ticker_data[f'y_{TP}'] - ticker_data['close']) >= 0).astype(int)

# ver.3(SMA-SMA/SMA, 3class) #
########## not yet ###########

# ### origi data ###
# origi_data = ticker_data.copy()

# ### diff data ###
# ticker_data['open'] = ticker_data['open'].diff()
# ticker_data['high'] = ticker_data['high'].diff()
# ticker_data['low'] = ticker_data['low'].diff()
# ticker_data['close'] = ticker_data['close'].diff()

# ticker_data['EMA9'] = ticker_data['EMA9'].diff()
# ticker_data['EMA12'] = ticker_data['EMA12'].diff()
# ticker_data['EMA26'] = ticker_data['EMA26'].diff()

# ticker_data.replace([float('inf'), -float('inf')], 0, inplace=True) # 不知道為何有些調整過後會變inf, 要拿掉(應該是連兩天的價格都相同)
# ticker_data = ticker_data.dropna().reset_index(drop=True)

# ### move 'y' to the last column ###
# y_column = ticker_data.pop(f'y_after_{TP}')
# ticker_data[f'y_after_{TP}'] = y_column

ticker_data

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,y_10
0,2021-01-04,530.0,540.0,528.0,536.0,39490.0,454.0,12463.0,-33.0,865.0,2342.0,6.0443,521.295251,518.980386,513.251221,5.729165,3.933239,84.477581,1
1,2021-01-05,536.0,542.0,535.0,542.0,34839.0,-355.0,2884.0,179.0,-451.0,-1374.0,5.3592,525.437881,522.532126,515.535238,6.996887,4.619674,88.417310,1
2,2021-01-06,555.0,555.0,541.0,549.0,55614.0,-256.0,5355.0,105.0,-4163.0,1.0,6.9696,530.151835,526.614084,518.179719,8.434365,5.454306,91.005801,1
3,2021-01-07,554.0,570.0,553.0,565.0,53393.0,2200.0,1671.0,-75.0,2060.0,-402.0,8.7664,537.123278,532.531850,521.861371,10.670478,6.574521,93.325963,1
4,2021-01-08,580.0,580.0,571.0,580.0,62957.0,-502.0,3278.0,187.0,1176.0,-5041.0,9.0658,545.700404,539.847445,526.412277,13.435169,8.026473,94.939847,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
694,2023-11-13,579.0,580.0,571.0,571.0,46556.0,-525.0,16064.0,813.0,109.0,15223.0,9.1131,554.525921,552.006718,546.507936,5.498782,2.554078,64.654070,1
695,2023-11-14,576.0,576.0,571.0,572.0,25367.0,128.0,10421.0,-25.0,1044.0,6574.0,5.2297,558.020737,555.082608,548.396237,6.686370,3.380536,68.577468,1
696,2023-11-15,583.0,583.0,575.0,581.0,53122.0,-749.0,26002.0,-684.0,-71.0,16167.0,7.9897,562.616589,559.069899,550.811331,8.258568,4.356142,78.365103,0
697,2023-11-16,581.0,583.0,578.0,583.0,31059.0,-377.0,12982.0,105.0,518.0,9102.0,6.1995,566.693271,562.751453,553.195677,9.555776,5.396069,84.738876,0


1.2.Splite data into train(Library) and test(Prediction)

In [4]:
Library = ticker_data[ticker_data['Date'] <= '2023-06-30'] # windows=20, 最後預測到6/30
Prediction = ticker_data[(ticker_data['Date'] >= '2023-06-01')&(ticker_data['Date'] <= '2023-10-31')] # windows=20, 從6/1預測7/3開始
Prediction.tail()

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,y_10
681,2023-10-25,544.0,551.0,544.0,544.0,17137.0,-99.0,-2573.0,651.0,-142.0,1185.0,3.9095,544.649068,543.673118,541.716547,1.956572,0.734048,91.909547,0
682,2023-10-26,530.0,535.0,530.0,531.0,31683.0,487.0,-10712.0,-35173.0,-1744.0,-10261.0,6.9033,541.919255,541.723408,540.922728,0.800679,0.747374,85.178131,1
683,2023-10-27,534.0,536.0,532.0,533.0,17051.0,17.0,-5262.0,1478.0,-73.0,-1739.0,4.1968,540.135404,540.381345,540.33586,0.045485,0.606996,78.927028,1
684,2023-10-30,531.0,534.0,528.0,532.0,23299.0,265.0,-11811.0,487.0,378.0,-5803.0,5.6532,538.508323,539.091907,539.718389,-0.626481,0.360301,72.836426,1
685,2023-10-31,535.0,535.0,527.0,529.0,28073.0,113.0,-9363.0,495.0,-358.0,-5392.0,5.4314,536.606658,537.539306,538.924434,-1.385128,0.011215,63.359478,1


1.3.Data Normalize

In [5]:
def make_data_minmax(Library, Prediction):

    # MinMax #
    scaler_X = MinMaxScaler()
    feature_to_standardize = Library.columns.to_list()[1 : -1]
    Library[feature_to_standardize] = scaler_X.fit_transform(Library[feature_to_standardize])
    Prediction[feature_to_standardize] = scaler_X.fit_transform(Prediction[feature_to_standardize])

    scaler_y = MinMaxScaler()
    Lib_ans = Library.iloc[:, -1].values.reshape(-1, 1)
    Library.iloc[:, -1] = scaler_y.fit_transform(Lib_ans)
    Pred_ans = Prediction.iloc[:, -1].values.reshape(-1, 1)
    Prediction.iloc[:, -1] = scaler_y.fit_transform(Pred_ans)

    return Library, Prediction, scaler_y

### 切train和validation ###
train_Library = Library[: int((len(Library) * 0.8))]
valid_Library = Library[int((len(Library) * 0.8)): ]
train_Library, valid_Library, _ = make_data_minmax(Library=train_Library, Prediction=valid_Library)

### 切完整data ###
Library, Prediction, _ = make_data_minmax(Library=Library, Prediction=Prediction)

1.4.Make window data: X, y

In [6]:
def data_preprocess(data, window_size):

    X = np.array(data.iloc[:, 1: -1])
    y = data.iloc[:, -1].values.reshape(-1, 1)

    data_X, data_y = [], []
    for i in range(len(data) - window_size + 1):
        data_X.append(X[i : (i + window_size), :])
        data_y.append(y[i + window_size - 1])

    data_X, data_y = np.array(data_X), np.array(data_y)
        
    return data_X, data_y

train_X, train_y = data_preprocess(data=train_Library, window_size=20)
valid_X, valid_y = data_preprocess(data=valid_Library, window_size=20)
full_X, full_y = data_preprocess(data=Library, window_size=20)
test_X, test_y = data_preprocess(data=Prediction, window_size=20)

1.5.Over-smapling

In [7]:
ros = RandomOverSampler(random_state=87) # 初始化過採樣器
train_X_resampled, train_y_resampled = ros.fit_resample(train_X.reshape(train_X.shape[0], -1), train_y)# 對訓練集進行過採樣
train_X_resampled = train_X_resampled.reshape(-1, train_X.shape[1], train_X.shape[2]) # 將過採樣後的數據重新整形成原來的格式

# ############
train_y_resampled = train_y_resampled.reshape(-1,1)
# ############

# 檢查過採樣後的資料大小 #
print("Shape of resampled train_X:", train_X_resampled.shape)
print("Shape of resampled train_y:", train_y_resampled.shape)
# 檢查過採樣後的正類樣本數量 #
print("Number of positive samples after resampling:", train_y_resampled.sum())

Shape of resampled train_X: (514, 20, 17)
Shape of resampled train_y: (514, 1)
Number of positive samples after resampling: 257


In [8]:
ros = RandomOverSampler(random_state=87) # 初始化過採樣器
full_X_resampled, full_y_resampled = ros.fit_resample(full_X.reshape(full_X.shape[0], -1), full_y)# 對訓練集進行過採樣
full_X_resampled = full_X_resampled.reshape(-1, full_X.shape[1], full_X.shape[2]) # 將過採樣後的數據重新整形成原來的格式

# ############
full_y_resampled = full_y_resampled.reshape(-1,1)
# ############

# 檢查過採樣後的資料大小 #
print("Shape of resampled full_X:", full_X_resampled.shape)
print("Shape of resampled full_y:", full_y_resampled.shape)
# 檢查過採樣後的正類樣本數量 #
print("Number of positive samples after resampling:", full_y_resampled.sum())

Shape of resampled full_X: (618, 20, 17)
Shape of resampled full_y: (618, 1)
Number of positive samples after resampling: 309


# Create model

In [9]:
#############
TUNNING = False

params = {'X_shape': train_X.shape,
          'hidden_units': [48, 112, 32, 64, 32], 
          'dropout_rates': [0.2, 0.8],
          'ls': 0.001, 'lr': 1e-03}
#############

2.1.Create model and find hyperparameter

In [10]:
def tunning_model(hp, X_shape):

    # tf.random.set_seed(87)

    #############################################
    hidden_units = [hp.Int(name=f"units_{i}", min_value=16, max_value=256, step=16) for i in range(1, 6)]
    dropout_rates = [hp.Choice(f"dropout_{i}", [0.0, 0.2, 0.5, 0.8]) for i in range(1, 3)]
    ls = hp.Choice('ls',[1e-2, 1e-3, 1e-5])
    lr = hp.Choice('lr',[1e-2, 1e-3, 1e-5])
    #############################################
    
    inp = Input(shape = (X_shape[1], X_shape[2]))

    x = LSTM(hidden_units[0], return_sequences=True)(inp)
    x = LSTM(hidden_units[1], return_sequences=True)(x)
    x = Attention(hidden_units[2])(x)
    
    x = Dense(hidden_units[3])(x)
    x = Dropout(dropout_rates[0])(x)
    x = Dense(hidden_units[4])(x)
    x = Dropout(dropout_rates[1])(x)

    out = Dense(1, activation = 'sigmoid')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=lr),
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=ls), 
                  metrics=tf.keras.metrics.AUC(name='AUC'))

    return model

if TUNNING:
    model_fn = lambda hp: tunning_model(hp, X_shape=train_X.shape)
    tuner = kt.BayesianOptimization(model_fn,
                                    objective=kt.Objective("val_AUC", direction="max"),
                                    max_trials=10,
                                    executions_per_trial=2,
                                    directory="model_kt",
                                    overwrite=True,
                                    seed=87)
    path = f'model.hdf5'
    ckp = ModelCheckpoint(path, monitor='val_AUC', verbose = 0, 
                          save_best_only=True, save_weights_only=True, mode='max')
    es = EarlyStopping(monitor='val_AUC', min_delta=1e-4, patience=10, mode='max', 
                       baseline=None, restore_best_weights=True, verbose=1)
    
    tuner.search(train_X_resampled, train_y_resampled, validation_data=(valid_X, valid_y),
                 epochs=100, batch_size=16, callbacks=[ckp, es], verbose=1)
    model = tuner.get_best_models()[0]

    best_hyperparameters = tuner.get_best_hyperparameters()[0]
    print("Best Hyperparameters:")
    print(best_hyperparameters.values)

2.2.Train model(with parameter)

In [11]:
def create_model(X_shape, hidden_units, dropout_rates, lr, ls):

    tf.random.set_seed(87)

    inp = Input(shape = (X_shape[1], X_shape[2]))

    x = LSTM(hidden_units[0], return_sequences=True)(inp)
    x = LSTM(hidden_units[1], return_sequences=True)(x)
    x = Attention(hidden_units[2])(x)
    
    x = Dense(hidden_units[3])(x)
    x = Dropout(dropout_rates[0])(x)
    x = Dense(hidden_units[4])(x)
    x = Dropout(dropout_rates[1])(x)

    out = Dense(1, activation = 'sigmoid')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=lr), # or RMSprop
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=ls), 
                  metrics=tf.keras.metrics.AUC(name='AUC'))

    return model

if TUNNING == False:

    path = f'model.hdf5'
    model = create_model(**params)
    # ckp = ModelCheckpoint(path, monitor='val_AUC', verbose = 0, 
    #                       save_best_only=True, save_weights_only=True, mode='max')
    # es = EarlyStopping(monitor='val_AUC', min_delta=1e-4, patience=10, mode='max', # 若不EarlyStopping就先拿掉
    #                    baseline=None, restore_best_weights=True, verbose=0)
    
    history = model.fit(full_X_resampled, full_y_resampled, # train_X_resampled, train_y_resampled,
                        validation_split=0.2, shuffle=True, # validation_data=(valid_X, valid_y),                   
                        # sample_weight = sw[tr], 
                        epochs=100, batch_size=16, 
                        # callbacks=[ckp, es], # 若不EarlyStopping就先拿掉
                        verbose=1)
    
    hist = pd.DataFrame(history.history)
    score = hist['val_AUC'].max()
    print(f'AUC:', score)

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Epoch 1/100


2024-03-21 11:09:39.984707: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2024-03-21 11:09:41.036024: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }




2024-03-21 11:09:43.373250: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

2.3.Test model on one stock

In [13]:
pred_dir = model.predict(test_X)
pred_dir = (pred_dir > 0.5).astype(int)

result_df = pd.DataFrame(pred_dir, columns=['Pred'])
result_df['True'] = test_y

match_count = (result_df['Pred'] == result_df['True']).sum()
correct = match_count / len(result_df)

print(f'ACC: {correct}\n')
result_df.head(60)

ACC: 0.44047619047619047



Unnamed: 0,Pred,True
0,0,1
1,0,0
2,0,0
3,0,0
4,1,1
5,1,1
6,1,1
7,1,1
8,1,0
9,0,0
