In [66]:
import warnings
from datetime import timedelta, datetime

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode as inm
import statsmodels.api as sm
import talib as ta
import yfinance as yf
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from numpy_ext import rolling_apply as rolling_apply_ext

inm(True)
import plotly.io as pio
pio.templates.default = "presentation"

# Data Preprocessing

In [16]:
df = pd.read_csv('assets/data/AMZN+AAPL/AAPL.csv')

df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
df.columns = ['Open', 'High', 'Low', 'Adj Close', 'Volume', 'Average', 'Bar Count']
df.columns = pd.MultiIndex.from_tuples([('Price', col) for col in df.columns])
df = df.sort_index()

In [19]:
df_ = df.copy()

In [20]:
# Technical Indicators
ticker = 'Price'
df_.loc[:, (ticker, 'ret')] = np.log(df[ticker]['Adj Close'].pct_change() + 1)
# upperband, middleband, lowerband = ta.BBANDS(df[ticker]['Adj Close'], timeperiod=40)
# df_.loc[:, (ticker, 'bbands')] = df[ticker]['Adj Close'] - middleband / (upperband - lowerband)

df_.loc[:, (ticker, 'ema1w')] = ta.EMA(df[ticker]['Adj Close'], timeperiod=5)
df_.loc[:, (ticker, 'ema2w')] = ta.EMA(df[ticker]['Adj Close'], timeperiod=10)
df_.loc[:, (ticker, 'ema1m')] = ta.EMA(df[ticker]['Adj Close'], timeperiod=21)

df_.loc[:, (ticker, 'sma1w')] = ta.SMA(df[ticker]['Adj Close'], timeperiod=5)
df_.loc[:, (ticker, 'sma2w')] = ta.SMA(df[ticker]['Adj Close'], timeperiod=10)
df_.loc[:, (ticker, 'sma1m')] = ta.SMA(df[ticker]['Adj Close'], timeperiod=21)

df_.loc[:, (ticker, 'cci')] = ta.CCI(df[ticker]['High'], df[ticker]['Low'], df[ticker]['Adj Close'])

df_.loc[:, (ticker, 'macd')] = ta.MACD(df[ticker]['Adj Close'])[2]

df_.loc[:, (ticker, 'stochrsi')] = ta.STOCHRSI(df[ticker]['Adj Close'])[1] # smoothed

df_.loc[:, (ticker, 'willr')] = ta.WILLR(df[ticker]['High'], df[ticker]['Low'], df[ticker]['Adj Close'])

df_.loc[:, (ticker, 'ad')] = ta.AD(df[ticker]['High'], df[ticker]['Low'], df[ticker]['Adj Close'], df[ticker]['Volume'])
df_.loc[:, (ticker, 'adosc')] = ta.ADOSC(df[ticker]['High'], df[ticker]['Low'], df[ticker]['Adj Close'], df[ticker]['Volume'])

df_.loc[:, (ticker, 'ht_inphase')] = ta.HT_PHASOR(df[ticker]['Adj Close'])[0] # trend
df_.loc[:, (ticker, 'ht_quadrature')] = ta.HT_PHASOR(df[ticker]['Adj Close'])[1] # cycle or oscillatory

df_.loc[:, (ticker, 'atr')] = ta.ATR(df[ticker]['High'], df[ticker]['Low'], df[ticker]['Adj Close'])

In [22]:
# Price Moments
df_.loc[:, (ticker, 'var1m')] = df_[ticker]['Adj Close'].rolling(window=20).var()
df_.loc[:, (ticker, 'var3m')] = df_[ticker]['Adj Close'].rolling(window=60).var()
df_.loc[:, (ticker, 'skew1m')] = df_[ticker]['Adj Close'].rolling(window=20).skew()
df_.loc[:, (ticker, 'skew3m')] = df_[ticker]['Adj Close'].rolling(window=60).skew()
df_.loc[:, (ticker, 'kurt1m')] = df_[ticker]['Adj Close'].rolling(window=20).kurt()
df_.loc[:, (ticker, 'kurt3m')] = df_[ticker]['Adj Close'].rolling(window=60).kurt()

['var1m'] used in m1
['var3m', 'skew1m', 'skew3m', 'kurt1m', 'kurt3m'] used in m2

## drop nan

In [23]:
df_ = df_[~np.any(df_.isnull(), axis=1)]
df_ = df_[~df_.index.duplicated()]

In [24]:
# Return
ticker='Price'
ret = pd.DataFrame()
ret[ticker] = np.log(df_[ticker]['Adj Close'].pct_change() + 1)
df_ = df_.shift(1)  # shift features to align with return
df_ = df_.dropna()
ret = ret.dropna()

In [27]:
# discretize return
ret_ = pd.DataFrame(np.where(ret > 0 , 1, -1), index=ret.index, columns=ret.columns)

In [34]:
# Correlation between features and return
ticker = 'Price'
tickers = [ticker]
corr = pd.Series()
for i in df_[ticker].columns:
    corr.loc[i] = np.corrcoef(df_[ticker][i], ret_[ticker])[0, 1]
abs(corr).sort_values(ascending=False)

willr            0.024981
stochrsi         0.019487
Volume           0.019206
cci              0.019043
Bar Count        0.016980
ret              0.015201
atr              0.013187
macd             0.009234
skew1m           0.008857
adosc            0.008424
sma1m            0.005974
ema1m            0.005969
sma2w            0.005968
ema2w            0.005962
sma1w            0.005957
ema1w            0.005953
High             0.005953
Open             0.005944
Average          0.005944
Adj Close        0.005936
Low              0.005933
var3m            0.005043
ht_quadrature    0.004088
skew3m           0.003053
var1m            0.002313
ht_inphase       0.001967
ad               0.001478
kurt1m           0.001421
kurt3m           0.000100
dtype: float64

In [35]:
badfeats = abs(corr)[abs(corr) <= 0.01]

# Primary Model

In [36]:
feat1 = ['Adj Close', 'High', 'Low', 'Open', 'Volume', 'ret', 
#          'bbands',
       'ema1w', 'ema2w', 'ema1m', 'sma1w', 'sma2w', 'sma1m', 'cci', 'macd',
       'stochrsi', 'willr', 'ad', 'adosc', 'ht_inphase', 'ht_quadrature',
       'atr', 'var1m']

# delete features with low correlation
# feat1 = list(set(feat1).difference(set(badfeats)))

In [37]:
len(feat1)

22

In [38]:
m1_subset_columns = []
for i in tickers:
    for j in feat1:
        m1_subset_columns.append((i, j))

## data

In [44]:
# Prepare features and label for primary model
all_x = df_[m1_subset_columns]
all_x = all_x[((all_x.index.year >= 2014) & (all_x.index.year < 2020))]

all_y = ret.copy()
all_y[all_y>0] = 1
all_y[~ (all_y>0)] = 0
all_y = all_y[((all_y.index.year >= 2014) & (all_y.index.year < 2020))]

### split

#### k-fold split

In [45]:
# Expanding window cross validation split
fold_length = int(np.ceil(len(all_x) / 10))

splits = [
    [4*fold_length, 5*fold_length],
    [5*fold_length, 6*fold_length],
    [6*fold_length, 7*fold_length],
    [7*fold_length, 8*fold_length],
    [8*fold_length, 10*fold_length],
]

In [46]:
es = EarlyStopping(monitor='loss', mode='min', min_delta=0.001, verbose=1, patience = 3)

In [50]:
tn = keras.metrics.TrueNegatives()
fn = keras.metrics.FalseNegatives()
tp = keras.metrics.TruePositives()
fp = keras.metrics.FalsePositives()
precision = keras.metrics.Precision()
recall = keras.metrics.Recall()

In [59]:
para_unit = 20
para_do = 0.2
para_bs = 16

for data_set_i in range(0, 5):
    train_end = splits[data_set_i][0]
    valid_end = splits[data_set_i][1]
    train_x, test_x = np.array(all_x)[:train_end], np.array(all_x)[train_end:valid_end]
    train_y, test_y = np.array(all_y)[:train_end], np.array(all_y)[train_end:valid_end]

    scaler = MinMaxScaler()
    train_x_scaled = scaler.fit_transform(train_x)
    test_x_scaled = scaler.fit_transform(test_x)

    time_step = 10
    n_samples = len(train_x_scaled) - time_step + 1
    n_features = train_x_scaled.shape[1]
    train_x_3d = np.zeros((n_samples, time_step, n_features))
    for i in range(n_samples):
        train_x_3d[i] = train_x_scaled[i:i+time_step]
    train_y = train_y[time_step-1:]

    n_samples = len(test_x_scaled) - time_step + 1
    n_features = test_x_scaled.shape[1]
    test_x_3d = np.zeros((n_samples, time_step, n_features))
    for i in range(n_samples):
        test_x_3d[i] = test_x_scaled[i:i+time_step]
    test_y = test_y[time_step-1:]


    model = Sequential()
    model.add(LSTM(units=para_unit, return_sequences=True, input_shape=(time_step, n_features)))
    model.add(Dropout(para_do))
    model.add(LSTM(units=para_unit, return_sequences=False, input_shape=(time_step, n_features)))
    model.add(Dropout(para_do))
    model.add(Dense(1, activation='sigmoid'))

    with tf.device('/cpu:0'):
        model.compile(loss='binary_crossentropy', 
                      optimizer='adam', 
                      metrics=['accuracy', precision, recall, tn, fn, tp, fp],
                     )
        model.fit(train_x_3d, train_y, epochs=10, batch_size=para_bs, verbose=1, callbacks=[es])
        loss, t1, t2, t3, t4, t5, t6, t7 = model.evaluate(test_x_3d, test_y, verbose=1)
        pred_y = model.predict(test_x_3d)
        np.save(f'res/AAPL_label_{data_set_i+1}.npy', pred_y)
        model.save(f'res/model/AAPL_m1_{data_set_i+1}.keras')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 7: early stopping


## report

In [70]:
para_unit = 20
para_do = 0.2
para_bs = 16

metrics = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall', 'F1-score', 'Cross-entropy'])

for data_set_i in range(0, 5):
    train_end = splits[data_set_i][0]
    valid_end = splits[data_set_i][1]
    train_x, test_x = np.array(all_x)[:train_end], np.array(all_x)[train_end:valid_end]
    train_y, test_y = np.array(all_y)[:train_end], np.array(all_y)[train_end:valid_end]

    scaler = MinMaxScaler()
    train_x_scaled = scaler.fit_transform(train_x)
    test_x_scaled = scaler.fit_transform(test_x)

    time_step = 10
    n_samples = len(train_x_scaled) - time_step + 1
    n_features = train_x_scaled.shape[1]
    train_x_3d = np.zeros((n_samples, time_step, n_features))
    for i in range(n_samples):
        train_x_3d[i] = train_x_scaled[i:i+time_step]
    train_y = train_y[time_step-1:]

    n_samples = len(test_x_scaled) - time_step + 1
    n_features = test_x_scaled.shape[1]
    test_x_3d = np.zeros((n_samples, time_step, n_features))
    for i in range(n_samples):
        test_x_3d[i] = test_x_scaled[i:i+time_step]
    test_y = test_y[time_step-1:]

    model = keras.models.load_model(f'res/model/AAPL_m1_{data_set_i+1}.keras')
    pred_y = np.load(f'res/AAPL_label_{data_set_i+1}.npy')
    pred_class = np.where(pred_y.flatten()>0.5, 1, 0)
    accuracy = accuracy_score(test_y, pred_class)
    precision = precision_score(test_y, pred_class, average='weighted')
    recall = recall_score(test_y, pred_class, average='weighted')
    f1 = f1_score(test_y, pred_class, average='weighted')
    cross_entropy = log_loss(test_y, pred_y)
    
    metrics.loc[data_set_i+1] = [accuracy, precision, recall, f1, cross_entropy]

    # print(f"Data set {data_set_i+1}")
    # print("Accuracy:", accuracy)
    # print("Precision:", precision)
    # print("Recall:", recall)
    # print("F1-score:", f1)
    # print("Cross-entropy:", cross_entropy)
    # print("========================================")

    # with tf.device('/cpu:0'):
    #     loss, t1, t2, t3, t4, t5, t6, t7 = model.evaluate(test_x_3d, test_y, verbose=1)
        

In [71]:
metrics

Unnamed: 0,Accuracy,Precision,Recall,F1-score,Cross-entropy
1,0.550905,0.526036,0.550905,0.491304,0.686882
2,0.54265,0.517927,0.54265,0.442075,0.688742
3,0.53743,0.526928,0.53743,0.491129,0.68911
4,0.522216,0.51311,0.522216,0.440584,0.691713
5,0.518518,0.51264,0.518518,0.48948,0.692261


## CV for Hyperparameter Tuning

In [52]:
es = EarlyStopping(monitor='loss', mode='min', min_delta=0.0001, verbose=1, patience = 4)

In [54]:
tn = keras.metrics.TrueNegatives()
fn = keras.metrics.FalseNegatives()
tp = keras.metrics.TruePositives()
fp = keras.metrics.FalsePositives()
precision = keras.metrics.Precision()
recall = keras.metrics.Recall()

In [55]:
# Search space for hyperparameters
para_dict = {'units': [5, 20],
             'dropout_rate': [0, 0.2],
             'batch_size': [32, 16]
            }

In [57]:
cv_log = pd.DataFrame(columns=['dataset', 'units', 'dropout', 'batch_size', 'metrics'])

In [None]:
def cv_run(para_unit, para_do, para_bs, train_x, train_y, test_x, test_y):
    model = Sequential()
    model.add(LSTM(units=para_unit, return_sequences=True, input_shape=(time_step, n_features)))
    model.add(Dropout(para_do))
    model.add(LSTM(units=para_unit, return_sequences=False, input_shape=(time_step, n_features)))
    model.add(Dropout(para_do))
    model.add(Dense(1, activation='sigmoid'))

    with tf.device('/cpu:0'):
        model.compile(loss='binary_crossentropy', 
                      optimizer='adam', 
                      metrics=['accuracy', precision, recall, tn, fn, tp, fp],
                     )
        model.fit(train_x, train_y, epochs=10, batch_size=para_bs, verbose=1, callbacks=[es])
        loss, t1, t2, t3, t4, t5, t6, t7 = model.evaluate(test_x, test_y, verbose=1)
        return loss, t1, t2, t3, t4, t5, t6, t7

In [805]:
name = 0
for para_unit in para_dict['units']:
    for para_bs in para_dict['batch_size']:
        for para_do in para_dict['dropout_rate']:
            for datasets_i in range(5):
                name += 1
                print('!!!', para_unit, para_do, para_bs, datasets_i, '!!!')
                [train_x, train_y, test_x, test_y] = dd[datasets_i]
                loss, t1, t2, t3, t4, t5, t6, t7 = cv_run(para_unit, para_do, para_bs, train_x, train_y, test_x, test_y)
                cv_log = cv_log.append({'dataset': datasets_i,
                                        'units': para_unit,
                                        'dropout': para_do,
                                        'batch_size': para_bs,
                                        'metrics': [loss, t1, t2, t3, t4, t5, t6, t7]
                                       }, 
                                       ignore_index=True)
                cv_log.to_csv(str(name)+'.csv')

!!! 5 0 32 0 !!!
Epoch 1/10


2023-07-14 05:44:51.150241: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6939 - accuracy: 0.4945 - precision_1: 0.5277 - recall: 0.4306 - true_negatives_7: 145.0000 - false_negatives_3: 164.0000 - true_positives_3: 124.0000 - false_positives_3: 111.0000

2023-07-14 05:51:19.197132: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 32 1 !!!
Epoch 1/10


2023-07-14 05:51:25.411870: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6848 - accuracy: 0.5368 - precision_1: 0.5124 - recall: 0.4806 - true_negatives_7: 168.0000 - false_negatives_3: 134.0000 - true_positives_3: 124.0000 - false_positives_3: 118.0000

2023-07-14 05:59:15.523096: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 32 2 !!!
Epoch 1/10


2023-07-14 05:59:21.477086: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6866 - accuracy: 0.5478 - precision_1: 0.5429 - recall: 0.6884 - true_negatives_7: 108.0000 - false_negatives_3: 86.0000 - true_positives_3: 190.0000 - false_positives_3: 160.0000

2023-07-14 06:09:00.348081: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 32 3 !!!
Epoch 1/10


2023-07-14 06:09:06.575672: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  18/1833 [..............................] - ETA: 5s - loss: 0.6875 - accuracy: 0.5469 - precision_1: 0.5190 - recall: 0.7870 - true_negatives_7: 97.0000 - false_negatives_3: 59.0000 - true_positives_3: 218.0000 - false_positives_3: 202.0000

2023-07-14 06:20:22.428357: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 32 4 !!!
Epoch 1/10


2023-07-14 06:20:28.643909: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 11s - loss: 0.6899 - accuracy: 0.5515 - precision_1: 0.5452 - recall: 0.6606 - true_negatives_7: 119.0000 - false_negatives_3: 93.0000 - true_positives_3: 181.0000 - false_positives_3: 151.0000

2023-07-14 06:33:09.850740: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 32 0 !!!
Epoch 1/10


2023-07-14 06:33:21.136452: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6953 - accuracy: 0.5074 - precision_1: 0.5595 - recall: 0.3264 - true_negatives_7: 182.0000 - false_negatives_3: 194.0000 - true_positives_3: 94.0000 - false_positives_3: 74.0000

2023-07-14 06:39:50.237386: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 32 1 !!!
Epoch 1/10


2023-07-14 06:39:56.472262: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6857 - accuracy: 0.5460 - precision_1: 0.5242 - recall: 0.4612 - true_negatives_7: 178.0000 - false_negatives_3: 139.0000 - true_positives_3: 119.0000 - false_positives_3: 108.0000

2023-07-14 06:47:59.711665: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 32 2 !!!
Epoch 1/10


2023-07-14 06:48:05.903048: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6863 - accuracy: 0.5460 - precision_1: 0.5492 - recall: 0.5870 - true_negatives_7: 135.0000 - false_negatives_3: 114.0000 - true_positives_3: 162.0000 - false_positives_3: 133.0000

2023-07-14 06:57:34.370001: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 32 3 !!!
Epoch 1/10


2023-07-14 06:57:40.625521: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6864 - accuracy: 0.5533 - precision_1: 0.5420 - recall: 0.5358 - true_negatives_7: 159.0000 - false_negatives_3: 123.0000 - true_positives_3: 142.0000 - false_positives_3: 120.0000

2023-07-14 07:08:58.981233: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 32 4 !!!
Epoch 1/10


2023-07-14 07:09:05.351010: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 11s - loss: 0.6910 - accuracy: 0.5368 - precision_1: 0.5362 - recall: 0.5949 - true_negatives_7: 129.0000 - false_negatives_3: 111.0000 - true_positives_3: 163.0000 - false_positives_3: 141.0000

2023-07-14 07:22:17.019764: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 16 0 !!!
Epoch 1/10


2023-07-14 07:22:28.400331: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6924 - accuracy: 0.5147 - precision_1: 0.5480 - recall: 0.4757 - true_negatives_7: 143.0000 - false_negatives_3: 151.0000 - true_positives_3: 137.0000 - false_positives_3: 113.0000

2023-07-14 07:35:02.960649: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 16 1 !!!
Epoch 1/10


2023-07-14 07:35:09.209239: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6850 - accuracy: 0.5331 - precision_1: 0.5082 - recall: 0.4806 - true_negatives_7: 166.0000 - false_negatives_3: 134.0000 - true_positives_3: 124.0000 - false_positives_3: 120.0000

2023-07-14 07:50:47.291316: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 16 2 !!!
Epoch 1/10


2023-07-14 07:50:53.491871: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6885 - accuracy: 0.5404 - precision_1: 0.5637 - recall: 0.4167 - true_negatives_7: 179.0000 - false_negatives_3: 161.0000 - true_positives_3: 115.0000 - false_positives_3: 89.0000

2023-07-14 08:09:47.429843: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 16 3 !!!
Epoch 1/10


2023-07-14 08:09:53.747672: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6845 - accuracy: 0.5699 - precision_1: 0.5585 - recall: 0.5585 - true_negatives_7: 162.0000 - false_negatives_3: 117.0000 - true_positives_3: 148.0000 - false_positives_3: 117.0000

2023-07-14 08:31:45.820405: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0 16 4 !!!
Epoch 1/10


2023-07-14 08:31:51.998499: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 11s - loss: 0.6898 - accuracy: 0.5570 - precision_1: 0.5442 - recall: 0.7409 - true_negatives_7: 100.0000 - false_negatives_3: 71.0000 - true_positives_3: 203.0000 - false_positives_3: 170.0000

2023-07-14 08:54:07.503032: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 16 0 !!!
Epoch 1/10


2023-07-14 08:54:18.741029: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6923 - accuracy: 0.4963 - precision_1: 0.5271 - recall: 0.4722 - true_negatives_7: 134.0000 - false_negatives_3: 152.0000 - true_positives_3: 136.0000 - false_positives_3: 122.0000

2023-07-14 09:07:04.886770: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 16 1 !!!
Epoch 1/10


2023-07-14 09:07:11.014681: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 10: early stopping
  17/1833 [..............................] - ETA: 5s - loss: 0.6851 - accuracy: 0.5386 - precision_1: 0.5164 - recall: 0.4264 - true_negatives_7: 183.0000 - false_negatives_3: 148.0000 - true_positives_3: 110.0000 - false_positives_3: 103.0000

2023-07-14 09:23:02.351260: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 16 2 !!!
Epoch 1/10


2023-07-14 09:23:08.412870: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6860 - accuracy: 0.5386 - precision_1: 0.5394 - recall: 0.6196 - true_negatives_7: 122.0000 - false_negatives_3: 105.0000 - true_positives_3: 171.0000 - false_positives_3: 146.0000

2023-07-14 09:42:13.867741: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 16 3 !!!
Epoch 1/10


2023-07-14 09:42:20.038651: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6856 - accuracy: 0.5607 - precision_1: 0.5387 - recall: 0.6830 - true_negatives_7: 124.0000 - false_negatives_3: 84.0000 - true_positives_3: 181.0000 - false_positives_3: 155.0000

2023-07-14 10:04:15.133188: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 5 0.2 16 4 !!!
Epoch 1/10


2023-07-14 10:04:21.286103: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 12s - loss: 0.6913 - accuracy: 0.5294 - precision_1: 0.5197 - recall: 0.8650 - true_negatives_7: 51.0000 - false_negatives_3: 37.0000 - true_positives_3: 237.0000 - false_positives_3: 219.0000  

2023-07-14 10:26:58.591797: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 32 0 !!!
Epoch 1/10


2023-07-14 10:27:09.906765: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6915 - accuracy: 0.5215 - precision_1: 0.5533 - recall: 0.4982 - true_negatives_7: 132.0000 - false_negatives_3: 136.0000 - true_positives_3: 135.0000 - false_positives_3: 109.0000

2023-07-14 10:33:55.499409: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 32 1 !!!
Epoch 1/10


2023-07-14 10:34:01.888902: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6823 - accuracy: 0.5430 - precision_1: 0.5109 - recall: 0.4895 - true_negatives_7: 161.0000 - false_negatives_3: 122.0000 - true_positives_3: 117.0000 - false_positives_3: 112.0000

2023-07-14 10:42:22.919182: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 32 2 !!!
Epoch 1/10


2023-07-14 10:42:29.286157: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6871 - accuracy: 0.5478 - precision_1: 0.5540 - recall: 0.5580 - true_negatives_7: 144.0000 - false_negatives_3: 122.0000 - true_positives_3: 154.0000 - false_positives_3: 124.0000

2023-07-14 10:52:33.717976: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 32 3 !!!
Epoch 1/10


2023-07-14 10:52:39.984585: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6874 - accuracy: 0.5547 - precision_1: 0.5279 - recall: 0.7621 - true_negatives_7: 95.0000 - false_negatives_3: 59.0000 - true_positives_3: 189.0000 - false_positives_3: 169.0000

2023-07-14 11:04:24.285601: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 32 4 !!!
Epoch 1/10


2023-07-14 11:04:30.767179: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 11s - loss: 0.6904 - accuracy: 0.5276 - precision_1: 0.5209 - recall: 0.7737 - true_negatives_7: 75.0000 - false_negatives_3: 62.0000 - true_positives_3: 212.0000 - false_positives_3: 195.0000  

2023-07-14 11:17:53.720746: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 32 0 !!!
Epoch 1/10


2023-07-14 11:18:05.510787: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
   1/1833 [..............................] - ETA: 12:20 - loss: 0.6950 - accuracy: 0.5312 - precision_1: 0.6364 - recall: 0.3889 - true_negatives_7: 10.0000 - false_negatives_3: 11.0000 - true_positives_3: 7.0000 - false_positives_3: 4.0000

2023-07-14 11:24:50.136456: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 32 1 !!!
Epoch 1/10


2023-07-14 11:24:56.867583: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6846 - accuracy: 0.5368 - precision_1: 0.5138 - recall: 0.4341 - true_negatives_7: 180.0000 - false_negatives_3: 146.0000 - true_positives_3: 112.0000 - false_positives_3: 106.0000

2023-07-14 11:33:18.801272: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 32 2 !!!
Epoch 1/10


2023-07-14 11:33:25.302358: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6884 - accuracy: 0.5488 - precision_1: 0.5631 - recall: 0.4826 - true_negatives_7: 156.0000 - false_negatives_3: 134.0000 - true_positives_3: 125.0000 - false_positives_3: 97.0000

2023-07-14 11:43:36.388310: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 32 3 !!!
Epoch 1/10


2023-07-14 11:43:45.130041: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6857 - accuracy: 0.5680 - precision_1: 0.5446 - recall: 0.6906 - true_negatives_7: 126.0000 - false_negatives_3: 82.0000 - true_positives_3: 183.0000 - false_positives_3: 153.0000

2023-07-14 11:55:26.929354: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 32 4 !!!
Epoch 1/10


2023-07-14 11:55:33.300298: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/3665 [..............................] - ETA: 12s - loss: 0.6901 - accuracy: 0.5391 - precision_1: 0.5527 - recall: 0.5019 - true_negatives_7: 145.0000 - false_negatives_3: 130.0000 - true_positives_3: 131.0000 - false_positives_3: 106.0000

2023-07-14 12:08:56.817974: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 16 0 !!!
Epoch 1/10


2023-07-14 12:09:08.408397: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6930 - accuracy: 0.4926 - precision_1: 0.5280 - recall: 0.3924 - true_negatives_7: 155.0000 - false_negatives_3: 175.0000 - true_positives_3: 113.0000 - false_positives_3: 101.0000

2023-07-14 12:22:07.685797: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 16 1 !!!
Epoch 1/10


2023-07-14 12:22:13.957193: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  18/1833 [..............................] - ETA: 5s - loss: 0.6854 - accuracy: 0.5399 - precision_1: 0.5113 - recall: 0.5809 - true_negatives_7: 153.0000 - false_negatives_3: 114.0000 - true_positives_3: 158.0000 - false_positives_3: 151.0000

2023-07-14 12:38:22.388856: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 16 2 !!!
Epoch 1/10


2023-07-14 12:38:28.663157: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6867 - accuracy: 0.5547 - precision_1: 0.5649 - recall: 0.5212 - true_negatives_7: 149.0000 - false_negatives_3: 124.0000 - true_positives_3: 135.0000 - false_positives_3: 104.0000

2023-07-14 12:58:13.980218: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 16 3 !!!
Epoch 1/10


2023-07-14 12:58:20.475163: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6836 - accuracy: 0.5809 - precision_1: 0.5668 - recall: 0.5925 - true_negatives_7: 159.0000 - false_negatives_3: 108.0000 - true_positives_3: 157.0000 - false_positives_3: 120.0000

2023-07-14 13:21:20.092938: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0 16 4 !!!
Epoch 1/10


2023-07-14 13:21:26.441630: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 11s - loss: 0.6899 - accuracy: 0.5386 - precision_1: 0.5431 - recall: 0.5292 - true_negatives_7: 148.0000 - false_negatives_3: 129.0000 - true_positives_3: 145.0000 - false_positives_3: 122.0000

2023-07-14 13:44:17.583892: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 16 0 !!!
Epoch 1/10


2023-07-14 13:44:29.223621: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6910 - accuracy: 0.5129 - precision_1: 0.5404 - recall: 0.5347 - true_negatives_7: 125.0000 - false_negatives_3: 134.0000 - true_positives_3: 154.0000 - false_positives_3: 131.0000

2023-07-14 13:57:34.676673: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 16 1 !!!
Epoch 1/10


2023-07-14 13:57:40.989479: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6833 - accuracy: 0.5527 - precision_1: 0.5202 - recall: 0.5397 - true_negatives_7: 154.0000 - false_negatives_3: 110.0000 - true_positives_3: 129.0000 - false_positives_3: 119.0000

2023-07-14 14:13:55.741815: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 16 2 !!!
Epoch 1/10


2023-07-14 14:14:02.270796: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  16/1833 [..............................] - ETA: 6s - loss: 0.6868 - accuracy: 0.5449 - precision_1: 0.5607 - recall: 0.4633 - true_negatives_7: 159.0000 - false_negatives_3: 139.0000 - true_positives_3: 120.0000 - false_positives_3: 94.0000

2023-07-14 14:33:17.488180: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 16 3 !!!
Epoch 1/10


2023-07-14 14:33:23.919481: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/1833 [..............................] - ETA: 5s - loss: 0.6872 - accuracy: 0.5607 - precision_1: 0.5346 - recall: 0.7585 - true_negatives_7: 104.0000 - false_negatives_3: 64.0000 - true_positives_3: 201.0000 - false_positives_3: 175.0000

2023-07-14 14:55:55.124864: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


!!! 20 0.2 16 4 !!!
Epoch 1/10


2023-07-14 14:56:01.500910: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  17/3665 [..............................] - ETA: 11s - loss: 0.6901 - accuracy: 0.5276 - precision_1: 0.5246 - recall: 0.6606 - true_negatives_7: 106.0000 - false_negatives_3: 93.0000 - true_positives_3: 181.0000 - false_positives_3: 164.0000

2023-07-14 15:18:58.105238: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [834]:
cv_log[cv_log['dataset']!=4].groupby(['batch_size']).mean()

Unnamed: 0_level_0,loss,accuracy,precision,recall,tn,fn,tp,fp
batch_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
16,0.690444,0.529595,0.530819,0.526462,15623.5,13872.125,15430.875,13711.5
32,0.690583,0.528044,0.530573,0.527219,15505.5,13845.0625,15457.9375,13829.5


In [836]:
cv_log[cv_log['dataset']!=4].groupby(['units', 'dropout', 'batch_size']).mean().reset_index().sort_values('precision')

Unnamed: 0,units,dropout,batch_size,loss,accuracy,precision,recall,tn,fn,tp,fp
1,5,0.0,32,0.690767,0.527265,0.526395,0.608635,13070.25,11455.5,17847.5,16264.75
5,20,0.0,32,0.690421,0.52877,0.527506,0.582354,13933.75,12230.75,17072.25,15401.25
2,5,0.2,16,0.690771,0.528245,0.529489,0.542643,15072.5,13400.25,15902.75,14262.5
6,20,0.2,16,0.690309,0.530701,0.530517,0.554827,14854.5,13038.25,16264.75,14480.5
0,5,0.0,16,0.69046,0.529132,0.531417,0.493612,16563.75,14839.5,14463.5,12771.25
4,20,0.0,16,0.690237,0.5303,0.531852,0.514765,16003.25,14210.5,15092.5,13331.75
3,5,0.2,32,0.690598,0.527141,0.533047,0.457234,17507.0,15899.5,13403.5,11828.0
7,20,0.2,32,0.690547,0.529,0.535344,0.460653,17511.0,15794.5,13508.5,11824.0


In [829]:
cv_log[(cv_log['units']==20) & (cv_log['dropout']==0.2) & (cv_log['batch_size']==16)]

Unnamed: 0,dataset,units,dropout,batch_size,loss,accuracy,precision,recall,tn,fn,tp,fp
35,0,20,0.2,16,0.689126,0.538081,0.531174,0.56749,15084.0,12551.0,16468.0,14535.0
36,1,20,0.2,16,0.690543,0.5301,0.536453,0.459201,17589.0,15893.0,13495.0,11661.0
37,2,20,0.2,16,0.689651,0.534039,0.536805,0.484625,17131.0,15084.0,14184.0,12239.0
38,3,20,0.2,16,0.691917,0.520584,0.517637,0.707993,9614.0,8625.0,20912.0,19487.0
39,4,20,0.2,16,0.692316,0.518913,0.517766,0.602888,25373.0,23372.0,35483.0,33048.0


In [826]:
cv_log[cv_log['dataset']==4].groupby(['units', 'dropout', 'batch_size']).mean().reset_index().sort_values('accuracy')

Unnamed: 0,units,dropout,batch_size,loss,accuracy,precision,recall,tn,fn,tp,fp
2,5,0.2,16,0.692594,0.51308,0.509396,0.806474,12707.0,11390.0,47465.0,45714.0
5,20,0.0,32,0.692429,0.516747,0.513926,0.683799,20357.0,18610.0,40245.0,38064.0
7,20,0.2,32,0.692964,0.518009,0.522963,0.450616,34229.0,32334.0,26521.0,24192.0
0,5,0.0,16,0.692389,0.518179,0.515917,0.64681,22702.0,20787.0,38068.0,35719.0
4,20,0.0,16,0.693277,0.518316,0.525165,0.419285,36109.0,34178.0,24677.0,22312.0
1,5,0.0,32,0.692327,0.518682,0.517609,0.601342,25437.0,23463.0,35392.0,32984.0
6,20,0.2,16,0.692316,0.518913,0.517766,0.602888,25373.0,23372.0,35483.0,33048.0
3,5,0.2,32,0.692252,0.518947,0.519035,0.564982,27608.0,25603.0,33252.0,30813.0


## threshold

In [72]:
time_step = 10

In [73]:
# raw_scores = model.predict(np.concatenate([train_x, test_x], axis=0)).flatten()
# true_labels = np.concatenate([train_y, test_y], axis=0)

pred_y = np.concatenate([np.load('res/AAPL_label_1.npy'),
                         np.load('res/AAPL_label_2.npy'),
                         np.load('res/AAPL_label_3.npy'),
                         np.load('res/AAPL_label_4.npy'),
                         np.load('res/AAPL_label_5.npy'),
                         ])
raw_prob = pred_y.flatten()
true_side = np.concatenate([all_y[splits[0][0]+time_step-1: splits[0][1]],
                            all_y[splits[1][0]+time_step-1: splits[1][1]],
                            all_y[splits[2][0]+time_step-1: splits[2][1]],
                            all_y[splits[3][0]+time_step-1: splits[3][1]],
                            all_y[splits[4][0]+time_step-1: splits[4][1]]])
raw_prob.shape, true_side.shape

((351783,), (351783, 1))

In [105]:
recall_precision = pd.DataFrame(columns=['Recall', 'Precision', 'N_profitable_trade'])
for tau in np.concatenate([np.linspace(start=0.0005, stop=0.05, num=50),
               np.linspace(start=0.05, stop=0.2, num=50)]):
    # thres_mid = raw_prob.mean()
    thres_mid = 0.47303626433978163
    above_thres = raw_prob > thres_mid + tau
    in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
    under_thres = raw_prob < thres_mid - tau

    trade_sig = np.zeros(shape=raw_prob.shape)

    trade_sig[above_thres] = 1
    trade_sig[in_thres] = 0
    trade_sig[under_thres] = -1

    # the i-th row and j-th column of cm is the number of observations 
    # with true label being i-th class and predicted label being j-th class.
    cm = confusion_matrix(y_true=np.where(true_side == 0, -1, true_side),
                          y_pred=trade_sig,
                          labels = [-1, 0, 1]
                          )
    n_profitable_trade = cm[0, 0] + cm[2, 2]  # (down, short), (up, long)
    n_miss_opportunity = cm[0, 1] + cm[2, 1]  # (down, hold), (up, hold)
    n_losing_trade = cm[0, 2] + cm[2, 0]  # (down, long), (up, short)
    recall_ = n_profitable_trade / (n_miss_opportunity + n_profitable_trade) 
    precision_ = n_profitable_trade / (n_profitable_trade + n_losing_trade)
    recall_precision.loc[tau] = [recall_, precision_, n_profitable_trade]
    # print(cm)
    # print(f'{tau}, recall:', recall_)

In [97]:
# a list of tau to further investigate
tau_list = [0.001, 0.002, 0.004, 0.006, 0.009, 0.01, 0.015, 0.019, 0.025, 0.034]
len(tau_list)

10

In [147]:
fig = px.line(recall_precision.iloc[:, :2])
fig.update_xaxes(title_text='Threshold (τ)')
fig.update_yaxes(title_text='Recall - Precision')
fig.update_layout(legend_title_text='Metrics')
# change legend group name
fig.show()

# Secondary Model

## data

### rolling metrics

In [149]:
# Use the probability of going up in the historical data as the midpoint of threshold
# historical data: training set before split 1
tt = ret[ret.index < all_y[splits[0][0]+time_step-1: splits[0][1]].index[0]][ticker]
# tt = ret[ret.index < m2.index[0]][ticker]
# 使用历史数据中的涨的概率作为threshold中点

tau = 0.004
# thres_mid = raw_prob.mean()
thres_mid = sum(tt > 0) / len(tt)
above_thres = raw_prob > thres_mid + tau
in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
under_thres = raw_prob < thres_mid - tau

trade_sig = np.zeros(shape=raw_prob.shape)

trade_sig[above_thres] = 1
trade_sig[in_thres] = 0
trade_sig[under_thres] = -1

cm = confusion_matrix(y_true=np.where(true_side == 0, -1, true_side),
                      y_pred=trade_sig,
                      labels = [-1, 0, 1]
                      )
n_profitable_trade = cm[0, 0] + cm[2, 2]
n_miss_opportunity = cm[0, 1] + cm[2, 1]
print(cm)
print('recall:', n_profitable_trade / (n_miss_opportunity + n_profitable_trade))

[[80804 19113 87694]
 [    0     0     0]
 [64282 16944 82946]]
recall: 0.8195408569269345


In [150]:
label_df = pd.concat([pd.Series(true_side.flatten(), name='true_class'),
                      pd.Series(trade_sig, name='trade_signal'),
                      pd.Series(pred_y.flatten(), name='raw_prob')
                     ], axis=1)

In [151]:
label_df.loc[label_df['raw_prob']>0.5, 'pred_class'] = 1
label_df.loc[label_df['raw_prob']<=0.5, 'pred_class'] = 0

In [153]:
# rolling precision
label_df['trade_res'] = np.nan
label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==1), 'trade_res'] = 'profit'
label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==-1), 'trade_res'] = 'profit'
label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==-1), 'trade_res'] = 'loss'
label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==1), 'trade_res'] = 'loss'
label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'
label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'

rolling_count = pd.get_dummies(label_df['trade_res']).rolling(window=30).sum()
label_df['rol_trading_precision'] = rolling_count['profit'] / (rolling_count['profit'] + rolling_count['loss'])


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [154]:
# rolling cross entropy
def rolling_cross_entropy(true, pred):
    return log_loss(y_true=true, y_pred=pred, labels=[1, 0])

label_df['rol_cross_entropy'] = rolling_apply_ext(
    rolling_cross_entropy, 30, label_df['true_class'], label_df['raw_prob'])

In [155]:
# rolling accuracy
def rolling_accuracy(true, pred):
    return accuracy_score(y_true=true, y_pred=pred)

label_df['rol_accuracy'] = rolling_apply_ext(
    rolling_accuracy, 30, label_df['true_class'], label_df['pred_class'])

### index

In [177]:
sp5 = pd.read_csv('assets/data/1_min_SPY_2008-2021.csv')
sp5 = sp5.drop_duplicates()
sp5 = sp5.iloc[:, 1:]
sp5['date'] = pd.to_datetime(sp5['date'])
sp5.set_index('date', inplace=True)
sp5 = sp5[pd.Series((sp5.index.year >= 2014) & (sp5.index.year < 2020), index=sp5.index)].sort_index()
sp5['shift_time'] = sp5.index.to_series().apply(lambda x: x+timedelta(hours=2))
sp5.set_index('shift_time', drop=True, inplace=True)
sp5 = sp5.drop_duplicates()
sp5 = sp5.reindex(df_[(df_.index.year>=2014) & (df_.index.year<2020)].index)
sp5['ret'] = np.log(sp5['close'].pct_change(fill_method=None) + 1)
sp5 = sp5.ffill()
sp5.rename({'average':'sp500_avg', 'ret': 'sp500_ret'}, axis=1, inplace=True)

### price moments

In [178]:
moms = df_[ticker][['var3m', 'skew1m', 'skew3m', 'kurt1m', 'kurt3m']][(df_.index.year>=2014) & (df_.index.year<2020)]

### combine

In [179]:
df_14_19 = df_[(df_.index.year>=2014) & (df_.index.year<2020)]

org_input = pd.concat([df_14_19[ticker].iloc[splits[0][0]+time_step-1: splits[0][1]],
                       df_14_19[ticker].iloc[splits[1][0]+time_step-1: splits[1][1]],
                       df_14_19[ticker].iloc[splits[2][0]+time_step-1: splits[2][1]],
                       df_14_19[ticker].iloc[splits[3][0]+time_step-1: splits[3][1]],
                       df_14_19[ticker].iloc[splits[4][0]+time_step-1: splits[4][1]]])

label_df.index = org_input.index

In [181]:
m2 = pd.concat([org_input[feat1], moms, label_df, sp5[['sp500_avg', 'sp500_ret']]], axis=1)
# m2 = pd.concat([org_input, moms.iloc[splits[0][0]:], label_df], axis=1)
m2[['rol_trading_precision', 'rol_cross_entropy', 'rol_accuracy']] = m2[['rol_trading_precision', 'rol_cross_entropy', 'rol_accuracy']].shift()
m2 = m2.dropna(how='any')

## tau = 0.004

In [193]:
feat2 = ['Adj Close', 'High', 'Low', 'Open', 'Volume', 'ret', 'ema1w', 'ema2w',
       'ema1m', 'sma1w', 'sma2w', 'sma1m', 'cci', 'macd', 'stochrsi', 'willr',
       'ad', 'adosc', 'ht_inphase', 'ht_quadrature', 'atr', 'var1m', 'var3m',
       'skew1m', 'skew3m', 'kurt1m', 'kurt3m', 'trade_signal', 'raw_prob', 'trade_signal',
         'rol_trading_precision', 'rol_cross_entropy', 'rol_accuracy', 'sp500_avg', 'sp500_ret']

In [194]:
xx = np.array(m2[feat2])
yy = np.where(m2[['trade_res']]=='profit', 1, 0).flatten()

In [195]:
xx.shape, yy.shape

((351749, 35), (351749,))

In [196]:
fold_length_ = int(np.ceil(len(xx) / 10))

In [197]:
splits_ = [
    [4*fold_length_, 5*fold_length_],
    [5*fold_length_, 6*fold_length_],
    [6*fold_length_, 7*fold_length_],
    [7*fold_length_, 8*fold_length_],
    [8*fold_length_, 10*fold_length_],
]

In [202]:
xx.shape, yy.shape

((351749, 35), (351749,))

In [208]:
tn = keras.metrics.TrueNegatives()
fn = keras.metrics.FalseNegatives()
tp = keras.metrics.TruePositives()
fp = keras.metrics.FalsePositives()
precision = keras.metrics.Precision()
recall = keras.metrics.Recall()

es = EarlyStopping(monitor='loss', mode='min', min_delta=0.001, verbose=1, patience = 4)
for ss in range(1, 5):
    train_end = splits_[ss][0]
    valid_end = splits_[ss][1]
    train_x, test_x = xx[:train_end], xx[train_end: valid_end]
    train_y, test_y = yy[:train_end], yy[train_end:valid_end]

    scaler = MinMaxScaler()
    train_x_scaled = scaler.fit_transform(train_x)
    test_x_scaled = scaler.fit_transform(test_x)

    time_step = 10

    n_samples = len(train_x_scaled) - time_step + 1
    n_features = train_x_scaled.shape[1]
    train_x_3d = np.zeros((n_samples, time_step, n_features))
    for i in range(n_samples):
        train_x_3d[i] = train_x_scaled[i:i+time_step]
    train_y = train_y[time_step-1:]

    n_samples = len(test_x_scaled) - time_step + 1
    n_features = test_x_scaled.shape[1]
    test_x_3d = np.zeros((n_samples, time_step, n_features))
    for i in range(n_samples):
        test_x_3d[i] = test_x_scaled[i:i+time_step]
    test_y = test_y[time_step-1:]

    model = Sequential()
    model.add(LSTM(units=20, return_sequences=True, input_shape=(time_step, n_features)))
    model.add(Dropout(0.2))
    model.add(LSTM(units=20, return_sequences=False, input_shape=(time_step, n_features)))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))

    with tf.device('/cpu:0'):
        model.compile(loss='binary_crossentropy', 
                      optimizer='adam', 
                      metrics=['accuracy', precision, recall, tn, fn, tp, fp],
                     )
        model.fit(train_x_3d, train_y, epochs=10, batch_size=16, verbose=1, callbacks=[es])
        loss, t1, t2, t3, t4, t5, t6, t7 = model.evaluate(test_x_3d, test_y, verbose=1)
        pred_y = model.predict(test_x_3d)
        # np.save('res/AAPL_meta_label_'+str(ss+1)+'.npy', pred_y)
        # model.save('res/model/AAPL_m2_'+str(ss+1)+'.keras')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 8: early stopping
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 9: early stopping
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 7: early stopping


In [228]:
ml = np.concatenate([np.load(f'res/AAPL_meta_label_1_{tau}tau.npy'),
                     np.load(f'res/AAPL_meta_label_2_{tau}tau.npy'),
                     np.load(f'res/AAPL_meta_label_3_{tau}tau.npy'),
                     np.load(f'res/AAPL_meta_label_4_{tau}tau.npy'),
                     np.load(f'res/AAPL_meta_label_5_{tau}tau.npy'),], axis=0)

In [212]:
ml.shape

(211004, 1)

In [229]:
# original signal generated by primary model
# for each test set the first time_step-1 data should not be included
sig1 = np.concatenate([m2['trade_signal'][4*fold_length_+time_step-1: 5*fold_length_],
                       m2['trade_signal'][5*fold_length_+time_step-1: 6*fold_length_],
                       m2['trade_signal'][6*fold_length_+time_step-1: 7*fold_length_],
                       m2['trade_signal'][7*fold_length_+time_step-1: 8*fold_length_],
                       m2['trade_signal'][8*fold_length_+time_step-1:]])

In [230]:
true_side = np.concatenate([m2['true_class'][4*fold_length_+time_step-1: 5*fold_length_],
                            m2['true_class'][5*fold_length_+time_step-1: 6*fold_length_],
                            m2['true_class'][6*fold_length_+time_step-1: 7*fold_length_],
                            m2['true_class'][7*fold_length_+time_step-1: 8*fold_length_],
                            m2['true_class'][8*fold_length_+time_step-1:]])

In [231]:
# filtered signal. only when meta-label is 1, it takes action.
# otherwize the trade_signal is 0
sig2 = np.where(ml.flatten()<0.5, 0, sig1)

In [232]:
np.unique(sig1, return_counts=True)

(array([-1.,  0.,  1.]), array([ 66427,  23173, 121404]))

In [233]:
np.unique(sig2, return_counts=True)

(array([-1.,  0.,  1.]), array([ 39899, 154700,  16405]))

In [234]:
profit = pd.Series(np.where(((sig1==1) & (true_side==1)) | ((sig1==-1) & (true_side==0)), 'profit', np.nan)).replace('nan', np.nan)
loss = pd.Series(np.where(((sig1==1) & (true_side==0)) | ((sig1==-1) & (true_side==1)), 'loss', np.nan)).replace('nan', np.nan)
missed = pd.Series(np.where(sig1==0, 'missed', np.nan)).replace('nan', np.nan)
sig1_res = profit.fillna(loss).fillna(missed)

In [235]:
profit = pd.Series(np.where(((sig2==1) & (true_side==1)) | ((sig2==-1) & (true_side==0)), 'profit', np.nan)).replace('nan', np.nan)
loss = pd.Series(np.where(((sig2==1) & (true_side==0)) | ((sig2==-1) & (true_side==1)), 'loss', np.nan)).replace('nan', np.nan)
missed = pd.Series(np.where(sig2==0, 'missed', np.nan)).replace('nan', np.nan)
sig2_res = profit.fillna(loss).fillna(missed)

In [236]:
sig1_res.value_counts()

profit    95740
loss      92091
missed    23173
Name: count, dtype: int64

In [237]:
# recall
sig1_res.value_counts()['profit'] / (sig1_res.value_counts()['profit'] + sig1_res.value_counts()['missed'])

0.8051264369749312

In [238]:
sig2_res.value_counts()['profit'] / (sig2_res.value_counts()['profit'] + sig2_res.value_counts()['missed'])

0.1631278569689757

In [239]:
# precision
sig1_res.value_counts()['profit'] / (sig1_res.value_counts()['profit'] + sig1_res.value_counts()['loss'])

0.5097135190676725

In [240]:
sig2_res.value_counts()['profit'] / (sig2_res.value_counts()['profit'] + sig2_res.value_counts()['loss'])

0.5355747371412333

## different tau

In [241]:
tau_list = [0.001, 0.002, 0.004, 0.006, 0.009, 0.01, 0.015, 0.019, 0.025, 0.034]

In [112]:
for tau in tau_list:
    print(f'----------tau = {tau}! {datetime.now().strftime("%H:%M:%S")}----------')
    pred_y = np.concatenate([np.load('res/AAPL_label_1.npy'),
                             np.load('res/AAPL_label_2.npy'),
                             np.load('res/AAPL_label_3.npy'),
                             np.load('res/AAPL_label_4.npy'),
                             np.load('res/AAPL_label_5.npy'),
                             ])
    raw_prob = pred_y.flatten()
    true_side = np.concatenate([y[splits[0][0]+time_step-1: splits[0][1]],
                                y[splits[1][0]+time_step-1: splits[1][1]],
                                y[splits[2][0]+time_step-1: splits[2][1]],
                                y[splits[3][0]+time_step-1: splits[3][1]],
                                y[splits[4][0]+time_step-1: splits[4][1]]])
    raw_prob.shape, true_side.shape

    # thres_mid = raw_prob.mean()
    thres_mid = 0.47303626433978163
    above_thres = raw_prob > thres_mid + tau
    in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
    under_thres = raw_prob < thres_mid - tau

    trade_sig = np.zeros(shape=raw_prob.shape)
    trade_sig[above_thres] = 1
    trade_sig[in_thres] = 0
    trade_sig[under_thres] = -1
    
    print(f'----------read data {datetime.now().strftime("%H:%M:%S")}----------')
    label_df = pd.concat([pd.Series(true_side, name='true_class'),
                          pd.Series(trade_sig, name='trade_signal'),
                          pd.Series(pred_y.flatten(), name='raw_prob')
                         ], axis=1)

    label_df.loc[label_df['raw_prob']>0.5, 'pred_class'] = 1
    label_df.loc[label_df['raw_prob']<=0.5, 'pred_class'] = 0

    # rolling precision
    label_df['trade_res'] = np.nan
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==-1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==-1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'

    rolling_count = pd.get_dummies(label_df['trade_res']).rolling(window=30).sum()
    label_df['rol_trading_precision'] = rolling_count['profit'] / (rolling_count['profit'] + rolling_count['loss'])

    # rolling cross entropy
    label_df['rol_cross_entropy'] = rolling_apply_ext(
        rolling_cross_entropy, 30, label_df['true_class'], label_df['raw_prob'])

    # rolling accuracy
    label_df['rol_accuracy'] = rolling_apply_ext(
        rolling_accuracy, 30, label_df['true_class'], label_df['pred_class'])
    label_df.index = org_input.index

    print(f'----------data prepared {datetime.now().strftime("%H:%M:%S")}----------')
    m2 = pd.concat([org_input, moms.iloc[splits[0][0]:], label_df], axis=1)
    m2[['rol_trading_precision', 'rol_cross_entropy', 'rol_accuracy']] = m2[['rol_trading_precision', 'rol_cross_entropy', 'rol_accuracy']].shift()
    m2 = m2.dropna(how='any')
    
    xx = np.array(m2[feat2])
    yy = np.where(m2[['trade_res']]=='profit', 1, 0).flatten()
    fold_length_ = int(np.ceil(len(xx) / 10))
    splits_ = [
        [4*fold_length_, 5*fold_length_],
        [5*fold_length_, 6*fold_length_],
        [6*fold_length_, 7*fold_length_],
        [7*fold_length_, 8*fold_length_],
        [8*fold_length_, 10*fold_length_],
    ]
    
    print(f'----------training {datetime.now().strftime("%H:%M:%S")}----------')
    # split first
    es = EarlyStopping(monitor='loss', mode='min', min_delta=0.001, verbose=1, patience = 4)
    for ss in range(5):
        print(f'----------set {str(ss+1)} start {datetime.now().strftime("%H:%M:%S")}----------')
        train_end = splits_[ss][0]
        valid_end = splits_[ss][1]
        train_x, test_x = xx[:train_end], xx[train_end: valid_end]
        train_y, test_y = yy[:train_end], yy[train_end:valid_end]

        scaler = MinMaxScaler()
        train_x_scaled = scaler.fit_transform(train_x)
        test_x_scaled = scaler.fit_transform(test_x)

        time_step = 10

        n_samples = len(train_x_scaled) - time_step + 1
        n_features = train_x_scaled.shape[1]
        train_x_3d = np.zeros((n_samples, time_step, n_features))
        for i in range(n_samples):
            train_x_3d[i] = train_x_scaled[i:i+time_step]
        train_y = train_y[time_step-1:]

        n_samples = len(test_x_scaled) - time_step + 1
        n_features = test_x_scaled.shape[1]
        test_x_3d = np.zeros((n_samples, time_step, n_features))
        for i in range(n_samples):
            test_x_3d[i] = test_x_scaled[i:i+time_step]
        test_y = test_y[time_step-1:]

        model = Sequential()
        model.add(LSTM(units=20, return_sequences=True, input_shape=(time_step, n_features)))
        model.add(Dropout(0.2))
        model.add(LSTM(units=20, return_sequences=False, input_shape=(time_step, n_features)))
        model.add(Dropout(0.2))
        model.add(Dense(1, activation='sigmoid'))

        with tf.device('/cpu:0'):
            model.compile(loss='binary_crossentropy', 
                          optimizer='adam', 
                          metrics=['accuracy', precision, recall, tn, fn, tp, fp],
                         )
            model.fit(train_x_3d, train_y, epochs=10, batch_size=16, verbose=0, callbacks=[es])
            loss, t1, t2, t3, t4, t5, t6, t7 = model.evaluate(test_x_3d, test_y, verbose=1)
            pred_y = model.predict(test_x_3d)
            # np.save('res/AAPL_meta_label_'+str(ss+1)+f'_{tau}tau'+'.npy', pred_y)
            # model.save('res/model/AAPL_m2_'+str(ss+1)+f'_{tau}tau'+'.keras')
        print(f'----------{tau}_{str(ss+1)} saved {datetime.now().strftime("%H:%M:%S")}----------')

----------tau = 0.001! 02:54:30----------


NameError: name 'y' is not defined

## report

In [242]:
m2_ML_metrics = pd.DataFrame(columns=['tau', 'data', 'Accuracy', 'Precision', 'Recall', 'F1', 'Cross-entropy Loss'])
for tau in tau_list:
    print(f'----------tau = {tau}! {datetime.now().strftime("%H:%M:%S")}----------')
    pred_y = np.concatenate([np.load('res/AAPL_label_1.npy'),
                             np.load('res/AAPL_label_2.npy'),
                             np.load('res/AAPL_label_3.npy'),
                             np.load('res/AAPL_label_4.npy'),
                             np.load('res/AAPL_label_5.npy'),
                             ])
    raw_prob = pred_y.flatten()
    true_side = np.concatenate([all_y[splits[0][0]+time_step-1: splits[0][1]],
                                all_y[splits[1][0]+time_step-1: splits[1][1]],
                                all_y[splits[2][0]+time_step-1: splits[2][1]],
                                all_y[splits[3][0]+time_step-1: splits[3][1]],
                                all_y[splits[4][0]+time_step-1: splits[4][1]]]).flatten()
    raw_prob.shape, true_side.shape

    # thres_mid = raw_prob.mean()
    thres_mid = 0.47303626433978163
    above_thres = raw_prob > thres_mid + tau
    in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
    under_thres = raw_prob < thres_mid - tau

    trade_sig = np.zeros(shape=raw_prob.shape)
    trade_sig[above_thres] = 1
    trade_sig[in_thres] = 0
    trade_sig[under_thres] = -1
    
    print(f'----------read data {datetime.now().strftime("%H:%M:%S")}----------')
    label_df = pd.concat([pd.Series(true_side, name='true_class'),
                          pd.Series(trade_sig, name='trade_signal'),
                          pd.Series(pred_y.flatten(), name='raw_prob')
                         ], axis=1)

    label_df.loc[label_df['raw_prob']>0.5, 'pred_class'] = 1
    label_df.loc[label_df['raw_prob']<=0.5, 'pred_class'] = 0

    # rolling precision
    label_df['trade_res'] = np.nan
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==-1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==-1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'

    rolling_count = pd.get_dummies(label_df['trade_res']).rolling(window=30).sum()
    label_df['rol_trading_precision'] = rolling_count['profit'] / (rolling_count['profit'] + rolling_count['loss'])

#     # rolling cross entropy
#     label_df['rol_cross_entropy'] = rolling_apply_ext(
#         rolling_cross_entropy, 30, label_df['true_class'], label_df['raw_prob'])

#     # rolling accuracy
#     label_df['rol_accuracy'] = rolling_apply_ext(
#         rolling_accuracy, 30, label_df['true_class'], label_df['pred_class'])
    label_df.index = org_input.index

    print(f'----------data prepared {datetime.now().strftime("%H:%M:%S")}----------')
    m2 = pd.concat([org_input, moms.iloc[splits[0][0]:], label_df], axis=1)
    m2[['rol_trading_precision']] = m2[['rol_trading_precision']].shift()
    m2 = m2.dropna(how='any')
    
    xx = np.array(m2[feat2])
    yy = np.where(m2[['trade_res']]=='profit', 1, 0).flatten()
    fold_length_ = int(np.ceil(len(xx) / 10))
    splits_ = [
        [4*fold_length_, 5*fold_length_],
        [5*fold_length_, 6*fold_length_],
        [6*fold_length_, 7*fold_length_],
        [7*fold_length_, 8*fold_length_],
        [8*fold_length_, 10*fold_length_],
    ]
    
    print(f'----------training {datetime.now().strftime("%H:%M:%S")}----------')
    # split first
    es = EarlyStopping(monitor='loss', mode='min', min_delta=0.001, verbose=1, patience = 4)
    for ss in range(5):
        print(f'----------set {str(ss+1)} start {datetime.now().strftime("%H:%M:%S")}----------')
        train_end = splits_[ss][0]
        valid_end = splits_[ss][1]
        train_x, test_x = xx[:train_end], xx[train_end: valid_end]
        train_y, test_y = yy[:train_end], yy[train_end:valid_end]

        scaler = MinMaxScaler()
        train_x_scaled = scaler.fit_transform(train_x)
        test_x_scaled = scaler.fit_transform(test_x)

        time_step = 10

        n_samples = len(train_x_scaled) - time_step + 1
        n_features = train_x_scaled.shape[1]
        train_x_3d = np.zeros((n_samples, time_step, n_features))
        for i in range(n_samples):
            train_x_3d[i] = train_x_scaled[i:i+time_step]
        train_y = train_y[time_step-1:]

        n_samples = len(test_x_scaled) - time_step + 1
        n_features = test_x_scaled.shape[1]
        test_x_3d = np.zeros((n_samples, time_step, n_features))
        for i in range(n_samples):
            test_x_3d[i] = test_x_scaled[i:i+time_step]
        test_y = test_y[time_step-1:]
        pred_y = np.load('res/AAPL_meta_label_'+str(ss+1)+f'_{tau}tau'+'.npy')
        pred_class = np.where(pred_y>0.5, 1, 0)
        
        accuracy = accuracy_score(test_y, pred_class)
        precision = precision_score(test_y, pred_class, average='weighted')
        recall = recall_score(test_y, pred_class, average='weighted')
        f1 = f1_score(test_y, pred_class, average='weighted')
        cross_entropy = log_loss(test_y, pred_y)
        m2_ML_metrics = m2_ML_metrics.append(pd.Series([tau, ss, accuracy, precision, recall, f1, cross_entropy], index=m2_ML_metrics.columns),
                                             ignore_index = True
                                            )
        


#         model = keras.models.load_model('res/model/AAPL_m2_'+str(ss+1)+f'_{tau}tau'+'.keras')
#         loss, t1, t2, t3, t4, t5, t6, t7 = model.evaluate(test_x_3d, test_y, verbose=1)
# #         pred_y = model.predict(test_x_3d)

----------tau = 0.001! 11:39:59----------
----------read data 11:39:59----------
----------data prepared 11:39:59----------



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



KeyError: "['rol_cross_entropy', 'rol_accuracy', 'sp500_avg', 'sp500_ret'] not in index"

In [353]:
m2_ML_metrics.groupby('tau').mean().iloc[:, 1:]

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Cross-entropy Loss
tau,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.001,0.533054,0.537768,0.533054,0.494364,0.675413
0.002,0.543663,0.550798,0.543663,0.498447,0.658637
0.004,0.565076,0.564663,0.565076,0.53177,0.625308
0.006,0.587853,0.589584,0.587853,0.553635,0.593042
0.009,0.621805,0.627036,0.621805,0.579891,0.542756
0.01,0.634323,0.626084,0.634323,0.598569,0.525176
0.015,0.69333,0.669452,0.69333,0.637492,0.44135
0.019,0.738693,0.749231,0.738693,0.711903,0.372231
0.025,0.810508,0.786452,0.810508,0.764685,0.271874
0.034,0.878015,0.878149,0.878015,0.865045,0.185292


## metrics

In [251]:
met = pd.DataFrame(columns=['recall_1', 'recall_2', 'precision_1', 'precision_2'])

In [252]:
# for _ in range(6, len(tau_list)):
#     tau = tau_list[_]
for tau in tau_list:
    print(tau)
    pred_y = np.concatenate([np.load('res/AAPL_label_1.npy'),
                             np.load('res/AAPL_label_2.npy'),
                             np.load('res/AAPL_label_3.npy'),
                             np.load('res/AAPL_label_4.npy'),
                             np.load('res/AAPL_label_5.npy'),
                             ])
    print(pred_y.shape)
    raw_prob = pred_y.flatten()
    true_side = np.concatenate([all_y[splits[0][0]+time_step-1: splits[0][1]],
                                all_y[splits[1][0]+time_step-1: splits[1][1]],
                                all_y[splits[2][0]+time_step-1: splits[2][1]],
                                all_y[splits[3][0]+time_step-1: splits[3][1]],
                                all_y[splits[4][0]+time_step-1: splits[4][1]]]).flatten()
    raw_prob.shape, true_side.shape

    # thres_mid = raw_prob.mean()
    thres_mid = 0.47303626433978163
    above_thres = raw_prob > thres_mid + tau
    in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
    under_thres = raw_prob < thres_mid - tau

    trade_sig = np.zeros(shape=raw_prob.shape)
    trade_sig[above_thres] = 1
    trade_sig[in_thres] = 0
    trade_sig[under_thres] = -1
    print(trade_sig.shape)

    print(f'----------read data {datetime.now().strftime("%H:%M:%S")}----------')
    label_df = pd.concat([pd.Series(true_side, name='true_class'),
                          pd.Series(trade_sig, name='trade_signal'),
                          pd.Series(pred_y.flatten(), name='raw_prob')
                         ], axis=1)

    label_df.loc[label_df['raw_prob']>0.5, 'pred_class'] = 1
    label_df.loc[label_df['raw_prob']<=0.5, 'pred_class'] = 0

    # rolling precision
    label_df['trade_res'] = np.nan
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==-1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==-1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'

    rolling_count = pd.get_dummies(label_df['trade_res']).rolling(window=30).sum()
    label_df['rol_trading_precision'] = rolling_count['profit'] / (rolling_count['profit'] + rolling_count['loss'])
    label_df.index = org_input.index
    print(label_df.shape)
    
    if tau < 0.015:
        index_path = 'res/meta-label_index.pkl'
    else:
        index_path = f'res/meta-label_index_{tau}.pkl'
    sig1 = label_df.reindex(pd.read_pickle(index_path).index)['trade_signal']
    true_side = label_df.reindex(pd.read_pickle(index_path).index)['true_class']

    profit = pd.Series(np.where(((sig1==1) & (true_side==1)) | ((sig1==-1) & (true_side==0)), 'profit', np.nan)).replace('nan', np.nan)
    loss = pd.Series(np.where(((sig1==1) & (true_side==0)) | ((sig1==-1) & (true_side==1)), 'loss', np.nan)).replace('nan', np.nan)
    missed = pd.Series(np.where(sig1==0, 'missed', np.nan)).replace('nan', np.nan)
    sig1_res = profit.fillna(loss).fillna(missed)

    # recall & precision
    recall_1 = sig1_res.value_counts()['profit'] / (sig1_res.value_counts()['profit'] + sig1_res.value_counts()['missed'])
    precision_1 = sig1_res.value_counts()['profit'] / (sig1_res.value_counts()['profit'] + sig1_res.value_counts()['loss'])

    ml = np.concatenate([np.load('res/AAPL_meta_label_1'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_2'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_3'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_4'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_5'+f'_{tau}tau'+'.npy'),], axis=0)

    # filtered signal. only when meta-label is 1, it takes action.
    # otherwize the trade_signal is 0
    sig2 = np.where(ml.flatten()<0.5, 0, sig1)

    profit = pd.Series(np.where(((sig2==1) & (true_side==1)) | ((sig2==-1) & (true_side==0)), 'profit', np.nan)).replace('nan', np.nan)
    loss = pd.Series(np.where(((sig2==1) & (true_side==0)) | ((sig2==-1) & (true_side==1)), 'loss', np.nan)).replace('nan', np.nan)
    missed = pd.Series(np.where(sig2==0, 'missed', np.nan)).replace('nan', np.nan)
    sig2_res = profit.fillna(loss).fillna(missed)
    

    recall_2 = sig2_res.value_counts()['profit'] / (sig2_res.value_counts()['profit'] + sig2_res.value_counts()['missed'])
    precision_2 = sig2_res.value_counts()['profit'] / (sig2_res.value_counts()['profit'] + sig2_res.value_counts()['loss'])
    
    met.loc[tau] = [recall_1, recall_2, precision_1, precision_2]

0.001
(351783, 1)
(351783,)
----------read data 11:43:36----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.002
(351783, 1)
(351783,)
----------read data 11:43:37----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.004
(351783, 1)
(351783,)
----------read data 11:43:37----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.006
(351783, 1)
(351783,)
----------read data 11:43:38----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.009
(351783, 1)
(351783,)
----------read data 11:43:39----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.01
(351783, 1)
(351783,)
----------read data 11:43:39----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.015
(351783, 1)
(351783,)
----------read data 11:43:40----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.019
(351783, 1)
(351783,)
----------read data 11:43:41----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.025
(351783, 1)
(351783,)
----------read data 11:43:41----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



0.034
(351783, 1)
(351783,)
----------read data 11:43:42----------
(351783, 6)



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [253]:
met['f1_1'] = 2* (met['recall_1'] * met['precision_1']) / (met['recall_1'] + met['precision_1'])
met['f1_2'] = 2* (met['recall_2'] * met['precision_2']) / (met['recall_2'] + met['precision_2'])

In [254]:
met

Unnamed: 0,recall_1,recall_2,precision_1,precision_2,f1_1,f1_2
0.001,0.954401,0.143191,0.502283,0.549276,0.658179,0.227163
0.002,0.908823,0.171882,0.502355,0.538544,0.64705,0.260593
0.004,0.825818,0.166836,0.502235,0.534628,0.624606,0.254311
0.006,0.750608,0.217052,0.501908,0.524358,0.601567,0.307018
0.009,0.645208,0.174767,0.502365,0.525422,0.564896,0.26229
0.01,0.613596,0.136935,0.502611,0.533406,0.552586,0.217924
0.015,0.467056,0.069464,0.502565,0.551588,0.48416,0.12339
0.019,0.366481,0.120865,0.502482,0.520668,0.423839,0.196188
0.025,0.238141,0.042055,0.504983,0.546006,0.323653,0.078095
0.034,0.118324,0.041158,0.514064,0.540342,0.19237,0.07649


# Strategy

In [272]:
def cal_period_perf_indicator(nav):
    if type(nav) == pd.DataFrame:
        res = pd.DataFrame(index=nav.columns, columns=['Annualised Return', 'Annualised Std.', 'Sharpe Ratio', 
                                                       'Maximum Drawdown', 'Calmar Ratio'])
        for col in nav:
            res.loc[col] = cal_period_perf_indicator(nav[col])
        return res
                                                          
    returns = nav - 1
    daily_returns = (returns+1).resample('D').last().pct_change(fill_method=None)

    total_days = len(daily_returns)
    annualized_return = (nav[-1] / nav[0]) ** (252 / total_days) - 1

    annualized_std = daily_returns.std() * np.sqrt(252)

    drawdown = (returns+1) / (returns+1).cummax() - 1
    max_drawdown = drawdown.min()

    sharpe_ratio = (annualized_return) / annualized_std
    calmar_ratio = annualized_return / abs(max_drawdown)
    return [annualized_return, annualized_std, sharpe_ratio, max_drawdown, calmar_ratio]

In [256]:
tau_list = [0.001, 0.002, 0.004, 0.006, 0.009, 0.01, 0.015, 0.019, 0.025, 0.034]

In [266]:
ecdf = sm.distributions.ECDF(ml.flatten()[:-117276])
ML_metrics = pd.DataFrame(columns=['Model', 'tau', 'Precision', 'Recall'])
nav = {}
nav_ = {}
nav__ = {}
# nav = pd.DataFrame()
# for tau in [0.001, 0.002, 0.004, 0.006, 0.009, 0.01, 0.015, 0.019, 0.025]:
for tau in tau_list:
# tau = 0.004
    if tau < 0.015:
        index_path = 'res/meta-label_index.pkl'
    else:
        index_path = f'res/meta-label_index_{tau}.pkl'
    indices = pd.read_pickle(index_path).index

    
    df_14_19 = df_[(df_.index.year>=2014) & (df_.index.year<2020)]
    org_input = pd.concat([
                           df_14_19[ticker].iloc[splits[0][0]+time_step-1: splits[0][1]],
                           df_14_19[ticker].iloc[splits[1][0]+time_step-1: splits[1][1]],
                           df_14_19[ticker].iloc[splits[2][0]+time_step-1: splits[2][1]],
                           df_14_19[ticker].iloc[splits[3][0]+time_step-1: splits[3][1]],
                           df_14_19[ticker].iloc[splits[4][0]+time_step-1: splits[4][1]]])
    pred_y = np.concatenate([
                             np.load('res/AAPL_label_1.npy'),
                             np.load('res/AAPL_label_2.npy'),
                             np.load('res/AAPL_label_3.npy'),
                             np.load('res/AAPL_label_4.npy'),
                             np.load('res/AAPL_label_5.npy'),
                             ])
    raw_prob = pred_y.flatten()
    true_side = np.concatenate([
                                all_y[splits[0][0]+time_step-1: splits[0][1]],
                                all_y[splits[1][0]+time_step-1: splits[1][1]],
                                all_y[splits[2][0]+time_step-1: splits[2][1]],
                                all_y[splits[3][0]+time_step-1: splits[3][1]],
                                all_y[splits[4][0]+time_step-1: splits[4][1]]]).flatten()

    # thres_mid = raw_prob.mean()
    thres_mid = 0.47303626433978163
    above_thres = raw_prob > thres_mid + tau
    in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
    under_thres = raw_prob < thres_mid - tau

    trade_sig = np.zeros(shape=raw_prob.shape)
    trade_sig[above_thres] = 1
    trade_sig[in_thres] = 0
    trade_sig[under_thres] = -1

    label_df = pd.concat([pd.Series(true_side, name='true_class'),
                          pd.Series(trade_sig, name='trade_signal'),
                          pd.Series(pred_y.flatten(), name='raw_prob')
                         ], axis=1)

    label_df.loc[label_df['raw_prob']>0.5, 'pred_class'] = 1
    label_df.loc[label_df['raw_prob']<=0.5, 'pred_class'] = 0

    # rolling precision
    label_df['trade_res'] = np.nan
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==-1), 'trade_res'] = 'profit'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==-1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==1), 'trade_res'] = 'loss'
    label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'
    label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'

    rolling_count = pd.get_dummies(label_df['trade_res']).rolling(window=30).sum()
    label_df['rol_trading_precision'] = rolling_count['profit'] / (rolling_count['profit'] + rolling_count['loss'])
    label_df.index = org_input.index
    t1 = label_df.reindex(indices)['trade_signal']
    t2 = ret.reindex(indices)['Price']

    if tau < 0.015:
        index_path = 'res/meta-label_index.pkl'
    else:
        index_path = f'res/meta-label_index_{tau}.pkl'
    sig1 = label_df.reindex(pd.read_pickle(index_path).index)['trade_signal']
    true_side = label_df.reindex(pd.read_pickle(index_path).index)['true_class']

    ml = np.concatenate([
                         np.load('res/AAPL_meta_label_1'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_2'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_3'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_4'+f'_{tau}tau'+'.npy'),
                         np.load('res/AAPL_meta_label_5'+f'_{tau}tau'+'.npy'),], axis=0)

    # filtered signal. only when meta-label is 1, it takes action.
    # otherwize the trade_signal is 0
    sig2 = pd.Series(np.where(ml.flatten()<0.5, 0, sig1), index=sig1.index)
    sig3 = ecdf(ml.flatten()) * sig1
    
    n_profit = sum(((true_side==1) & (sig1==1)) | ((true_side==0) & (sig1==-1)))
    n_loss = sum(((true_side==1) & (sig1==-1)) | ((true_side==0) & (sig1==1)))
    n_miss = sum(sig1==0)
    # ML_metrics = ML_metrics.append(pd.Series([1, tau, n_profit / (n_profit + n_loss), n_profit / (n_profit + n_miss)], index=ML_metrics.columns),
    #                                ignore_index=True
    #                               )
    ML_metrics.loc[len(ML_metrics)+1, :] = pd.Series([1, tau, n_profit / (n_profit + n_loss), n_profit / (n_profit + n_miss)], index=ML_metrics.columns)
    n_profit = sum(((true_side==1) & (sig2==1)) | ((true_side==0) & (sig2==-1)))
    n_loss = sum(((true_side==1) & (sig2==-1)) | ((true_side==0) & (sig2==1)))
    n_miss = sum(sig2==0)
    # ML_metrics = ML_metrics.append(pd.Series([2, tau, n_profit / (n_profit + n_loss), n_profit / (n_profit + n_miss)], index=ML_metrics.columns),
    #                                ignore_index=True
    #                               )
    ML_metrics.loc[len(ML_metrics)+1, :] = pd.Series([2, tau, n_profit / (n_profit + n_loss), n_profit / (n_profit + n_miss)], index=ML_metrics.columns)
    
    nav[tau] = ((ret.reindex(sig1.index)['Price'] * sig1) + 1).cumprod()
    nav_[tau] = ((ret.reindex(sig1.index)['Price'] * sig2) + 1).cumprod()
    nav__[tau] = ((ret.reindex(sig1.index)['Price'] * sig3) + 1).cumprod()


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtyp

In [267]:
ML_metrics

Unnamed: 0,Model,tau,Precision,Recall
1,1.0,0.001,0.502283,0.954401
2,2.0,0.001,0.549276,0.143191
3,1.0,0.002,0.502355,0.908823
4,2.0,0.002,0.538544,0.171882
5,1.0,0.004,0.502235,0.825818
6,2.0,0.004,0.534628,0.166836
7,1.0,0.006,0.501908,0.750608
8,2.0,0.006,0.524358,0.217052
9,1.0,0.009,0.502365,0.645208
10,2.0,0.009,0.525422,0.174767


## ML metrics

## Trading Confusion Matrix

In [268]:
t1 = ML_metrics[ML_metrics['Model']==1] 
t2 = ML_metrics[ML_metrics['Model']==2] 
trace1 = go.Bar(x=t1.tau,
                y=t1.Precision,
                name='Primary Model',
                marker_color='lightblue',
                opacity=0.7,
                text=t1['Precision'].apply(lambda x: f'{x:.4f}'),  # Format the text to two decimals
                textposition='auto'  # Automatically position the text on top of the bars
                )
trace2 = go.Bar(x=t2.tau,
                y=t2.Precision,
                name='Primary+Secondary Model',
                marker_color='darkblue',
                opacity=0.7,
                text=t2['Precision'].apply(lambda x: f'{x:.4f}'),  # Format the text to two decimals
                textposition='auto'  # Automatically position the text on top of the bars
                )
layout = go.Layout(
#     title='Precision Improvement',
    xaxis=dict(title='Threshold', type='category', categoryorder='array', categoryarray=t1['tau'].astype(str)),
    yaxis=dict(title='Trading Precision'),
    barmode='group',  # This places the bars next to each other
    bargap=0.3,
    legend=dict(x=0, y=1.2)
)

# Create the figure and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()

In [269]:
t1 = ML_metrics[ML_metrics['Model']==1] 
t2 = ML_metrics[ML_metrics['Model']==2] 
trace1 = go.Bar(x=t1.tau,
                y=t1.Recall,
                name='Primary Model',
                marker_color='lightblue',
                opacity=0.7,
                text=t1['Recall'].apply(lambda x: f'{x:.4f}'),  # Format the text to two decimals
                textposition='auto'  # Automatically position the text on top of the bars
                )
trace2 = go.Bar(x=t2.tau,
                y=t2.Recall,
                name='Primary+Secondary Model',
                marker_color='darkblue',
                opacity=0.7,
                text=t2['Recall'].apply(lambda x: f'{x:.4f}'),  # Format the text to two decimals
                textposition='auto'  # Automatically position the text on top of the bars
                )
layout = go.Layout(
#     title='Recall Improvement',
    xaxis=dict(title='Threshold', type='category', categoryorder='array', categoryarray=t1['tau'].astype(str)),
    yaxis=dict(title='Trading Recall'),
    barmode='group',  # This places the bars next to each other
    bargap=0.3,
    legend=dict(x=0, y=1.2)
)

# Create the figure and plot
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()

## Risk-return metrics

In [270]:
m1_rep = pd.DataFrame(nav).iloc[:, :-1].dropna().iloc[-117276:]
m2_rep = pd.DataFrame(nav_).iloc[:, :-1].dropna().iloc[-117276:]
m3_rep = pd.DataFrame(nav__).iloc[:, :-1].dropna().iloc[-117276:]

In [273]:
cal_period_perf_indicator(m1_rep)


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



Unnamed: 0,Annualised Return,Annualised Std.,Sharpe Ratio,Maximum Drawdown,Calmar Ratio
0.001,0.305649,0.295279,1.035118,-0.393365,0.777012
0.002,0.315511,0.296337,1.064702,-0.392174,0.804519
0.004,0.296561,0.297075,0.998268,-0.396153,0.748601
0.006,0.296481,0.295297,1.004011,-0.398585,0.743834
0.009,0.266194,0.293634,0.906551,-0.405759,0.656041
0.01,0.282383,0.292024,0.966984,-0.405577,0.69625
0.015,0.253382,0.286219,0.885272,-0.396786,0.638585
0.019,0.151604,0.27892,0.543539,-0.391643,0.387098
0.025,0.217419,0.266856,0.814746,-0.355074,0.612322


In [274]:
cal_period_perf_indicator(m2_rep)


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



Unnamed: 0,Annualised Return,Annualised Std.,Sharpe Ratio,Maximum Drawdown,Calmar Ratio
0.001,0.091758,0.053456,1.716505,-0.056982,1.610304
0.002,0.080736,0.039182,2.06052,-0.046194,1.74777
0.004,-0.055053,0.120423,-0.457165,-0.222195,-0.247769
0.006,0.357175,0.124871,2.860361,-0.059625,5.990332
0.009,0.225089,0.106651,2.11052,-0.046185,4.873665
0.01,0.168816,0.080061,2.108609,-0.086716,1.946768
0.015,0.057417,0.020623,2.784146,-0.013108,4.380261
0.019,0.113821,0.075978,1.498084,-0.068356,1.665118
0.025,0.021289,0.013657,1.558813,-0.011114,1.91555


In [275]:
cal_period_perf_indicator(m3_rep)


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



Unnamed: 0,Annualised Return,Annualised Std.,Sharpe Ratio,Maximum Drawdown,Calmar Ratio
0.001,0.169893,0.162322,1.046641,-0.26001,0.653411
0.002,0.229362,0.174016,1.31805,-0.238394,0.962113
0.004,0.192069,0.159952,1.200792,-0.238504,0.805306
0.006,0.221012,0.155008,1.425811,-0.178509,1.238096
0.009,0.195817,0.155025,1.263129,-0.199539,0.981346
0.01,0.139479,0.160828,0.867254,-0.279776,0.498537
0.015,0.127129,0.158648,0.801325,-0.256456,0.495714
0.019,0.111361,0.169272,0.657885,-0.258717,0.430437
0.025,0.114455,0.15155,0.755229,-0.192112,0.59577


In [276]:
dd = cal_period_perf_indicator(m3_rep[m3_rep.index.year < 2019]) - cal_period_perf_indicator(m1_rep[m1_rep.index.year < 2019])


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [277]:
dd

Unnamed: 0,Annualised Return,Annualised Std.,Sharpe Ratio,Maximum Drawdown,Calmar Ratio
0.001,0.183671,-0.161591,-0.343804,0.127215,-0.18473
0.002,0.21354,-0.174461,-0.314209,0.144198,-0.199974
0.004,0.170845,-0.145998,-0.265171,0.133149,-0.282599
0.006,0.291618,-0.206423,-0.251799,0.196992,-0.328135
0.009,0.257451,-0.191312,-0.309923,0.183486,-0.341293
0.01,0.172424,-0.1486,-0.319603,0.119011,-0.147567
0.015,0.163286,-0.138492,-0.304515,0.124592,-0.211829
0.019,0.19211,-0.150689,-0.295559,0.129827,-0.127569
0.025,0.228115,-0.152448,-0.062945,0.156762,-0.255398


In [278]:
tt = m1_rep / m1_rep.iloc[0]
tt = tt.resample('D').last().dropna()
tt.columns.name = 'Threshold Value'
fig = px.line(tt)
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Net Asset Value')
fig.update_layout(title='Strategy Generated by Raw Trading Signal')

In [279]:
tt = m2_rep / m2_rep.iloc[0]
tt = tt.resample('D').last().dropna()
tt.columns.name = 'Threshold Value'
fig = px.line(tt)
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Net Asset Value')
fig.update_layout(title='Strategy Generated by All-or-Nothing Method')

In [280]:
tt = m3_rep / m3_rep.iloc[0]
tt = tt.resample('D').last().dropna()
tt.columns.name = 'Threshold Value'
fig = px.line(tt)
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Net Asset Value')
fig.update_layout(title='Strategy Generated by ECDF Method')

In [393]:
# error
strategy_metrics = pd.DataFrame(columns=['tau', 'model', 'ret', 'std', 'sr', 'mdd', 'wr', 'cr'])
for tau in tau_list:
    returns = nav[tau] - 1
    daily_returns = (returns+1).resample('D').last().pct_change()
    weekly_returns = (returns+1).resample('W').last().pct_change()

    # Calculate annualized return
    total_days = len(daily_returns)
    annualized_return = (returns[-1] + 1) ** (365 / total_days) - 1

    # Calculate annualized standard deviation
    annualized_std = daily_returns.std() * np.sqrt(365)
#     annualized_std = weekly_returns.std() * np.sqrt(52)

    def calculate_drawdown(returns):
        drawdown = (returns+1) / (returns+1).cummax() - 1
#         cumulative_returns = (returns + 1).cumprod()
#         peak = cumulative_returns.expanding(min_periods=1).max()
#         drawdown = (cumulative_returns / peak) - 1
        return drawdown.min()

    max_drawdown = calculate_drawdown(returns)

    # Calculate win rate
    total_trades = len(returns)
    winning_trades = len(returns[returns > 0])
    win_rate = winning_trades / total_trades

    # Calculate Sharpe Ratio and Calmar Ratio

    sharpe_ratio = (annualized_return) / annualized_std
    calmar_ratio = annualized_return / abs(max_drawdown)
    dict_ = {i: j for i, j in zip(['tau', 'model', 'ret', 'std', 'sr', 'mdd', 'wr', 'cr'],
                                  [tau, 1, annualized_return, annualized_std, sharpe_ratio, max_drawdown, win_rate, calmar_ratio])}
    strategy_metrics = strategy_metrics.append(dict_ ,ignore_index=True)
#     strategy_metrics.loc[[tau, 1], ['ret', 'std', 'sr', 'mdd', 'wr', 'cr']] = [annualized_return,
#                                                                                    annualized_std, sharpe_ratio,
#                                                                                    max_drawdown, win_rate,
#                                                                                    calmar_ratio
#                                                                                   ]
    
    returns = nav_[tau] - 1
    daily_returns = (returns+1).resample('D').last().pct_change()
    weekly_returns = (returns+1).resample('W').last().pct_change()

    # Calculate annualized return
    total_days = len(daily_returns)
    annualized_return = (returns[-1] + 1) ** (365 / total_days) - 1

    # Calculate annualized standard deviation
    annualized_std = daily_returns.std() * np.sqrt(365)
#     annualized_std = weekly_returns.std() * np.sqrt(52)

    max_drawdown = calculate_drawdown(returns)

    # Calculate win rate
    total_trades = len(returns)
    winning_trades = len(returns[returns > 0])
    win_rate = winning_trades / total_trades

    # Calculate Sharpe Ratio and Calmar Ratio

    sharpe_ratio = (annualized_return) / annualized_std
    calmar_ratio = annualized_return / abs(max_drawdown)
    dict_ = {i: j for i, j in zip(['tau', 'model', 'ret', 'std', 'sr', 'mdd', 'wr', 'cr'],
                                  [tau, 2, annualized_return, annualized_std, sharpe_ratio, max_drawdown, win_rate, calmar_ratio])}
    strategy_metrics = strategy_metrics.append(dict_ ,ignore_index=True)

In [400]:
tt = strategy_metrics.copy()
tt.columns=['threshold', 'model', 'Annualized Return', 'Annulized Standard Deviation', 'Sharpe Ratio', 'Maximum Drawdown', 'wr', 'Calmar Ratio']

In [402]:
tt.drop(columns=['wr'])

Unnamed: 0,threshold,model,Annualized Return,Annulized Standard Deviation,Sharpe Ratio,Maximum Drawdown,Calmar Ratio
0,0.001,1.0,0.793556,0.273823,2.898064,-0.406945,1.950031
1,0.001,2.0,0.201155,0.061064,3.29416,-0.056982,3.530183
2,0.002,1.0,0.821449,0.274191,2.995904,-0.405615,2.025191
3,0.002,2.0,0.291629,0.095691,3.047615,-0.07572,3.851409
4,0.004,1.0,0.7739,0.273369,2.830975,-0.410866,1.883581
5,0.004,2.0,0.135765,0.115538,1.175068,-0.238035,0.570357
6,0.006,1.0,0.761396,0.270258,2.817295,-0.412986,1.843636
7,0.006,2.0,0.503483,0.119626,4.208825,-0.075938,6.630167
8,0.009,1.0,0.717539,0.266987,2.687537,-0.422,1.700327
9,0.009,2.0,0.328564,0.111365,2.950329,-0.116896,2.810727


In [394]:
strategy_metrics

Unnamed: 0,tau,model,ret,std,sr,mdd,wr,cr
0,0.001,1.0,0.793556,0.273823,2.898064,-0.406945,0.995782,1.950031
1,0.001,2.0,0.201155,0.061064,3.29416,-0.056982,0.98365,3.530183
2,0.002,1.0,0.821449,0.274191,2.995904,-0.405615,0.995725,2.025191
3,0.002,2.0,0.291629,0.095691,3.047615,-0.07572,0.98727,3.851409
4,0.004,1.0,0.7739,0.273369,2.830975,-0.410866,0.995602,1.883581
5,0.004,2.0,0.135765,0.115538,1.175068,-0.238035,0.988337,0.570357
6,0.006,1.0,0.761396,0.270258,2.817295,-0.412986,0.995626,1.843636
7,0.006,2.0,0.503483,0.119626,4.208825,-0.075938,0.985868,6.630167
8,0.009,1.0,0.717539,0.266987,2.687537,-0.422,0.995758,1.700327
9,0.009,2.0,0.328564,0.111365,2.950329,-0.116896,0.994175,2.810727


In [297]:
qq = strategy_metrics[(strategy_metrics.tau.isin([0.001, 0.006, 0.015, 0.025])) & (strategy_metrics.model == 1)]
qq = qq.drop(columns='model')

In [299]:
for col in qq.columns[1:]:
    qq[col] = qq[col].round(2)

In [351]:
qq

Unnamed: 0,tau,ret,std,sr,mdd,wr,cr
0,0.001,0.79,7.06,0.11,-7.21,1.0,0.11
6,0.006,0.76,4.22,0.18,-1.59,1.0,0.48
12,0.015,0.6,6.41,0.09,-4.65,1.0,0.13
16,0.025,0.54,2.36,0.23,-0.84,1.0,0.64


In [358]:
tt = pd.DataFrame(nav).resample('D').last().dropna(how='all')
tt = tt[[0.001, 0.006, 0.01, 0.025]]
# tt = tt.iloc[int(len(tt)*0.8):]
# tt = tt / tt.iloc[0]
fig = px.line(tt)
fig.update_yaxes(title='Net Asset Value', showgrid=False)
fig.update_xaxes(title='Date', showgrid=False)
fig.update_xaxes(title='Threshold', showgrid=False)
fig


In [352]:
# secondary model

In [398]:
tau = 0.01
tt = pd.concat([nav[tau], nav_[tau]], axis=1)
tt.columns = ['Only Primary Model', 'With Secondary Model']
fig = px.line(tt.resample('D').last().dropna())
fig.update_yaxes(title='Net Asset Value', showgrid=False)
fig.update_xaxes(title='Date', showgrid=False)
fig

In [354]:
qq = strategy_metrics[(strategy_metrics.tau.isin([0.001])) & (strategy_metrics.model.isin([1, 2]))]
for col in qq.columns[1:]:
    qq[col] = qq[col].round(2)
qq



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,tau,model,ret,std,sr,mdd,wr,cr
0,0.001,1.0,0.79,7.06,0.11,-7.21,1.0,0.11
1,0.001,2.0,0.71,11.16,0.06,-1.07,1.0,0.66


# Performance Metrics Report

In [282]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc, confusion_matrix

In [283]:
pred_y = np.concatenate([
                         np.load('res/AAPL_label_1.npy'),
                         np.load('res/AAPL_label_2.npy'),
                         np.load('res/AAPL_label_3.npy'),
                         np.load('res/AAPL_label_4.npy'),
                         np.load('res/AAPL_label_5.npy'),
                         ])
true_side = np.concatenate([
                            all_y[splits[0][0]+time_step-1: splits[0][1]],
                            all_y[splits[1][0]+time_step-1: splits[1][1]],
                            all_y[splits[2][0]+time_step-1: splits[2][1]],
                            all_y[splits[3][0]+time_step-1: splits[3][1]],
                            all_y[splits[4][0]+time_step-1: splits[4][1]]])

In [284]:
tt = np.where(pred_y >= 0.5, 1, 0)

In [285]:
# Calculate metrics
accuracy = accuracy_score(true_side, tt)
precision = precision_score(true_side, tt)
recall = recall_score(true_side, tt)
roc_auc = roc_auc_score(true_side, pred_y)

# Calculate ROC curve and AUC
fpr, tpr, _ = roc_curve(true_side, pred_y)
roc_auc_curve = auc(fpr, tpr)

# Create confusion matrix
conf_matrix = confusion_matrix(true_side, tt)

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("ROC-AUC:", roc_auc)
print("ROC curve AUC:", roc_auc_curve)
print("Confusion Matrix:")
print(conf_matrix)
print(log_loss(true_side, pred_y))

Accuracy: 0.5296077411358707
Precision: 0.4924537037037037
Recall: 0.2591672148722072
ROC-AUC: 0.5276888732342723
ROC curve AUC: 0.5276888732342723
Confusion Matrix:
[[143759  43852]
 [121624  42548]]
0.6902663167394666


In [286]:
log_loss(true_side, pred_y)

0.6902663167394666

In [290]:
# tau = 0.001
tau = 0.01
if tau < 0.015:
    index_path = 'res/meta-label_index.pkl'
else:
    index_path = f'res/meta-label_index_{tau}.pkl'
indices = pd.read_pickle(index_path).index


df_14_19 = df_[(df_.index.year>=2014) & (df_.index.year<2020)]
org_input = pd.concat([
                       df_14_19[ticker].iloc[splits[0][0]+time_step-1: splits[0][1]],
                       df_14_19[ticker].iloc[splits[1][0]+time_step-1: splits[1][1]],
                       df_14_19[ticker].iloc[splits[2][0]+time_step-1: splits[2][1]],
                       df_14_19[ticker].iloc[splits[3][0]+time_step-1: splits[3][1]],
                       df_14_19[ticker].iloc[splits[4][0]+time_step-1: splits[4][1]]])
pred_y = np.concatenate([
                         np.load('res/AAPL_label_1.npy'),
                         np.load('res/AAPL_label_2.npy'),
                         np.load('res/AAPL_label_3.npy'),
                         np.load('res/AAPL_label_4.npy'),
                         np.load('res/AAPL_label_5.npy'),
                         ])
raw_prob = pred_y.flatten()
true_side = np.concatenate([
                            all_y[splits[0][0]+time_step-1: splits[0][1]],
                            all_y[splits[1][0]+time_step-1: splits[1][1]],
                            all_y[splits[2][0]+time_step-1: splits[2][1]],
                            all_y[splits[3][0]+time_step-1: splits[3][1]],
                            all_y[splits[4][0]+time_step-1: splits[4][1]]]).flatten()

# thres_mid = raw_prob.mean()
thres_mid = 0.47303626433978163
above_thres = raw_prob > thres_mid + tau
in_thres = (raw_prob >= thres_mid - tau) & (raw_prob <= thres_mid + tau)
under_thres = raw_prob < thres_mid - tau

trade_sig = np.zeros(shape=raw_prob.shape)
trade_sig[above_thres] = 1
trade_sig[in_thres] = 0
trade_sig[under_thres] = -1

label_df = pd.concat([pd.Series(true_side, name='true_class'),
                      pd.Series(trade_sig, name='trade_signal'),
                      pd.Series(pred_y.flatten(), name='raw_prob')
                     ], axis=1)

label_df.loc[label_df['raw_prob']>0.5, 'pred_class'] = 1
label_df.loc[label_df['raw_prob']<=0.5, 'pred_class'] = 0

# rolling precision
label_df['trade_res'] = np.nan
label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==1), 'trade_res'] = 'profit'
label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==-1), 'trade_res'] = 'profit'
label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==-1), 'trade_res'] = 'loss'
label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==1), 'trade_res'] = 'loss'
label_df.loc[(label_df['true_class']==0) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'
label_df.loc[(label_df['true_class']==1) & (label_df['trade_signal']==0), 'trade_res'] = 'missed'

rolling_count = pd.get_dummies(label_df['trade_res']).rolling(window=30).sum()
label_df['rol_trading_precision'] = rolling_count['profit'] / (rolling_count['profit'] + rolling_count['loss'])
label_df.index = org_input.index
t1 = label_df.reindex(indices)['trade_signal']
t2 = ret.reindex(indices)['Price']

if tau < 0.015:
    index_path = 'res/meta-label_index.pkl'
else:
    index_path = f'res/meta-label_index_{tau}.pkl'
sig1 = label_df.reindex(pd.read_pickle(index_path).index)['trade_signal']
true_side = label_df.reindex(pd.read_pickle(index_path).index)['true_class']

ml = np.concatenate([
                     np.load('res/AAPL_meta_label_1'+f'_{tau}tau'+'.npy'),
                     np.load('res/AAPL_meta_label_2'+f'_{tau}tau'+'.npy'),
                     np.load('res/AAPL_meta_label_3'+f'_{tau}tau'+'.npy'),
                     np.load('res/AAPL_meta_label_4'+f'_{tau}tau'+'.npy'),
                     np.load('res/AAPL_meta_label_5'+f'_{tau}tau'+'.npy'),], axis=0)

# filtered signal. only when meta-label is 1, it takes action.
# otherwize the trade_signal is 0
sig2 = pd.Series(np.where(ml.flatten()<0.48, 0, sig1), index=sig1.index)


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'profit' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [291]:
# should be corrected
should_be_corrected = np.where(((sig1 == -1) & (true_side == 1)) | ((sig1 == 1) & (true_side == -1)), 1, 0)

In [292]:
ml

array([[3.8093565e-06],
       [5.6742167e-01],
       [5.7693517e-01],
       ...,
       [4.7842094e-01],
       [4.8071399e-01],
       [4.8152861e-01]], dtype=float32)

In [293]:
# Calculate metrics
correct_ = np.where(ml<0.48, 1, 0)
accuracy = accuracy_score(should_be_corrected, correct_)
precision = precision_score(should_be_corrected, correct_)
recall = recall_score(should_be_corrected, correct_)
roc_auc = roc_auc_score(should_be_corrected, 1-ml)

# Calculate ROC curve and AUC
fpr, tpr, _ = roc_curve(should_be_corrected, 1-ml)
roc_auc_curve = auc(fpr, tpr)

# Create confusion matrix
conf_matrix = confusion_matrix(should_be_corrected, correct_)

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("ROC-AUC:", roc_auc)
print("ROC curve AUC:", roc_auc_curve)
print("Confusion Matrix:")
print(conf_matrix)
log_loss(should_be_corrected, 1-ml)

Accuracy: 0.5446389641902524
Precision: 0.0
Recall: 0.0
ROC-AUC: 0.04069756667047042
ROC curve AUC: 0.04069756667047042
Confusion Matrix:
[[114921  83428]
 [ 12655      0]]


3.8194763147641737