In [1]:
import numpy as np
import pandas as pd
import scipy
import os
from Metric_Computation import *

## Load the data and Split

In [2]:
# specify the path to read
data_load_path = '../data/intermediate_stock_split/'
# read the data
data_test_X = np.load(data_load_path+'data_X_test.npy')
data_test_X_prev = np.load(data_load_path+'data_X_prev_test.npy')
SP500_test_X = np.load(data_load_path+'SP500_test.npy')
SP500_test_X_prev = np.load(data_load_path+'SP500_prev_test.npy')
value_target_test = np.load(data_load_path+'target_value_test.npy')
value_target_test_prev = np.load(data_load_path+'target_value_prev_test.npy')
gradient_target_test = np.load(data_load_path+'target_gradient_test.npy')
gradient_target_test_prev = np.load(data_load_path+'target_gradient_prev_test.npy')
trend_target_test = np.load(data_load_path+'price_trend_flag_test.npy')
stock_split_test = np.load(data_load_path+'stock_ind_split_test.npy')

In [3]:
# because we needs to clean the data -- build a array to remember the indexes of each stock and 
# placeholder of the array
stock_indication_array = np.zeros([data_test_X.shape[0]])
for cStock in range(stock_split_test.shape[0]):
    start_ind = stock_split_test[cStock]
    if cStock<(stock_split_test.shape[0]-1):
        end_ind = stock_split_test[cStock+1]
    else:
        end_ind = data_test_X.shape[0]
    stock_indication_array[start_ind:end_ind] = cStock

In [6]:
# drop instance that contains inf values
valid_series_index = np.where((np.sum(np.sum(data_test_X,axis=-1),axis=-1)!=np.inf)&((np.sum(np.sum(data_test_X_prev,axis=-1),axis=-1)!=np.inf)))[0]
data_test_X = data_test_X[valid_series_index,:,:]
data_test_X_prev = data_test_X_prev[valid_series_index,:,:]
SP500_test_X = SP500_test_X[valid_series_index,:]
SP500_test_X_prev = SP500_test_X_prev[valid_series_index,:]
value_target_test = value_target_test[valid_series_index,:]
value_target_test_prev = value_target_test_prev[valid_series_index,:]
gradient_target_test = gradient_target_test[valid_series_index,:]
gradient_target_test_prev = gradient_target_test_prev[valid_series_index,:]
trend_target_test = trend_target_test[valid_series_index, :]
stock_indication_array = stock_indication_array[valid_series_index]

In [7]:
current_and_future_price_test = np.concatenate([value_target_test_prev, value_target_test],axis=1)

### preceed the trend(class) target to one-hot

In [8]:
def one_hot_encoding_label(label_input, num_class=None):
    '''
    :param label_input: The sparse form of input label (2,0,1,3,0,1,2etc.)
    :param num_class: The number of classes, if keep None, then automatically infer from the given label input
    '''
    # retrieve the number of input data
    nData = label_input.shape[0]
    # reshape the data
    label_input_flat = np.reshape(label_input, [-1])
    if (label_input_flat.shape[0]!=nData):            # which means the input label is not 'mathematically 1-d'
        raise ValueError('The input label must be 1-d mathematically')
    # infer the number of class if input is None
    if num_class is None:
        num_class = (int)(np.amax(label_input)+1)
    # create the return encoded matrx
    one_hot_label_mat = np.zeros([nData, num_class])
    # get a row index to assist the batch-assigning
    row_ind_batch = np.arange(nData)
    # assign '1's to the corresponding positions
    one_hot_label_mat[row_ind_batch, label_input_flat.astype('int')] = 1
    
    return one_hot_label_mat

In [9]:
trend_target_test_input = one_hot_encoding_label(trend_target_test)

## Evaluation Functions

In [10]:
# confusion metrics
def classification_info_computation(pred_label, true_label, num_class):
    '''
    :param pred_label: the sparse (not one-hot) prediction of labels
    :param true_label: the sparse (not one-hot) ground-truth of labels
    :param num_class: number of classes
    '''
    # flatten the two label arrays if they are not already so
    pred_label = np.reshape(pred_label,[-1])
    true_label = np.reshape(true_label,[-1])
    # initialize the confusion maxtrix array
    class_matrix = np.zeros([num_class, num_class])    # each row is the true labels
    # initialize the precision and recall arrays
    precision_array = np.zeros([num_class])
    recall_array = np.zeros([num_class])
    # fill the confusion-prediction matrix
    for cClass_True in range(num_class):
        # retrieve the current 
        current_cClass_ind = np.where(true_label==cClass_True)[0]
        # retrueve the corresponding predictions
        current_cClass_pred = pred_label[current_cClass_ind]
        # fill the evaluation matrx
        for cClass_Pred in range(num_class):
            cClass_pred_num = np.where(current_cClass_pred==cClass_Pred)[0].shape[0]
            class_matrix[cClass_True, cClass_Pred] = cClass_pred_num
    # fill the precision and recall arrays
    for cClass_True in range(num_class):
        precision_array[cClass_True] = class_matrix[cClass_True,cClass_True]/np.sum(class_matrix[:,cClass_True])
        recall_array[cClass_True] = class_matrix[cClass_True,cClass_True]/np.sum(class_matrix[cClass_True,:])
        
    return class_matrix, precision_array, recall_array

In [11]:
# 'regret' function
def invest_regret_comput(pred_label, true_label):
    '''
    The function to return the 'regret' defined by the real investment scenarios
    :param pred_label: the sparse (not one-hot) prediction of labels
    :param true_label: the sparse (not one-hot) ground-truth of labels
    With the meaning 2=uptrend 1=downtrend 0=non-trend
    Strategy: 
        predict 0: don't buy or sell
        predict 1: sell
        predict 2: buy
    ******************** Truth Table *********************
    | True Label | Predicted Label | Regret |
    |      0     |        0        |   0    |
    |      0     |        1        |   1    |
    |      0     |        2        |   1    |
    |      1     |        0        |   1    |
    |      1     |        1        |   0    |
    |      1     |        2        |   2    |
    |      2     |        0        |   1    |
    |      2     |        1        |   2    |
    |      2     |        2        |   0    |
    '''
    # flatten the two label arrays if they are not already so
    pred_label = np.reshape(pred_label,[-1])
    true_label = np.reshape(true_label,[-1])
    # check if the two arrays are of the same legth
    if pred_label.shape[0]!=true_label.shape[0]:
        raise ValueError('The predicted and the true labels must be in the same length!')
    # placeholder of regret array
    regret_array = np.zeros([pred_label.shape[0]])
    # check the conditions for regret '1'
    one_regret_ind = np.where(((true_label==0)&(pred_label==2))|((true_label==0)&(pred_label==1))|((true_label==2)&(pred_label==0))|((true_label==1)&(pred_label==0)))[0]
    # check the conditions for regret '2'
    two_regret_ind = np.where(((true_label==1)&(pred_label==2))|((true_label==2)&(pred_label==1)))[0]
    # assign regret values to the entries
    regret_array[one_regret_ind] = 1.0
    regret_array[two_regret_ind] = 2.0
    # compute the overall regret
    overall_regret = np.mean(regret_array)
    
    return overall_regret

## Define the CCI rule-based function

In [25]:
def CCI_stock_prediction(input_x, decision_thred = 100):
    '''
    Input must be continuous in terms of time
    '''
    # palceholder of the predicted labels
    label_prediction = np.zeros([input_x.shape[0]])
    # Use the 20-day CCI as the indicator
    CCI_mat = input_x[:,4,-1]      # [nData]
    # find the the index exceed the threshold and assign values
    label_prediction[np.where(CCI_mat>=decision_thred)[0]] = 2     # uptrend
    label_prediction[np.where(CCI_mat<=-decision_thred)[0]] = 1    # downtrend
    
    return label_prediction

## Define the MACD-based Rule

In [30]:
def MACD_stock_prediction(input_x):
    '''
    The input must be in continuous time!!!
    '''
    # placeholder of the MACD output label
    label_prediction = np.zeros([input_x.shape[0]])
    # MACD line
    MACD_line = np.reshape(input_x[:, -12, 1] - input_x[:, 0, 1], [-1])
    # signal line -- 9-day EMA of MACD
    # build the [nData * 9] array for computing
    MACD_date_array = np.zeros([MACD_line.shape[0],9])
    # loop to fill the values
    for cDate in range(MACD_line.shape[0]):
        if cDate<8:
            MACD_date_array[cDate, :] = np.concatenate([np.zeros([8-cDate]), MACD_line[:(cDate+1)]],axis=0)
        else:
            MACD_date_array[cDate, :] = MACD_line[(cDate-8):(cDate+1)]
    # compute with EMA
    base_signal_line = np.reshape(EMA_batch_computation(MACD_date_array,n_period_compute=[9]),[-1])
    # compute the difference between the two values
    diff_MACD_base = MACD_line - base_signal_line
    # let positive to be 1 and negative to be -1
    sign_diff_MACD = np.zeros(diff_MACD_base.shape)
    sign_diff_MACD[np.where(diff_MACD_base>0)[0]] = 1
    # set the 1-order difference
    # for the cross_sign_diff_MACD values:
    #   1 -- MACD upcross baseline   -1 -- MACD downcross baseline    0 -- no trends
    cross_sign_diff_MACD = np.concatenate([np.zeros([1]),np.diff(sign_diff_MACD)],axis=0) 
    # assign up/down trend predictions
    label_prediction[np.where(cross_sign_diff_MACD==1)[0]] = 2
    label_prediction[np.where(cross_sign_diff_MACD==-1)[0]] = 1
    
    return label_prediction

In [31]:
# placeholder for the ground-truth and predicted outputs
# the placeholder of the true labels are also required because the labels shoud be in order
true_label_odered = np.zeros([gradient_target_test.shape[0]])
pred_label_orderd = np.zeros([gradient_target_test.shape[0]]) 
# initialize the start index
pred_start_ind = 0
# loop over different stocks
for cStock in range(stock_split_test.shape[0]):
    # retrieve the index of the current stock
    ind_current_stock = np.where(stock_indication_array==cStock)[0]
    # compute the number of data
    nData_this_stock = ind_current_stock.shape[0]
    # retrieve the true labels of the current stock
    current_true_labels = np.flip(trend_target_test[ind_current_stock],axis=0)
    # retrieve the current stock data with a accending time order
    current_time_input = np.flip(data_test_X[pred_start_ind:pred_start_ind+nData_this_stock,:,:],axis=0)
    # compute the MACD prediction
    current_pred_labels = MACD_stock_prediction(current_time_input)
    # store the resulting labels
    true_label_odered[pred_start_ind:pred_start_ind+nData_this_stock] = np.reshape(current_true_labels,[-1])
    pred_label_orderd[pred_start_ind:pred_start_ind+nData_this_stock] = np.reshape(current_pred_labels,[-1])
# compute the holistic evaluation metrics
# test
class_matrix_test, precision_array_test, recall_array_test = classification_info_computation(pred_label = pred_label_orderd,
                                                                                             true_label = true_label_odered, 
                                                                                             num_class=3)
down_mis_up_rate_test = class_matrix_test[2,1]/np.sum(class_matrix_test[:,1])
up_mis_down_rate_test = class_matrix_test[1,2]/np.sum(class_matrix_test[:,2])
precision_downtrend_adjusted_test = precision_array_test[1] - down_mis_up_rate_test
precision_uptrend_adjuest_test = precision_array_test[2] - up_mis_down_rate_test
# regret
regret_epoch_test = invest_regret_comput(pred_label = pred_label_orderd,
                                         true_label = true_label_odered)
# print out the information
print('The test regret of the current epoch is: ', regret_epoch_test)
print('Recall Info:')
print('On the testing data, the recall of Non-trend:', recall_array_test[0], ' Downtrend:', recall_array_test[1], ' Uptrend:', recall_array_test[2])
print('Precision Info:')
print('On the testing data, the precision of Non-trend:', precision_array_test[0], ' Downtrend:', precision_array_test[1], ' Uptrend:', precision_array_test[2])
print('Adjusted Precision:')
print('On the testing data, Adjusted Downtrend precision: ', precision_downtrend_adjusted_test, 'Adjusted Uptrend precision: ', precision_uptrend_adjuest_test)
print('**************************I\'m the Divider*****************************')

The test regret of the current epoch is:  0.019844780105148962
Recall Info:
On the testing data, the recall of Non-trend: 0.9976557324137463  Downtrend: 0.05  Uptrend: 0.03143712574850299
Precision Info:
On the testing data, the precision of Non-trend: 0.9838342211947599  Downtrend: 0.17117117117117117  Uptrend: 0.19090909090909092
Adjusted Precision:
On the testing data, Adjusted Downtrend precision:  -0.09009009009009009 Adjusted Uptrend precision:  0.06363636363636366
**************************I'm the Divider*****************************


In [21]:
save_path = '../results_MACD/'
# make the path if it does not exist yet
if not os.path.exists(save_path):
    os.makedirs(save_path)
# save training
np.save(save_path+'train_precision_epoch_array.npy',train_epoch_wise_info[0])
np.save(save_path+'train_recall_epoch_array.npy',train_epoch_wise_info[1]) 
np.save(save_path+'train_regret_epoch_array.npy',train_epoch_wise_info[2]) 
np.save(save_path+'save_class_matrix_train.npy',train_epoch_wise_info[3]) 
# save validation
np.save(save_path+'valid_precision_epoch_array.npy',valid_epoch_wise_info[0])
np.save(save_path+'valid_recall_epoch_array.npy',valid_epoch_wise_info[1]) 
np.save(save_path+'valid_regret_epoch_array.npy',valid_epoch_wise_info[2]) 
np.save(save_path+'save_class_matrix_valid.npy',valid_epoch_wise_info[3]) 
# save the testing
np.save(save_path+'test_precision_epoch_array.npy',test_epoch_wise_info[0])
np.save(save_path+'test_recall_epoch_array.npy',test_epoch_wise_info[1]) 
np.save(save_path+'test_regret_epoch_array.npy',test_epoch_wise_info[2]) 
np.save(save_path+'save_class_matrix_test.npy',test_epoch_wise_info[3]) 