In [1]:
import config as cfg
import utils.helper_functions as hf
import utils.inputs as inputs
import utils.outputs as outputs
import utils.tf_model as tf_model
import utils.evaluate as eval

from IPython.display import display, clear_output

import pandas as pd
import numpy as np

import hyperopt
from hyperopt import hp, fmin, tpe

import os

os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1' # disable file validation in the debugger
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' #0: All logs (default setting), 1: Filter out INFO logs, up to 3
pd.options.mode.copy_on_write = True # avoid making unnecessary copies of DataFrames or Series


In [2]:
if cfg.use_hyperopt:
    print(cfg.hyperopt_n_iterations)
else:
    num_combinations = hf.get_num_combinations(cfg.param_grid) 

number of combinations: 6


In [3]:
df = pd.read_pickle(cfg.db_path)
df = hf.get_rows_after_date(df, cfg.start_date)
df = hf.fillnavalues(df)

def get_single_level_df(df, ohlcv):
    new_df = df[[ohlcv]]
    new_df = hf.remove_top_column_name(new_df)

    return new_df

def get_ohlcv_dfs(df):
    df_open = get_single_level_df(df, 'Open')
    df_high = get_single_level_df(df, 'High')
    df_low = get_single_level_df(df, 'Low')
    df_close = get_single_level_df(df, 'Close')
    df_volume = get_single_level_df(df, 'Volume')
    
    return {'df_open': df_open, 'df_high': df_high, 'df_low': df_low,
            'df_close': df_close, 'df_volume': df_volume}

num_tickers = hf.get_num_tickers(get_single_level_df(df, 'Open'))
print(f'number of tickers: {num_tickers}')


number of tickers: 593


In [4]:
def get_df_data(hyperparams):
    df_buy = get_single_level_df(df, hyperparams['buying_time'])
    df_sell = get_single_level_df(df, hyperparams['selling_time'])
    dfs_ohlcv = get_ohlcv_dfs(df)

    if os.path.exists(cfg.transformed_data_path) and cfg.use_saved_transformed_data:
        df_data = pd.read_pickle(cfg.transformed_data_path)
        print(f'using existing {cfg.transformed_data_path}')
    else:
        print(f'need to create {cfg.transformed_data_path}')
        df_data = inputs.get_inputs(df_buy, dfs_ohlcv, hyperparams['buying_time'])
        
        df_data.to_pickle(cfg.transformed_data_path)
        print(f'saved new {cfg.transformed_data_path}')

    df_data = outputs.add_outputs(df_data, df_buy, df_sell, dfs_ohlcv, num_tickers, cfg.output_class_name, cfg.fee, **hyperparams)

    df_data = df_data.dropna()

    return df_data

In [5]:
from itertools import product

i = 0
results = []

def objective(hyperparams):
    hyperparams['thresholds'] = [hyperparams['thresholds']]
    hyperparams['rank_pct_thresholds'] = [hyperparams['rank_pct_thresholds']]
    
    df_data = get_df_data(hyperparams)
    test_train_data, model = model.load_model(df_data, hyperparams)
    performance_metrics = eval.evaluate_model(df_data, model, test_train_data, hyperparams)

    result = {**performance_metrics, **hyperparams, 'epochs': cfg.epochs}
    print(result)
    results.append(result)

    performance = result['performance_score']

    return -performance

if cfg.use_hyperopt:
    best = fmin(objective, cfg.search_space, algo=tpe.suggest, max_evals=cfg.hyperopt_n_iterations)
    print(f'best parameters: {best}')
else:
    for params in product(*cfg.param_grid.values()):
        i += 1
        # clear_output(wait=True) # clear printed outputs
        hf.print_combination(i, num_combinations)

        hyperparams = dict(zip(cfg.param_grid.keys(), params))

        df_data = get_df_data(hyperparams)
        test_train_data, model = tf_model.load_tf_model(df_data, hyperparams)
        performance_metrics = eval.evaluate_model(df_data, model, test_train_data, num_tickers, num_combinations, hyperparams)

        result = {**performance_metrics, **hyperparams, 'epochs': cfg.epochs}
        print(result)
        results.append(result)


step: 1/6
need to create ./outputs/classifier_transformed_data.pkl
saved new ./outputs/classifier_transformed_data.pkl
number of elements in y_train: 1605684
number of elements in y_test: 60000
need to create ./outputs/classifier_model.keras


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m50178/50178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 960us/step - accuracy: 0.5669 - loss: 1.2562 - val_accuracy: 0.7218 - val_loss: 0.6047
X_test shape: (60000, 52)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 457us/step
market cumulative % per class: [0.07551667 1.        ]
{'performance_score': 108.98415044215584, 'trimmed_average_profit': 1.0093303867662882, 'average_profit': 1.034546050264884, 'median_profit': 0.9930199600798403, 'prediction_is_buy_count': 109, 'loss_limit_reached_pct': 0.5137614678899083, 'market_rate': 0.07551666666666666, 'true_positives': 31, 'true_negatives': 55391, 'false_positives': 78, 'false_negatives': 4500, 'winning_rate': 0.28440366972477066, 'winning_rate_vs_market': 0.20888700305810398, 'buying_time': 'Open', 'selling_time': 'Open', 'target_future_days': 1, 'loss_limit': 0.997, 'sell_at_target': False, 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'dropout_rate': 0.1, 'balance_data': True, 'ba

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m50161/50161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 950us/step - accuracy: 0.5746 - loss: 1.2631 - val_accuracy: 0.7328 - val_loss: 0.6511
X_test shape: (60000, 52)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 392us/step
market cumulative % per class: [0.0944 1.    ]
{'performance_score': 95.24980810210533, 'trimmed_average_profit': 1.018899342819856, 'average_profit': 1.0527081769989561, 'median_profit': 0.9930199600798403, 'prediction_is_buy_count': 82, 'loss_limit_reached_pct': 0.524390243902439, 'market_rate': 0.0944, 'true_positives': 29, 'true_negatives': 54283, 'false_positives': 53, 'false_negatives': 5635, 'winning_rate': 0.35365853658536583, 'winning_rate_vs_market': 0.25925853658536585, 'buying_time': 'Open', 'selling_time': 'Open', 'target_future_days': 2, 'loss_limit': 0.997, 'sell_at_target': False, 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'dropout_rate': 0.1, 'balance_data': True, 'batch_size': 32, 'n_first_

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m50144/50144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 954us/step - accuracy: 0.5638 - loss: 1.2657 - val_accuracy: 0.5961 - val_loss: 0.6907
X_test shape: (60000, 52)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 380us/step
market cumulative % per class: [0.099 1.   ]
{'performance_score': 53.974618422449126, 'trimmed_average_profit': 1.0096072185377598, 'average_profit': 1.0664769046017077, 'median_profit': 0.9930199600798403, 'prediction_is_buy_count': 50, 'loss_limit_reached_pct': 0.46, 'market_rate': 0.099, 'true_positives': 17, 'true_negatives': 54027, 'false_positives': 33, 'false_negatives': 5923, 'winning_rate': 0.34, 'winning_rate_vs_market': 0.24100000000000002, 'buying_time': 'Open', 'selling_time': 'Open', 'target_future_days': 3, 'loss_limit': 0.997, 'sell_at_target': False, 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'dropout_rate': 0.1, 'balance_data': True, 'batch_size': 32, 'n_first_classes': [0, 0], 'cumulated_

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m50178/50178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 1ms/step - accuracy: 0.6980 - loss: 1.0959 - val_accuracy: 0.8033 - val_loss: 0.4712
X_test shape: (60000, 55)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 379us/step
market cumulative % per class: [0.03543333 1.        ]
{'performance_score': 105.07392994319501, 'trimmed_average_profit': 1.0047310003712508, 'average_profit': 1.0098401746617403, 'median_profit': 0.9960079840319361, 'prediction_is_buy_count': 835, 'loss_limit_reached_pct': 0.23592814371257484, 'market_rate': 0.03543333333333333, 'true_positives': 196, 'true_negatives': 57235, 'false_positives': 639, 'false_negatives': 1930, 'winning_rate': 0.2347305389221557, 'winning_rate_vs_market': 0.19929720558882236, 'buying_time': 'Close', 'selling_time': 'Open', 'target_future_days': 1, 'loss_limit': 0.997, 'sell_at_target': False, 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'dropout_rate': 0.1, 'balance_data': True, '

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m50161/50161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 951us/step - accuracy: 0.5664 - loss: 1.1476 - val_accuracy: 0.5984 - val_loss: 0.6472
X_test shape: (60000, 55)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 379us/step
market cumulative % per class: [0.13258333 1.        ]
{'performance_score': 31.129896526070084, 'trimmed_average_profit': 0.9965600250327177, 'average_profit': 1.049405611740256, 'median_profit': 0.9930199600798403, 'prediction_is_buy_count': 32, 'loss_limit_reached_pct': 0.6875, 'market_rate': 0.13258333333333333, 'true_positives': 9, 'true_negatives': 52022, 'false_positives': 23, 'false_negatives': 7946, 'winning_rate': 0.28125, 'winning_rate_vs_market': 0.14866666666666667, 'buying_time': 'Close', 'selling_time': 'Open', 'target_future_days': 2, 'loss_limit': 0.997, 'sell_at_target': False, 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'dropout_rate': 0.1, 'balance_data': True, 'batch_size': 32, 'n_first_c

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m50144/50144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 982us/step - accuracy: 0.5381 - loss: 1.1585 - val_accuracy: 0.6182 - val_loss: 0.6604
X_test shape: (60000, 55)
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 395us/step
market cumulative % per class: [0.1618 1.    ]
{'performance_score': nan, 'trimmed_average_profit': nan, 'average_profit': 0.999166943617143, 'median_profit': 0.9930199600798403, 'prediction_is_buy_count': 3, 'loss_limit_reached_pct': 0.6666666666666666, 'market_rate': 0.1618, 'true_positives': 0, 'true_negatives': 50289, 'false_positives': 3, 'false_negatives': 9708, 'winning_rate': 0.0, 'winning_rate_vs_market': -0.1618, 'buying_time': 'Close', 'selling_time': 'Open', 'target_future_days': 3, 'loss_limit': 0.997, 'sell_at_target': False, 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'dropout_rate': 0.1, 'balance_data': True, 'batch_size': 32, 'n_first_classes': [0, 0], 'cumulated_probs_target': 0.9, 'threshol

In [6]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='performance_score', ascending=False)
df_results.head(1000)

Unnamed: 0,performance_score,trimmed_average_profit,average_profit,median_profit,prediction_is_buy_count,loss_limit_reached_pct,market_rate,true_positives,true_negatives,false_positives,false_negatives,winning_rate,winning_rate_vs_market,buying_time,selling_time,target_future_days,loss_limit,sell_at_target,size_layer_1,size_layer_2,size_layer_3,dropout_rate,balance_data,batch_size,n_first_classes,cumulated_probs_target,thresholds,rank_pct_thresholds,epochs
0,108.98415,1.00933,1.034546,0.99302,109,0.513761,0.075517,31,55391,78,4500,0.284404,0.208887,Open,Open,1,0.997,False,128,128,128,0.1,True,32,"[0, 0]",0.9,[1.0184],[0.45],1
3,105.07393,1.004731,1.00984,0.996008,835,0.235928,0.035433,196,57235,639,1930,0.234731,0.199297,Close,Open,1,0.997,False,128,128,128,0.1,True,32,"[0, 0]",0.9,[1.0184],[0.45],1
1,95.249808,1.018899,1.052708,0.99302,82,0.52439,0.0944,29,54283,53,5635,0.353659,0.259259,Open,Open,2,0.997,False,128,128,128,0.1,True,32,"[0, 0]",0.9,[1.0184],[0.45],1
2,53.974618,1.009607,1.066477,0.99302,50,0.46,0.099,17,54027,33,5923,0.34,0.241,Open,Open,3,0.997,False,128,128,128,0.1,True,32,"[0, 0]",0.9,[1.0184],[0.45],1
4,31.129897,0.99656,1.049406,0.99302,32,0.6875,0.132583,9,52022,23,7946,0.28125,0.148667,Close,Open,2,0.997,False,128,128,128,0.1,True,32,"[0, 0]",0.9,[1.0184],[0.45],1
5,,,0.999167,0.99302,3,0.666667,0.1618,0,50289,3,9708,0.0,-0.1618,Close,Open,3,0.997,False,128,128,128,0.1,True,32,"[0, 0]",0.9,[1.0184],[0.45],1


In [7]:
df_results.to_excel(f'./outputs/{hf.get_date()}_classifier_results.xlsx')