In [None]:
# import tracemalloc

# tracemalloc.start()
# snapshot1 = tracemalloc.take_snapshot()

import config as cfg
from utils import helper_functions as hf
from utils import inputs
from utils import outputs
from utils import tf_classifier_model
from utils import evaluate as eval

from IPython.display import display, clear_output

import pandas as pd
import numpy as np

import hyperopt
from hyperopt import hp, fmin, tpe, STATUS_FAIL, STATUS_OK, Trials

import os

os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1' # disable file validation in the debugger
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #0: All logs (default setting), 1: Filter out INFO logs, up to 3
pd.options.mode.copy_on_write = True # avoid making unnecessary copies of DataFrames or Series

In [2]:
num_combinations = cfg.hyperopt_n_iterations if cfg.use_hyperopt else hf.get_num_combinations(cfg.param_grid)

print(num_combinations)

50


In [3]:
def is_valid_combination(hyperparams):
    return hyperparams['target_future_days'] != 0 or (hyperparams['buying_time'] == 'Open' and hyperparams['selling_time'] == 'Close')

In [4]:
df = pd.read_pickle(cfg.db_path)
df = hf.get_rows_after_date(df, cfg.start_date)
df = hf.fillnavalues(df)

def get_single_level_df(df, ohlcv):
    new_df = df[[ohlcv]]
    new_df = hf.remove_top_column_name(new_df)

    return new_df

def get_ohlcv_dfs(df):
    df_open = get_single_level_df(df, 'Open')
    df_high = get_single_level_df(df, 'High')
    df_low = get_single_level_df(df, 'Low')
    df_close = get_single_level_df(df, 'Close')
    df_volume = get_single_level_df(df, 'Volume')
    
    return {'df_open': df_open, 'df_high': df_high, 'df_low': df_low,
            'df_close': df_close, 'df_volume': df_volume}

num_tickers = hf.get_num_tickers(get_single_level_df(df, 'Open'))
print(f'number of tickers: {num_tickers}')


number of tickers: 593


In [5]:
def get_df_data(hyperparams):
    df_buy = get_single_level_df(df, hyperparams['buying_time'])
    df_sell = get_single_level_df(df, hyperparams['selling_time'])
    dfs_ohlcv = get_ohlcv_dfs(df)

    if os.path.exists(cfg.transformed_data_path) and cfg.use_saved_transformed_data:
        df_data = pd.read_pickle(cfg.transformed_data_path)
        # print(f'using existing {cfg.transformed_data_path}')
    else:
        # print(f'need to create {cfg.transformed_data_path}')
        df_data = inputs.get_inputs(df_buy, dfs_ohlcv, hyperparams['buying_time'])
        
        df_data.to_pickle(cfg.transformed_data_path)
        # print(f'saved new {cfg.transformed_data_path}')

    df_data = outputs.add_outputs(df_data, df_buy, df_sell, dfs_ohlcv, num_tickers, cfg.output_class_name, cfg.fee, **hyperparams)

    df_data = df_data.dropna()

    return df_data

In [6]:
random_string = hf.get_random_string(length=3)

def save_results(results, print_results):
    df_results = pd.DataFrame(results)
    
    if print_results:
        pd.set_option('display.max_columns', None)
        pd.set_option('display.max_rows', None)

        df_results = df_results.sort_values(by='performance_score', ascending=False)
        df_results.head(1000)

    df_results.to_excel(f'./outputs/{hf.get_date()}_classifier_results_{random_string}.xlsx', index=False)


In [None]:
from itertools import product

i = 0
results = []

def get_model_result(hyperparams):
    print(hyperparams)

    df_data = get_df_data(hyperparams)
    test_train_data, model = tf_classifier_model.load_tf_model(df_data, hyperparams)
    performance_metrics = eval.evaluate_model(df_data, model, test_train_data, num_tickers, num_combinations, hyperparams)

    result = {**performance_metrics, **hyperparams, 'epochs': cfg.epochs}
    print(result)

    return result

if cfg.use_hyperopt:
    def objective(hyperparams):   
        try:
            if is_valid_combination(hyperparams):
                result = get_model_result(hyperparams)
                results.append(result)

            if i % cfg.save_every_n_iterations == 0 or i == cfg.hyperopt_n_iterations:
                save_results(results, print_results=False)

            performance = result['performance_score']

            return -performance
        except ValueError as e:
            print(f'Skipping trial, error: {e}')
            return {'status': STATUS_FAIL}  # Hyperopt will ignore this trial
    
    trials = Trials()
    best = fmin(objective, cfg.search_space, algo=tpe.suggest, max_evals=cfg.hyperopt_n_iterations)
    print(f'best parameters: {best}')
else:
    for params in product(*cfg.param_grid.values()):
        i += 1
        
        hf.print_combination(i, num_combinations)

        hyperparams = dict(zip(cfg.param_grid.keys(), params))

        if is_valid_combination(hyperparams):
            result = get_model_result(hyperparams)
            results.append(result)

        if i % cfg.save_every_n_iterations == 0 or i == num_combinations:
            save_results(results, print_results=False)
            clear_output(wait=True) # clear printed outputs

save_results(results, print_results=True)

# snapshot2 = tracemalloc.take_snapshot()
# top_stats = snapshot2.compare_to(snapshot1, 'lineno')

# for stat in top_stats[:6]:
#     print(stat)

# tracemalloc.stop()

{'balance_data': False, 'batch_size': 128, 'buying_time': 'Open', 'cumulated_probs_target': 0.5521877687892864, 'dropout_rate': 0.1313476583028004, 'loss_limit': 0.9795777095198637, 'n_first_classes': (0, 0), 'rank_pct_thresholds': (0.08, 0.2, 0.33), 'sell_at_target': False, 'selling_time': 'Close', 'size_layer_1': 128, 'size_layer_2': 128, 'size_layer_3': 128, 'target_future_days': 0, 'thresholds': (1.01,)}
Epoch 1/2                                             

[1m    1/12849[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:37:47[0m 737ms/step - accuracy: 0.5156 - loss: 0.9701
[1m   42/12849[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 1ms/step - accuracy: 0.5360 - loss: 0.8411      
[1m   85/12849[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 1ms/step - accuracy: 0.5726 - loss: 0.7804
[1m  125/12849[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 1ms/step - accuracy: 0.5987 - loss: 0.7452
[1m  162/12849[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 1ms/step - accuracy: 0.6172 - loss: 0.

KeyboardInterrupt: 