In [1]:
#   https://stable-baselines3.readthedocs.io/en/master/modules/sac.html
#   https://stable-baselines3.readthedocs.io/en/master/modules/td3.html
#   https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
#   https://github.com/araffin/rl-baselines-zoo

import os
import sys
from os.path import join as path_join
from typing import Callable

import numpy as np
from d3rlpy.wrappers.sb3 import to_mdp_dataset
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

sys.path.append("..")  # Adds higher directory to python modules path.

from common.SaveToDiskOnBestTrainingRewardCallback import SaveToDiskOnBestTrainingRewardCallback
from common.config_utils import get_paths_params, get_data_params, get_pf_fx_env_params, get_train_params, \
    get_agent_params, get_model_params
from common.data_utils import prepare_date_train
from common.env_utils import create_env
from common.forex_utils import get_forex_7, get_forex_12, get_forex_14, get_forex_18, get_forex_28
from common.model_utils import get_model_name, get_online_class_and_policy, get_action_noise_class


# ONLINE_ALGORITHM = 'TD3'
# NUMBER_OF_TRIALS = 100
# TOTAL_TIMESTEPS = 100000

ModuleNotFoundError: No module named 'common'

In [None]:
def run_trial(online_algorithm, train_look_back_period, total_timesteps, config_file):
    v_market, v_resolution, v_num_of_instruments, v_spread, v_subdir, \
    v_train_test_split = get_data_params(config_file)

    if v_num_of_instruments == 4:
        v_instruments, v_pip_size, v_pip_spread = get_forex_7(v_spread)
    elif v_num_of_instruments == 7:
        v_instruments, v_pip_size, v_pip_spread = get_forex_7(v_spread)
    elif v_num_of_instruments == 12:
        v_instruments, v_pip_size, v_pip_spread = get_forex_12(v_spread)
    elif v_num_of_instruments == 14:
        v_instruments, v_pip_size, v_pip_spread = get_forex_14(v_spread)
    elif v_num_of_instruments == 18:
        v_instruments, v_pip_size, v_pip_spread = get_forex_18(v_spread)
    elif v_num_of_instruments == 28:
        v_instruments, v_pip_size, v_pip_spread = get_forex_28(v_spread)

    # v_subdir, v_train_test_split, v_env_verbose, v_model_verbose, v_callback_verbose, v_save_replay_buffer, \
    # v_tensorboard, v_use_callback, v_check_freq, v_callback_lookback, v_save_freq = get_train_params(config_file)

    v_data = prepare_date_train(v_subdir, v_market, v_resolution, v_instruments, train_look_back_period,
                                v_train_test_split)

    # v_data = prepare_data_train(v_market, v_resolution, v_instruments, v_train_look_back_period)

    print(f'Data shape:{np.shape(v_data)}')

    v_env_lookback_period, v_random_episode_start, v_cash, v_max_slippage_percent, v_lot_size, v_leverage, \
    v_compute_position, v_compute_indicators, v_compute_reward, v_env_verbose = get_pf_fx_env_params(config_file)

    v_env = create_env(v_data,
                       v_instruments,
                       v_env_lookback_period,
                       v_random_episode_start,
                       v_cash,
                       v_max_slippage_percent,
                       v_lot_size,
                       v_leverage,
                       v_pip_size,
                       v_pip_spread,
                       v_compute_position,
                       v_compute_indicators,
                       v_compute_reward,
                       v_env_verbose)

    print(
        f'Instruments:{v_instruments}, lookack:{v_env_lookback_period}, random_episode_start:{v_random_episode_start}, cash:{v_cash}, max_slippage_percent:{v_max_slippage_percent}, lot_size:{v_lot_size}, leverage:{v_leverage}, pip_size:{v_pip_size}, pip_spread:{v_pip_spread}, compute_position:{v_compute_position}, compute_indicators:{v_compute_indicators}, compute_reward:{v_compute_reward}, verbose:{v_env_verbose}')

    v_model_verbose, v_callback_verbose, v_save_replay_buffer, v_use_tensorboard, v_use_callback, v_check_freq, \
    v_callback_lookback, v_save_freq, v_learning_rate, v_net_arch, v_action_noise, v_noise_sigma, v_use_sde = get_agent_params(
        config_file)

    v_delimeter, v_model_prefix = get_model_params(config_file)

    v_model_name = get_model_name(v_delimeter, v_model_prefix, v_leverage, v_action_noise, v_use_callback,
                                  v_random_episode_start, v_instruments, total_timesteps, train_look_back_period,
                                  v_env_lookback_period, v_spread, v_market, v_resolution, online_algorithm.lower(),
                                  v_compute_position, v_compute_indicators, v_compute_reward)

    print(f'Model name:{v_model_name}')

    v_main_dir, v_models_dir, v_logs_dir = get_paths_params(config_file)

    v_online_model_dir = path_join(*[v_models_dir, v_resolution, v_subdir, v_model_name, 'online'])
    # v_online_model_dir = path_join(v_online_models_dir, online_algorithm.lower())
    v_online_model_file_name = path_join(v_online_model_dir, 'model.zip')
    v_online_model_file_name_stats = path_join(v_online_model_dir, 'stats.pkl')
    v_online_model_replay_buffer = path_join(v_online_model_dir, 'replay_buffer.pkl')
    v_online_model_dataset_file_name = path_join(v_online_model_dir, 'dataset.h5')

    if not os.path.exists(v_online_model_dir):
        os.makedirs(v_online_model_dir.lower())

    v_monitor = path_join(v_logs_dir, v_model_name)

    v_dummy_vec_env = DummyVecEnv([lambda: Monitor(v_env, v_monitor)])
    v_dummy_vec_env.seed(1)

    online_class, online_policy = get_online_class_and_policy(online_algorithm)

    n_actions = v_dummy_vec_env.action_space.shape[-1]

    v_action_noise_class = get_action_noise_class(online_algorithm, v_action_noise, n_actions, v_noise_sigma)

    # load recent checkpoint
    if os.path.isfile(v_online_model_file_name) and os.path.isfile(v_online_model_file_name_stats):
        v_vec_normalize = VecNormalize.load(v_online_model_file_name_stats, v_dummy_vec_env)
        v_vec_normalize.reset()
        v_online_model = online_class.load(v_online_model_file_name, v_vec_normalize)
        print('Model Loaded ...')
    else:
        #   v_vec_normalize = VecNormalize(v_dummy_vec_env, norm_obs, norm_reward, clip_obs, clip_reward, gamma)
        v_vec_normalize = VecNormalize(v_dummy_vec_env)
        v_vec_normalize.seed(1)

    params = {
        'learning_rate': v_learning_rate,
        'policy_kwargs': dict(net_arch=v_net_arch)
    }

    if online_algorithm == 'PPO' or online_algorithm == 'A2C' or online_algorithm == 'SAC':
        v_online_model = online_class(env=v_vec_normalize, policy=online_policy, verbose=v_model_verbose, **params,
                                      use_sde=v_use_sde, tensorboard_log=v_logs_dir if v_use_tensorboard else None)
    elif online_algorithm == 'TD3':
        v_online_model = online_class(env=v_vec_normalize, policy=online_policy, action_noise=v_action_noise_class,
                                      optimize_memory_usage=True, verbose=v_model_verbose, **params,
                                      tensorboard_log=v_logs_dir if v_use_tensorboard else None)

    # replay buffer
    if os.path.isfile(v_online_model_replay_buffer):
        v_online_model.load_replay_buffer(v_online_model_replay_buffer)

    print("Start training model...")

    if v_use_callback:
        callback = SaveToDiskOnBestTrainingRewardCallback(check_freq=v_check_freq, save_freq=v_save_freq,
                                                          lookback=v_callback_lookback,
                                                          online_algorithm=online_algorithm,
                                                          model_file_name=v_online_model_file_name,
                                                          model_replay_buffer=v_online_model_replay_buffer,
                                                          model_stats=v_online_model_file_name_stats,
                                                          save_replay_buffer=v_save_replay_buffer,
                                                          verbose=v_callback_verbose)
        v_online_model.learn(total_timesteps=total_timesteps, log_interval=1000, reset_num_timesteps=False,
                             tb_log_name=v_model_name, callback=callback)
    else:
        v_online_model.learn(total_timesteps=total_timesteps, log_interval=1000, reset_num_timesteps=False,
                             tb_log_name=v_model_name)

    if not v_use_callback:
        v_online_model.save(v_online_model_file_name.lower())
        v_vec_normalize.save(v_online_model_file_name_stats.lower())

    if v_save_replay_buffer:
        try:
            dataset = to_mdp_dataset(v_online_model.replay_buffer)
            dataset.dump(v_online_model_dataset_file_name)
            os.remove(v_online_model_replay_buffer.lower())
            # os.remove(v_online_model_file_name)
            # os.remove(v_online_model_file_name_stats)
        except Exception as e:
            print(e)

    print("End training online model...")

    v_env.close()


In [None]:
v_config_file = '../config-oanda.ini'

v_online_algorithm, v_number_of_trials, v_train_look_back_period, v_total_timesteps = get_train_params(
    v_config_file)

for i in range(v_number_of_trials):
    run_trial(v_online_algorithm, v_train_look_back_period, v_total_timesteps, v_config_file)

row_count=4871, start_row=3870, start_date=2017-12-17T22:00:00.000000000, end_row=4870, end_date=2021-10-27T21:00:00.000000000
Data shape:(7, 1000, 4)
Instruments:['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDCAD', 'USDCHF', 'NZDUSD'], lookack:30, random_episode_start:True, cash:1000.0, max_slippage_percent:0.01, lot_size:Micro, leverage:20, pip_size:[0.0001, 0.01, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001], pip_spread:[2, 2, 2, 2, 2, 2, 2], compute_position:long_and_short, compute_indicators:all, compute_reward:['log_returns'], verbose:False
Model name:fx_sb3_leverage_train_with_callback_with_random_episode_start_noise_ou-7-100000-1000-30-2-oanda-daily-on_algo.td3-comp_pos.long_and_short-comp_ind.all-comp_rew.[log_returns]-16e3231d
Using cuda device
Start training model...
Logging to E:\\alpha-machine\\logs\\forex\fx_sb3_leverage_train_with_callback_with_random_episode_start_noise_ou-7-100000-1000-30-2-oanda-daily-on_algo.td3-comp_pos.long_and_short-comp_ind.all-comp_rew.[log_returns]-16e

KeyboardInterrupt: 