### How to simulate
1. perform model retrain every n days
2. model retrain using all historical data (normalize price with first day open price for each game)
3. rolling forward account balance

In [1]:
import sys

sys.path.append('../')

In [2]:
import pandas as pd
import numpy as np

In [3]:
# read whole simulation data
stock_history_df = pd.concat([pd.read_csv(i) for i in ['./ETH_hist_test.csv', './ETH_hist.csv']])

In [4]:
stock_history_df.sort_values(by = 'time', inplace = True)
stock_history_df.fillna(1e-10, inplace = True)

In [5]:
stock_history_df.head(2)

Unnamed: 0,time,open,high,low,close,adjcp,volume,tic,cci_30,rsi_30,rsi_14,rsi_6,dx_30,dx_14
0,2019-12-31,132.612274,133.732681,128.798157,129.610855,129.610855,8936866000.0,ETH-USD,1e-10,1e-10,1e-10,1e-10,1e-10,1e-10
1,2020-01-01,129.630661,132.835358,129.198288,130.802002,130.802002,7935230000.0,ETH-USD,66.66667,100.0,100.0,100.0,1e-10,1e-10


In [6]:
# use first 250 for first model
start_trade_index = 250
# consequence model retrain point
model_retrain_interval = 30

tech_indicators = ['cci_30',
 'rsi_30',
 'rsi_14',
 'rsi_6',
 'dx_30', 
 'dx_14']

cwd = './CryptoModel/model_%i.pkl'
reward_on_value = True
lookback_n = 7

config_max_step = model_retrain_interval

if reward_on_value:
    reward_scaling = 2 ** -10
else:
    reward_scaling = 2 ** -5

In [7]:
from test_env.single_crypto_env import CryptoTradingEnv

from stable_baselines3 import PPO, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv, VecCheckNan, VecNormalize
from stable_baselines3.common.logger import configure

In [8]:
tmp_path = "./tmp/sb3_log/"
# set up logger
new_logger = configure(tmp_path, ["stdout", "csv"])

Logging to ./tmp/sb3_log/


In [9]:
def modelTraining(time_idx, px_df):
    # create env
    config = dict()

    config['price_array'] = px_df.iloc[:time_idx][['open', 'adjcp', 'low', 'high']].values
    config['tech_array'] = px_df.iloc[:time_idx][tech_indicators].values
    # randomly inital states for training
    config['if_sequence'] = False
    config['if_randomV'] = True
    config['if_value'] = reward_on_value
    config['lookback_n'] = lookback_n

    initial_capital = 1e-5
    initial_stocks = np.array([40.0])
    max_step = config_max_step
    
    crypto_env = CryptoTradingEnv(config, 
                              initial_capital=initial_capital,
                              initial_stocks=initial_stocks,
                              max_step = max_step, 
                              reward_scaling = reward_scaling
                              )
    
    env_train = DummyVecEnv([lambda : crypto_env])
    env_train = VecCheckNan(env_train, raise_exception=True)
    #env_train = VecNormalize(env_train)

    model = DDPG("MlpPolicy", env_train, learning_rate=0.00025, 
                     batch_size=128, gamma = 0.99, seed=312)

    model.set_logger(new_logger)
    
    model.learn(total_timesteps=5e3, tb_log_name = 'ddpg', log_interval=100)
    print('Training finished!')
    
    model.save(cwd%(time_idx))
    print('Trained model saved in ' + str(cwd%(time_idx)))
    return cwd%(time_idx)

In [10]:
def modelRun(start_idx, px_df, input_amount, input_stocks, last_model):
    test_config = dict()

    test_config['price_array'] = px_df.iloc[:(start_idx + config_max_step)][['open', 'adjcp', 'low', 'high']].values
    test_config['tech_array'] = px_df.iloc[:(start_idx + config_max_step)][tech_indicators].values

    #randomly start day index for back testing
    test_config['if_sequence'] = True
    # disable random initial capital 
    test_config['if_randomV'] = False

    test_config['if_value'] = reward_on_value
    test_config['lookback_n'] = lookback_n

    max_step = min(config_max_step, px_df.shape[0] - start_idx) - 1
    
    print ('Run model from ', start_idx, ' to ', start_idx + max_step)
    
    test_env = CryptoTradingEnv(test_config, \
                            initial_capital=input_amount, \
                            max_step = max_step, \
                           initial_stocks = input_stocks, 
                           reward_scaling = reward_scaling, \
                            start_idx = start_idx)
    state = test_env.reset()

    #test_model = PPO.load(cwd)
    test_model = DDPG.load(last_model)
    test_model = test_model.policy.eval()
    
    done = False  
    while not done:
        action = test_model.predict(state)[0]
        state, reward, done, _ = test_env.step(action)
        
    return test_env.amount, test_env.stocks

### Train first model

In [20]:
initial_sim_amount = 0.01
initial_sim_stocks = np.array([40.0])

In [11]:
test_amount = initial_sim_amount
test_stocks = initial_sim_stocks

for t in range(start_trade_index, stock_history_df.shape[0], model_retrain_interval):
    print ('Training model at time ', t)
    model_file = modelTraining(t, stock_history_df)
    
    print ('Applying model')
    test_amount, test_stocks = modelRun(t, stock_history_df, test_amount, test_stocks, model_file)
    
print (f'Final amount: {test_amount}, stocks: {test_stocks[0]}')

Training model at time  250
---------------------------------
| time/              |          |
|    episodes        | 100      |
|    fps             | 179      |
|    time_elapsed    | 16       |
|    total timesteps | 3000     |
| train/             |          |
|    actor_loss      | -1.06    |
|    critic_loss     | 0.278    |
|    learning_rate   | 0.00025  |
|    n_updates       | 2880     |
---------------------------------
Training finished!
Trained model saved in ./CryptoModel/model_250.pkl
Applying model
Run model from  250  to  279
initial stock: [40.] inital amount:  0.01
initial asset:  14106.949453125
[Day 252] SELL: 32.0
[Day 254] SELL: 6.3997
[Day 255] BUY: 28.709300000000002
[Day 256] SELL: 23.767100000000003
[Day 257] SELL: 4.7186
[Day 258] SELL: 1.2930000000000001
[Day 259] SELL: 0.4083
[Day 260] SELL: 0.09380000000000001
[Day 261] SELL: 0.0217
[Day 262] SELL: 0.005200000000000001
[Day 263] SELL: 0.0013000000000000002
[Day 266] SELL: 0.0004
[Day 267] SELL: 0.0001
Ep

---------------------------------
| time/              |          |
|    episodes        | 100      |
|    fps             | 169      |
|    time_elapsed    | 17       |
|    total timesteps | 3000     |
| train/             |          |
|    actor_loss      | -6.48    |
|    critic_loss     | 18.6     |
|    learning_rate   | 0.00025  |
|    n_updates       | 2880     |
---------------------------------
Training finished!
Trained model saved in ./CryptoModel/model_520.pkl
Applying model
Run model from  520  to  549
initial stock: [36.4018] inital amount:  0.07361580245169955
initial asset:  94290.30799080245
[Day 525] SELL: 29.1184
[Day 526] BUY: 23.736900000000002
[Day 528] SELL: 13.9504
[Day 530] SELL: 12.266200000000001
[Day 531] SELL: 3.7847000000000004
[Day 533] SELL: 0.8053
[Day 534] SELL: 0.16870000000000002
[Day 536] SELL: 0.0356
[Day 537] SELL: 0.0074
[Day 538] SELL: 0.0015
[Day 540] SELL: 0.00030000000000000003
[Day 543] BUY: 13.8377
[Day 544] BUY: 14.8855
[Day 546] BUY: 10.

In [23]:
holding_return = ((stock_history_df['close'].iloc[-1] * initial_sim_stocks[0]) + initial_sim_amount)\
/((stock_history_df['close'].iloc[0] * initial_sim_stocks[0]) + initial_sim_amount)

In [27]:
trading_return = ((stock_history_df['close'].iloc[-1] * test_stocks[0]) + test_amount)\
/((stock_history_df['close'].iloc[0] * initial_sim_stocks[0]) + initial_sim_amount)

In [28]:
print (f'Holding return: {holding_return}, Trading return: {trading_return}')

Holding return: 30.308974230436476, Trading return: 33.188348721996526
