### How to simulate
1. perform model retrain every n days
2. model retrain using all historical data (normalize price with first day open price for each game)
3. rolling forward account balance

In [1]:
import sys

sys.path.append('../')

In [2]:
import pandas as pd
import numpy as np

In [3]:
# read whole simulation data
stock_history_df = pd.concat([pd.read_csv(i) for i in ['./DOGE_px.csv']])

In [4]:
stock_history_df.sort_values(by = 'time', inplace = True)
stock_history_df.fillna(1e-10, inplace = True)

In [5]:
stock_history_df.head(2)

Unnamed: 0,time,open,high,low,close,adjcp,volume,tic,cci_30,rsi_30,rsi_14,rsi_6,dx_30,dx_14
0,2020-12-31,0.004636,0.004743,0.004553,0.004682,0.004682,85498337.0,DOGE-USD,1e-10,1e-10,1e-10,1e-10,1e-10,1e-10
1,2021-01-01,0.004681,0.005685,0.004615,0.005685,0.005685,228961515.0,DOGE-USD,66.66667,100.0,100.0,100.0,100.0,100.0


In [6]:
# use first 250 for first model
start_trade_index = 180
# consequence model retrain point
model_retrain_interval = 15

tech_indicators = ['cci_30',
 'rsi_30',
 'rsi_14',
 'rsi_6',
 'dx_30', 
 'dx_14']

cwd = './CryptoModel/doge_model_%i.pkl'
reward_on_value = True
lookback_n = 3

config_max_step = model_retrain_interval

if reward_on_value:
    reward_scaling = 2 ** -10
else:
    reward_scaling = 2 ** -5

In [7]:
from test_env.single_crypto_env import CryptoTradingEnv

from stable_baselines3 import PPO, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv, VecCheckNan, VecNormalize
from stable_baselines3.common.logger import configure

In [8]:
tmp_path = "./tmp/sb3_log/"
# set up logger
new_logger = configure(tmp_path, ["stdout", "csv"])

Logging to ./tmp/sb3_log/


In [9]:
def modelTraining(time_idx, px_df, total_train_timesteps = 1e4):
    # create env
    config = dict()

    config['price_array'] = px_df.iloc[:time_idx][['open', 'adjcp', 'low', 'high']].values
    config['tech_array'] = px_df.iloc[:time_idx][tech_indicators].values
    # randomly inital states for training
    config['if_sequence'] = False
    config['if_randomV'] = True
    config['if_value'] = reward_on_value
    config['lookback_n'] = lookback_n

    initial_capital = 1e-5
    initial_stocks = np.array([200.0])
    max_step = config_max_step
    
    crypto_env = CryptoTradingEnv(config, 
                              initial_capital=initial_capital,
                              initial_stocks=initial_stocks,
                              max_step = max_step, 
                              reward_scaling = reward_scaling
                              )
    
    env_train = DummyVecEnv([lambda : crypto_env])
    env_train = VecCheckNan(env_train, raise_exception=True)
    #env_train = VecNormalize(env_train)

    model = DDPG("MlpPolicy", env_train, learning_rate=0.00025, 
                     batch_size=128, gamma = 0.99, seed=312)

    model.set_logger(new_logger)
    
    model.learn(total_timesteps=total_train_timesteps, 
                tb_log_name = 'ddpg', log_interval=1000)
    print('Training finished!')
    
    model.save(cwd%(time_idx))
    print('Trained model saved in ' + str(cwd%(time_idx)))
    return cwd%(time_idx)

In [10]:
def modelRun(start_idx, px_df, input_amount, input_stocks, last_model):
    test_config = dict()

    test_config['price_array'] = px_df.iloc[:(start_idx + config_max_step)][['open', 'adjcp', 'low', 'high']].values
    test_config['tech_array'] = px_df.iloc[:(start_idx + config_max_step)][tech_indicators].values

    #randomly start day index for back testing
    test_config['if_sequence'] = True
    # disable random initial capital 
    test_config['if_randomV'] = False

    test_config['if_value'] = reward_on_value
    test_config['lookback_n'] = lookback_n

    max_step = min(config_max_step, px_df.shape[0] - start_idx) - 1
    
    print ('Run model from ', start_idx, ' to ', start_idx + max_step)
    
    test_env = CryptoTradingEnv(test_config, \
                            initial_capital=input_amount, \
                            max_step = max_step, \
                           initial_stocks = input_stocks, 
                           reward_scaling = reward_scaling, \
                            start_idx = start_idx)
    state = test_env.reset()

    #test_model = PPO.load(cwd)
    test_model = DDPG.load(last_model)
    test_model = test_model.policy.eval()
    
    done = False  
    while not done:
        action = test_model.predict(state)[0]
        state, reward, done, _ = test_env.step(action)
        
    return test_env.amount, test_env.stocks

### Simulation Train and Trade

In [11]:
initial_sim_amount = 0.01
initial_sim_stocks = np.array([200.0])

In [12]:
test_amount = initial_sim_amount
test_stocks = initial_sim_stocks

for t in range(start_trade_index, stock_history_df.shape[0], model_retrain_interval):
    print ('Training model at time ', t)
    model_file = modelTraining(t, stock_history_df)
    
    print ('Applying model')
    test_amount, test_stocks = modelRun(t, stock_history_df, test_amount, test_stocks, model_file)
    
print (f'Final amount: {test_amount}, stocks: {test_stocks[0]}')

Training model at time  180
Training finished!
Trained model saved in ./CryptoModel/doge_model_180.pkl
Applying model
Run model from  180  to  194
initial stock: [200.] inital amount:  0.01
initial asset:  52.56380249023437
[Day 181] BUY: 0.014100000000000001
[Day 182] BUY: 0.0021000000000000003
[Day 184] SELL: 68.9796
[Day 185] SELL: 46.7969
[Day 187] SELL: 15.702100000000002
[Day 188] SELL: 13.0984
[Day 190] SELL: 49.8952
[Day 191] SELL: 4.9896
[Day 192] SELL: 0.4989
[Day 193] SELL: 0.0499
Episode Return:  0.8960164707447654
Training model at time  195
Training finished!
Trained model saved in ./CryptoModel/doge_model_195.pkl
Applying model
Run model from  195  to  209
initial stock: [0.00560539] inital amount:  47.09691170119228
initial asset:  47.09801777934669
[Day 196] BUY: 133.209
[Day 198] SELL: 48.9326
[Day 201] SELL: 31.023300000000003
[Day 202] SELL: 18.54
[Day 204] BUY: 165.3298
[Day 205] BUY: 19.604400000000002
[Day 206] BUY: 9.4929
[Day 207] BUY: 2.8956
[Day 208] BUY: 0.3

In [17]:
holding_return = ((stock_history_df['close'].iloc[-1] * initial_sim_stocks[0]) + initial_sim_amount)\
/((stock_history_df['close'].iloc[0] * initial_sim_stocks[0]) + initial_sim_amount)

In [18]:
trading_return = ((stock_history_df['close'].iloc[-1] * test_stocks[0]) + test_amount)\
/((stock_history_df['close'].iloc[0] * initial_sim_stocks[0]) + initial_sim_amount)

In [19]:
print (f'Holding return: {holding_return}, Trading return: {trading_return}')

Holding return: 50.71428873010237, Trading return: 67.07022093157205


### Final model for daily run

In [20]:
final_model_file = modelTraining(stock_history_df.shape[0] - 1, 
                                 stock_history_df, 
                                 total_train_timesteps = 1e5)

---------------------------------
| time/              |          |
|    episodes        | 1000     |
|    fps             | 167      |
|    time_elapsed    | 89       |
|    total timesteps | 15000    |
| train/             |          |
|    actor_loss      | -0.0164  |
|    critic_loss     | 3.38e-05 |
|    learning_rate   | 0.00025  |
|    n_updates       | 14895    |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 2000     |
|    fps             | 166      |
|    time_elapsed    | 180      |
|    total timesteps | 30000    |
| train/             |          |
|    actor_loss      | -0.0213  |
|    critic_loss     | 2.56e-05 |
|    learning_rate   | 0.00025  |
|    n_updates       | 29895    |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 3000     |
|    fps             | 166      |
|    time_elapsed    | 270      |
|    total tim

In [21]:
final_model_file

'./CryptoModel/doge_model_260.pkl'