In [1]:
import sys

sys.path.append('../')

In [2]:
import pandas as pd

In [3]:
start_date = '2021-01-01'
end_date = '2021-08-30'

In [4]:
tic_list = ['ETH-USD']

In [5]:
tech_indicators = ['macd',
 'rsi_30',
 'cci_30',
 'dx_30']

In [6]:
cwd = './CryptoModel/model.pkl'

In [None]:
from neo_finrl.data_processors.processor_yahoofinance import YahooFinanceProcessor

In [None]:
data_downloader = YahooFinanceProcessor()

### Extract historical px

In [None]:
stock_history_df = data_downloader.download_data(start_date, end_date, tic_list, '1D')

In [None]:
data_downloader.time_interval = '1D'
stock_history_df = data_downloader.clean_data(stock_history_df)

In [None]:
stock_history_df = data_downloader.add_technical_indicator(stock_history_df, tech_indicators)

In [None]:
stock_history_df.to_csv('./ETH_hist.csv', index = False)

### Env module

In [7]:
from test_env.single_crypto_env import CryptoTradingEnv

### Setup env

In [8]:
stock_history_df = pd.read_csv('./ETH_hist.csv')

In [9]:
price_array = stock_history_df[['open', 'adjcp', 'low', 'high']].values
tech_array = stock_history_df[tech_indicators].values

In [10]:
train_test_split_index = int(stock_history_df.shape[0] * 0.8)

In [11]:
print (f'{train_test_split_index} records for training')
print (f'{stock_history_df.shape[0] - train_test_split_index} records for testing')

193 records for training
49 records for testing


In [12]:
config = dict()

config['price_array'] = price_array[:train_test_split_index]
config['tech_array'] = tech_array[:train_test_split_index]
config['if_train'] = True

initial_capital = 1e5
max_step = 100

In [13]:
crypto_env = CryptoTradingEnv(config, initial_capital=initial_capital, max_step = max_step)

### Train RL

In [14]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [15]:
env_train = DummyVecEnv([lambda : crypto_env])

model = PPO("MlpPolicy", env_train, learning_rate=0.00025, 
                        n_steps=2048, batch_size=128, ent_coef=0.0, 
                        gamma=0.99, seed=312)

In [16]:
model.learn(total_timesteps=1e4, tb_log_name = 'ppo')
print('Training finished!')

0.99480045
8.336502
8.863002
14.643405
18.594204
14.570903
6.557901
7.5271006
4.560401
8.849401
0.6634021
7.848303
6.807102
14.220904
18.406506
10.468706
9.863906
12.933108
13.310311
14.38121
9.750109
0.359308
8.062649e-06
1.0000079
7.716942e-06
6.7715087
3.3913083
2.7674077
4.1552067
0.39900732
0.50600755
0.63720816
2.4231083
1.0000081
15.089006
5.215708
16.875906
21.635914
23.893206
13.1570015
10.931098
11.0107975
4.873797
4.791096
8.558196
7.1911964
11.019795
8.572796
19.9889
14.296601
15.963199
15.877805
20.117802
22.739508
27.524614
23.115818
19.27912
23.049416
26.976616
26.976616
44.346508
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.577007
46.5770

In [17]:
model.save(cwd)
print('Trained model saved in ' + str(cwd))

Trained model saved in ./CryptoModel/model.pkl


### Test RL

In [18]:
#test on the testing env
def testRun(model, env_instance):
    state = env_instance.reset()
    episode_returns = list()  # the cumulative_return / initial_account
    done = False

    while not done:
        action = model.predict(state)[0]
        #print (action)
        state, reward, done, _ = env_instance.step(action)
        
        #print (env_instance.stocks)
        #total_asset = env_instance.amount + (env_instance.price_ary[env_instance.day + env_instance.run_index, 1] \
        #                                     * env_instance.stocks).sum()
        #episode_return = total_asset / env_instance.initial_total_asset
        
        episode_return = env_instance.stocks
        episode_returns.append(episode_return)
        
    print('episode_return', episode_return)
    print('Test Finished!')  
    return episode_returns

In [19]:
test_config = dict()

start_idx = train_test_split_index - 100
test_config['price_array'] = price_array[start_idx:]
test_config['tech_array'] = tech_array[start_idx:]
test_config['if_train'] = False

initial_capital = 1e5
max_step = min(test_config['price_array'].shape[0], 100)

In [20]:
test_env = CryptoTradingEnv(test_config, \
                            initial_capital=initial_capital, \
                            max_step = max_step)
test_model = PPO.load(cwd)

In [21]:
cumulative_return = testRun(test_model, test_env)

0.0
episode_return [0.]
Test Finished!
