In [None]:
import sys

sys.path.append('../')

In [None]:
import pandas as pd
import numpy as np

In [None]:
start_date = '2021-01-01'
end_date = '2021-08-30'

In [None]:
tic_list = ['ETH-USD']

In [None]:
tech_indicators = ['macd',
 'rsi_30',
 'cci_30',
 'dx_30', 
 'open_2_sma', 
 'rsi_6', 
 'close_2_tema']

tech_indicators = [
 'open_2_sma', 
 'rsi_6', 
 'close_2_tema']

In [None]:
cwd = './CryptoModel/model.pkl'

In [None]:
from neo_finrl.data_processors.processor_yahoofinance import YahooFinanceProcessor

In [None]:
data_downloader = YahooFinanceProcessor()

### Extract historical px

In [None]:
stock_history_df = data_downloader.download_data(start_date, end_date, tic_list, '1D')

In [None]:
data_downloader.time_interval = '1D'
stock_history_df = data_downloader.clean_data(stock_history_df)

In [None]:
stock_history_df = data_downloader.add_technical_indicator(stock_history_df, tech_indicators)

In [None]:
stock_history_df.to_csv('./ETH_hist.csv', index = False)

### Env module

In [None]:
from test_env.single_crypto_env import CryptoTradingEnv

### Setup env

In [None]:
stock_history_df = pd.read_csv('./ETH_hist.csv')

In [None]:
price_array = stock_history_df[['open', 'adjcp', 'low', 'high']].values
tech_array = stock_history_df[tech_indicators].values

In [None]:
train_test_split_index = int(stock_history_df.shape[0] * 0.8)

In [None]:
print (f'{train_test_split_index} records for training')
print (f'{stock_history_df.shape[0] - train_test_split_index} records for testing')

In [None]:
config = dict()

config['price_array'] = price_array[:train_test_split_index]
config['tech_array'] = tech_array[:train_test_split_index]
config['if_train'] = True

initial_capital = 1e4
initial_stocks = np.array([0.0])
max_step = 30

In [None]:
crypto_env = CryptoTradingEnv(config, 
                              initial_capital=initial_capital,
                              initial_stocks=initial_stocks,
                              max_step = max_step, 
                              )

### Train RL

In [None]:
from stable_baselines3 import PPO, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
env_train = DummyVecEnv([lambda : crypto_env])
model_name = 'ppo'

if model_name == 'ppo':
    model = PPO("MlpPolicy", env_train, learning_rate=0.00025, 
                            n_steps=1024, batch_size=128, ent_coef=0.0, 
                            gamma=0.99, seed=312)
elif model_name == 'ddpg':
    model = DDPG("MlpPolicy", env_train, learning_rate=0.00025, 
                 batch_size=128, gamma = 0.99, seed=312)

In [None]:
model.learn(total_timesteps=1e4, tb_log_name = 'ppo')
print('Training finished!')

In [None]:
model.save(cwd)
print('Trained model saved in ' + str(cwd))

### Test RL

In [None]:
#test on the testing env
def testRun(model, env_instance):
    state = env_instance.reset()
    episode_returns = list()  # the cumulative_return / initial_account
    done = False

    while not done:
        action = model.predict(state)[0]
        #print (action)
        state, reward, done, _ = env_instance.step(action)
        
        #print (env_instance.stocks)
        #total_asset = env_instance.amount + (env_instance.price_ary[env_instance.day + env_instance.run_index, 1] \
        #                                     * env_instance.stocks).sum()
        #episode_return = total_asset / env_instance.initial_total_asset
        
        episode_returns.append(reward)
        
    print('Test Finished!')  
    return episode_returns

In [None]:
test_config = dict()

start_idx = train_test_split_index
test_config['price_array'] = price_array[start_idx:]
test_config['tech_array'] = tech_array[start_idx:]
test_config['if_train'] = False

max_step = min(test_config['price_array'].shape[0], 30)

In [None]:
test_env = CryptoTradingEnv(test_config, \
                            initial_capital=1e4, \
                            max_step = max_step)

#test_env = CryptoTradingEnv(test_config, \
#                            initial_capital=0, \
#                            max_step = max_step, \
#                           initial_stocks = np.array([20.0]))

test_model = PPO.load(cwd)
#test_model = model
test_model = test_model.policy.eval()

In [None]:
cumulative_return = testRun(test_model, test_env)

holding_return = price_array[start_idx + max_step, 1] / price_array[start_idx, 1]

print ("Holding-strategy return: ", holding_return)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.plot(cumulative_return, label='agent return')
plt.grid()
plt.title('cumulative return')
plt.xlabel('time')

In [None]:
plt.plot(test_config['price_array'][:, 2:4], label='px')
plt.grid()
plt.title('price')
plt.xlabel('time')