# Multiple Cryptocurrencies Trading Demo

## Installation

#%cd ./
#!git clone https://github.com/AI4Finance-Foundation/FinRL-Meta
%cd ./FinRL-Meta/
!pip install git+https://github.com/AI4Finance-LLC/ElegantRL.git
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git
!pip install yfinance stockstats
!pip install alpaca_trade_api
!pip install ray[default]
!pip install lz4
!pip install ray[tune]
!pip install tensorboardX
!pip install gputil
!pip install trading_calendars
!pip install wrds
!pip install rqdatac
!pip install sqlalchemy==1.2.19
!pip install tushare
#install talib
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz 
!tar xvzf ta-lib-0.4.0-src.tar.gz
import os
os.chdir('ta-lib') 
!./configure --prefix=/usr
!make
!make install
os.chdir('../')
!pip install TA-Lib
!pip install baostock
!pip install quandl

In [1]:
%cd ./FinRL-Meta/

/home/pop/Documents/Code/RTML/Project/finRL/FinRL-Meta


In [2]:
import os 
print(os.getcwd())

/home/pop/Documents/Code/RTML/Project/finRL/FinRL-Meta


In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Import Related Packages

In [4]:
import sys
sys.path.append('/home/pop/conda/lib/python3.8/site-packages')
print(sys.path)

['/home/pop/Documents/Code/RTML/Project/finRL', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/pop/.local/lib/python3.8/site-packages', '/usr/local/lib/python3.8/dist-packages', '/usr/lib/python3/dist-packages', '/home/pop/.local/lib/python3.8/site-packages/IPython/extensions', '/home/pop/.ipython', '/home/pop/conda/lib/python3.8/site-packages']


In [5]:
from agents.elegantrl_models import DRLAgent as DRLAgent_erl
from agents.rllib_models import DRLAgent as DRLAgent_rllib
from agents.stablebaselines3_models import DRLAgent as DRLAgent_sb3
from finrl_meta.data_processor import DataProcessor
import gym
import numpy as np
import pandas as pd
import math

def train(start_date, end_date, ticker_list, data_source, time_interval,
          technical_indicator_list, drl_lib, env, model_name, if_vix=True,
          **kwargs):
    # process data using unified data processor
    dp = DataProcessor(data_source, start_date, end_date, time_interval, **kwargs)
    price_array, tech_array, turbulence_array = dp.run(ticker_list, 
                                                       technical_indicator_list,
                                                       if_vix)
    data_config = {'price_array': price_array,
                   'tech_array': tech_array,
                   'turbulence_array': turbulence_array}
    # build environment using processed data
    env_instance = env(config=data_config)

    # read parameters and load agents
    cwd = kwargs.get('cwd', './' + str(model_name))  # cwd: current_working_dir

    if drl_lib == 'elegantrl':
        break_step = kwargs.get('break_step', 1e6)  # total_training_steps
        erl_params = kwargs.get('erl_params')  # see notebooks for examples.

        agent = DRLAgent_erl(env=env,
                             price_array=price_array,
                             tech_array=tech_array,
                             turbulence_array=turbulence_array)

        model = agent.get_model(model_name, model_kwargs=erl_params)
        trained_model = agent.train_model(model=model,
                                          cwd=cwd,
                                          total_timesteps=break_step)  # erl model is automated saved in cwd

    elif drl_lib == 'rllib':
        total_episodes = kwargs.get('total_episodes', 100)  # rllib uses total training episodes instead of steps.
        rllib_params = kwargs.get('rllib_params')

        agent_rllib = DRLAgent_rllib(env=env,
                                     price_array=price_array,
                                     tech_array=tech_array,
                                     turbulence_array=turbulence_array)

        model, model_config = agent_rllib.get_model(model_name)

        model_config['lr'] = rllib_params['lr']  # learning_rate
        model_config['train_batch_size'] = rllib_params['train_batch_size']
        model_config['gamma'] = rllib_params['gamma']

        trained_model = agent_rllib.train_model(model=model,
                                                model_name=model_name,
                                                model_config=model_config,
                                                total_episodes=total_episodes)
        trained_model.save(cwd)


    elif drl_lib == 'stable_baselines3':
        total_timesteps = kwargs.get('total_timesteps', 1e6)
        agent_params = kwargs.get('agent_params')

        agent = DRLAgent_sb3(env=env_instance)

        model = agent.get_model(model_name, model_kwargs=agent_params)
        trained_model = agent.train_model(model=model,
                                          tb_log_name=model_name,
                                          total_timesteps=total_timesteps)
        print('Training finished!')
        trained_model.save(cwd)
        print('Trained model saved in ' + str(cwd))
    else:
        raise ValueError('DRL library input is NOT supported. Please check.')
        

def test(start_date, end_date, ticker_list, data_source, time_interval,
         technical_indicator_list, drl_lib, env, model_name, if_vix=True,
         **kwargs):
    # process data using unified data processor
    dp = DataProcessor(data_source, start_date, end_date, time_interval, **kwargs)
    price_array, tech_array, turbulence_array = dp.run(ticker_list, 
                                                       technical_indicator_list,
                                                       if_vix)
    data_config = {'price_array': price_array,
                   'tech_array': tech_array,
                   'turbulence_array': turbulence_array}
    # build environment using processed data
    env_instance = env(config=data_config)

    env_config = {
        "price_array": price_array,
        "tech_array": tech_array,
        "turbulence_array": turbulence_array,
        "if_train": False,
    }
    env_instance = env(config=env_config)

    # load elegantrl needs state dim, action dim and net dim
    net_dimension = kwargs.get("net_dimension", 2 ** 7)
    cwd = kwargs.get("cwd", "./" + str(model_name))
    print("price_array: ", len(price_array))

    if drl_lib == "elegantrl":
        episode_total_assets = DRLAgent_erl.DRL_prediction(
            model_name=model_name,
            cwd=cwd,
            net_dimension=net_dimension,
            environment=env_instance,
        )

        return episode_total_assets

    elif drl_lib == "rllib":
        # load agent
        episode_total_assets = DRLAgent_rllib.DRL_prediction(
            model_name=model_name,
            env=env,
            price_array=price_array,
            tech_array=tech_array,
            turbulence_array=turbulence_array,
            agent_path=cwd,
        )

        return episode_total_assets

    elif drl_lib == "stable_baselines3":
        episode_total_assets = DRLAgent_sb3.DRL_prediction_load_from_file(
            model_name=model_name, environment=env_instance, cwd=cwd
        )

        return episode_total_assets
    else:
        raise ValueError("DRL library input is NOT supported. Please check.")


In [6]:
class CryptoEnv(gym.Env):  # custom env
    def __init__(self, config, lookback=1, initial_capital=1e6, 
                 buy_cost_pct=1e-3, sell_cost_pct=1e-3, gamma=0.99):
        
        self.lookback = lookback
        self.initial_total_asset = initial_capital
        self.initial_cash = initial_capital
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.max_stock = 1
        self.gamma = gamma
        self.price_array = config['price_array']
        self.tech_array = config['tech_array']
        self._generate_action_normalizer()
        self.crypto_num = self.price_array.shape[1]
        self.max_step = self.price_array.shape[0] - lookback 
        
        # reset
        self.time = lookback-1
        self.cash = self.initial_cash
        self.current_price = self.price_array[self.time]
        self.current_tech = self.tech_array[self.time]
        self.stocks = np.zeros(self.crypto_num, dtype=np.float32)

        self.total_asset = self.cash + (self.stocks * self.price_array[self.time]).sum()
        self.episode_return = 0.0  
        self.gamma_return = 0.0
        

        '''env information'''
        self.env_name = 'MulticryptoEnv'
        self.state_dim = 1 + (self.price_array.shape[1] + self.tech_array.shape[1])*lookback
        self.action_dim = self.price_array.shape[1]
        self.if_discrete = False
        self.target_return = 10
        
        self.observation_space = gym.spaces.Box(low=-3000, high=3000, shape=(self.state_dim,), dtype=np.float32)
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(self.action_dim,), dtype=np.float32)

    def reset(self) -> np.ndarray:
        self.time = self.lookback-1
        self.current_price = self.price_array[self.time]
        self.current_tech = self.tech_array[self.time]
        self.cash = self.initial_cash  # reset()
        self.stocks = np.zeros(self.crypto_num, dtype=np.float32)
        self.total_asset = self.cash + (self.stocks * self.price_array[self.time]).sum()
        
        state = self.get_state()
        return state

    def step(self, actions): # -> (np.ndarray, float, bool, None):
        self.time += 1
        
        price = self.price_array[self.time]
        for i in range(self.action_dim):
            norm_vector_i = self.action_norm_vector[i]
            actions[i] = actions[i] * norm_vector_i
            
        for index in np.where(actions < 0)[0]:  # sell_index:
            if price[index] > 0:  # Sell only if current asset is > 0
                sell_num_shares = min(self.stocks[index], -actions[index])
                self.stocks[index] -= sell_num_shares
                self.cash += price[index] * sell_num_shares * (1 - self.sell_cost_pct)
                
        for index in np.where(actions > 0)[0]:  # buy_index:
            if price[index] > 0:  # Buy only if the price is > 0 (no missing data in this particular date)
                buy_num_shares = min(self.cash // price[index], actions[index])
                self.stocks[index] += buy_num_shares
                self.cash -= price[index] * buy_num_shares * (1 + self.buy_cost_pct)

        """update time"""
        done = self.time == self.max_step
        state = self.get_state()
        next_total_asset = self.cash + (self.stocks * self.price_array[self.time]).sum()
        reward = (next_total_asset - self.total_asset) * 2 ** -16  
        self.total_asset = next_total_asset
        self.gamma_return = self.gamma_return * self.gamma + reward 
        self.cumu_return = self.total_asset / self.initial_cash
        if done:
            reward = self.gamma_return
            self.episode_return = self.total_asset / self.initial_cash
        return state, reward, done, None

    def get_state(self):
        state =  np.hstack((self.cash * 2 ** -18, self.stocks * 2 ** -3))
        for i in range(self.lookback):
            tech_i = self.tech_array[self.time-i]
            normalized_tech_i = tech_i * 2 ** -15
            state = np.hstack((state, normalized_tech_i)).astype(np.float32)
        return state
    
    def close(self):
        pass

    def _generate_action_normalizer(self):
        action_norm_vector = []
        price_0 = self.price_array[0]
        for price in price_0:
            x = math.floor(math.log(price, 10)) #the order of magnitude 
            action_norm_vector.append(1/((10)**x)) 
            
        action_norm_vector = np.asarray(action_norm_vector) * 10000
        self.action_norm_vector = np.asarray(action_norm_vector)

### PPO

In [8]:
# script to evaluate the performance of one agent on one crypto
# write a python script instead of the Jupyter Notebook will be easier


#importation of modules
import numpy as np
import math
import gym
from finrl_meta.env_crypto_trading.env_multiple_crypto import CryptoEnv
from finrl.plot import backtest_stats
from finrl import config
from agents.stablebaselines3_models import DRLAgent as DRLAgent_sb3
from agents.rllib_models import DRLAgent as DRLAgent_rllib
from agents.elegantrl_models import DRLAgent as DRLAgent_erl

import pandas as pd
from plot2 import get_baseline, get_daily_return, backtest_plot

from finrl_meta.data_processor import DataProcessor


TICKER_LIST = ['BTCUSDT','ETHUSDT','ADAUSDT','BNBUSDT','XRPUSDT',
                'SOLUSDT','DOTUSDT', 'DOGEUSDT','AVAXUSDT','UNIUSDT']
TICKER = [TICKER_LIST[0]]
print(TICKER)

TRAIN_START_DATE = '2015-01-01'
TRAIN_END_DATE = '2022-02-01'

TEST_START_DATE = '2022-02-02'
TEST_END_DATE = '2022-03-02'
time_interval = '1d'

env = CryptoEnv


INDICATORS = ['macd', 'rsi', 'cci', 'dx'] #self-defined technical indicator list is NOT supported yet

ERL_PARAMS = {"learning_rate": 2**-15,"batch_size": 2**11,
                "gamma": 0.99, "seed":312,"net_dimension": 2**9, 
                "target_step": 5000, "eval_gap": 30, "eval_times": 1}


# training of the agent

print('----------Training-----------------')

# train(start_date=TRAIN_START_DATE, 
#       end_date=TRAIN_END_DATE,
#       ticker_list=TICKER,    #only one ticker for the moment (deal with get_baseline for multiple ticker)
#       data_source='binance',
#       time_interval=time_interval, 
#       technical_indicator_list=INDICATORS,
#       drl_lib='elegantrl', 
#       env=env, 
#       model_name='ppo', 
#       current_working_dir='./test_ppo_elegantrl',
#       erl_params=ERL_PARAMS,
#       break_step=5e4,
#       if_vix=False
#       )

print('----------End Training-----------------')

#testing of the agent

print('----------- Testing -----------------')

account_value_erl = test(start_date = TEST_START_DATE, 
                        end_date = TEST_END_DATE,
                        ticker_list = TICKER, 
                        data_source = 'binance',
                        time_interval= time_interval, 
                        technical_indicator_list= INDICATORS,
                        drl_lib='elegantrl', 
                        env=env, 
                        model_name='ppo', 
                        current_working_dir='./test_ppo_elegantrl', 
                        net_dimension = 2**9, 
                        if_vix=False
                        )

print('--------- End Testing ------------------')


baseline_df = get_baseline(TICKER, TEST_START_DATE, TEST_END_DATE, time_interval)
account_value_erl_pd = pd.DataFrame({'date':baseline_df.date,'account_value':account_value_erl})
assert account_value_erl_pd.shape[0] == baseline_df.shape[0]

backtest_plot(account_value_erl_pd, 
             baseline_ticker = TICKER, 
             baseline_start = TEST_START_DATE,
             baseline_end = TEST_END_DATE)


['BTCUSDT']
----------Training-----------------
----------End Training-----------------
----------- Testing -----------------
binance successfully connected
2022-02-02 2022-03-02
10
tech_indicator_list:  ['macd', 'rsi', 'cci', 'dx']
indicator:  macd
indicator:  rsi
indicator:  cci
indicator:  dx
Succesfully add technical indicators
price_array:  26
Test Finished!
episode_return 0.9981001601202032
--------- End Testing ------------------
binance successfully connected
Using cached file ./cache/BTCUSDT_binance_2022-02-02_2022-03-02_1d.pickle
tech_indicator_list:  ['macd', 'rsi', 'cci', 'dx']
indicator:  macd


  s_tensor = _torch.as_tensor((state,), device=device)


indicator:  rsi
indicator:  cci
indicator:  dx
Succesfully add technical indicators


ValueError: array length 25 does not match index length 26

## DDPG

In [None]:
# script to evaluate the performance of one agent on one crypto
# write a python script instead of the Jupyter Notebook will be easier


#importation of modules
import numpy as np
import math
import gym
from finrl_meta.env_crypto_trading.env_multiple_crypto import CryptoEnv
from finrl.plot import backtest_stats
from finrl import config
from agents.stablebaselines3_models import DRLAgent as DRLAgent_sb3
from agents.rllib_models import DRLAgent as DRLAgent_rllib
from agents.elegantrl_models import DRLAgent as DRLAgent_erl

import pandas as pd
from plot2 import get_baseline, get_daily_return, backtest_plot

from finrl_meta.data_processor import DataProcessor


TICKER_LIST = ['BTCUSDT','ETHUSDT','ADAUSDT','BNBUSDT','XRPUSDT',
                'SOLUSDT','DOTUSDT', 'DOGEUSDT','AVAXUSDT','UNIUSDT']
TICKER = [TICKER_LIST[0]]
print(TICKER)

TRAIN_START_DATE = '2015-01-01'
TRAIN_END_DATE = '2022-02-01'

TEST_START_DATE = '2022-02-02'
TEST_END_DATE = '2022-03-02'
time_interval = '1d'

env = CryptoEnv


INDICATORS = ['macd', 'rsi', 'cci', 'dx'] #self-defined technical indicator list is NOT supported yet

ERL_PARAMS = {"learning_rate": 2**-15,"batch_size": 2**11,
                "gamma": 0.99, "seed":312,"net_dimension": 2**9, 
                "target_step": 5000, "eval_gap": 30, "eval_times": 1}


# training of the agent

print('----------Training-----------------')

train(start_date=TRAIN_START_DATE, 
      end_date=TRAIN_END_DATE,
      ticker_list=TICKER,    #only one ticker for the moment (deal with get_baseline for multiple ticker)
      data_source='binance',
      time_interval=time_interval, 
      technical_indicator_list=INDICATORS,
      drl_lib='elegantrl', 
      env=env, 
      model_name='ddpg', 
      current_working_dir='./test_ddpg_elegantrl',
      erl_params=ERL_PARAMS,
      break_step=5e4,
      if_vix=False
      )

print('----------End Training-----------------')

#testing of the agent

print('----------- Testing -----------------')

account_value_erl = test(start_date = TEST_START_DATE, 
                        end_date = TEST_END_DATE,
                        ticker_list = TICKER, 
                        data_source = 'binance',
                        time_interval= time_interval, 
                        technical_indicator_list= INDICATORS,
                        drl_lib='elegantrl', 
                        env=env, 
                        model_name='ddpg', 
                        current_working_dir='./test_ddpg_elegantrl', 
                        net_dimension = 2**9, 
                        if_vix=False
                        )

print('--------- End Testing ------------------')


baseline_df = get_baseline(TICKER, TEST_START_DATE, TEST_END_DATE, time_interval)
account_value_erl_pd = pd.DataFrame({'date':baseline_df.date,'account_value':account_value_erl})
assert account_value_erl_pd.shape[0] == baseline_df.shape[0]

backtest_plot(account_value_erl_pd, 
             baseline_ticker = TICKER, 
             baseline_start = TEST_START_DATE,
             baseline_end = TEST_END_DATE)


## SAC

In [None]:
TICKER_LIST = ['BTCUSDT','ETHUSDT','ADAUSDT','BNBUSDT','XRPUSDT',
                'SOLUSDT','DOTUSDT', 'DOGEUSDT','AVAXUSDT','UNIUSDT']
TICKER = [TICKER_LIST[0]]
print(TICKER)

TRAIN_START_DATE = '2015-01-01'
TRAIN_END_DATE = '2022-02-01'

TEST_START_DATE = '2022-02-02'
TEST_END_DATE = '2022-03-02'
time_interval = '1d'

env = CryptoEnv


INDICATORS = ['macd', 'rsi', 'cci', 'dx'] #self-defined technical indicator list is NOT supported yet

ERL_PARAMS = {"learning_rate": 2**-15,"batch_size": 2**11,
                "gamma": 0.99, "seed":312,"net_dimension": 2**9, 
                "target_step": 5000, "eval_gap": 30, "eval_times": 1}


# training of the agent

print('----------Training-----------------')

train(start_date=TRAIN_START_DATE, 
      end_date=TRAIN_END_DATE,
      ticker_list=TICKER,    #only one ticker for the moment (deal with get_baseline for multiple ticker)
      data_source='binance',
      time_interval=time_interval, 
      technical_indicator_list=INDICATORS,
      drl_lib='elegantrl', 
      env=env, 
      model_name='sac', 
      current_working_dir='./test_sac_elegantrl',
      erl_params=ERL_PARAMS,
      break_step=5e4,
      if_vix=False
      )

print('----------End Training-----------------')

#testing of the agent

print('----------- Testing -----------------')

account_value_erl = test(start_date = TEST_START_DATE, 
                        end_date = TEST_END_DATE,
                        ticker_list = TICKER, 
                        data_source = 'binance',
                        time_interval= time_interval, 
                        technical_indicator_list= INDICATORS,
                        drl_lib='elegantrl', 
                        env=env, 
                        model_name='sac', 
                        current_working_dir='./test_sac_elegantrl', 
                        net_dimension = 2**9, 
                        if_vix=False
                        )

print('--------- End Testing ------------------')


baseline_df = get_baseline(TICKER, TEST_START_DATE, TEST_END_DATE, time_interval)
account_value_erl_pd = pd.DataFrame({'date':baseline_df.date,'account_value':account_value_erl})
assert account_value_erl_pd.shape[0] == baseline_df.shape[0]

backtest_plot(account_value_erl_pd, 
             baseline_ticker = TICKER, 
             baseline_start = TEST_START_DATE,
             baseline_end = TEST_END_DATE)


## TD3

In [None]:
TICKER_LIST = ['BTCUSDT','ETHUSDT','ADAUSDT','BNBUSDT','XRPUSDT',
                'SOLUSDT','DOTUSDT', 'DOGEUSDT','AVAXUSDT','UNIUSDT']
TICKER = [TICKER_LIST[0]]
print(TICKER)

TRAIN_START_DATE = '2015-01-01'
TRAIN_END_DATE = '2022-02-01'

TEST_START_DATE = '2022-02-02'
TEST_END_DATE = '2022-03-02'
time_interval = '1d'

env = CryptoEnv


INDICATORS = ['macd', 'rsi', 'cci', 'dx'] #self-defined technical indicator list is NOT supported yet

ERL_PARAMS = {"learning_rate": 2**-15,"batch_size": 2**11,
                "gamma": 0.99, "seed":312,"net_dimension": 2**9, 
                "target_step": 5000, "eval_gap": 30, "eval_times": 1}


# training of the agent

print('----------Training-----------------')

train(start_date=TRAIN_START_DATE, 
      end_date=TRAIN_END_DATE,
      ticker_list=TICKER,    #only one ticker for the moment (deal with get_baseline for multiple ticker)
      data_source='binance',
      time_interval=time_interval, 
      technical_indicator_list=INDICATORS,
      drl_lib='elegantrl', 
      env=env, 
      model_name='td3', 
      current_working_dir='./test_td3_elegantrl',
      erl_params=ERL_PARAMS,
      break_step=5e4,
      if_vix=False
      )

print('----------End Training-----------------')

#testing of the agent

print('----------- Testing -----------------')

account_value_erl = test(start_date = TEST_START_DATE, 
                        end_date = TEST_END_DATE,
                        ticker_list = TICKER, 
                        data_source = 'binance',
                        time_interval= time_interval, 
                        technical_indicator_list= INDICATORS,
                        drl_lib='elegantrl', 
                        env=env, 
                        model_name='td3', 
                        current_working_dir='./test_td3_elegantrl', 
                        net_dimension = 2**9, 
                        if_vix=False
                        )

print('--------- End Testing ------------------')


baseline_df = get_baseline(TICKER, TEST_START_DATE, TEST_END_DATE, time_interval)
account_value_erl_pd = pd.DataFrame({'date':baseline_df.date,'account_value':account_value_erl})
assert account_value_erl_pd.shape[0] == baseline_df.shape[0]

backtest_plot(account_value_erl_pd, 
             baseline_ticker = TICKER, 
             baseline_start = TEST_START_DATE,
             baseline_end = TEST_END_DATE)
