In [1]:
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

Collecting git+https://github.com/AI4Finance-Foundation/FinRL.git
  Cloning https://github.com/AI4Finance-Foundation/FinRL.git to /tmp/pip-req-build-7lza2nf0
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/FinRL.git /tmp/pip-req-build-7lza2nf0
  Resolved https://github.com/AI4Finance-Foundation/FinRL.git to commit d5384aab33035f8874ecea7017a2068cbe0fd43e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting elegantrl@ git+https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl (from finrl==0.3.6)
  Cloning https://github.com/AI4Finance-Foundation/ElegantRL.git to /tmp/pip-install-zz392eyl/elegantrl_6f434f34cea24430a6c7b9433b490239
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/ElegantRL.git /tmp/pip-install-zz392eyl/elegantrl_6f434f34cea24430a6c7b9433b

In [2]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import datetime

from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.config import INDICATORS

  from jax import xla_computation as _xla_computation


# 1. Data Wrangle

In [3]:
TRAIN_START_DATE = '2020-01-01'
TRAIN_END_DATE = '2020-12-31'
TRADE_START_DATE = '2021-09-01'
TRADE_END_DATE = '2021-12-31'

symbols = [
    'BTC-USD',
    'ETH-USD',
    'BNB-USD',
    'ADA-USD',
    'XRP-USD',
    'SOL-USD',
    'BCH-USD'
]

In [4]:
#Future improvement would be to use Alpaca/Binance

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader

df_raw = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TRADE_END_DATE,
                     ticker_list = symbols).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (5010, 8)





In [5]:
df_raw.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2020-01-01,0.032832,0.033813,0.032704,0.033458,22948374,ADA-USD,2
1,2020-01-01,204.671295,208.077515,203.205154,204.397537,1456113692,BCH-USD,2
2,2020-01-01,13.730962,13.873946,13.654942,13.689083,172980718,BNB-USD,2
3,2020-01-01,7194.89209,7254.330566,7174.944336,7200.174316,18565664997,BTC-USD,2
4,2020-01-01,129.630661,132.835358,129.198288,130.802002,7935230330,ETH-USD,2


## Preprocess Data

In [6]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = INDICATORS,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature=False)

processed = fe.preprocess_data(df_raw)


Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (503, 8)
Successfully added vix
Successfully added turbulence index


In [7]:
processed.sample(5)

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
2757,2021-10-27,60352.0,61435.183594,58208.1875,58482.386719,43657076893,BTC-USD,2,2783.536243,66381.913187,53157.053219,56.471971,44.616265,22.692343,55853.433203,51174.297396,16.98,2.822981
2764,2021-10-28,3924.81543,4293.150879,3905.706055,4287.318848,25958154575,ETH-USD,3,198.675798,4394.140538,3366.345839,60.255138,106.244252,15.74108,3706.938631,3535.383736,16.530001,1.954375
17,2020-01-06,0.195536,0.223832,0.195068,0.22151,2301679290,XRP-USD,0,0.00139,0.221585,0.173626,89.052239,199.038304,76.962312,0.197605,0.197605,13.85,0.0
496,2020-04-30,216.909134,227.529694,206.43692,207.602051,28089191904,ETH-USD,3,12.811604,217.151563,144.227386,58.185199,169.937971,53.032128,171.946989,165.185044,34.150002,0.0
755,2020-07-01,0.17575,0.178491,0.174703,0.176976,1115989502,XRP-USD,2,-0.005662,0.198669,0.173547,41.028985,-131.012216,55.667391,0.191438,0.197868,28.620001,0.0


In [8]:
import itertools

list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).
                 astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)


In [9]:
processed_full.sample(5)

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
4023,2021-11-02,BTC-USD,60963.253906,64242.792969,60673.054688,63226.402344,37746670000.0,1.0,2141.937228,65247.07698,57857.299973,60.441386,73.428534,28.155472,59182.450391,52491.581901,16.030001,1.437997
793,2020-05-13,BCH-USD,233.0755,240.228577,230.909943,239.160126,2955785000.0,2.0,0.355606,266.986798,227.809374,47.97573,-32.705242,0.46272,241.48435,231.746639,35.279999,0.0
200,2020-02-04,BNB-USD,18.466125,18.595591,17.904842,18.177366,213533700.0,1.0,0.504675,18.847705,16.749039,64.217703,75.248819,41.499588,16.994606,16.520493,16.049999,0.0
2628,2021-03-15,ADA-USD,1.058601,1.068913,1.007628,1.033539,4545452000.0,0.0,0.043475,1.333026,0.976502,57.27961,-30.252368,0.489526,1.089523,0.786898,20.030001,1.674417
235,2020-02-10,BCH-USD,449.911713,457.945923,439.068237,452.887512,4615610000.0,0.0,31.805179,477.103103,296.485904,74.401738,146.471533,58.961904,367.297154,331.488725,15.04,0.0


## Data Split and Save

In [10]:
train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE,TRADE_END_DATE)

print(len(train))
print(len(trade))

1512
498


In [11]:
train.to_csv('./train.csv')
trade.to_csv('./trade.csv')

print(f"train.shape: {train.shape}")
print(f"trade.shape: {trade.shape}")

train.shape: (1512, 18)
trade.shape: (498, 18)


# 2. Train, Test, & Make Env
## Design Env

In [12]:
crypto_dimension = len(processed_full.tic.unique()) # the number of cryptocurrencies
state_space = 1+ 2*crypto_dimension + len(INDICATORS)*crypto_dimension
print(f"Crypto dimension: {crypto_dimension}, State space: {state_space}")

Crypto dimension: 6, State space: 61


We're using stock env of FinRl due to many packaging issues in the crypto env and the similar behaviour and outdome of both on RL terms

In [14]:
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

buy_cost_list = sell_cost_list = [0.001] * crypto_dimension   # the transaction costs for buying and selling each cryptocurrency
num_crypto_shares = [0] * crypto_dimension

env_kwargs = {
    "hmax": 100,  # maximum number of steps per episode
    "initial_amount": 1000000,  # initial investment amount
    "num_stock_shares": num_crypto_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": crypto_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": crypto_dimension,
    "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [16]:
env_train, _ = e_train_gym.get_sb_env() #vectorizing the initialized env
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


## initialize agent & train