# Install needed deps

#### Don't forget to run ```apt-get update --fix-missing && sudo apt-get install build-essential``` and ```apt-get install zlib1g-dev``` in case you are running on an Ubuntu image

In [231]:
%pip install pandas-ta==0.3.14b --pre
%pip install gym==0.21.0
%pip install ipywidgets
%pip install stable-baselines3[extra]
%pip install ta
%pip install quantstats
%pip install sklearn
%pip install feature_engine
%pip install --upgrade mplfinance

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Requirement already up-to-date: mplfinance in ./.venv/lib/python3.8/site-packages (0.12.8b9)
Note: you may need to restart the kernel to use updated packages.


# Prepare and fetch the data

In [1]:
from tensortrade.data.cdd import CryptoDataDownload

import numpy as np
import pandas as pd
pd.options.mode.use_inf_as_na = True

def prepare_data(df):
    df['volume'] = np.int64(df['volume'])
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by='date', ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['date'] = df['date'].dt.strftime('%Y-%m-%d %I:%M %p')
    return df

def fetch_data():
    cdd = CryptoDataDownload()
    bitfinex_data = cdd.fetch("Bitfinex", "USD", "BTC", "1h")
    bitfinex_data = bitfinex_data[['date', 'open', 'high', 'low', 'close', 'volume']]
    bitfinex_data = prepare_data(bitfinex_data)
    return bitfinex_data

def load_csv(filename):
    df = pd.read_csv('data/' + filename, skiprows=1)
    df.drop(columns=['symbol', 'volume_btc'], inplace=True)

    # Fix timestamp from "2019-10-17 09-AM" to "2019-10-17 09-00-00 AM"
    df['date'] = df['date'].str[:14] + '00-00 ' + df['date'].str[-2:]

    return prepare_data(df)

In [2]:
data = fetch_data()
data

Unnamed: 0,date,open,high,low,close,volume
0,2018-05-15 06:00 AM,8723.800000,8793.000000,8714.90000,8739.000000,8988053
1,2018-05-15 07:00 AM,8739.000000,8754.800000,8719.30000,8743.000000,2288904
2,2018-05-15 08:00 AM,8743.000000,8743.100000,8653.20000,8723.700000,8891773
3,2018-05-15 09:00 AM,8723.700000,8737.800000,8701.20000,8708.100000,2054868
4,2018-05-15 10:00 AM,8708.100000,8855.700000,8695.80000,8784.400000,17309722
...,...,...,...,...,...,...
34354,2022-04-17 09:00 PM,40279.000000,40310.948566,40152.00000,40227.829085,2411100
34355,2022-04-17 10:00 PM,40215.000000,40363.000000,39957.00000,40016.000000,8236277
34356,2022-04-17 11:00 PM,40021.000000,40021.000000,39563.00000,39707.552712,10678099
34357,2022-04-18 12:00 AM,39699.719303,39796.000000,39602.00000,39745.389366,5425275


## Create features for the feed module

In [3]:
import os
import numpy as np
import ta as ta1
import pandas_ta as ta

import quantstats as qs
qs.extend_pandas()

def fix_dataset_inconsistencies_without_backfilling(dataframe, fill_value=None):
    dataframe = dataframe.replace([-np.inf, np.inf], np.nan)

    return dataframe.fillna(axis='index', method='pad').dropna(axis='columns')

def fix_dataset_inconsistencies(dataframe, fill_value=None):
    dataframe = dataframe.replace([-np.inf, np.inf], np.nan)

    #This is done to avoid filling middle holes with backfilling.
    if fill_value is None:
        dataframe.iloc[0,:] = \
            dataframe.apply(lambda column: column.iloc[column.first_valid_index()], axis='index')
    else:
        dataframe.iloc[0,:] = \
            dataframe.iloc[0,:].fillna(fill_value)

    return dataframe.fillna(axis='index', method='pad').dropna(axis='columns')

def rsi(price: 'pd.Series[pd.Float64Dtype]', period: float) -> 'pd.Series[pd.Float64Dtype]':
    r = price.diff()
    upside = np.minimum(r, 0).abs()
    downside = np.maximum(r, 0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)

def macd(price: 'pd.Series[pd.Float64Dtype]', fast: float, slow: float, signal: float) -> 'pd.Series[pd.Float64Dtype]':
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

def generate_all_default_quantstats_features(data):
    excluded_indicators = [
        'compare',
        'greeks',
        'information_ratio',
        'omega',
        'r2',
        'r_squared',
        'rolling_greeks',
        'warn',
    ]
    
    indicators_list = [f for f in dir(qs.stats) if f[0] != '_' and f not in excluded_indicators]
    
    df = data.copy()
    df = df.set_index('date')
    df.index = pd.DatetimeIndex(df.index)

    for indicator_name in indicators_list:
        try:
            #print(indicator_name)
            indicator = qs.stats.__dict__[indicator_name](df['close'])
            if isinstance(indicator, pd.Series):
                indicator = indicator.to_frame(name=indicator_name)
                df = pd.concat([df, indicator], axis='columns')
        except (pd.errors.InvalidIndexError, ValueError):
            pass

    df = df.reset_index()
    return df

def generate_features(data):

    # Generate all default indicators from ta library
    ta1.add_all_ta_features(data, 
                            'open', 
                            'high', 
                            'low', 
                            'close', 
                            'volume', 
                            fillna=True)

    # Naming convention across most technical indicator libraries
    data = data.rename(columns={'open': 'Open', 
                                'high': 'High', 
                                'low': 'Low', 
                                'close': 'Close', 
                                'volume': 'Volume'})
    data = data.set_index('date')

    # Custom indicators
    features = pd.DataFrame.from_dict({
        'prev_open': data['Open'].shift(1),
        'prev_high': data['High'].shift(1),
        'prev_low': data['Low'].shift(1),
        'prev_close': data['Close'].shift(1),
        'prev_volume': data['Volume'].shift(1),
        'vol_5': data['Close'].rolling(window=5).std().abs(),
        'vol_10': data['Close'].rolling(window=10).std().abs(),
        'vol_20': data['Close'].rolling(window=20).std().abs(),
        'vol_30': data['Close'].rolling(window=30).std().abs(),
        'vol_50': data['Close'].rolling(window=50).std().abs(),
        'vol_60': data['Close'].rolling(window=60).std().abs(),
        'vol_100': data['Close'].rolling(window=100).std().abs(),
        'vol_200': data['Close'].rolling(window=200).std().abs(),
        'ma_5': data['Close'].rolling(window=5).mean(),
        'ma_10': data['Close'].rolling(window=10).mean(),
        'ma_20': data['Close'].rolling(window=20).mean(),
        'ma_30': data['Close'].rolling(window=30).mean(),
        'ma_50': data['Close'].rolling(window=50).mean(),
        'ma_60': data['Close'].rolling(window=60).mean(),
        'ma_100': data['Close'].rolling(window=100).mean(),
        'ma_200': data['Close'].rolling(window=200).mean(),
        'ema_5': ta1.trend.ema_indicator(data['Close'], window=5, fillna=True),
        'ema_9': ta1.trend.ema_indicator(data['Close'], window=9, fillna=True),
        'ema_21': ta1.trend.ema_indicator(data['Close'], window=21, fillna=True),
        'ema_60': ta1.trend.ema_indicator(data['Close'], window=60, fillna=True),
        'ema_64': ta1.trend.ema_indicator(data['Close'], window=64, fillna=True),
        'ema_120': ta1.trend.ema_indicator(data['Close'], window=120, fillna=True),
        'lr_open': np.log(data['Open']).diff().fillna(0),
        'lr_high': np.log(data['High']).diff().fillna(0),
        'lr_low': np.log(data['Low']).diff().fillna(0),
        'lr_close': np.log(data['Close']).diff().fillna(0),
        'r_volume': data['Close'].diff().fillna(0),
        'rsi_5': rsi(data['Close'], period=5),
        'rsi_10': rsi(data['Close'], period=10),
        'rsi_100': rsi(data['Close'], period=100),
        'rsi_7': rsi(data['Close'], period=7),
        'rsi_28': rsi(data['Close'], period=28),
        'rsi_6': rsi(data['Close'], period=6),
        'rsi_14': rsi(data['Close'], period=14),
        'rsi_26': rsi(data['Close'], period=24),
        'macd_normal': macd(data['Close'], fast=12, slow=26, signal=9),
        'macd_short': macd(data['Close'], fast=10, slow=50, signal=5),
        'macd_long': macd(data['Close'], fast=200, slow=100, signal=50),
        'macd_wolfpack': macd(data['Close'], fast=3, slow=8, signal=9),
    })

    # Concatenate both manually and automatically generated features
    data = pd.concat([data, features], axis='columns').fillna(method='pad')

    # Remove potential column duplicates
    data = data.loc[:,~data.columns.duplicated()]

    # Revert naming convention
    data = data.rename(columns={'Open': 'open', 
                                'High': 'high', 
                                'Low': 'low', 
                                'Close': 'close', 
                                'Volume': 'volume'})

    data = data.reset_index()

    # Generate all default quantstats features
    df_quantstats = generate_all_default_quantstats_features(data)

    # Concatenate both manually and automatically generated features
    data = pd.concat([data, df_quantstats], axis='columns').fillna(method='pad')

    # Remove potential column duplicates
    data = data.loc[:,~data.columns.duplicated()]

    # A lot of indicators generate NaNs at the beginning of DataFrames, so remove them
    data = data.iloc[200:]
    data = data.reset_index(drop=True)

    data = fix_dataset_inconsistencies_without_backfilling(data, fill_value=None)
    return data

In [4]:
data = generate_features(data)
# remove not needed features
to_drop = ['others_dlr', 'compsum']
data = data.drop(columns=to_drop)
data.shape

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)
  self._psar_up = pd.Series(index=self._psar.index)
  self._psar_down = pd.Series(index=self._psar.index)
  return bound(*args, **kwds)
  return _pd.concat(dfs, 1, sort=True)


(34159, 140)

## Remove features with low variance before splitting the dataset

In [5]:
from sklearn.feature_selection import VarianceThreshold
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
date = data[['date']].copy()
data = data.drop(columns=['date'])
sel.fit(data)
data[data.columns[sel.get_support(indices=True)]]
data = pd.concat([date, data], axis='columns')
data

Unnamed: 0,date,open,high,low,close,volume,volume_adi,volume_obv,volume_cmf,volume_fi,...,rsi_26,macd_normal,macd_short,macd_long,macd_wolfpack,pct_rank,rolling_sharpe,rolling_sortino,rolling_volatility,to_drawdown_series
0,2018-05-23 02:00 PM,7897.300000,7898.800000,7849.80000,7877.400000,9341499,-1.219515e+08,-153103304,-0.175983,-1.548039e+08,...,65.542059,11.190548,10.871904,31.873058,19.596642,10.000000,-0.811487,-1.144302,0.072620,-0.103251
1,2018-05-23 03:00 PM,7877.400000,7889.700000,7661.00000,7700.000000,23679375,-1.375548e+08,-176782679,-0.228723,-7.327921e+08,...,72.698849,1.333779,-5.426751,34.355233,-24.639480,1.666667,-1.248391,-1.633909,0.079103,-0.123446
2,2018-05-23 04:00 PM,7700.000000,7700.100000,7548.10000,7605.400000,42144843,-1.479246e+08,-218927522,-0.216859,-1.197665e+09,...,75.527202,-10.060459,-21.497215,37.504922,-51.837145,1.666667,-1.612964,-2.069373,0.080681,-0.134215
3,2018-05-23 05:00 PM,7605.400000,7623.600000,7441.80000,7511.100000,38711817,-1.571235e+08,-257639339,-0.221424,-1.548073e+09,...,77.907846,-21.778972,-36.146245,41.269618,-66.773623,1.666667,-1.797159,-2.272346,0.082309,-0.144950
4,2018-05-23 06:00 PM,7511.100000,7551.600000,7403.00000,7489.100000,23046091,-1.534634e+08,-280685430,-0.149460,-1.399351e+09,...,78.418914,-28.422775,-41.976877,44.917996,-57.191729,1.666667,-1.879146,-2.372706,0.082361,-0.147455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34154,2022-04-17 09:00 PM,40279.000000,40310.948566,40152.00000,40227.829085,2411100,1.034904e+10,-4091153067,0.069567,-2.279012e+06,...,53.472527,-15.399788,-12.766689,-94.423742,-0.511128,15.000000,-1.143253,-1.301348,0.102899,-0.413597
34155,2022-04-17 10:00 PM,40215.000000,40363.000000,39957.00000,40016.000000,8236277,1.034319e+10,-4099389344,-0.012154,-2.511939e+08,...,57.736870,-28.816185,-32.828145,-92.752200,-48.891523,1.666667,-1.258480,-1.430544,0.103106,-0.416685
34156,2022-04-17 11:00 PM,40021.000000,40021.000000,39563.00000,39707.552712,10678099,1.033926e+10,-4110067443,-0.088576,-6.858277e+08,...,62.902948,-55.180302,-69.739910,-88.328100,-118.618577,1.666667,-1.563211,-1.760876,0.102909,-0.421181
34157,2022-04-18 12:00 AM,39699.719303,39796.000000,39602.00000,39745.389366,5425275,1.034185e+10,-4104642168,-0.040761,-5.585274e+08,...,61.933905,-66.052950,-79.588620,-84.620542,-100.678703,3.333333,-1.547567,-1.743567,0.102928,-0.420630


# Setup which data to use for training and which data to use for evaluation of RL Model

In [6]:
from sklearn.model_selection import train_test_split

def split_data(data):
    X = data.copy()
    y = X['close'].pct_change()

    X_train_test, X_valid, y_train_test, y_valid = \
        train_test_split(data, data['close'].pct_change(), train_size=0.67, test_size=0.33, shuffle=False)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_train_test, y_train_test, train_size=0.50, test_size=0.50, shuffle=False)

    return X_train, X_test, X_valid, y_train, y_test, y_valid

In [7]:
X_train, X_test, X_valid, y_train, y_test, y_valid = \
    split_data(data)

## Implement basic feature engineering

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

from feature_engine.selection import SelectBySingleFeaturePerformance

from scipy.stats import iqr


def estimate_outliers(data):
    return iqr(data) * 1.5

def estimate_percent_gains(data, column='close'):
    returns = get_returns(data, column=column)
    gains = estimate_outliers(returns)
    return gains

def get_returns(data, column='close'):
    return fix_dataset_inconsistencies(data[[column]].pct_change(), fill_value=0)

def precalculate_ground_truths(data, column='close', threshold=None):
    returns = get_returns(data, column=column)
    gains = estimate_outliers(returns) if threshold is None else threshold
    binary_gains = (returns[column] > gains).astype(int)
    return binary_gains

def is_null(data):
    return data.isnull().sum().sum() > 0



rf = RandomForestClassifier(n_estimators=100, 
                            random_state=1990, 
                            n_jobs=-1)

sel = SelectBySingleFeaturePerformance(variables=None, 
                                       estimator=rf, 
                                       scoring="roc_auc", 
                                       cv=5, 
                                       threshold=0.65)

sel.fit(X_train, precalculate_ground_truths(X_train, column='close'))

SelectBySingleFeaturePerformance(cv=5,
                                 estimator=RandomForestClassifier(n_jobs=-1,
                                                                  random_state=1990),
                                 threshold=0.65)

In [9]:
feature_performance = pd.Series(sel.feature_performance_).sort_values(ascending=False)

In [12]:
# import matplotlib.pyplot as plt

# feature_performance.plot.bar(figsize=(40, 10))
# plt.title('Performance of ML models trained with individual features')
# plt.ylabel('roc-auc')

In [10]:
features_to_drop = sel.features_to_drop_
to_drop = list(set(features_to_drop) - set(['open', 'high', 'low', 'close', 'volume']))
len(to_drop)
# features_to_drop

124

In [11]:
X_train = X_train.drop(columns=to_drop)
X_test = X_test.drop(columns=to_drop)
X_valid = X_valid.drop(columns=to_drop)

X_train.shape, X_test.shape, X_valid.shape

((11443, 16), (11443, 16), (11273, 16))

In [12]:
X_train.columns.tolist()

['date',
 'open',
 'high',
 'low',
 'close',
 'volume',
 'volume_em',
 'volume_vpt',
 'volatility_kchi',
 'trend_aroon_up',
 'momentum_stoch_rsi',
 'others_dr',
 'lr_high',
 'lr_close',
 'r_volume',
 'macd_wolfpack']

## Normalize the dataset subsets to make the model converge faster

In [13]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

scaler_type = MinMaxScaler

def get_feature_scalers(X, scaler_type=scaler_type):
    scalers = []
    for name in list(X.columns[X.columns != 'date']):
        scalers.append(scaler_type().fit(X[name].values.reshape(-1, 1)))
    return scalers

def get_scaler_transforms(X, scalers):
    X_scaled = []
    for name, scaler in zip(list(X.columns[X.columns != 'date']), scalers):
        X_scaled.append(scaler.transform(X[name].values.reshape(-1, 1)))
    X_scaled = pd.concat([pd.DataFrame(column, columns=[name]) for name, column in \
                          zip(list(X.columns[X.columns != 'date']), X_scaled)], axis='columns')
    return X_scaled

def normalize_data(X_train, X_test, X_valid):
    X_train_test = pd.concat([X_train, X_test], axis='index')
    X_train_test_valid = pd.concat([X_train_test, X_valid], axis='index')

    X_train_test_dates = X_train_test[['date']]
    X_train_test_valid_dates = X_train_test_valid[['date']]

    X_train_test = X_train_test.drop(columns=['date'])
    X_train_test_valid = X_train_test_valid.drop(columns=['date'])

    train_test_scalers = \
        get_feature_scalers(X_train_test, 
                            scaler_type=scaler_type)
    train_test_valid_scalers = \
        get_feature_scalers(X_train_test_valid, 
                            scaler_type=scaler_type)

    X_train_test_scaled = \
        get_scaler_transforms(X_train_test, 
                              train_test_scalers)
    X_train_test_valid_scaled = \
        get_scaler_transforms(X_train_test_valid, 
                              train_test_scalers)
    X_train_test_valid_scaled_leaking = \
        get_scaler_transforms(X_train_test_valid, 
                              train_test_valid_scalers)

    X_train_test_scaled = \
        pd.concat([X_train_test_dates, 
                   X_train_test_scaled], 
                  axis='columns')
    X_train_test_valid_scaled = \
        pd.concat([X_train_test_valid_dates, 
                   X_train_test_valid_scaled], 
                  axis='columns')
    X_train_test_valid_scaled_leaking = \
        pd.concat([X_train_test_valid_dates, 
                   X_train_test_valid_scaled_leaking], 
                  axis='columns')

    X_train_scaled = X_train_test_scaled.iloc[:X_train.shape[0]]
    X_test_scaled = X_train_test_scaled.iloc[X_train.shape[0]:]
    X_valid_scaled = X_train_test_valid_scaled.iloc[X_train_test.shape[0]:]
    X_valid_scaled_leaking = X_train_test_valid_scaled_leaking.iloc[X_train_test.shape[0]:]

    return (train_test_scalers, 
            train_test_valid_scalers, 
            X_train_scaled, 
            X_test_scaled, 
            X_valid_scaled, 
            X_valid_scaled_leaking)

In [14]:
train_test_scalers, train_test_valid_scalers, X_train_scaled, X_test_scaled, X_valid_scaled, X_valid_scaled_leaking = \
    normalize_data(X_train, X_test, X_valid)

In [15]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import style
import mplfinance as mpf

style.use('dark_background')

VOLUME_CHART_HEIGHT = 0.33

UP_COLOR = '#27A59A'
DOWN_COLOR = '#EF534F'
UP_TEXT_COLOR = '#73D3CC'
DOWN_TEXT_COLOR = '#DC2C27'


def date2num(date):
    return mdates.datestr2num(date)
    


class LiveTradingGraph:
    """A trading visualization using matplotlib made to render OpenAI gym environments"""

    def __init__(self, df, title=None):
        self.df = df.copy(deep=True) 
        self.df.index = pd.to_datetime(df.index)
        self.net_worths = np.zeros(len(df['date']))

        # Create a figure on screen and set the title
        fig = plt.figure()
        fig.suptitle(title)

        # Create top subplot for net worth axis
        self.net_worth_ax = plt.subplot2grid(
            (6, 1), (0, 0), rowspan=2, colspan=1)

        # Create bottom subplot for shared price/volume axis
        self.price_ax = plt.subplot2grid(
            (6, 1), (2, 0), rowspan=8, colspan=1, sharex=self.net_worth_ax)

        # Create a new axis for volume which shares its x-axis with price
        self.volume_ax = self.price_ax.twinx()

        # Add padding to make graph easier to view
        plt.subplots_adjust(left=0.11, bottom=0.24,
                            right=0.90, top=0.90, wspace=0.2, hspace=0)

        # Show the graph without blocking the rest of the program
        plt.show(block=False)

    def _render_net_worth(self, current_step, net_worth, step_range, dates):
        # Clear the frame rendered last step
        self.net_worth_ax.clear()

        # Plot net worths
        self.net_worth_ax.plot_date(
            dates, self.net_worths[step_range], '-', label='Net Worth')

        # Show legend, which uses the label we defined for the plot above
        self.net_worth_ax.legend()
        legend = self.net_worth_ax.legend(loc=2, ncol=2, prop={'size': 8})
        legend.get_frame().set_alpha(0.4)

        last_date = date2num(self.df['date'].values[current_step])
        last_net_worth = self.net_worths[current_step]

        # Annotate the current net worth on the net worth graph
        self.net_worth_ax.annotate('{0:.2f}'.format(net_worth), (last_date, last_net_worth),
                                   xytext=(last_date, last_net_worth),
                                   bbox=dict(boxstyle='round',
                                             fc='w', ec='k', lw=1),
                                   color="black",
                                   fontsize="small")

        # Add space above and below min/max net worth
        self.net_worth_ax.set_ylim(
            min(self.net_worths[np.nonzero(self.net_worths)]) / 1.25, max(self.net_worths) * 1.25)

    def _render_price(self, current_step, net_worth, dates, step_range):
        self.price_ax.clear()

        # Format data for OHCL candlestick graph
        candlesticks = zip(dates,
                           self.df['open'].values[step_range], self.df['close'].values[step_range],
                           self.df['high'].values[step_range], self.df['low'].values[step_range])

        # Plot price using candlestick graph from mpl_finance
        # mpf.plot(self.price_ax, candlesticks, width=1,
        #             colorup=UP_COLOR, colordown=DOWN_COLOR)

        mpf.plot(self.df, type='candle', style='yahoo', volume=True)

        last_date = date2num(self.df['date'].values[current_step])
        last_close = self.df['close'].values[current_step]
        last_high = self.df['high'].values[current_step]

        # Print the current price to the price axis
        self.price_ax.annotate('{0:.2f}'.format(last_close), (last_date, last_close),
                               xytext=(last_date, last_high),
                               bbox=dict(boxstyle='round',
                                         fc='w', ec='k', lw=1),
                               color="black",
                               fontsize="small")

        # Shift price axis up to give volume chart space
        ylim = self.price_ax.get_ylim()
        self.price_ax.set_ylim(ylim[0] - (ylim[1] - ylim[0])
                               * VOLUME_CHART_HEIGHT, ylim[1])

    def _render_volume(self, current_step, net_worth, dates, step_range):
        self.volume_ax.clear()

        volume = np.array(self.df['volume'].values[step_range])

        pos = self.df['open'].values[step_range] - \
            self.df['close'].values[step_range] < 0
        neg = self.df['open'].values[step_range] - \
            self.df['close'].values[step_range] > 0

        # Color volume bars based on price direction on that date
        self.volume_ax.bar(dates[pos], volume[pos], color=UP_COLOR,
                           alpha=0.4, width=1, align='center')
        self.volume_ax.bar(dates[neg], volume[neg], color=DOWN_COLOR,
                           alpha=0.4, width=1, align='center')

        # Cap volume axis height below price chart and hide ticks
        self.volume_ax.set_ylim(0, max(volume) / VOLUME_CHART_HEIGHT)
        self.volume_ax.yaxis.set_ticks([])

    def _render_trades(self, current_step, trades, step_range):
        # render only if there are trades
        if len(trades) > 0:
            for trade in trades:
                if trade['step'] in step_range:
                    date = date2num(self.df['date'].values[trade['step']])
                    high = self.df['high'].values[trade['step']]
                    low = self.df['low'].values[trade['step']]

                    if trade['type'] == 'Buy':
                        high_low = low
                        color = UP_TEXT_COLOR
                    else:
                        high_low = high
                        color = DOWN_TEXT_COLOR
                    
                    total = '{0:.2f}'.format(trade['total'])

                    # Print the current price to the price axis
                    self.price_ax.annotate(f'${total}', (date, high_low),
                                        xytext=(date, high_low),
                                        color=color,
                                        fontsize=8,
                                        arrowprops=(dict(color=color)))

    def render(self, current_step, net_worth, trades, window_size=40):
        self.net_worths[current_step] = net_worth

        window_start = max(current_step - window_size, 0)
        step_range = range(window_start, current_step + 1)

        # Format dates as timestamps, necessary for candlestick graph
        dates = np.array([date2num(x)
                          for x in self.df['date'].values[step_range]])

        self._render_net_worth(current_step, net_worth, step_range, dates)
        self._render_price(current_step, net_worth, dates, step_range)
        self._render_volume(current_step, net_worth, dates, step_range)
        self._render_trades(current_step, trades, step_range)

        # Format the date ticks to be more easily read
        self.price_ax.set_xticklabels(self.df['date'].values[step_range], rotation=45,
                                      horizontalalignment='right')

        # Hide duplicate net worth date labels
        plt.setp(self.net_worth_ax.get_xticklabels(), visible=False)

        # Necessary to view frames before they are unrendered
        plt.pause(0.001)

    def close(self):
        plt.close()


# Defining the environment

In [70]:
import random
import gym
from gym import spaces
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# infinite number in python
MAX_NET_WORTH = 2147483647
MAX_NUM_QUOTE_OR_BASE_ASSET = 2147483647

INITIAL_QUOTE_ASSET = 10000
INITIAL_BASE_ASSET = 0
OBSERVATION_WINDOW_SIZE = 24 # Probably we should put it as param ?

class SimpleTradingEnv(gym.Env):
    
    metadata = {'render.modes': ['live', 'human', 'none']}
    visualization = None

    def __init__(self, df_scaled, df_normal, trading_fee):
        
        self.df_scaled = df_scaled.reset_index(drop=True)
        self.df_normal = df_normal.reset_index(drop=True)
        self.window_size = OBSERVATION_WINDOW_SIZE
        self.prices, self.features = self._process_data(df_scaled)
        # The shape of the observation is (window_size * features + environment_features) the environment_features are: quote_asset, base_asset, net_worth. The entire observation is flattened in a 1D np array. 
        self.obs_shape = ((OBSERVATION_WINDOW_SIZE * self.features.shape[1] + 3),)


        # Action space
        #self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3.0, 1.0]), dtype=np.float32)
        self.action_space = spaces.MultiDiscrete([3, 100])
        # Observation space
        self.observation_space = spaces.Box(low=-1, high = 1, shape=self.obs_shape, dtype=np.float32)

        # Initialize the episode environment

        self._start_candle = OBSERVATION_WINDOW_SIZE # We assume that the first observation is not the first row of the dataframe, in order to avoid the case where there are no calculated indicators.
        self._end_candle = len(self.features) - 1
        self._trading_fee = trading_fee

        self._quote_asset = None
        self._base_asset = None
        self._done = None
        self._current_candle = None
        self._net_worth = None
        self._previous_net_worth = None

        # Render and analysis data
        self._total_reward_accumulated = None
        self.trade_history = None
        self._first_rendering = None
        

    def reset(self):
        self._done = False
        self._current_candle = self._start_candle
        self._quote_asset = INITIAL_QUOTE_ASSET
        self._base_asset = INITIAL_BASE_ASSET 
        self._net_worth = INITIAL_QUOTE_ASSET # at the begining our net worth is the initial quote asset
        self._previous_net_worth = INITIAL_QUOTE_ASSET # at the begining our previous net worth is the initial quote asset
        self._total_reward_accumulated = 0.
        self._first_rendering = True
        self.trade_history = []
        return self._get_observation()

    def _take_action(self, action):
        self._done = False
        current_price = random.uniform(
            self.df_normal.loc[self._current_candle, "low"], self.df_normal.loc[self._current_candle, "high"])

        # print('action: ', action)
        # print('action type: ', action[0])
        # print('action amount: ', action[1])
        action_type = action[0]
        amount = action[1] / 100
        
        if action_type < 1:
            # Buy % assets
            # Determine the maximum amount of quote asset that can be bought
            available_amount_to_buy_with = self._quote_asset / current_price
            # Buy only the amount that agent chose
            assets_bought = available_amount_to_buy_with * amount
            # Update the quote asset balance
            self._quote_asset -= assets_bought * current_price
            # Update the base asset
            self._base_asset += assets_bought
            # substract trading fee from base asset based on the amount bought
            self._base_asset -= self._trading_fee * assets_bought

            # Add to trade history the amount bought if greater than 0
            if assets_bought > 0:
                self.trade_history.append({'step': self._current_candle, 'type': 'Buy', 'amount': assets_bought, 'price': current_price, 'total' : assets_bought * current_price, 'percent_amount': action[1]})
        

        elif action_type < 2:
            # Sell % assets
            # Determine the amount of base asset that can be sold
            amount_to_sell = self._base_asset * amount
            received_quote_asset = amount_to_sell * current_price
            # Update the quote asset
            self._quote_asset += received_quote_asset
            # Update the base asset
            self._base_asset -= amount_to_sell
            
            # substract trading fee from quote asset based on the amount sold
            self._quote_asset -= self._trading_fee * received_quote_asset

            # Add to trade history the amount sold if greater than 0
            if amount_to_sell > 0:
                self.trade_history.append({'step': self._current_candle, 'type': 'Sell', 'amount': amount_to_sell, 'price': current_price, 'total' : received_quote_asset, 'percent_amount': action[1]})

        else:
            # Hold
            self.trade_history.append({'step': self._current_candle, 'type': 'Hold', 'amount': '0', 'price': current_price, 'total' : 0, 'percent_amount': action[1]})


        # Update the current net worth
        self._net_worth = self._base_asset * current_price + self._quote_asset



    def step(self, action):
        """
        Returns the next observation, reward, done and info.
        """
        
        self._take_action(action)

        # Calculate reward comparing the current net worth with the previous net worth
        reward = self._net_worth - self._previous_net_worth

        self._total_reward_accumulated += reward

        # Update the previous net worth to be the current net worth after the reward has been applied
        self._previous_net_worth = self._net_worth

        obs = self._get_observation()
        # Update the info and add it to history data
        info = dict (
            total_reward_accumulated = self._total_reward_accumulated,
            net_worth = self._net_worth,
            last_action_type = self.trade_history[-1]['type'],
            last_action_amount = self.trade_history[-1]['amount'],
            current_step = self._current_candle
        )

        self._current_candle += 1

        self._done = self._net_worth <= 0 or self._current_candle >= len(
            self.df_scaled.loc[:, 'open'].values)
        
        return obs, reward, self._done, info


    def _get_observation(self):
        """
        Returns the current observation.
        """
        data_frame = self.features[(self._current_candle - self.window_size):self._current_candle]
        data_frame = data_frame.flatten()

        # Scale all the values to be between -1 and 1
        obs = np.append(data_frame, np.array([self._net_worth / MAX_NET_WORTH , self._quote_asset / MAX_NUM_QUOTE_OR_BASE_ASSET, self._base_asset / MAX_NUM_QUOTE_OR_BASE_ASSET], dtype=np.float32))

        return obs


    def render(self, mode='human', **kwargs):
        """
        Renders a plot with trades made by the agent.
        """
        
        if mode == 'human':
            print(f'Accumulated Reward: {self._total_reward_accumulated} ---- Current Net Worth: {self._net_worth}')
            print(f'Current Quote asset: {self._quote_asset} ---- Current Base asset: {self._base_asset}')
            print(f'Number of trades: {len(self.trade_history)}')
        
            if(len(self.trade_history) > 0):
                print(f'Last Action: {self.trade_history[-1]["type"]} {self.trade_history[-1]["amount"]} assets ({self.trade_history[-1]["percent_amount"]} %) at price {self.trade_history[-1]["price"]}, total: {self.trade_history[-1]["total"]}')
            print(f'--------------------------------------------------------------------------------------')
        elif mode == 'live':
            if self.visualization == None:
                self.visualization = LiveTradingGraph(self.df_normal, kwargs.get('title', None))

            if self._current_candle > OBSERVATION_WINDOW_SIZE:
                self.visualization.render(self._current_candle, self._net_worth, self.trade_history, window_size=OBSERVATION_WINDOW_SIZE)

    def close(self):
        if self.visualization != None:
            self.visualization.close()
            self.visualization = None
         

    def _process_data(self, df_scaled):
        """
        Processes the dataframe into features.
        """

        prices = self.df_scaled.loc[:, 'close'].to_numpy(dtype=np.float32)

        data_frame = df_scaled.iloc[:, 1:] # drop first column which is date TODO: Should be fixed outside of this class

        # Convert df to numpy array
        return prices, data_frame.to_numpy(dtype=np.float32)

    def _generate_action_data_tuple(self, action, price):
        """
        Returns the action type and amount as tuple
        """

        action_type_name = self._get_human_readable_action(action)

        amount = action[1]
        return (action_type_name, amount, price)

    def _get_human_readable_action(self, action):
        # if action is lower than 1 than it's buy action, if action is lower than 2 than it's sell action otherwise it's hold action
        action_type_name = None
        if action[0] < 1:
            action_type_name = 'Buy'
        elif action[0] < 2:
            action_type_name = 'Sell'
        else:
            action_type_name = 'Hold'
        
        return action_type_name    

### Initialize, validate the environment and run a random test of x steps

In [127]:
import os
import time
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env

trading_fee = 0.0075
env = SimpleTradingEnv(X_train_scaled, X_train, trading_fee)
# check_env(env) ### Already tested and working :)
env = make_vec_env(lambda: env,vec_env_cls=SubprocVecEnv, n_envs=1)

obs = env.reset()
# Trying some random action sample
# for i in range(5):
#     # Take a random action
#     actions = np.array([env.action_space.sample() for _ in range(env.num_envs)])
#     print(actions)
#     env.step_async(actions)
#     obs, reward, done, info = env.step_wait()
#     print(info)
#     if done:
#         break
# env.close()

### Create an evaluation environment used to save only the best performing model

In [88]:
from stable_baselines3.common.callbacks import EvalCallback

# Separate evaluation env
eval_env = SimpleTradingEnv(X_valid_scaled, X_valid, trading_fee)
# check_env(eval_env) ### Already tested and working :)
eval_env = make_vec_env(lambda: eval_env,vec_env_cls=SubprocVecEnv, n_envs=20)
# Use deterministic actions for evaluation
eval_callback = EvalCallback(eval_env, best_model_save_path='model/PPO_best/',
                             log_path='model/logs/', eval_freq=max(500000 // 20, 1),
                             deterministic=False, render=False)

### Create a checkpoint callback to save the model periodically

In [89]:
from stable_baselines3.common.callbacks import CheckpointCallback

checkpoint_callback = CheckpointCallback(save_freq=max(500000 // 20, 1), save_path='model/PPO/',
                                         name_prefix='rl_model')

# Initialize the model and start learning

In [161]:
from stable_baselines3.common.callbacks import CallbackList

cwd = os.getcwd()
logdir = "logs"
if not os.path.exists(logdir):
    os.makedirs(logdir)


callback_list = CallbackList([checkpoint_callback, eval_callback])

number_of_epochs = 10000
total_timesteps = len(X_train_scaled) * number_of_epochs

model = PPO(MlpPolicy, env, verbose=1, tensorboard_log=logdir, device='cuda')

model.learn(total_timesteps=total_timesteps, tb_log_name="PPO", callback=callback_list)

Using cpu device
Logging to logs/PPO_0
------------------------------
| time/              |       |
|    fps             | 1724  |
|    iterations      | 1     |
|    time_elapsed    | 9     |
|    total_timesteps | 16384 |
------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 834         |
|    iterations           | 2           |
|    time_elapsed         | 39          |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.008025253 |
|    clip_fraction        | 0.0753      |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.7        |
|    explained_variance   | -0.000251   |
|    learning_rate        | 0.0003      |
|    loss                 | 2.96e+03    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 7.18e+03    |
------------------------------

Process ForkServerProcess-84:
Process ForkServerProcess-83:
Process ForkServerProcess-81:
Process ForkServerProcess-79:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/alexandrustefan/Projects/rl-algos/.venv/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 27, in _worker
    cmd, data = remote.recv()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer
Process ForkServerProcess-82:
Process ForkServerProcess-80

KeyboardInterrupt: 

# Load saved model

In [128]:
# Make sure to load the model that performed the best, you can check it up in the tensorboard
# usualy it's the one with the highest rollout/ep_rew_mean, you can identify it by looking at the step number
model_path = "model/PPO_best/best_model.zip"
loaded_model = PPO.load(model_path, env=env)

# Run an evaluation test 

In [129]:
mean_reward, std_reward = evaluate_policy(loaded_model, env, n_eval_episodes=5, deterministic=False)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

mean_reward:-9985.84 +/- 4.46


# Render results

In [116]:
steps = 100000
render_interval = steps // 10
obs = env.reset()
for i in range(steps):
    action, _state = loaded_model.predict(obs, deterministic=False)
    if action[0][0] < 2 :
        print(f"Action: {action[0]}")
    # env.step_async(action)
    # obs, reward, done, info =  env.step_wait()
    # print('obs:', obs)
    # print('action:', action)
    # print('reward:', reward)
    # print('done:', done)
    # print('info:', info)
    # # if (i % render_interval) == 0:
    # #     env.env_method('render')
    # # if done:
    # #     obs = env.env_method('reset')
# env.close()


Action: [ 0 73]
Action: [ 1 21]
Action: [ 0 24]
Action: [0 7]
Action: [ 1 32]
Action: [ 0 62]
Action: [ 1 55]
Action: [0 7]
Action: [ 0 38]
Action: [ 1 83]
Action: [ 1 20]
Action: [ 0 38]
Action: [ 0 44]
Action: [ 0 12]
Action: [ 0 37]
Action: [ 0 37]
Action: [1 0]
Action: [ 0 13]
Action: [0 0]
Action: [ 0 15]
Action: [ 0 76]
Action: [ 0 76]
Action: [1 6]
Action: [ 0 13]
Action: [ 0 91]
Action: [ 0 78]
Action: [ 1 94]
Action: [ 1 32]
Action: [ 1 65]
Action: [ 0 92]
Action: [ 1 87]
Action: [ 0 53]
Action: [ 1 19]
Action: [ 0 17]
Action: [ 0 13]
Action: [ 0 51]
Action: [ 1 24]
Action: [ 1 11]
Action: [ 1 25]
Action: [ 0 43]
Action: [ 0 37]
Action: [ 1 94]
Action: [ 0 37]
Action: [ 1 87]
Action: [ 0 13]
Action: [ 1 85]
Action: [0 7]
Action: [ 0 44]
Action: [1 3]
Action: [ 0 94]
Action: [ 1 13]
Action: [ 0 70]
Action: [ 1 92]
Action: [ 0 14]
Action: [ 1 19]
Action: [ 1 66]
Action: [ 1 57]
Action: [ 0 46]
Action: [ 0 29]
Action: [ 0 15]
Action: [ 1 30]
Action: [ 1 90]
Action: [ 1 19]
Action

KeyboardInterrupt: 

In [167]:

print(env.env.trade_history)

AttributeError: 'SubprocVecEnv' object has no attribute 'env'