In [2]:
import gym
import pandas as pd
import numpy as np
import tf_slim as slim
from gym import spaces
from sklearn import preprocessing
import yfinance as yf
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO



In [3]:
class BitcoinTradingEnv(gym.Env):
    """A Bitcoin trading environment for OpenAI gym"""
    metadata = {'render.modes': ['live', 'file', 'none']}
    scaler = preprocessing.MinMaxScaler()
    viewer = None
    def __init__(self, df, lookback_window_size=50,commission=0.00075,initial_balance=10000,serial=False):
        super(BitcoinTradingEnv, self).__init__()
        self.df = df.dropna().reset_index()
        self.lookback_window_size = lookback_window_size
        self.initial_balance = initial_balance
        self.commission = commission
        self.serial = serial
        # Actions of the format Buy 1/10, Sell 3/10, Hold, etc.
        self.action_space = spaces.MultiDiscrete([3, 10])
        # Observes the OHCLV values, net worth, and trade history
        self.observation_space = spaces.Box(low=0, high=1, shape=(10,lookback_window_size + 1), dtype=np.float16)



In [4]:
def reset(self):
    self.balance = self.initial_balance
    self.net_worth = self.initial_balance
    self.btc_held = 0
    self._reset_session()
    self.account_history = np.repeat([
        [self.net_worth],
        [0],
        [0],
        [0],
        [0]
    ], self.lookback_window_size + 1, axis=1)
    self.trades = []
    return self._next_observation()

In [5]:
MAX_TRADING_SESSION = 100000  # ~2 months

In [6]:
def _reset_session(self):
    self.current_step = 0
    if self.serial:
        self.steps_left = len(self.df) - self.lookback_window_size - 1
        self.frame_start = self.lookback_window_size
    else:
        self.steps_left = np.random.randint(1, MAX_TRADING_SESSION)
        self.frame_start = np.random.randint(
            self.lookback_window_size, len(self.df) - self.steps_left)
    self.active_df = self.df[self.frame_start -
                             self.lookback_window_size:self.frame_start + self.steps_left]

In [7]:
def _next_observation(self):
    end = self.current_step + self.lookback_window_size + 1
    obs = np.array([
        self.active_df['Open'].values[self.current_step:end],
        self.active_df['High'].values[self.current_step:end],
        self.active_df['Low'].values[self.current_step:end],
        self.active_df['Close'].values[self.current_step:end],
        self.active_df['Volume_(BTC)'].values[self.current_step:end],
    ])
    scaled_history = self.scaler.fit_transform(self.account_history)
    obs = np.append(obs, scaled_history[:, -(self.lookback_window_size
                                             + 1):], axis=0)
    return obs

In [8]:
def step(self, action):
    current_price = self._get_current_price() + 0.01
    self._take_action(action, current_price)
    self.steps_left -= 1
    self.current_step += 1
    if self.steps_left == 0:
        self.balance += self.btc_held * current_price
        self.btc_held = 0
        self._reset_session()
    obs = self._next_observation()
    reward = self.net_worth
    done = self.net_worth <= 0
    return obs, reward, done, {}

In [9]:
def _take_action(self, action, current_price):
    action_type = action[0]
    amount = action[1] / 10
    btc_bought = 0
    btc_sold = 0
    cost = 0
    sales = 0
    if action_type < 1:
        btc_bought = self.balance / current_price * amount
        cost = btc_bought * current_price * (1 + self.commission)
        self.btc_held += btc_bought
        self.balance -= cost
    elif action_type < 2:
        btc_sold = self.btc_held * amount
        sales = btc_sold * current_price  * (1 - self.commission)
        self.btc_held -= btc_sold
        self.balance += sales
    if btc_sold > 0 or btc_bought > 0:
        self.trades.append({
        'step': self.frame_start+self.current_step,
        'amount': btc_sold if btc_sold > 0 else btc_bought,
        'total': sales if btc_sold > 0 else cost,
        'type': "sell" if btc_sold > 0 else "buy"
    })
    self.net_worth = self.balance + self.btc_held * current_price
    self.account_history = np.append(self.account_history, [
        [self.net_worth],
        [btc_bought],
        [cost],
        [btc_sold],
        [sales]
    ], axis=1)





In [10]:
from datetime import datetime

In [11]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import style

# finance module is no longer part of matplotlib
# see: https://github.com/matplotlib/mpl_finance
from mpl_finance import candlestick_ochl as candlestick

style.use('dark_background')

VOLUME_CHART_HEIGHT = 0.33

UP_COLOR = '#27A59A'
DOWN_COLOR = '#EF534F'
UP_TEXT_COLOR = '#73D3CC'
DOWN_TEXT_COLOR = '#DC2C27'


def date2num(date):
    converter = mdates.strpdate2num('%Y-%m-%d')
    return converter(date)


class BitcoinTradingGraph:
    """A stock trading visualization using matplotlib made to render OpenAI gym environments"""

    def __init__(self, df, title=None):
        self.df = df
        self.net_worths = np.zeros(len(df['Date']))

        # Create a figure on screen and set the title
        fig = plt.figure()
        fig.suptitle(title)

        # Create top subplot for net worth axis
        self.net_worth_ax = plt.subplot2grid(
            (6, 1), (0, 0), rowspan=2, colspan=1)

        # Create bottom subplot for shared price/volume axis
        self.price_ax = plt.subplot2grid(
            (6, 1), (2, 0), rowspan=8, colspan=1, sharex=self.net_worth_ax)

        # Create a new axis for volume which shares its x-axis with price
        self.volume_ax = self.price_ax.twinx()

        # Add padding to make graph easier to view
        plt.subplots_adjust(left=0.11, bottom=0.24,
                            right=0.90, top=0.90, wspace=0.2, hspace=0)

        # Show the graph without blocking the rest of the program
        plt.show(block=False)

    def _render_net_worth(self, current_step, net_worth, step_range, dates):
        # Clear the frame rendered last step
        self.net_worth_ax.clear()

        # Plot net worths
        self.net_worth_ax.plot_date(
            dates, self.net_worths[step_range], '-', label='Net Worth')

        # Show legend, which uses the label we defined for the plot above
        self.net_worth_ax.legend()
        legend = self.net_worth_ax.legend(loc=2, ncol=2, prop={'size': 8})
        legend.get_frame().set_alpha(0.4)

        last_date = (self.df['Timestamp'].values[current_step])
        last_net_worth = self.net_worths[current_step]

        # Annotate the current net worth on the net worth graph
        self.net_worth_ax.annotate('{0:.2f}'.format(net_worth), (last_date, last_net_worth),
                                   xytext=(last_date, last_net_worth),
                                   bbox=dict(boxstyle='round',
                                             fc='w', ec='k', lw=1),
                                   color="black",
                                   fontsize="small")

        # Add space above and below min/max net worth
        self.net_worth_ax.set_ylim(
            min(self.net_worths[np.nonzero(self.net_worths)]) / 1.25, max(self.net_worths) * 1.25)

    def _render_price(self, current_step, net_worth, dates, step_range):
        self.price_ax.clear()

        # Format data for OHCL candlestick graph
        candlesticks = zip(dates,
                           self.df['Open'].values[step_range], self.df['Close'].values[step_range],
                           self.df['High'].values[step_range], self.df['Low'].values[step_range])

        # Plot price using candlestick graph from mpl_finance
        candlestick(self.price_ax, candlesticks, width=1,
                    colorup=UP_COLOR, colordown=DOWN_COLOR)

        last_date = self.df['Timestamp'].values[current_step]
        last_close = self.df['Close'].values[current_step]
        last_high = self.df['High'].values[current_step]

        # Print the current price to the price axis
        self.price_ax.annotate('{0:.2f}'.format(last_close), (last_date, last_close),
                               xytext=(last_date, last_high),
                               bbox=dict(boxstyle='round',
                                         fc='w', ec='k', lw=1),
                               color="black",
                               fontsize="small")

        # Shift price axis up to give volume chart space
        ylim = self.price_ax.get_ylim()
        self.price_ax.set_ylim(ylim[0] - (ylim[1] - ylim[0])
                               * VOLUME_CHART_HEIGHT, ylim[1])

    def _render_volume(self, current_step, net_worth, dates, step_range):
        self.volume_ax.clear()

        volume = np.array(self.df['Volume'].values[step_range])

        pos = self.df['Open'].values[step_range] -self.df['Close'].values[step_range] < 0
        neg = self.df['Open'].values[step_range] - self.df['Close'].values[step_range] > 0

        # Color volume bars based on price direction on that date
        self.volume_ax.bar(dates[pos], volume[pos], color=UP_COLOR,
                           alpha=0.4, width=1, align='center')
        self.volume_ax.bar(dates[neg], volume[neg], color=DOWN_COLOR,
                           alpha=0.4, width=1, align='center')

        # Cap volume axis height below price chart and hide ticks
        self.volume_ax.set_ylim(0, max(volume) / VOLUME_CHART_HEIGHT)
        self.volume_ax.yaxis.set_ticks([])

    def _render_trades(self, current_step, trades, step_range):
        for trade in trades:
            if trade['step'] in step_range:
                date = self.df['Timestamp'].values[trade['step']]
                high = self.df['High'].values[trade['step']]
                low = self.df['Low'].values[trade['step']]

                if trade['type'] == 'buy':
                    high_low = low
                    color = UP_TEXT_COLOR
                else:
                    high_low = high
                    color = DOWN_TEXT_COLOR

                total = '{0:.2f}'.format(trade['total'])

                # Print the current price to the price axis
                self.price_ax.annotate(f'${total}', (date, high_low),
                                       xytext=(date, high_low),
                                       color=color,
                                       fontsize=8,
                                       arrowprops=(dict(color=color)))

    def render(self, current_step, net_worth, trades, window_size=40):
        self.net_worths[current_step] = net_worth

        window_start = max(current_step - window_size, 0)
        step_range = range(window_start, current_step + 1)

        # Format dates as timestamps, necessary for candlestick graph
        dates = np.array([(x)for x in self.df['Timestamp'].values[step_range]])
        date_labels = np.array([datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M') for x in self.df['Timestamp'].values[step_range]])

        self._render_net_worth(current_step, net_worth, step_range, dates)
        self._render_price(current_step, net_worth, dates, step_range)
        self._render_volume(current_step, net_worth, dates, step_range)
        self._render_trades(current_step, trades, step_range)

        # Format the date ticks to be more easily read
        self.price_ax.set_xticklabels(self.df['Timestamp'].values[step_range], rotation=45,
                                      horizontalalignment='right')

        # Hide duplicate net worth date labels
        plt.setp(self.net_worth_ax.get_xticklabels(), visible=False)

        # Necessary to view frames before they are unrendered
        plt.pause(0.001)

    def close(self):
        plt.close()




    Please use `mplfinance` instead (no hyphen, no underscore).

    To install: `pip install --upgrade mplfinance` 

   For more information, see: https://pypi.org/project/mplfinance/




In [12]:
def render(self, mode='human', **kwargs):
    if mode == 'human':
        if self.viewer == None:
            self.viewer = BitcoinTradingGraph(self.df,kwargs.get('title', None))
        self.viewer.render(self.frame_start + self.current_step,
                   self.net_worth,
                   self.trades,
                   window_size=self.lookback_window_size)

In [13]:
df = yf.download('BTC-USD', start='2021-01-01',
                 end='2022-12-12', interval='1h', back_adjust=True, auto_adjust=True, prepost=True)

[*********************100%***********************]  1 of 1 completed


In [14]:
slice_point = int(len(df) - 1000)
train_df = df[:slice_point]
test_df = df[slice_point:]

In [15]:
train_env = DummyVecEnv([lambda: BitcoinTradingEnv(train_df,
                                                   commission=0, serial=False)])
test_env = DummyVecEnv([lambda: BitcoinTradingEnv(test_df,
                                                  commission=0, serial=True)])

In [16]:
model = PPO('MlpPolicy',train_env,verbose=1,tensorboard_log="./tensorboard/")
model.learn(total_timesteps=5000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


NotImplementedError: 

<stable_baselines.common.vec_env.dummy_vec_env.DummyVecEnv at 0x28b60d700>

In [18]:
import tensorflow as tf
print(tf.__version__)

2.8.0
