In [1]:
!pip install tensortrade

Collecting tensortrade
[?25l  Downloading https://files.pythonhosted.org/packages/76/6c/9bcd00c219026ac6f12fc2fab3fb1ca879107f418f812c6f063f64963fcb/tensortrade-1.0.1b0-py3-none-any.whl (132kB)
[K     |████████████████████████████████| 133kB 6.8MB/s 
[?25hCollecting pyyaml>=5.1.2
[?25l  Downloading https://files.pythonhosted.org/packages/7a/a5/393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a/PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636kB)
[K     |████████████████████████████████| 645kB 8.2MB/s 
Collecting ipython>=7.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/81/d1/8d0ba7589ea4cbf3e80ef8e20616da2cfc3c33187a64b044372aad517512/ipython-7.23.1-py3-none-any.whl (785kB)
[K     |████████████████████████████████| 788kB 9.5MB/s 
Collecting plotly>=4.5.0
[?25l  Downloading https://files.pythonhosted.org/packages/1f/f6/bd3c17c8003b6641df1228e80e1acac97ed8402635e46c2571f8e1ef63af/plotly-4.14.3-py2.py3-none-any.whl (13.2MB)
[K     |████████████████████

In [2]:
import pandas as pd
import tensortrade.env.default as default

from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.agents import DQNAgent


%matplotlib inline

In [3]:
cdd = CryptoDataDownload()

data = cdd.fetch("Bitstamp", "USD", "BTC", "1h")

In [4]:
def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal


features = []
for c in data.columns[1:]:
    s = Stream.source(list(data[c]), dtype="float").rename(data[c].name)
    features += [s]

cp = Stream.select(features, lambda s: s.name == "close")

features = [
    cp.log().diff().rename("lr"),
    rsi(cp, period=20).rename("rsi"),
    macd(cp, fast=10, slow=50, signal=5).rename("macd")
]

feed = DataFeed(features)
feed.compile()

In [5]:
bitstamp = Exchange("bitstamp", service=execute_order)(
    Stream.source(list(data["close"]), dtype="float").rename("USD-BTC")
)

portfolio = Portfolio(USD, [
    Wallet(bitstamp, 10000 * USD),
    Wallet(bitstamp, 10 * BTC)
])


renderer_feed = DataFeed([
    Stream.source(list(data["date"])).rename("date"),
    Stream.source(list(data["open"]), dtype="float").rename("open"),
    Stream.source(list(data["high"]), dtype="float").rename("high"),
    Stream.source(list(data["low"]), dtype="float").rename("low"),
    Stream.source(list(data["close"]), dtype="float").rename("close"), 
    Stream.source(list(data["volume"]), dtype="float").rename("volume") 
])


env = default.create(
    portfolio=portfolio,
    action_scheme="managed-risk",
    reward_scheme="risk-adjusted",
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.PlotlyTradingChart(),
    window_size=20
)

In [6]:
env.observer.feed.next()

{'external': {'lr': nan, 'macd': 0.0, 'rsi': nan},
 'internal': {'bitstamp:/BTC:/free': 10.0,
  'bitstamp:/BTC:/locked': 0.0,
  'bitstamp:/BTC:/total': 10.0,
  'bitstamp:/BTC:/worth': 87409.9,
  'bitstamp:/USD-BTC': 8740.99,
  'bitstamp:/USD:/free': 10000.0,
  'bitstamp:/USD:/locked': 0.0,
  'bitstamp:/USD:/total': 10000.0,
  'net_worth': 97409.9},
 'renderer': {'close': 8740.99,
  'date': Timestamp('2018-05-15 06:00:00'),
  'high': 8796.68,
  'low': 8707.28,
  'open': 8733.86,
  'volume': 559.93}}

In [9]:
!pip install pfrl

Collecting pfrl
[?25l  Downloading https://files.pythonhosted.org/packages/5c/02/61ebc04fabab26e38bc06f8f37d9036c448387f48a9e46b6e2e1e3cf456d/pfrl-0.2.1.tar.gz (110kB)
[K     |███                             | 10kB 13.7MB/s eta 0:00:01[K     |██████                          | 20kB 12.1MB/s eta 0:00:01[K     |████████▉                       | 30kB 10.0MB/s eta 0:00:01[K     |███████████▉                    | 40kB 8.9MB/s eta 0:00:01[K     |██████████████▉                 | 51kB 5.0MB/s eta 0:00:01[K     |█████████████████▊              | 61kB 5.2MB/s eta 0:00:01[K     |████████████████████▊           | 71kB 5.7MB/s eta 0:00:01[K     |███████████████████████▊        | 81kB 6.2MB/s eta 0:00:01[K     |██████████████████████████▋     | 92kB 6.6MB/s eta 0:00:01[K     |█████████████████████████████▋  | 102kB 6.5MB/s eta 0:00:01[K     |████████████████████████████████| 112kB 6.5MB/s 
Building wheels for collected packages: pfrl
  Building wheel for pfrl (setup.py) ... [

In [43]:
import pfrl


import numpy as np
import torch
from abc import ABCMeta, abstractmethod

from tensortrade.core import Identifiable

class QFunction(torch.nn.Module):

    def __init__(self, obs_size, n_actions):
        super().__init__()
        self.l1 = torch.nn.Linear(obs_size, 120)
        self.l2 = torch.nn.Linear(120, 120)
        self.l3 = torch.nn.Linear(120, n_actions)

    def forward(self, x):
        h = x
        h = torch.nn.functional.relu(self.l1(h))
        h = torch.nn.functional.relu(self.l2(h))
        h = self.l3(h)
        return pfrl.action_value.DiscreteActionValue(h)

class Agent(Identifiable, metaclass=ABCMeta):

    def __init__(self, env: 'TradingEnv'):
        self.env = env
        self.n_actions = env.action_space.n
        self.observation_shape = env.observation_space.shape


        
        self.dqn_agent = self.create_agent()
        self.env.agent_id = self.id

    def create_agent(self):
        flat_shape = self.observation_shape[0] * self.observation_shape[1]
       
        q_func = QFunction(flat_shape, self.n_actions)
        # Use Adam to optimize q_func. eps=1e-2 is for stability.
        optimizer = torch.optim.Adam(q_func.parameters(), eps=1e-2)

        # Set the discount factor that discounts future rewards.
        gamma = 0.9

        # Use epsilon-greedy for exploration
        explorer = pfrl.explorers.ConstantEpsilonGreedy(
            epsilon=0.3, random_action_func=self.env.action_space.sample)

        # DQN uses Experience Replay.
        # Specify a replay buffer and its capacity.
        replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=10 ** 6)

        # Since observations from CartPole-v0 is numpy.float64 while
        # As PyTorch only accepts numpy.float32 by default, specify
        # a converter as a feature extractor function phi.
        phi = lambda x: x.astype(np.float32, copy=False)

        # Set the device id to use GPU. To use CPU only, set it to -1.
        gpu = -1

        # Now create an agent that will interact with the environment.
        agent = pfrl.agents.DoubleDQN(
            q_func,
            optimizer,
            replay_buffer,
            gamma,
            explorer,
            replay_start_size=500,
            update_interval=1,
            target_update_interval=100,
            phi=phi,
            gpu=gpu,
        )

        return agent

    
    def restore(self, path: str, **kwargs):
        """Restore the agent from the file specified in `path`."""
        pass

    
    def save(self, path: str, **kwargs):
        """Save the agent to the directory specified in `path`."""
        pass

    
    def get_action(self, state: np.ndarray, **kwargs) -> int:
        """Get an action for a specific state in the environment."""
        action = self.dqn_agent.act(state)
        return action

    
    def train(self,
              n_steps: int = None,
              n_episodes: int = 10000,
              save_every: int = None,
              save_path: str = None,
              callback: callable = None,
              **kwargs) -> float:
        """Train the agent in the environment and return the mean reward."""

        #n_episodes = 1000
        max_episode_len = 2**30
        for i in range(1, n_episodes + 1):
            obs = self.env.reset()
            t = 0  # time step
            while True:
                # Uncomment to watch the behavior in a GUI window
                # env.render()
                obs = obs.flatten()
                action = self.get_action(obs)
                obs, reward, done, _ = self.env.step(action)
                obs = obs.flatten()
                # print(action, reward, done)
                t += 1
                reset = t == max_episode_len
                self.dqn_agent.observe(obs, reward, done, reset)
                if done or reset:
                    break
            
            self.env.render()
        

        print('Finished.')



In [44]:
agent = Agent(env)

agent.train(n_steps=200, n_episodes=2, save_path="agents/")

FigureWidget({
    'data': [{'close': array([8740.99, 8739.  , 8728.49, ..., 4211.4 , 4209.18, 4130.62]),
    …

Finished.
