<a href="https://colab.research.google.com/github/hesller/python-ai/blob/main/rltrader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensortrade[tf,tensorforce,baselines,ccxt,fbm]

In [None]:
!pip install mpl_finance ccxt ta

In [None]:
!pip uninstall tensorflow

In [None]:
!pip install -r requirements.txt

In [None]:
!pip install tensorflow-gpu==1.15

In [None]:
!python cli.py update-static-data

In [None]:
!pip install optuna==0.10


In [None]:
!unzip Archive.zip

In [None]:
!python cli.py optimize

In [None]:
!python ./optimize.py

**CODE ENVIRONMENT**

In [None]:
import ta

import pandas as pd
import tensortrade.env.default as default

from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed, NameSpace
from tensortrade.oms.instruments import USD, BTC, ETH, LTC
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order

In [None]:
cdd = CryptoDataDownload()

bitfinex_data = pd.concat([
    cdd.fetch("Bitfinex", "USD", "BTC", "1h").add_prefix("BTC:"),
    cdd.fetch("Bitfinex", "USD", "ETH", "1h").add_prefix("ETH:")
], axis=1)

bitfinex_data.set_index('BTC:date', inplace=True)

**DEFINE THE EXCHANGE**

In [None]:
bitfinex = Exchange("bitfinex", service=execute_order)(
    Stream.source(list(bitfinex_data['BTC:close']), dtype="float").rename("USD-BTC"),
    Stream.source(list(bitfinex_data['ETH:close']), dtype="float").rename("USD-ETH")
)

**DEFINE EXTERNAL DATAFEED**

In [None]:
# Add all features for bitstamp BTC & ETH
bitfinex_btc = bitfinex_data.loc[:, [name.startswith("BTC") for name in bitfinex_data.columns]]
bitfinex_eth = bitfinex_data.loc[:, [name.startswith("ETH") for name in bitfinex_data.columns]]

ta.add_all_ta_features(
    bitfinex_btc,
    colprefix="BTC:",
    **{k: "BTC:" + k for k in ['open', 'high', 'low', 'close', 'volume']}
)


with NameSpace("bitfinex"):
    bitfinex_streams = [
        Stream.source(list(bitfinex_btc[c]), dtype="float").rename(c) for c in bitfinex_btc.columns
    ]
    bitfinex_streams += [
        Stream.source(list(bitfinex_eth[c]), dtype="float").rename(c) for c in bitfinex_eth.columns
    ]

def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

cp = Stream.select(bitfinex_streams, lambda s: s.name == "bitfinex:/BTC:close")

features = [
    cp.log().diff().rename("BTC:lr"),
    rsi(cp, period=20).rename("BTC:rsi"),
    macd(cp, fast=10, slow=50, signal=5).rename("BTC:macd")
]


cp = Stream.select(bitfinex_streams, lambda s: s.name == "bitfinex:/ETH:close")

features = [
    cp.log().diff().rename("ETH:lr"),
    rsi(cp, period=20).rename("ETH:rsi"),
    macd(cp, fast=10, slow=50, signal=5).rename("ETH:macd")
]

bitfinex_streams += features

feed = DataFeed(features)


In [None]:
bitfinex_eth

In [None]:
for stream in bitfinex_streams:
  print(stream.name, stream.generic_name)

**MAKE PORTFOLIO**

In [None]:
portfolio = Portfolio(USD, [
    Wallet(bitfinex, 10000 * USD),
    Wallet(bitfinex, 10 * BTC),
    Wallet(bitfinex, 5 * ETH),
])

**RENDERER FEED**

In [None]:
renderer_feed = DataFeed([
    Stream.source(list(bitfinex_data["BTC:unix"]), dtype="float").rename("date"),
    Stream.source(list(bitfinex_data["BTC:open"]), dtype="float").rename("open"),
    Stream.source(list(bitfinex_data["BTC:high"]), dtype="float").rename("high"),
    Stream.source(list(bitfinex_data["BTC:low"]), dtype="float").rename("low"),
    Stream.source(list(bitfinex_data["BTC:close"]), dtype="float").rename("close"),
    Stream.source(list(bitfinex_data["BTC:volume"]), dtype="float").rename("volume")
])

**CREATE ENVIRONMENT**

In [None]:
import tensortrade.env.default as default

env = default.create(
    portfolio=portfolio,
    action_scheme="managed-risk",
    reward_scheme="risk-adjusted",
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.PlotlyTradingChart(),
    window_size=20
)

In [None]:
env.observer.feed.next()

**TRAIN AND EVALUATE**

In [None]:
!pip install ray==0.8.7
!pip install symfit

In [None]:
import ray
import numpy as np

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default

from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio

In [None]:
cdd = CryptoDataDownload()

bitfinex_data = pd.concat([
  cdd.fetch("Bitfinex", "USD", "BTC", "1h").add_prefix("BTC:"),
], axis=1)

# add the bitfinex dataset
# bitfinex_data.set_index('BTC:date', inplace=True)

# add the exchange
bitfinex = Exchange("bitfinex", service=execute_order)(
  Stream.source(list(bitfinex_data['BTC:close']), dtype="float").rename("USD-BTC")
)

# Add all features for bitstamp BTC & ETH
bitfinex_btc = bitfinex_data.loc[:, [name.startswith("BTC") for name in bitfinex_data.columns]]

# ta.add_all_ta_features(
#     bitfinex_btc,
#     colprefix="BTC:",
#     **{k: "BTC:" + k for k in ['open', 'high', 'low', 'close', 'volume']}
# )

with NameSpace("bitfinex"):
    bitfinex_streams = [
        Stream.source(list(bitfinex_btc[c]), dtype="float").rename(c) for c in bitfinex_btc.columns
    ]

def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

cp = Stream.select(bitfinex_streams, lambda s: s.name == "bitfinex:/BTC:close")

features = [
    cp.log().diff().rename("BTC:lr"),
    rsi(cp, period=20).rename("BTC:rsi"),
    macd(cp, fast=10, slow=50, signal=5).rename("BTC:macd")
]

bitfinex_streams += features

feed = DataFeed(bitfinex_streams)

portfolio = Portfolio(USD, [
  Wallet(bitfinex, 10000 * USD),
  Wallet(bitfinex, 10 * BTC)
])

renderer_feed = DataFeed([
  Stream.source(list(bitfinex_data["BTC:unix"]), dtype="float").rename("date"),
  Stream.source(list(bitfinex_data["BTC:open"]), dtype="float").rename("open"),
  Stream.source(list(bitfinex_data["BTC:high"]), dtype="float").rename("high"),
  Stream.source(list(bitfinex_data["BTC:low"]), dtype="float").rename("low"),
  Stream.source(list(bitfinex_data["BTC:close"]), dtype="float").rename("close"),
  Stream.source(list(bitfinex_data["BTC:volume"]), dtype="float").rename("volume")
])

env = default.create(
    portfolio=portfolio,
    action_scheme="managed-risk",
    reward_scheme="risk-adjusted",
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.MatplotlibTradingChart(),
    window_size=25,
    max_allowed_loss=0.6
)

In [None]:
def create_env(config):
  cdd = CryptoDataDownload()

  bitfinex_data = pd.concat([
    cdd.fetch("Bitfinex", "USD", "BTC", "1h").add_prefix("BTC:"),
  ], axis=1)

  # add the bitfinex dataset
  bitfinex_data.set_index('BTC:date', inplace=True)

  # add the exchange
  bitfinex = Exchange("bitfinex", service=execute_order)(
    Stream.source(list(bitfinex_data['BTC:close']), dtype="float").rename("USD-BTC")
  )

  # Add all features for bitstamp BTC & ETH
  bitfinex_btc = bitfinex_data.loc[:, [name.startswith("BTC") for name in bitfinex_data.columns]]

  # ta.add_all_ta_features(
  #     bitfinex_btc,
  #     colprefix="BTC:",
  #     **{k: "BTC:" + k for k in ['open', 'high', 'low', 'close', 'volume']}
  # )

  with NameSpace("bitfinex"):
      bitfinex_streams = [
          Stream.source(list(bitfinex_btc[c]), dtype="float").rename(c) for c in bitfinex_btc.columns
      ]

  def rsi(price: Stream[float], period: float) -> Stream[float]:
      r = price.diff()
      upside = r.clamp_min(0).abs()
      downside = r.clamp_max(0).abs()
      rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
      return 100*(1 - (1 + rs) ** -1)


  def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
      fm = price.ewm(span=fast, adjust=False).mean()
      sm = price.ewm(span=slow, adjust=False).mean()
      md = fm - sm
      signal = md - md.ewm(span=signal, adjust=False).mean()
      return signal

  cp = Stream.select(bitfinex_streams, lambda s: s.name == "bitfinex:/BTC:close")

  features = [
      cp.log().diff().rename("BTC:lr"),
      rsi(cp, period=20).rename("BTC:rsi"),
      macd(cp, fast=10, slow=50, signal=5).rename("BTC:macd")
  ]

  bitfinex_streams += features

  feed = DataFeed(features)

  portfolio = Portfolio(USD, [
    Wallet(bitfinex, 10000 * USD),
    Wallet(bitfinex, 10 * BTC)
  ])

  renderer_feed = DataFeed([
    Stream.source(list(bitfinex_data["BTC:unix"]), dtype="float").rename("date"),
    Stream.source(list(bitfinex_data["BTC:open"]), dtype="float").rename("open"),
    Stream.source(list(bitfinex_data["BTC:high"]), dtype="float").rename("high"),
    Stream.source(list(bitfinex_data["BTC:low"]), dtype="float").rename("low"),
    Stream.source(list(bitfinex_data["BTC:close"]), dtype="float").rename("close"),
    Stream.source(list(bitfinex_data["BTC:volume"]), dtype="float").rename("volume")
  ])

  env = default.create(
      portfolio=portfolio,
      action_scheme="managed-risk",
      reward_scheme="risk-adjusted",
      feed=feed,
      renderer_feed=renderer_feed,
      renderer=default.renderers.PlotlyTradingChart(),
      window_size=config["window_size"],
      max_allowed_loss=0.6
  )
  return env

In [None]:
env = create_env({'window_size': 25})

In [None]:
env.observer.feed.next()

In [None]:
register_env("TradingEnv", create_env)

In [None]:
analysis = tune.run(
    "PPO",
    stop={
      "episode_reward_mean": 500
    },
    config={
        "env": "TradingEnv",
        "env_config": {
            "window_size": 25
        },
        "log_level": "DEBUG",
        "framework": "torch",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 1,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
    checkpoint_at_end=True,
)

In [None]:
import ray
import numpy as np

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default

from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio


USD = Instrument("USD", 2, "U.S. Dollar")
TTC = Instrument("TTC", 8, "TensorTrade Coin")


def create_env(config):
    cdd = CryptoDataDownload()

    bitfinex_data = pd.concat([
        cdd.fetch("Bitfinex", "USD", "BTC", "1h").add_prefix("BTC:"),
        cdd.fetch("Bitfinex", "USD", "ETH", "1h").add_prefix("ETH:")
    ], axis=1)

    bitfinex_data.set_index('BTC:date', inplace=True)

    bitfinex = Exchange("bitfinex", service=execute_order)(
      Stream.source(list(bitfinex_data['BTC:close']), dtype="float").rename("USD-BTC"),
      Stream.source(list(bitfinex_data['ETH:close']), dtype="float").rename("USD-ETH")
    )

    cash = Wallet(bitfinex, 100000 * USD)
    asset = Wallet(bitfinex, 0 * TTC)

    portfolio = Portfolio(USD, [
        cash,
        asset
    ])

    feed = DataFeed([
        p,
        p.rolling(window=10).mean().rename("fast"),
        p.rolling(window=50).mean().rename("medium"),
        p.rolling(window=100).mean().rename("slow"),
        p.log().diff().fillna(0).rename("lr")
    ])

    reward_scheme = default.rewards.PBR(price=p)

    action_scheme = default.actions.BSH(
        cash=cash,
        asset=asset
    ).attach(reward_scheme)

    env = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        window_size=config["window_size"],
        max_allowed_loss=0.6
    )
    return env


register_env("TradingEnv", create_env)


analysis = tune.run(
    "PPO",
    stop={
      "episode_reward_mean": 500
    },
    config={
        "env": "TradingEnv",
        "env_config": {
            "window_size": 25
        },
        "log_level": "DEBUG",
        "framework": "torch",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 0,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
    checkpoint_at_end=True
)

In [None]:
!pip install ray==0.8.7
!pip install symfit

In [None]:
!pip install tensorforce

In [None]:
!pip install stable-baselines3[extra]

In [None]:
from tensorforce.agents import Agent

network_spec = [
    dict(type='dense', size=128, activation="tanh"),
    dict(type='dense', size=64, activation="tanh"),
    dict(type='dense', size=32, activation="tanh")
]

agent_spec = {
    "type": "ppo_agent",
    "step_optimizer": {
        "type": "adam",
        "learning_rate": 1e-4
    },
    "discount": 0.99,
    "likelihood_ratio_clipping": 0.2,
}

agent = Agent.from_spec(spec=agent_spec, kwargs=dict(network=network_spec,
                                    states=env.states,
                                    actions=env.actions))

ModuleNotFoundError: ignored

In [None]:
import pandas as pd
import tensortrade.env.default as default

from stable_baselines3.ppo import PPO

from ray import tune
from ray.tune.registry import register_env

from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio

%matplotlib inline

In [None]:
from tensortrade.oms.instruments import Instrument

USD = Instrument("USD", 2, "U.S. Dollar")
TTC = Instrument("TTC", 8, "TensorTrade Coin")

In [None]:
from gym.spaces import Discrete

from tensortrade.env.default.actions import TensorTradeActionScheme

from tensortrade.env.generic import ActionScheme, TradingEnv
from tensortrade.core import Clock
from tensortrade.oms.instruments import ExchangePair
from tensortrade.oms.wallets import Portfolio
from tensortrade.oms.orders import (
    Order,
    proportion_order,
    TradeSide,
    TradeType
)


class BSH(TensorTradeActionScheme):

    registered_name = "bsh"

    def __init__(self, cash: 'Wallet', asset: 'Wallet'):
        super().__init__()
        self.cash = cash
        self.asset = asset

        self.listeners = []
        self.action = 0

    @property
    def action_space(self):
        return Discrete(2)

    def attach(self, listener):
        self.listeners += [listener]
        return self

    def get_orders(self, action: int, portfolio: 'Portfolio'):
        order = None

        if abs(action - self.action) > 0:
            src = self.cash if self.action == 0 else self.asset
            tgt = self.asset if self.action == 0 else self.cash
            order = proportion_order(portfolio, src, tgt, 1.0)
            self.action = action

        for listener in self.listeners:
            listener.on_action(action)

        return [order]

    def reset(self):
        super().reset()
        self.action = 0

In [None]:
from tensortrade.env.default.rewards import TensorTradeRewardScheme
from tensortrade.feed.core import Stream, DataFeed


class PBR(TensorTradeRewardScheme):

    registered_name = "pbr"

    def __init__(self, price: 'Stream'):
        super().__init__()
        self.position = -1

        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")

        reward = (r * position).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int):
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: 'Portfolio'):
        return self.feed.next()["reward"]

    def reset(self):
        self.position = -1
        self.feed.reset()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensortrade.env.generic import Renderer


class PositionChangeChart(Renderer):

    def __init__(self, color: str = "orange"):
        self.color = "orange"

    def render(self, env, **kwargs):
        history = pd.DataFrame(env.observer.renderer_history)

        actions = list(history.action)
        p = list(history.price)

        buy = {}
        sell = {}

        for i in range(len(actions) - 1):
            a1 = actions[i]
            a2 = actions[i + 1]

            if a1 != a2:
                if a1 == 0 and a2 == 1:
                    buy[i] = p[i]
                else:
                    sell[i] = p[i]

        buy = pd.Series(buy)
        sell = pd.Series(sell)

        fig, axs = plt.subplots(1, 2, figsize=(15, 5))

        fig.suptitle("Performance")

        axs[0].plot(np.arange(len(p)), p, label="price", color=self.color)
        axs[0].scatter(buy.index, buy.values, marker="^", color="green")
        axs[0].scatter(sell.index, sell.values, marker="^", color="red")
        axs[0].set_title("Trading Chart")

        performance_df = pd.DataFrame().from_dict(env.action_scheme.portfolio.performance, orient='index')
        performance_df.plot(ax=axs[1])
        axs[1].set_title("Net Worth")

        plt.show()

In [None]:
def create_env(config):
    x = np.arange(0, 2*np.pi, 2*np.pi / 1001)
    y = 50*np.sin(3*x) + 100

    x = np.arange(0, 2*np.pi, 2*np.pi / 1000)
    p = Stream.source(y, dtype="float").rename("USD-TTC")

    bitfinex = Exchange("bitfinex", service=execute_order)(
        p
    )

    cash = Wallet(bitfinex, 100000 * USD)
    asset = Wallet(bitfinex, 0 * TTC)

    portfolio = Portfolio(USD, [
        cash,
        asset
    ])

    feed = DataFeed([
        p,
        p.rolling(window=10).mean().rename("fast"),
        p.rolling(window=50).mean().rename("medium"),
        p.rolling(window=100).mean().rename("slow"),
        p.log().diff().fillna(0).rename("lr")
    ])

    reward_scheme = PBR(price=p)

    action_scheme = BSH(
        cash=cash,
        asset=asset
    ).attach(reward_scheme)

    renderer_feed = DataFeed([
        Stream.source(y, dtype="float").rename("price"),
        Stream.sensor(action_scheme, lambda s: s.action, dtype="float").rename("action")
    ])

    environment = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        renderer_feed=renderer_feed,
        renderer=PositionChangeChart(),
        window_size=config["window_size"],
        max_allowed_loss=0.6
    )
    return environment


In [None]:
register_env("TradingEnv", create_env)

In [None]:
analysis = tune.run(
    "PPO",
    stop={
      "episode_reward_mean": 500
    },
    config={
        "env": "TradingEnv",
        "env_config": {
            "window_size": 25
        },
        "log_level": "DEBUG",
        "framework": "torch",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 0,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
    checkpoint_at_end=False
)