In [1]:
import sys
sys.path.insert(0, '/code')

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd

import tensortrade.env.default as default

from tensortrade.oms.exchanges import Exchange
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.instruments import USD, EUR
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.agents import DQNAgent
from ray import tune
from ray.tune.registry import register_env

## Load Data
EURUSD data is being loaded from local directory

In [4]:
data = pd.read_csv('data/EURUSD_Candlestick_1_M_BID_01.12.2017-30.11.2020.csv')
data = data.rename(columns={"Local time": "Date"})

In [5]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,01.12.2017 00:00:00.000 GMT-0500,1.19109,1.19113,1.19106,1.19111,105.63
1,01.12.2017 00:01:00.000 GMT-0500,1.1911,1.1911,1.19108,1.19108,43.82
2,01.12.2017 00:02:00.000 GMT-0500,1.19107,1.19107,1.19099,1.191,163.81
3,01.12.2017 00:03:00.000 GMT-0500,1.19099,1.19104,1.19099,1.19104,144.37
4,01.12.2017 00:04:00.000 GMT-0500,1.19104,1.19105,1.19101,1.19104,68.71


## Features

In [6]:
def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal


features = []
for c in data.columns[1:]:
    s = Stream.source(list(data[c]), dtype="float").rename(data[c].name)
    features += [s]

cp = Stream.select(features, lambda s: s.name == "Close")

features = features + [
    cp.log().diff().rename("lr"),
    rsi(cp, period=20).rename("rsi"),
    macd(cp, fast=10, slow=50, signal=5).rename("macd"),
]

feed = DataFeed(features)
feed.compile()

## Setup Environment

In [7]:
forex = Exchange("forex", service=execute_order)(
    Stream.source(list(data["Close"]), dtype="float").rename("USD/EUR")  # works but understand
)

portfolio = Portfolio(USD, [
    Wallet(forex, 10000 * USD),
    Wallet(forex, 10 * EUR)
])

renderer_feed = DataFeed([
    Stream.source(list(data["Date"])).rename("date"),
    Stream.source(list(data["Open"]), dtype="float").rename("open"),
    Stream.source(list(data["High"]), dtype="float").rename("high"),
    Stream.source(list(data["Low"]), dtype="float").rename("low"),
    Stream.source(list(data["Close"]), dtype="float").rename("close"), 
    Stream.source(list(data["Volume"]), dtype="float").rename("volume") 
])

env = default.create(
    portfolio=portfolio,
    action_scheme=default.actions.SimpleOrders(trade_sizes=1),
    reward_scheme="simple",
    feed=feed,
    renderer_feed=renderer_feed,
#     renderer=default.renderers.PlotlyTradingChart(display=False),
    window_size=10,
    warmup_random_periods=True
)

In [None]:
register_env("TradingEnv", lambda config: env)

In [None]:
analysis = tune.run(
    "PPO",
    stop={
      "timesteps_total": 8
    },
    config={
        "env": "TradingEnv",
        "env_config": {
            "window_size": 10
        },
        "log_level": "WARN",
        "framework": "tfe",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 0,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    },
    checkpoint_at_end=True
)

In [26]:
agent = DQNAgent(env)

agent.train(n_steps=2000, n_episodes=1, save_path="agents/", update_target_every=100)

====      AGENT ID: 6b54167d-b5ba-4572-9fab-d0299ff6d2cc      ====
-6674108.036772495


0

In [24]:
portfolio.performance

OrderedDict([(0,
              {'forex:/USD/EUR': 1.1916200000000001,
               'forex:/USD:/free': 10000.0,
               'forex:/USD:/locked': 0.0,
               'forex:/USD:/total': 10000.0,
               'forex:/EUR:/free': 10.0,
               'forex:/EUR:/locked': 0.0,
               'forex:/EUR:/total': 10.0,
               'forex:/EUR:/worth': 11.916200000000002,
               'net_worth': 10011.9162,
               'base_symbol': 'USD'}),
             (1,
              {'forex:/USD/EUR': 1.19154,
               'forex:/USD:/free': 0.0,
               'forex:/USD:/locked': 0.0,
               'forex:/USD:/total': 0.0,
               'forex:/EUR:/free': 8388.15,
               'forex:/EUR:/locked': 0.0,
               'forex:/EUR:/total': 8388.15,
               'forex:/EUR:/worth': 9994.816251,
               'net_worth': 9994.816251,
               'base_symbol': 'USD'}),
             (2,
              {'forex:/USD/EUR': 1.19161,
               'forex:/USD:/free': 0.0

In [27]:
import numpy as np
net_worths = [nw['net_worth'] for nw in portfolio.performance.values()]
returns = [(b - a) / a for a, b in zip(net_worths[::1], net_worths[1::1])]
returns = np.array([x + 1 for x in returns[:10]])
returns

array([0.99758931, 0.99992441, 1.00010919, 0.99973966, 0.9999244 ,
       1.00008401, 1.000084  , 0.9999916 , 1.00015119, 0.99986563])

In [19]:
np.array([x+1 for x in returns[:10]]).cumprod() - 1

array([-0.00241069, -0.00248609, -0.00237718, -0.0026369 , -0.0027123 ,
       -0.00262852, -0.00254474, -0.00255312, -0.00240231, -0.00253636])

In [20]:
zip(net_worths[::1], net_worths[1::1])

<zip at 0x7f941c69f408>

In [28]:
agent.save("agents/")

In [29]:
agent.id

'231b8001-7556-4821-8b04-148cfb8b94d9'

In [11]:
env.reset()

array([[ 1.18575001e+00,  1.18587005e+00,  1.18561006e+00,
         1.18587005e+00,  3.09899994e+02,  1.01196652e-04,
         4.37400017e+01,  2.68785334e-05],
       [ 1.18587995e+00,  1.18587995e+00,  1.18568003e+00,
         1.18572998e+00,  2.23570007e+02, -1.18063756e-04,
         4.16770935e+01,  3.33465941e-05],
       [ 1.18575001e+00,  1.18576002e+00,  1.18529999e+00,
         1.18553996e+00,  3.54910004e+02, -1.60251686e-04,
         3.90463181e+01,  1.90899991e-05],
       [ 1.18554997e+00,  1.18562996e+00,  1.18527997e+00,
         1.18559003e+00,  3.07459991e+02,  4.21739860e-05,
         4.00938110e+01,  1.78276623e-05],
       [ 1.18560004e+00,  1.18572998e+00,  1.18536997e+00,
         1.18552995e+00,  3.29579987e+02, -5.06089964e-05,
         3.92419701e+01,  1.29712698e-05],
       [ 1.18552995e+00,  1.18578005e+00,  1.18552005e+00,
         1.18573999e+00,  3.08989990e+02,  1.77120281e-04,
         4.36525917e+01,  3.22383312e-05],
       [ 1.18571997e+00,  1.185920

In [20]:
steps = 0
env.reset()
import numpy as np
while True:
    action = np.random.randint(0, 3)
    obs, rew, done, info = env.step(action)
    print(action)
#     print(obs)
    print(rew)
    steps += 1
    if steps == 10:
        break

[10011.9266, 10011.9317]
2
11.931699999999182
[10011.9266, 10011.9317, 10010.3343285]
1
10.334328499999174
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999]
1
3.204400999999052
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96]
2
-48.04000000000087
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96, 9951.96]
2
-48.04000000000087
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96, 9951.96, 9951.96]
0
-48.04000000000087
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96, 9951.96, 9951.96, 9951.96]
2
-48.04000000000087
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96, 9951.96, 9951.96, 9951.96, 9951.96]
0
-48.04000000000087
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96, 9951.96, 9951.96, 9951.96, 9951.96, 9951.96]
2
-48.04000000000087
[10011.9266, 10011.9317, 10010.3343285, 10003.204400999999, 9951.96, 9951.96, 9951.96, 9951.96, 9951.96, 9951.96, 9944.61333]
1
-55.386669

In [17]:
env.observer.feed.next()

{'internal': {'forex:/USD/EUR': 1.19129,
  'forex:/USD:/free': 9951.96,
  'forex:/USD:/locked': 0.0,
  'forex:/USD:/total': 9951.96,
  'forex:/EUR:/free': 0.0,
  'forex:/EUR:/locked': 0.0,
  'forex:/EUR:/total': 0.0,
  'forex:/EUR:/worth': 0.0,
  'net_worth': 9951.96},
 'external': {'Open': 1.19127,
  'High': 1.1913,
  'Low': 1.19126,
  'Close': 1.19129,
  'Volume': 67.95,
  'lr': 1.678866429419279e-05,
  'rsi': 54.58181000433198,
  'macd': -2.1827504042260573e-05},
 'renderer': {'date': '01.12.2017 00:23:00.000 GMT-0500',
  'open': 1.19127,
  'high': 1.1913,
  'low': 1.19126,
  'close': 1.19129,
  'volume': 67.95}}