In [None]:
import pandas as pd
import numpy as np

In [None]:
data_path = "data/all_data_merged.csv"
train_path = "data/train_data.csv"
test_path = "data/test_data.csv"

In [None]:
from tensortrade.feed.core import Stream, DataFeed, NameSpace

In [None]:
# define exchange
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH, LTC
from tensortrade.oms.wallets import Wallet, Portfolio

In [None]:
import tensortrade.env.default as default

In [None]:
from tensortrade.agents import DQNAgent

#### Create train/test set

In [None]:
all_data = pd.read_csv(data_path, index_col=0)
all_data.head()

In [None]:
all_data.tail()

In [None]:
all_data.index = pd.to_datetime(all_data.index)

In [None]:
test_data = all_data[all_data.index.year==2020]
train_data = all_data[all_data.index.year!=2020]

In [None]:
train_data.shape, test_data.shape

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
# save
train_data.to_csv("data/train_data.csv")
test_data.to_csv("data/test_data.csv")

#### Baseline

In [None]:
def fetch_data(path):
    df = pd.read_csv(path, index_col=0)
    
    drop_cols = [col for col in df.columns if "tweet" in str(col).split("_")]
    print(drop_cols)
    
    df = data.select_dtypes(include=['float'])
    df.index = pd.to_datetime(df.index)
    df["month"] = df.index.month
    df["year"] = df.index.year
    df["week"] = df.index.week
    df["day"] = df.index.day
    return df

In [None]:
with NameSpace("bitfinex"):
    nodes = [Stream.source(data[name].tolist(), dtype="float").rename(name) for name in data.columns]

In [None]:
data_feed = DataFeed(nodes)

In [None]:
data_feed.next()

In [None]:
bitfinex = Exchange("bitfinex", service=execute_order)(
    Stream.source(data["btc_close"].tolist(), dtype="float").rename("USD-BTC"),
    Stream.source(data["ltc_close"].tolist(), dtype="float").rename("USD-LTC"),
    Stream.source(data["eth_close"].tolist(), dtype="float").rename("USD-ETH")
)

In [None]:
portfolio = Portfolio(USD, [
    Wallet(bitfinex, 1000 * USD),
    Wallet(bitfinex, 0 * BTC),
    Wallet(bitfinex, 0 * LTC),
    Wallet(bitfinex, 0 * ETH)
])

In [None]:
env = default.create(
    portfolio=portfolio,
    action_scheme="managed-risk",
    reward_scheme="risk-adjusted",
    feed=data_feed,
    renderer="screen-log",
    window_size=24
)

In [None]:
agent = DQNAgent(env)

In [None]:
agent.train(n_episodes=2, n_steps=200, render_interval=10)

In [None]:
portfolio.performance.plot()

In [None]:
portfolio.performance.net_worth.plot()

### Custom Agents

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler, LabelEncoder, OneHotEncoder

In [None]:
from numpy import hstack

In [None]:
from tensortrade.env.default.renderers import PlotlyTradingChart, FileLogger

chart_renderer = PlotlyTradingChart(
    display=True,  # show the chart on screen (default)
    height=800,  # affects both displayed and saved file height. None for 100% height.
    save_format="html",  # save the chart to an HTML file
    auto_open_html=True,  # open the saved HTML chart in a new browser tab
)

file_logger = FileLogger(
    filename="agent_train.log",  # omit or None for automatic file name
    path="training_logs"  # create a new directory if doesn't exist, None for no directory
)

In [None]:
def fetch_data(path, normalize=True, train=True, scalers=None):
    print("Reading Data...")
    df = pd.read_csv(path, index_col=0)
    if train:
        scalers = {
            "num": StandardScaler(),
            "cat": {
                "year": LabelEncoder().fit([2017, 2018, 2019, 2020])
            },
            "ohe": OneHotEncoder()
        }
        
    df = df.select_dtypes(include=['float'])
    price_data = df.iloc[:, :15]
    
    df.index = pd.to_datetime(df.index)
    df["month"] = df.index.month
    df["year"] = df.index.year
    df["week"] = df.index.week
    df["day"] = df.index.day
    
    features = df.values
    cat_cols = ["week", "year", "month", "day"]
    num_feature_cols = [col for col in df.columns if col not in cat_cols]
    all_cols = df.columns
    
    if normalize:
        print("Normalizing Data...")
        if train:
            scalers['num'].fit(df[num_feature_cols].values)
        
        num_features = scalers['num'].transform(df[num_feature_cols].values)
        cat_features = []
        print("Label encoding data...")
        for cat_col in cat_cols:
            if train:
                if cat_col not in scalers['cat']:
                    scalers['cat'][cat_col] = LabelEncoder()
                    scalers['cat'][cat_col].fit(df[cat_col].values)
            cat_features.append((scalers['cat'][cat_col].transform(df[cat_col].values)).reshape(-1,1))

        cat_features = hstack(cat_features)
        features = hstack([
            num_features,
            cat_features
        ])
    
    features_df = pd.DataFrame(features, columns=all_cols)
    
    return features_df, price_data, scalers

In [None]:
from tensortrade.env.generic.components.renderer import AggregateRenderer
from typing import Union

from tensortrade.env.default import actions
from tensortrade.env.default import rewards
from tensortrade.env.default import observers
from tensortrade.env.default import stoppers
from tensortrade.env.default import informers
from tensortrade.env.default import renderers

In [None]:
def create_env(path, window_size=24, capital=1000, action='managed-risk', reward='risk-adjusted', train=True, scalers=None):
    features_df, price_df, scalers = fetch_data(path, train=train, scalers=scalers)
    
    with NameSpace("bitfinex"):
        streams = [Stream.source(features_df[name].tolist(), dtype="float").rename(name) for name in features_df.columns]
    
    data_feed = DataFeed(streams)
    
    renderer_feed = DataFeed([
        Stream.source(price_df[c].tolist(), dtype="float").rename(c) for c in price_df.columns]
    )
    
    bitfinex = Exchange("bitfinex", service=execute_order)(
        Stream.source(price_df["btc_close"].tolist(), dtype="float").rename("USD-BTC"),
        Stream.source(price_df["ltc_close"].tolist(), dtype="float").rename("USD-LTC"),
        Stream.source(price_df["eth_close"].tolist(), dtype="float").rename("USD-ETH")
    )
    
    portfolio = Portfolio(USD, [
        Wallet(bitfinex, capital * USD),
        Wallet(bitfinex, 0 * BTC),
        Wallet(bitfinex, 0 * LTC),
        Wallet(bitfinex, 0 * ETH)
    ])
    
    env = default.create(
        portfolio=portfolio,
        action_scheme=action,
        reward_scheme=reward,
        feed=data_feed,
        renderer_feed=renderer_feed,
        renderers=[
            chart_renderer,
            file_logger
        ],
        window_size=window_size
    )
    return env, portfolio, scalers

In [None]:
from tensorforce.environments import Environment as ForceEnvironment

In [None]:
from tensorforce.execution import Runner

In [None]:
from tensorforce.agents import Agent

#### Tensorforce Agent

In [None]:
%%time
train_environment, train_portfolio, scalers = create_env(train_path)

In [None]:
%%time
test_environment, test_portfolio, _ = create_env(test_path, train=False, scalers=scalers)

In [None]:
tensorforce_train_environment = ForceEnvironment.create(
    environment=train_environment, max_episode_timesteps=500
)

In [None]:
tensorforce_test_environment = ForceEnvironment.create(
    environment=test_environment, max_episode_timesteps=500
)

In [None]:
agent_spec = {
    "agent": "tensorforce",
    "update": 64,
    "optimizer": {
        "optimizer": "adam",
        "learning_rate": 1e-3
    },
    "objective": "policy_gradient",
    "reward_estimation": {
        "horizon": 20
    }
}

network_spec = [
    dict(type='dense', size=64, activation="tanh"),
    dict(type='dense', size=32, activation="tanh"),
]

agent = Agent.create(
    agent=agent_spec,
    states=tensorforce_train_environment.states(),
    actions=tensorforce_train_environment.actions(),
    policy=dict(
        network="auto"
    )
)

In [None]:
agent

In [None]:
train_runner = Runner(
    agent=agent,
    environment=dict(environment=tensorforce_train_environment),
#     max_episode_timesteps=100
)

In [None]:
train_runner.run(num_episodes=100)

In [None]:
train_portfolio.net_worth

In [None]:
train_environment.render()

In [None]:
train_portfolio.performance.net_worth.plot()

In [None]:
test_runner = Runner(
    agent=agent,
    environment=dict(environment=tensorforce_test_environment),
#     max_episode_timesteps=100
)

In [None]:
test_runner.run(num_episodes=100, evaluation=True)

In [None]:
test_portfolio.net_worth

In [None]:
test_portfolio.performance.net_worth.plot()

#### PPO

In [None]:
from tensorforce.agents import PPOAgent

In [None]:
%%time
train_environment, train_portfolio, scalers = create_env(train_path)

In [None]:
%%time
test_environment, test_portfolio, _ = create_env(test_path, train=False, scalers=scalers)

In [None]:
tensorforce_train_environment = ForceEnvironment.create(
    environment=train_environment, max_episode_timesteps=1000
)

In [None]:
tensorforce_test_environment = ForceEnvironment.create(
    environment=test_environment, max_episode_timesteps=1000
)

In [None]:
tensorforce_test_environment.states()["shape"], tensorforce_train_environment.states()["shape"]

In [None]:
network_spec = [
    dict(type='dense', size=20, activation='relu'),
    dict(type='dense', size=10, activation='relu'),
]

agent_spec = {
    "agent": "ppo",
#     "update": 64,
#     "optimizer": "adam",
    "objective": "policy_gradient",
    "reward_estimation": {
        "horizon": 20
    }
}

ppo_agent = Agent.create(
    agent="ppo",
    states=tensorforce_train_environment.states(),
    actions=tensorforce_train_environment.actions(),
    max_episode_timesteps=1000,
    batch_size=32,
    network="auto"
)

In [None]:
train_runner = Runner(
    agent=ppo_agent,
    environment=dict(environment=tensorforce_train_environment),
#     max_episode_timesteps=100
)

In [None]:
train_runner.run(num_episodes=200)

In [None]:
train_portfolio.net_worth

In [None]:
# environment.render()

In [None]:
train_portfolio.performance.net_worth.plot()

In [None]:
test_runner = Runner(
    agent=ppo_agent,
    environment=dict(environment=tensorforce_test_environment),
#     max_episode_timesteps=100
)

In [None]:
test_runner.run(num_episodes=100, evaluation=True)

In [None]:
test_portfolio.net_worth

In [None]:
test_portfolio.performance.net_worth.plot()

In [None]:
train_portfolio.net_worth

In [None]:
# environment.render()

In [None]:
train_portfolio.performance.net_worth.plot()

In [None]:
test_runner = Runner(
    agent=ppo_agent,
    environment=dict(environment=tensorforce_test_environment),
#     max_episode_timesteps=100
)

In [None]:
test_runner.run(num_episodes=100, evaluation=True)

In [None]:
test_portfolio.net_worth

In [None]:
test_portfolio.performance.net_worth.plot()

#### PPO and Custom Action and Reward

##### Action

In [None]:
action = actions.ManagedRiskOrders(
    stop=[0.02, 0.04, 0.06, 0.08],
    take=[0.01, 0.03, 0.05, 0.07],
    trade_sizes=[1, 1/3],
    durations=[5, 10, 20]
)

##### Reward

In [None]:
from tensortrade.env.default.rewards import TensorTradeRewardScheme

In [None]:
!pip install empyrical

In [None]:
returns = np.array([.01, .02, .03, -.4, -.06, -.02])
benchmark_returns = np.array([.02, .02, .03, -.35, -.05, -.01])

# calculate the max drawdown
calmar_ratio(returns)

In [None]:
from empyrical import calmar_ratio, max_drawdown

class newManagedRisk(TensorTradeRewardScheme):
    def __init__(self,
                 return_algorithm: str = 'sharpe',
                 risk_free_rate: float = 0.,
                 target_returns: float = 0.,
                 window_size: int = 1) -> None:
        algorithm = self.default('return_algorithm', return_algorithm)

        assert algorithm in ['calmar', 'maxdd', 'sharpe']

        if algorithm == 'calmar':
            return_algorithm = self._calmar_ratio
        elif algorithm == 'maxdd':
            return_algorithm = self._max_dd
        elif algorithm == 'sharpe':
            return_algorithm = self._sharpe_ratio

        self._return_algorithm = return_algorithm
        self._risk_free_rate = self.default('risk_free_rate', risk_free_rate)
        self._target_returns = self.default('target_returns', target_returns)
        self._window_size = self.default('window_size', window_size)

    def _calmar_ratio(self, returns: 'pd.Series') -> float:
        """Computes the sharpe ratio for a given series of a returns.

        Parameters
        ----------
        returns : `pd.Series`
            The returns for the `portfolio`.

        Returns
        -------
        float
            The calmar ratio for the given series of a `returns`.

        References
        ----------
        .. [1] https://en.wikipedia.org/wiki/Sharpe_ratio
        """
        def annual_returns(returns):
            num_years = len(returns) / 252

            cum_ret_final = (returns + 1).prod().squeeze()

            return cum_ret_final ** (1 / num_years) - 1
        
        max_dd = max_drawdown(returns)
        if max_dd < 0:
            return annual_returns(returns) / abs(max_dd)

        return 0
    
    def _max_dd(self, returns: 'pd.Series') -> float:
        """Computes the sharpe ratio for a given series of a returns.

        Parameters
        ----------
        returns : `pd.Series`
            The returns for the `portfolio`.

        Returns
        -------
        float
            The sharpe ratio for the given series of a `returns`.

        References
        ----------
        .. [1] https://en.wikipedia.org/wiki/Sharpe_ratio
        """
        return max_drawdown(returns.values) - self._risk_free_rate + 1e-9
    
    def _sharpe_ratio(self, returns: 'pd.Series') -> float:
        """Computes the sharpe ratio for a given series of a returns.

        Parameters
        ----------
        returns : `pd.Series`
            The returns for the `portfolio`.

        Returns
        -------
        float
            The sharpe ratio for the given series of a `returns`.

        References
        ----------
        .. [1] https://en.wikipedia.org/wiki/Sharpe_ratio
        """
        return (np.mean(returns) - self._risk_free_rate + 1e-9) / (np.std(returns) + 1e-9)
    
    def get_reward(self, portfolio: 'Portfolio') -> float:
        """Computes the reward corresponding to the selected risk-adjusted return metric.

        Parameters
        ----------
        portfolio : `Portfolio`
            The current portfolio being used by the environment.

        Returns
        -------
        float
            The reward corresponding to the selected risk-adjusted return metric.
        """
        returns = portfolio.performance['net_worth'][-(self._window_size + 1):].pct_change().dropna()
        risk_adjusted_return = self._return_algorithm(returns)

        return risk_adjusted_return

##### 

In [None]:
%%time
train_environment, train_portfolio, scalers = create_env(train_path, reward=newManagedRisk(return_algorithm='calmar'))

In [None]:
%%time
test_environment, test_portfolio, _ = create_env(test_path, train=False, scalers=scalers, reward=newManagedRisk(return_algorithm='calmar'))

In [None]:
tensorforce_train_environment = ForceEnvironment.create(
    environment=train_environment, max_episode_timesteps=500
)

In [None]:
tensorforce_test_environment = ForceEnvironment.create(
    environment=test_environment, max_episode_timesteps=500
)

In [None]:
tensorforce_test_environment.states()["shape"], tensorforce_train_environment.states()["shape"]

In [None]:
network_spec = [
    dict(type='dense', size=20, activation='relu'),
    dict(type='dense', size=10, activation='relu'),
]

agent_spec = {
    "agent": "ppo",
#     "update": 64,
#     "optimizer": "adam",
    "objective": "policy_gradient",
    "reward_estimation": {
        "horizon": 20
    }
}

ppo_agent = Agent.create(
    agent="ppo",
    states=tensorforce_train_environment.states(),
    actions=tensorforce_train_environment.actions(),
    max_episode_timesteps=500,
    batch_size=32,
    network="auto"
)

In [None]:
train_runner = Runner(
    agent=ppo_agent,
    environment=dict(environment=tensorforce_train_environment),
#     max_episode_timesteps=100
)

In [None]:
train_runner.run(num_episodes=100)

In [None]:
train_portfolio.net_worth

In [None]:
# environment.render()

In [None]:
train_portfolio.performance.net_worth.plot()

In [None]:
test_runner = Runner(
    agent=ppo_agent,
    environment=dict(environment=tensorforce_test_environment),
#     max_episode_timesteps=100
)

In [None]:
test_runner.run(num_episodes=50, evaluation=True)

In [None]:
test_portfolio.net_worth

In [None]:
test_portfolio.performance.net_worth.plot()