In [3]:
import os
import numpy as np
import pandas as pd

data_ = {}

for filename in os.listdir("data/Top100Cryptos/"):
    path = os.path.join("data/Top100Cryptos/", filename)
    try:
        name = filename.split(".")[0]
        data_[name] = pd.read_csv(path)
    except:
        pass

num_data = []
for name in data_.keys():
    num_data.append(data_[name].shape[0])
name_list = np.array(list(data_.keys()))[np.argsort(num_data)[::-1]]
big_names = name_list[:10]

data = dict()
for name in big_names:
    data[name] = data_[name]

In [4]:
def convert_time(t):
    m = {
        'Jan' : "01",
        'Feb' : "02",
        'Mar' : "03",
        'Apr' : "04",
        'May' : "05",
        'Jun' : "06",
        'June' : "06",
        'Jul' : "07",
        'Aug' : "08",
        'Sep' : "09", 
        'Oct' : "10",
        'Nov' : "11",
        'Dec' : "12"
    }
    t_list = t.replace(",", "").split()
    t_list[0] = m[t_list[0]]
    return "-".join([t_list[2], t_list[0], t_list[1]])

In [8]:
from rltensor.environments.core import Env
from copy import deepcopy
import pandas as pd
import numpy as np

class TradeEnvironment(Env):
    """Environment only for close prices"""
    def __init__(self, data, start=None, end=None, add_cash=True, keys=["Close", "Open", "High"]):
        self.keys = keys
        time_index = set()
        impute_data = {}
        data = deepcopy(data)
        for key, val in data.items():
            dates = val["Date"].values
            dates = [convert_time(d) for d in dates]
            impute_data[key] = dict(time_range=(dates[-1], dates[0]),
                                    impute_val=(val.iloc[-1], val.iloc[0]))
            data[key].index = dates
            time_index = time_index.union(set(dates))
        self.time_index = sorted(list(time_index))
        if add_cash:
            val = np.ones(len(self.time_index))
            cash_df = pd.DataFrame({"Open" : val,
                                    "High" : val,
                                    "Low" : val,
                                    "Close" : val,
                                    "Volume" : val},
                                   index=self.time_index)
            key = "Cash"
            data[key] = cash_df
            impute_data[key] = dict(time_range=(self.time_index[-1], self.time_index[0]),
                                    impute_val=(cash_df.iloc[-1], cash_df.iloc[0]))
        self.impute_data  = impute_data
        if start is None:
            self.start = self.time_index[0]
        else:
            self.start = min(start, self.time_index[0])
        if end is None:
            self.end = self.time_index[-1]
        else:
            self.end = max(end, self.time_index[-1])
        self.data = data
        self.symbols = list(data.keys())
        self.num_stocks = len(self.symbols)
        self.current_time = self.start
        self.current_step = 0
        # Use for calculate return
        self.prev_states = self._get_bar()

    def _reset(self):
        self.current_time = self.start
        self.cirrent_step = 0
        current_bars = self._get_bar()
        observation = self._get_observation(current_bars)
        return observation

    def _step(self, action):
        current_bars = self._get_bar()
        returns = []
        for symbol in self.symbols:
            returns.append(current_states[symbol]["Close"] / self.prev_states[symbol]["Close"] - 1)
        returns = np.array(returns)
        # Update status
        self.prev_states = deepcopy(current_states)
        self._update_time()
        observation = self._get_observation(current_bars)
        terminal = False
        reward = np.sum(returns * action)
        info = {}
        info["returns"] = returns
        return observation, terminal, reward, info
    
    def _get_observation(self, bars):
        observation = [] 
        for symbol in self.symbols:
            observation.append([bars[symbol][key] for key in self.keys])
        return np.array(observation)
            
    def _update_time(self):
        index = self.time_index.index(self.current_time)
        self.current_time = self.time_index[index + 1]
        self.current_step += 1
        
    def _get_bar(self):
        bar = {}
        for symbol in self.symbols:
            min_t = self.impute_data[symbol]["time_range"][0]
            max_t = self.impute_data[symbol]["time_range"][1]
            if (min_t <= self.current_time) and (max_t >= self.current_time):
                if self.current_time in self.data[symbol].index:
                    bar[symbol] = self.data[symbol].loc[self.current_time]
                else:
                    bar[symbol] = deepcopy(self.impute_bar[symbol])
            elif min_t > self.current_time:
                bar[symbol] = self.impute_data[symbol]["impute_val"][0]
            else:
                bar[symbol] = self.impute_data[symbol]["impute_val"][1]
        # Keep value for imputation
        self.impute_bar = deepcopy(bar)
        return bar
    
    @property
    def action_dim(self):
        return self.num_stocks

In [9]:
import numpy as np


def calc_stats(log_return, accumulated_pv, peak_pv):
    accumulated_pv *= np.exp(log_return)
    if peak_pv < accumulated_pv:
        peak_pv = accumulated_pv
    draw_down = (peak_pv - accumulated_pv) / peak_pv
    return_ = np.exp(log_return) - 1.
    return return_, accumulated_pv, peak_pv, draw_down

def calc_sharp_ratio(returns, bench_mark=0, eps=1e-6):
    var = np.var(returns)
    mean = np.mean(returns)
    return (mean - bench_mark) / (np.sqrt(var) + eps)

In [11]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import time
from six.moves import xrange
from tensorforce.execution import Runner


class TradeRunner(Runner):
    def __init__(self, agent, env, env_name="trading",
                 tensorboard_dir="./logs", scalar_summary_tags=None,
                 histogram_summary_tags=None, load_file_path=None,
                 *args, **kwargs):

        if scalar_summary_tags is None:
            scalar_summary_tags = [
                'average.loss',
                'average.returns',
                'drawdown',
                'cumulative_returns',
                'episode.final_value',
                'episode.max_returns',
                'episode.min_returns',
                'episode.avg_returns',
                'episode.maximum_drawdowns',
                'episode.sharp_ratio',
                'training.learning_rate',
                'training.num_step_per_sec',
                'training.time',
                'test.cumulative_returns',
                'test.drawdowns',
                'test.final_value',
                'test.maximum_drawdowns',
                'test.sharp_ratio',]
        self.scalar_summary_tags = scalar_summary_tags

        if histogram_summary_tags is None:
            histogram_summary_tags = ['episode.returns', 'test.returns']
            for i in range(env.action_dim):
                histogram_summary_tags.append("episode.action_{}".format(i))
                histogram_summary_tags.append("test.action_{}".format(i))
        
        super(TradeRunner, self).__init__(agent=agent, env=env,
                                          env_name=env_name,
                                          tensorboard_dir=tensorboard_dir,
                                          scalar_summary_tags=scalar_summary_tags,
                                          histogram_summary_tags=histogram_summary_tags,
                                          load_file_path=load_file_path,
                                          *args, **kwargs)
        
        
    def run(
        self,
        timesteps=None,
        episodes=None,
        max_episode_timesteps=None,
        deterministic=False,
        episode_finished=None,
        init_pv=100.,
    ):
        """
        Runs the agent on the environment.

        Args:
            timesteps: Number of timesteps
            episodes: Number of episodes
            max_episode_timesteps: Max number of timesteps per episode
            deterministic: Deterministic flag
            episode_finished: Function handler taking a `Runner` argument and returning a boolean indicating
                whether to continue execution. For instance, useful for reporting intermediate performance or
                integrating termination conditions.
        """

        # Keep track of episode reward and episode length for statistics.
        self.start_time = time.time()

        self.agent.reset()

        self.episode = self.agent.episode
        if episodes is not None:
            episodes += self.agent.episode

        self.timestep = self.agent.timestep
        if timesteps is not None:
            timesteps += self.agent.timestep
            
        self.sp_list = []
        self.mmd_list = []
        self.pv_list = []
        self.accumulated_pvs_list = []
        self.draw_downs_list = []

        while True:
            episode_start_time = time.time()

            self.agent.reset()
            state = self.environment.reset()
            accumulated_pv = init_pv
            peak_pv = init_pv
            self.accumulated_pvs = [accumulated_pv]
            draw_downs = [0.]
            returns = [0.]
            self.episode_timestep = 0

            while True:
                action = self.agent.act(states=state, deterministic=deterministic)
                log_return = 0
                for repeat in xrange(self.repeat_actions):
                    state, terminal, step_reward = self.environment.execute(actions=action)
                    log_return += np.log(1. + step_reward)
                    if terminal:
                        break

                if max_episode_timesteps is not None and self.episode_timestep >= max_episode_timesteps:
                    terminal = True

                self.agent.observe(terminal=terminal, reward=log_return)

                self.episode_timestep += 1
                self.timestep += 1
                return_, accumulated_pv, peak_pv, draw_down = calc_stats(log_return, accumulated_pv, peak_pv)
                returns.append(return_)
                self.accumulated_pvs.append(accumulated_pv)
                draw_downs.append(draw_down)
                
                if terminal or self.agent.should_stop():  # TODO: should_stop also termina?
                    break
                    
            time_passed = time.time() - episode_start_time

            self.pv_list.append(self.accumulated_pvs[-1])
            self.mmd_list.append(np.max(draw_downs))
            self.sp_list.append(calc_sharp_ratio(returns))
            self.accumulated_pvs_list.append(self.accumulated_pvs)
            self.draw_downs_list.append(draw_downs)
            
            self.episode_timesteps.append(self.episode_timestep)
            self.episode_times.append(time_passed)

            self.episode += 1

            if episode_finished and not episode_finished(self) or \
                    (episodes is not None and self.agent.episode >= episodes) or \
                    (timesteps is not None and self.agent.timestep >= timesteps) or \
                    self.agent.should_stop():
                # agent.episode / agent.timestep are globally updated
                break

        self.agent.close()
        self.environment.close()

SyntaxError: invalid syntax (<ipython-input-11-d3778a9180f2>, line 50)