### Definición experimentación 1ª Iteración

**En esta iteración se utilizará tal cual el entorno de OpenAI gymAnytrading, sin realizarle ninguna modificación**.

**Series a predecir**: todos los pares de divisas

**Longitud entrenamiento**: 2 años.

**Longitud test**: 6 meses.

**window_size**: Fijo o variable. Creo que fijo a 50 días es lo que renta. Fijo a 50 días.

**Medición del mejor modelo**: el que en el conjunto de test saque el mejor sharpe ratio.

**Entre qué se elige**: distintos modelos y distintas políticas para cada modelo. Combinamos estos y sacamos la mejor combinación para cada par de divisas.

In [2]:
import numpy as np
import pandas as pd

import gym
import gym_anytrading
import quantstats as qs

from stable_baselines import A2C
from stable_baselines import DQN
from stable_baselines.common.vec_env import DummyVecEnv

import matplotlib.pyplot as plt
from finta import TA

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [3]:
AUDUSD = pd.read_csv('../Datasets/AUDUSD_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')
EURUSD = pd.read_csv('../Datasets/EURUSD_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')
GBPUSD = pd.read_csv('../Datasets/GBPUSD_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')
NZDUSD = pd.read_csv('../Datasets/NZDUSD_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')
USDCAD = pd.read_csv('../Datasets/USDCAD_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')
USDCHF = pd.read_csv('../Datasets/USDCHF_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')
USDJPY = pd.read_csv('../Datasets/USDJPY_Candlestick_1_Hour_BID_01.01.2007-25.12.2021.csv')

In [4]:
# 1. Limpiamos el dataset ya que las fechas como vienen están mal
# 2. Asignamos el conjunto de entrenamiento y test de cada par de divisas. 
# El train va desde Junio 2019 a Junio 2021 y el test de Junio 2021 a Diciembre 2021

df_pair = {}
df_pair['AUDUSD'] = AUDUSD
df_pair['EURUSD'] = EURUSD
df_pair['GBPUSD'] = GBPUSD
df_pair['NZDUSD'] = NZDUSD
df_pair['USDCAD'] = USDCAD
df_pair['USDCHF'] = USDCHF
df_pair['USDJPY'] = USDJPY
df_train = {}
df_test = {}
names = list(df_pair.keys())
for i in range(len(df_pair)):
    df = df_pair[names[i]]
    df = df.drop(df[df.Volume == 0].index)
    df = df.rename(columns={'Gmt time': 'Date'})
    #AUDUSD['Date'] =  pd.to_times(AUDUSD['Date'])
    #AUDUSD.set_index('Date', inplace=True)

    df['Day'] = df['Date'].astype(str).str[0:2]
    df['Month'] = df['Date'].astype(str).str[3:5]
    df['Year'] = df['Date'].astype(str).str[6:10]
    df['Hour'] = df['Date'].astype(str).str[11:13]
    df['Date'] = pd.to_datetime(df[['Day', 'Month', 'Year', 'Hour']])
    df.set_index('Date', inplace=True)
    df = df[["Open","High","Low","Close","Volume"]]
    
    df['SMA'] = TA.SMA(df, 12)
    df['RSI'] = TA.RSI(df)
    df['OBV'] = TA.OBV(df)
    df.fillna(0, inplace=True)
    
    
    df_pair[names[i]] = df
    # Train
    df_train[names[i]] = df_pair[names[i]].query("20190630 < index < 20210701")
    # Test
    df_test[names[i]] = df_pair[names[i]].query("20210701 < index < 20211226")

In [5]:
window_size_1 = 50
start_index = window_size_1
end_index = len(df_train[names[0]])
frame_bound = (start_index, end_index)

#desired_total_episodes = 75
desired_total_episodes = 50
n_points = end_index - start_index
# get the number of timesteps
total_timesteps = desired_total_episodes*n_points

In [6]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from enum import Enum
import matplotlib.pyplot as plt


class Actions(Enum):
    Sell = 0
    Buy = 1


class Positions(Enum):
    Short = 0
    Long = 1

    def opposite(self):
        return Positions.Short if self == Positions.Long else Positions.Long


class TradingEnv(gym.Env):

    metadata = {'render.modes': ['human']}

    def __init__(self, df, window_size):
        assert df.ndim == 2

        self.seed()
        self.df = df
        self.window_size = window_size
        self.prices, self.signal_features = self._process_data()
        self.shape = (window_size, self.signal_features.shape[1])

        # spaces
        self.action_space = spaces.Discrete(len(Actions))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)

        # episode
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._done = None
        self._current_tick = None
        self._last_trade_tick = None
        self._position = None
        self._position_history = None
        self._total_reward = None
        self._total_profit = None
        self._first_rendering = None
        self.history = None


    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def reset(self):
        self._done = False
        self._current_tick = self._start_tick
        self._last_trade_tick = self._current_tick - 1
        self._position = Positions.Short
        self._position_history = (self.window_size * [None]) + [self._position]
        self._total_reward = 0.
        self._total_profit = 1.  # unit
        self._first_rendering = True
        self.history = {}
        return self._get_observation()


    def step(self, action):
        self._done = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self._done = True

        step_reward = self._calculate_reward(action)
        self._total_reward += step_reward

        self._update_profit(action)

        trade = False
        if ((action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)):
            trade = True

        if trade:
            self._position = self._position.opposite()
            self._last_trade_tick = self._current_tick

        self._position_history.append(self._position)
        observation = self._get_observation()
        info = dict(
            total_reward = self._total_reward,
            total_profit = self._total_profit,
            position = self._position.value
        )
        self._update_history(info)

        return observation, step_reward, self._done, info


    def _get_observation(self):
        return self.signal_features[(self._current_tick-self.window_size):self._current_tick]


    def _update_history(self, info):
        if not self.history:
            self.history = {key: [] for key in info.keys()}

        for key, value in info.items():
            self.history[key].append(value)


    def render(self, mode='human'):

        def _plot_position(position, tick):
            color = None
            if position == Positions.Short:
                color = 'red'
            elif position == Positions.Long:
                color = 'green'
            if color:
                plt.scatter(tick, self.prices[tick], color=color)

        if self._first_rendering:
            self._first_rendering = False
            plt.cla()
            plt.plot(self.prices)
            start_position = self._position_history[self._start_tick]
            _plot_position(start_position, self._start_tick)

        _plot_position(self._position, self._current_tick)

        plt.suptitle(
            "Total Reward: %.6f" % self._total_reward + ' ~ ' +
            "Total Profit: %.6f" % self._total_profit
        )

        plt.pause(0.01)


    def render_all(self, mode='human'):
        window_ticks = np.arange(len(self._position_history))
        plt.plot(self.prices)

        short_ticks = []
        long_ticks = []
        for i, tick in enumerate(window_ticks):
            if self._position_history[i] == Positions.Short:
                short_ticks.append(tick)
            elif self._position_history[i] == Positions.Long:
                long_ticks.append(tick)

        plt.plot(short_ticks, self.prices[short_ticks], 'ro')
        plt.plot(long_ticks, self.prices[long_ticks], 'go')

        plt.suptitle(
            "Total Reward: %.6f" % self._total_reward + ' ~ ' +
            "Total Profit: %.6f" % self._total_profit
        )
        
        
    def close(self):
        plt.close()


    def save_rendering(self, filepath):
        plt.savefig(filepath)


    def pause_rendering(self):
        plt.show()


    def _process_data(self):
        raise NotImplementedError


    def _calculate_reward(self, action):
        raise NotImplementedError


    def _update_profit(self, action):
        raise NotImplementedError


    def max_possible_profit(self):  # trade fees are ignored
        raise NotImplementedError


In [7]:
import numpy as np



class ForexEnv(TradingEnv):

    def __init__(self, df, window_size, frame_bound, unit_side='left'):
        assert len(frame_bound) == 2
        assert unit_side.lower() in ['left', 'right']

        self.frame_bound = frame_bound
        self.unit_side = unit_side.lower()
        super().__init__(df, window_size)

        self.trade_fee = 0.0003  # unit


    def _process_data(self):
        #prices = self.df.loc[:, 'Close'].to_numpy()

        #prices[self.frame_bound[0] - self.window_size]  # validate index (TODO: Improve validation)
        #prices = prices[self.frame_bound[0]-self.window_size:self.frame_bound[1]]

        #diff = np.insert(np.diff(prices), 0, 0)
        #signal_features = np.column_stack((prices, diff))
        start = self.frame_bound[0] - self.window_size
        end = self.frame_bound[1]
        prices = self.df.loc[:, 'Low'].to_numpy()[start:end]
        signal_features = self.df.loc[:, ['Low', 'Volume','SMA', 'RSI', 'OBV']].to_numpy()[start:end]
        
        return prices, signal_features


    def _calculate_reward(self, action):
        step_reward = 0  # pip

        trade = False
        if ((action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)):
            trade = True

        current_price = self.prices[self._current_tick]
        # El last trade tick solo se actualiza si trade es true
        last_trade_price = self.prices[self._last_trade_tick]
        price_diff = current_price - last_trade_price
        factor_trade = 10000
        factor_no_trade = 100
        if self._position == Positions.Short:
            if trade:
                step_reward += -price_diff * factor_trade
            else:
                step_reward += -price_diff * factor_no_trade
        elif self._position == Positions.Long:
            if trade:
                step_reward += price_diff * factor_trade
            else:
                step_reward += price_diff * factor_no_trade

        return step_reward


    def _update_profit(self, action):
        trade = False
        if ((action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)):
            trade = True

        if trade or self._done:
            current_price = self.prices[self._current_tick]
            last_trade_price = self.prices[self._last_trade_tick]

            if self.unit_side == 'left':
                if self._position == Positions.Short:
                    quantity = self._total_profit * (last_trade_price - self.trade_fee)
                    self._total_profit = quantity / current_price

            elif self.unit_side == 'right':
                if self._position == Positions.Long:
                    quantity = self._total_profit / last_trade_price
                    self._total_profit = quantity * (current_price - self.trade_fee)


    def max_possible_profit(self):
        current_tick = self._start_tick
        last_trade_tick = current_tick - 1
        profit = 1.

        while current_tick <= self._end_tick:
            position = None
            if self.prices[current_tick] < self.prices[current_tick - 1]:
                while (current_tick <= self._end_tick and
                       self.prices[current_tick] < self.prices[current_tick - 1]):
                    current_tick += 1
                position = Positions.Short
            else:
                while (current_tick <= self._end_tick and
                       self.prices[current_tick] >= self.prices[current_tick - 1]):
                    current_tick += 1
                position = Positions.Long

            current_price = self.prices[current_tick - 1]
            last_trade_price = self.prices[last_trade_tick]

            if self.unit_side == 'left':
                if position == Positions.Short:
                    quantity = profit * (last_trade_price - self.trade_fee)
                    profit = quantity / current_price

            elif self.unit_side == 'right':
                if position == Positions.Long:
                    quantity = profit / last_trade_price
                    profit = quantity * (current_price - self.trade_fee)

            last_trade_tick = current_tick - 1

        return profit

In [8]:
policy_A2C = ['MlpPolicy','MlpLstmPolicy','MlpLnLstmPolicy']

In [9]:
def model_train_test(df_train,df_test, algo, policy):
    env2 = ForexEnv(df = df_train,window_size = window_size_1, frame_bound = (window_size_1, len(df_train)))    
    env_maker = lambda: env2
    env = DummyVecEnv([env_maker])
    model = algo(policy, env)
    #model = algo(policy, env, verbose=1)
    model.learn(total_timesteps=total_timesteps)
    
    # Test
    env = ForexEnv(df = df_test, window_size = window_size_1, frame_bound = (window_size_1, len(df_test))) 
   # env = gym.make('forex-v0', df=df_test, frame_bound=(window_size_1, len(df_test)), window_size=window_size_1)
    obs = env.reset()
    recompensa = []
    action_1 = []
    state_1 = []
    while True: 
        obs = obs[np.newaxis, ...]
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        recompensa.append(rewards)
        action_1.append(action[0])
        if done:
            print("info", info)
            break
    start_index = window_size_1
    end_index = len(df_test)
    qs.extend_pandas()
    net_worth = pd.Series(env.history['total_profit'], index=df_test.index[start_index+1:end_index])
    returns = net_worth.pct_change().iloc[1:]
    df = pd.DataFrame()
    df['recompensa'] = recompensa
    df['action'] = action_1
    
    return(returns)

In [10]:
# Defining functions

### Results A2C

In [11]:
# RESULTS A2C
results = {} 
policy_A2C = ['MlpPolicy','MlpLstmPolicy','MlpLnLstmPolicy']
# Create dictionary with each pair
for i in range(len(df_train)):
    results[str(names[i])] = {}

for i in range(len(df_train)):
    for j in range(len(policy_A2C)):
        results[str(names[i])]['A2C_'+str(policy_A2C[j])] = model_train_test(df_train[names[i]],df_test[names[i]], A2C, policy_A2C[j])





Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


info {'total_reward': 2041.365999999997, 'total_profit': 1.037301520504911, 'position': 0}
info {'total_reward': -5908.932999999977, 'total_profit': 1.0001598210004796, 'position': 1}
info {'total_reward': -394.10099999997635, 'total_profit': 0.7351967619877253, 'position': 1}
info {'total_reward': 7141.747999999961, 'total_profit': 1.04826951659707, 'position': 0}
info {'total_reward': -2573.8490000000065, 'total_profit': 0.9988490746238438, 'position': 1}
info {'total_reward': -7126.282999999997, 'total_profit': 0.9997890526777652, 'position': 1}
info {'total_reward': 5818.85199999998, 'total_profit': 1.0330039171126992, 'position': 

In [12]:
import pickle

In [13]:
# Para guardar los resultados
try:
    geeky_file = open('A2C_iter_3_good', 'wb')
    pickle.dump(results, geeky_file)
    geeky_file.close()
  
except:
    print("Something went wrong")

In [14]:
with open('A2C_iter_3_good', 'rb') as f:
    A2C_iter_3 = pickle.load(f)

In [71]:
A2C_iter_3

{'AUDUSD': {'A2C_MlpPolicy': Date
  2021-07-05 05:00:00    0.000000e+00
  2021-07-05 06:00:00    2.220446e-16
  2021-07-05 07:00:00    0.000000e+00
  2021-07-05 08:00:00    0.000000e+00
  2021-07-05 09:00:00    0.000000e+00
                             ...     
  2021-12-24 17:00:00    1.718380e-03
  2021-12-24 18:00:00    0.000000e+00
  2021-12-24 19:00:00    0.000000e+00
  2021-12-24 20:00:00    0.000000e+00
  2021-12-24 21:00:00    4.159215e-05
  Length: 2992, dtype: float64,
  'A2C_MlpLstmPolicy': Date
  2021-07-05 05:00:00    0.000000
  2021-07-05 06:00:00    0.000000
  2021-07-05 07:00:00    0.000000
  2021-07-05 08:00:00    0.000000
  2021-07-05 09:00:00    0.000000
                           ...   
  2021-12-24 17:00:00    0.000000
  2021-12-24 18:00:00    0.000000
  2021-12-24 19:00:00    0.000000
  2021-12-24 20:00:00    0.000000
  2021-12-24 21:00:00    0.041135
  Length: 2992, dtype: float64,
  'A2C_MlpLnLstmPolicy': Date
  2021-07-05 05:00:00    0.000000
  2021-07-05 06:00