The goal here is to implement a Finance environment as a prediction game. 
 - the environment uses static historical financial time series data to generate the states of the environment
 - the state is given by four floating point numbers representing four most recent data points in the time series
 - the value to be predicted is either 0 or 1
 - 0 means the financial time series value drops to a lower level ("market goes down")
 - 1 means the time series value rises to a higher level ("market goes up")

In [2]:
# importing the necessary libraries
import os
import random
import numpy as np
import pandas as pd

In [3]:
# setting the seed for reproducibility
random.seed(100)

# setting deterministic hashing in python
os.environ['PYTHONHASHSEED'] = '0'

In [4]:
# action space class
class ActionSpace:
    def sample(self):
        return random.randint(0, 1)
    
# creating a action space result
action_space = ActionSpace()

# printing out the results of the action space
# the .sample() returns a random action
print([action_space.sample() for _ in range(10)])


[0, 1, 1, 0, 1, 1, 1, 0, 0, 0]


In [5]:
# the finance class
class Finance:

    # the url link for the data set
    url = 'https://certificate.tpq.io/rl4finance.csv'

    # the constructor
    def __init__(self, symbol, feature, min_accuracy=0.485, n_features=4):

        # symbol for the time series to be used for the prediction game
        self.symbol = symbol

        # the type of feature to be used to define the state of the environment
        self.feature = feature

        # the number of feature values to be provided to the agent
        self.n_features = n_features

        # the action space object that is used for random action sampling
        self.action_space = ActionSpace()

        # minimum prediction accuracy required for agent to continue with the prediction game
        self.min_accuracy = min_accuracy

        # retrieval of financial time series data from the remote source
        self._get_data()

        # method call for data preparation
        self._prepare_data()

In [6]:
url = 'https://certificate.tpq.io/rl4finance.csv'
df = pd.read_csv(url)
df

Unnamed: 0,Date,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N,.SPX,.VIX,SPY,EUR=,XAU=,GDX,GLD
0,2014-01-01,,,,,,,,,1.3752,1205.2900,,
1,2014-01-02,19.754623,37.16,25.790,19.8985,176.89,1831.98,14.23,182.920,1.3670,1224.5100,22.03,118.00
2,2014-01-03,19.320695,36.91,25.780,19.8220,178.15,1831.37,13.76,182.885,1.3587,1236.1600,21.83,119.29
3,2014-01-06,19.426052,36.13,25.460,19.6815,179.37,1826.77,13.55,182.360,1.3627,1237.5900,21.93,119.50
4,2014-01-07,19.287034,36.41,25.585,19.9015,178.29,1837.88,12.92,183.480,1.3615,1231.4900,21.97,118.82
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2603,2023-12-25,,,,,,,,,1.1007,,,
2604,2023-12-26,193.050000,374.66,50.500,153.4100,381.61,4774.75,12.99,475.650,1.1042,2066.6050,31.67,191.72
2605,2023-12-27,193.150000,374.07,50.760,153.3400,384.48,4781.58,12.43,476.510,1.1103,2077.1589,31.98,192.59
2606,2023-12-28,193.580000,375.28,50.390,153.3800,386.41,4783.35,12.47,476.690,1.1059,2064.8601,31.17,191.47


In [7]:
class Finance(Finance):

    # method for preparing data
    def _prepare_data(self):

        # selects the relevant time series data from the DataFrame project
        self.data = pd.DataFrame(self.raw[self.symbol]).dropna()

        # generates a log return time series from the prime time series
        self.data['r'] = np.log(self.data / self.data.shift(1))

        # generates the binary, directional data to be predicted from log returns
        self.data['d'] = np.where(self.data['r'] > 0, 1, 0)

        # gets rid of all rows in the dataframe object that contian NaN
        self.data.dropna(inplace=True)

        # applies gaussian normalization to the data 
        self.data_ = (self.data - self.data.mean()) / self.data.std()

    
    # method for retrieving data
    def _get_data(self):
        self.raw = pd.read_csv(self.url, index_col=0, parse_dates=True)

    
    # method for reset
    def reset(self):

        # sets the current bar to the value for number of features
        self.bar = self.n_features

        # reset total reward to 0
        self.treward = 0

        # generates the initial state object to be returned by the method
        state = self.data_[self.feature].iloc[self.bar - self.n_features:self.bar].values

        return state, {}
    

    # method for step
    def step(self, action):

        # checks whether prediction is correct
        if action == self.data['d'].iloc[self.bar]:
            correct = True
        else:
            correct = False

        # assigns reward depending on correctness
        reward = 1 if correct else 0

        # increases the total reward
        self.treward += reward

        # bar value increased to move environment forward on time series
        self.bar += 1

        # current accuracy is calculated
        self.accuracy = self.treward / (self.bar - self.n_features)

        # checks whether end of dataset is reached
        if self.bar >= len(self.data):
            done = True

        # checks whether prediction is correct
        elif reward == 1:
            done = False

        # checks whether current accuracy is above minimum required accuracy
        elif(self.accuracy < self.min_accuracy) and (self.bar > 15):
            done = True
        else:
            done = False

        # generates the next state object to be returned by the method
        next_state = self.data_[self.feature].iloc[self.bar - self.n_features:self.bar].values
        
        return next_state, reward, done, False, {}



In [8]:
fin = Finance(symbol='EUR=', feature='EUR=')

list(fin.raw.columns)

['AAPL.O',
 'MSFT.O',
 'INTC.O',
 'AMZN.O',
 'GS.N',
 '.SPX',
 '.VIX',
 'SPY',
 'EUR=',
 'XAU=',
 'GDX',
 'GLD']

In [9]:
fin.reset()

(array([2.74844931, 2.64643904, 2.69560062, 2.68085214]), {})

In [11]:
fin.action_space.sample()

1

In [12]:
fin.step(fin.action_space.sample())

(array([2.64643904, 2.69560062, 2.68085214, 2.63046153]), 1, False, False, {})

In [14]:
fin = Finance('EUR=', 'r')
fin.reset()

(array([-1.19130476, -1.21344494,  0.61099805, -0.16094865]), {})

In [15]:
class RandomAgent:

    # setting up the constructor
    def __init__(self):
        self.env = Finance('EUR=', 'r')
    
    # the play method
    def play(self, episodes=1):
        self.trewards = list()

        for e in range(episodes):
            self.env.reset()

            for step in range(1, 100):
                a = self.env.action_space.sample()
                state, reward, done, trunc, info = self.env.step(a)

                if done:
                    self.trewards.append(step)
                    break

In [16]:
ra = RandomAgent()

ra.play(15)

ra.trewards

[15, 12, 12, 12, 12, 15, 12, 27, 13, 12, 13, 13, 13, 12]

In [17]:
round(sum(ra.trewards) / len(ra.trewards), 2)

13.79

In [None]:
len(fin.data)