In [7]:
# Import necessary libraries
import pandas as pd
import numpy as np
import talib
# from datetime import datetime, timedelta
# import datetime
from datetime import datetime as dt
from datetime import timedelta
import datetime
import yfinance as yf

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

## Design Reward

In [None]:
# We use pnl as our reward

def get_pnl(entry, curr_price, position):
    # Transaction cost and commissions
    tc = 0.001
    return (curr_price*(1-tc) - entry*(1+tc))/entry*(1+tc)*position

def reward_pure_pnl(entry, curr_price, position):
    '''pure pnl'''
    return get_pnl(entry, curr_price, position)


def reward_positive_pnl(entry, curr_price, position):
    '''Positive pnl, zero otherwise'''
    pnl = get_pnl(entry, curr_price, position)

    if pnl >= 0:
        return pnl

    else:
        return 0
    
    
def reward_pos_log_pnl(entry, curr_price, position):
    '''Positive log pnl, zero otherwise'''
    pnl = get_pnl(entry, curr_price, position)

    if pnl >= 0:
        return np.ceil(np.log(pnl*100+1))
    else:
        return 0


def reward_categorical_pnl(entry, curr_price, position):
    '''Sign of pnl'''
    pnl = get_pnl(entry, curr_price, position)
    return np.sign(pnl)


def reward_positive_categorical_pnl(entry, curr_price, position):
    '''1 for win, 0 for loss'''
    pnl = get_pnl(entry, curr_price, position)
    if pnl >= 0:
        return 1
    else:
        return 0


def reward_exponential_pnl(entry, curr_price, position):
    '''Exponentual percentage pnl'''
    pnl = get_pnl(entry, curr_price, position)
    return np.exp(pnl)

## Get Price Data

In [12]:
# Define the ticker symbol
ticker = 'META'

# Define the date range: 30 days before today to today
end_date = dt.today()
start_date = end_date - timedelta(days=30)

# Fetch the data using yfinance with 5-minute intervals
meta_data = yf.download(ticker, start=start_date, end=end_date, interval='5m')

# Delete the 'Adj Close' column
meta_data.drop(columns=['Adj Close'], inplace=True)

# Display the data
print(meta_data.head())
print(meta_data.tail())

[*********************100%%**********************]  1 of 1 completed

                                 Open        High         Low       Close  \
Datetime                                                                    
2024-06-10 09:30:00-04:00  493.529999  495.779999  493.440002  494.529999   
2024-06-10 09:35:00-04:00  494.429993  497.000000  494.119995  495.954987   
2024-06-10 09:40:00-04:00  495.971497  496.609985  495.299988  495.354309   
2024-06-10 09:45:00-04:00  495.410004  496.589996  494.600006  495.299988   
2024-06-10 09:50:00-04:00  495.119995  495.869995  494.870087  495.658112   

                            Volume  
Datetime                            
2024-06-10 09:30:00-04:00  1252944  
2024-06-10 09:35:00-04:00   234079  
2024-06-10 09:40:00-04:00   182812  
2024-06-10 09:45:00-04:00   120172  
2024-06-10 09:50:00-04:00    90853  
                                 Open        High         Low       Close  \
Datetime                                                                    
2024-07-08 15:35:00-04:00  527.461914  527.4627




## Contruct Game Class

`get_state`: This function returns the state of the system, including candlesticks, indicators, day of the week, time of the day and position.
            
`act`: This function interacts with the trading algorithm. It takes action as a parameter suggested by the neural networks and returns a flag whether game is over or not and a reward when game is over.

In [15]:
class Game(object):
    def __init__(self, bars5m, bars1d, bars1h, reward_function, lkbk=20,  init_idx=None):

        # Initialise 5 mins frequency data
        self.bars5m = bars5m
        # Initilaise lookback period for the calculation of technical indicators
        self.lkbk = lkbk
        # Intialise length of each trade
        self.trade_len = 0
        # Initialise 1 day frequency data
        self.bars1d = bars1d
        # Initialise 1 hour frequency data
        self.bars1h = bars1h
        # Initialise when game is over to update the state, position and calculate reward
        self.is_over = False
        # Intialise reward to store the value of reward
        self.reward = 0
        # Define pnl_sum to calculate the pnl when all episodes are complete.
        self.pnl_sum = 0
        # Supply a starting index which indicates a position in our price dataframe
        # and denotes the point at which the game starts
        self.init_idx = init_idx
        # Instantiate reward function
        self.reward_function = reward_function
        # When game is over, reset all state values
        self.reset()


    #------------------------------------------------------------
    # Action is 0 --> doing nothing
    # Action is 2 (buy)  current position is 0 --> buy; current position is 1 --> nothing; current position is -1 --> liquidate
    # Action is 1 (sell)
    def _update_position(self, action):
        '''This is where we update our position'''

        # If the action is zero or hold, do nothing
        if action == 0:
            pass

        elif action == 2:
            """---Enter a long or exit a short position---"""

            # Current position (long) same as the action (buy), do nothing
            if self.position == 1:
                pass

            # No current position, and action is buy, update the position to indicate buy
            elif self.position == 0:
                self.position = 1
                self.entry = self.curr_price
                self.start_idx = self.curr_idx

            # Current postion (short) is different than the action (buy), end the game
            elif self.position == -1:
                self.is_over = True

        elif action == 1:
            """---Enter a short or exit a long position---"""

            # Current position (short) same as the action (sell), do nothing
            if self.position == -1:
                pass

            # No current position, and action is sell, update the position to indicate sell
            elif self.position == 0:
                self.position = -1
                self.entry = self.curr_price
                self.start_idx = self.curr_idx

            # Current postion (long) is different than the action (sell), end the game
            elif self.position == 1:
                self.is_over = True

    #--------------------------------------------------------------------
    def _get_reward(self):
        """Here we calculate the reward when the game is finished.
        In this case, we use a exponential pnl reward.
        """
        if self.is_over:
            self.reward = self.reward_function(
                self.entry, self.curr_price, self.position)
            
    # ---------------------------------------------------------------------------------------------

    def _get_last_N_timebars(self):
        '''This function gets the timebars for the 5 mins, 1 hour and 1 day resolution based on the lookback we've specified.'''

        '''Width of the 5m, 1hr, and 1d'''
        self.wdw5m = 9
        self.wdw1h = np.ceil(self.lkbk*15/24.)
        self.wdw1d = np.ceil(self.lkbk*15)

        '''Creating the candlesticks based on windows'''
        self.last5m = self.bars5m[self.curr_time -
                                  timedelta(self.wdw5m):self.curr_time].iloc[-self.lkbk:]
        self.last1h = self.bars1h[self.curr_time -
                                  timedelta(self.wdw1h):self.curr_time].iloc[-self.lkbk:]
        self.last1d = self.bars1d[self.curr_time -
                                  timedelta(self.wdw1d):self.curr_time].iloc[-self.lkbk:]

