In [6]:
#This handles how market data is sourced and drip feeds the event loop
%load_ext autoreload
%autoreload 2
from dotenv import load_dotenv
load_dotenv()
import datetime
import os, os.path
import pandas as pd
import sys
sys.path.append(
    os.environ.get('WORK_DIR')) #type: ignore
sys.path.append(
    os.environ.get('DBASE_DIR')) #type: ignore
from dbase.database.SQLHelpers import query_database # type: ignore
from dbase.DataAPI.ThetaData import retrieve_option_ohlc # type: ignore
from trade.backtester_.backtester_ import PTDataset
from trade.assets.Stock import Stock
from abc import ABCMeta, abstractmethod
from data import DataHandler
from copy import deepcopy

from EventDriven.event import MarketEvent


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:

class HistoricTradeDataHandler(DataHandler): 
    """
    HistoricTradeDataHandler is designed to read from a pandas dataframe with trades data, 
    convert that to signals of 1 (for buy), -1 (for sell), 0 (for do nothing)
    """
    
    def __init__(self, events, trades_df): 
        self.trades_df = trades_df
        self.continue_backtest = True 
        self.events = events
        self._open_trade_data()
        self.options_data = {}
    def _open_trade_data(self): 
        unique_tickers = self.trades_df['Ticker'].unique()
        self.symbol_list = unique_tickers
        self.trades_df['EntryTime'] = pd.to_datetime(self.trades_df['EntryTime'])
        self.trades_df['ExitTime'] = pd.to_datetime(self.trades_df['ExitTime'])
        
        self.start_date = self.trades_df['EntryTime'].min()
        self.end_date = self.trades_df['ExitTime'].max()
        date_range = pd.date_range(start=self.start_date, end=self.end_date)
        #initialize signal dataframe
        self.signal_df = pd.DataFrame({'Date': date_range})
        
        for ticker in unique_tickers: 
            self.signal_df[ticker] = 0   
            
        #populate signal dataframe
        for _, row in self.trades_df.iterrows():
            entry_time = row['EntryTime']
            exit_time = row['ExitTime']
            ticker = row['Ticker']
            size = row['Size']
            #size in positive is for long positions whilenegative size is for short positions
            self.signal_df.loc[(self.signal_df['Date'] == entry_time) & (size > 0), ticker] = 1 
            self.signal_df.loc[(self.signal_df['Date'] == entry_time) & (size < 0), ticker] = 2
            self.signal_df.loc[self.signal_df['Date'] == exit_time, ticker] = -1 
        
        signal_columns = ['Date'].append(unique_tickers)
        self.latest_signal_df = pd.DataFrame(columns=signal_columns)
            
    def _get_new_bar(self): 
        """
        Return latest bar from data feed
        """
        while not self.signal_df.empty: 
            bar = self.signal_df.iloc[0]
            self.signal_df = self.signal_df.iloc[1:]
            yield bar
            
    def get_latest_bars(self, symbol ='', N=1) -> pd.DataFrame:
        return self.latest_signal_df.tail(N)
    
    def update_bars(self):
        try: 
            bar_generator = self._get_new_bar()
            #Get next bar from the generator
            bar = next(bar_generator)
        except StopIteration:
            print("No more signals available")
            self.continue_backtest = False
        else: 
            if bar is not None:
                bar_df = pd.DataFrame([bar])
            self.latest_signal_df = pd.concat([self.latest_signal_df, bar_df], axis=0)
        self.events.put(MarketEvent())
        
        
    def update_options_data_on_order(self, contract): 
        """
        Updates the option data based on the fill contract
        """
        if contract is not None:
            option_id = self.get_option_id(contract)
            if option_id not in self.options_data: 
                start_date = self.start_date.strftime('%Y%m%d')
                end_date = self.end_date.strftime('%Y%m%d')
                exp = f'{contract["expiration"]}'
                strike = contract['strike']
                options = retrieve_option_ohlc(symbol = contract['root'], exp = exp, strike= strike, right=contract["right"], start_date=start_date, end_date=end_date)
                if options is not None: 
                    self.options_data[option_id] = options # a dataframe with columns: ms_of_day,open,high,low,close,volume,count,date
                else: 
                    print(f"Option data not available for {option_id}") #TODO: good place to use logger
                #Request ohlc data for option 
           
            
            
    def get_options_data(self, option_id: str) -> pd.DataFrame:
        """
         returns a dataframe with columns: ms_of_day,open,high,low,close,volume,count,date
        """
        return self.options_data[option_id]
    
    
    def get_option_id(self, contract: pd.DataFrame) -> str: 
        """
            returns a string format of underlier-expiration-strike-type from the dataframe of the columns root, expiration, strike, right
        """
        return f"{contract['root']}-{contract['expiration']}-{contract['strike']}-{contract['right']}"

In [11]:
aapl = Stock('AAPL', run_chain = False)
data = aapl.spot(ts = True)
dataset = [PTDataset('AAPL', data)]
dataset

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.split_ratio.replace(0, 1, inplace = True)


[PTDataset(AAPL)]

In [None]:
class MarketDataHandler(DataHandler):
    def __init__(self, events, datalist):
        self.events = events
        self.datalist = deepcopy(datalist)
        self.symbol_list = [x.name for x in datalist]
        self.continue_backtest = True
        self.latest_data = {}