### logging

In [None]:
if globals().get('LOGGING_LEVEL') == None:
    LOGGING_LEVEL=2

### module import protection

In [None]:
if globals().get('LOADED_DATA_CONTAINER') == None:
    if LOGGING_LEVEL > 0: print('LOADED_DATA_CONTAINER')
    LOADED_DATA_CONTAINER=True

### modules

In [None]:
if globals().get('LOADED_ANNOTATIONS') == None:
    %run ANNOTATIONS.ipynb

In [None]:
if globals().get('LOADED_PATTERN_OBSERVER') == None:
    %run PATTERN_OBSERVER.ipynb

### imports

In [None]:
from collections import namedtuple
from pathlib import Path
from functools import reduce
import pandas as pd

### begin

In [None]:
class DATA_CONTAINER:
    ####################################
    # static
    ####################################
    # standardized file names
    BTPATHTUPLE      = namedtuple('BTPATHTUPLE',['backtest_name','subdir','filename'])
    _dev             = BTPATHTUPLE('./','./',None)
    _daily           = BTPATHTUPLE(None,'./daily','summary_YYYYMMDD')
    _extraday        = BTPATHTUPLE(None,'./extraday','BOOK_YYYYMMDD')
    _intraday        = BTPATHTUPLE(None,'./intraday','BOOK_YYYYMMDD')
    _factor          = BTPATHTUPLE(None,'./factor','BOOK_YYYYMMDD')

    ####################################
    # constructor
    ####################################
    def __init__(self,
        name                                = 'DATA_CONTAINER',

        # data
        selected_backtests                  = None,
        selected_books                      = None,

        # reference
        plot_method                         = None,
        return_type                         = None,
        view_type                           = None,
        date_from                           = None,
        date_to                             = None,
        time_from                           = None,
        time_to                             = None,
             
        ):
        self._logging_level                 = LOGGING_LEVEL                  # GLOBAL VARIABLE
        
        self.name                           = name
        
        # options
        self.reference = {
            'selected_backtests'            : self._populate_selected_backtests(selected_backtests), # OPTIONID_VALUES
            'selected_books'                : self._populate_selected_books(selected_books),         # OPTIONID_VALUES
            'plot_method'                   : plot_method,                                           # str
            'return_type'                   : return_type,                                           # str
            'view_type'                     : view_type,                                             # str
            'date_from'                     : date_from,                                             # DateTime.Date
            'date_to'                       : date_to,                                               # DateTime.Date
            'time_from'                     : time_from,                                             # DateTime.Time
            'time_to'                       : time_to,                                               # DateTime.Time
        }
        
        # building self.df
        self.data_dico                      = {}
        self.df                             = pd.DataFrame()
        
        # for observer pattern
        self._observers                     = []
            
        # show internals
        self._logging()
        
    ####################################
    # populate defaults
    ####################################
    def _populate_selected_backtests(self,selected_backtests : OPTIONID_VALUES = None) -> OPTIONID_VALUES :
        if selected_backtests != None:
            return selected_backtests
        else:
            return {
                ('AZUL4.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/AZUL4.SA.csv')) : True,
                ('EMBR3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/EMBR3.SA.csv')) : True,
                ('ECOR3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/ECOR3.SA.csv')) : False,
            }

    def _populate_selected_books(self,selected_books : OPTIONID_VALUES = None) -> OPTIONID_VALUES :
        if selected_books != None:
            selected_books 
        else:
            return {
                ('Trading','Trading') : True,
                ('Quote','Quote')     : True,
                ('MM2','MM2')         : True,
                ('Hedge','Hedge')     : True,
                ('Hit','Hit')         : False,
            }
    
    ####################################
    # functions
    ####################################
    def build_df(self) -> None : # TODO, this needs to depend on `view_type`
        if self._logging_level > 0: print(self.name,':','build_df')

        # show internals
        self._logging()
        
        ######################################################
        # update self.data_dico
        ######################################################
        # add / remove entries as per checked backtests
        for (option_name,option_path),checked in self.reference['selected_backtests'].items():
            if checked:
                self._data_dico_add_key(option_name,option_path) # make sure present # TODO when using real data this needs to change
            else:
                self._data_dico_remove_key(option_name,option_path) # make sure not present

        # remove options no longer present
        remove=[]
        for (option_name,option_path) in self.data_dico.keys():
            if not (option_name,option_path) in self.reference['selected_backtests'].keys():
                remove.append((option_name,option_path))
        
        for (option_name,option_path) in remove:
            self._data_dico_remove_key(option_name,option_path)
        
        ######################################################
        # build self.df from self.data_dico
        ######################################################
        if not bool(self.data_dico):
            # self.data_dico empty, bail
            self.df = pd.DataFrame()
        else:
            # self.data_dico not empty, build df
            
            # flatten
            df = pd.concat(
                map(self._extraday_set_index,self.data_dico.values()),
                keys  = [option_name for (option_name,option_path) in self.data_dico.keys()],
                names = ['sym']
            )

            # reorder
            df = df.reorder_levels([1,0])

            # apply date filters
            if self.reference['view_type'] in ['Extraday','Factor']:
                
                # lower date filter
                if not self.reference['date_from'] is None:
                    df = df[df.index.get_level_values('timestamp').date >= self.reference['date_from']]
                    
                # upper date filter
                if not self.reference['date_to'] is None:
                    df = df[df.index.get_level_values('timestamp').date <= self.reference['date_to']]
                    
            # apply time filters
            if self.reference['view_type'] in ['Intraday','Factor']:
                
                # lower time filter
                if not self.reference['time_from'] is None:
                    df = df[df.index.get_level_values('timestamp').time >= self.reference['time_from']]
                    
                # upper time filter
                if not self.reference['time_to'] is None:
                    df = df[df.index.get_level_values('timestamp').time <= self.reference['time_to']]

            # assign
            self.df = df

    '''set index in df for extraday view'''
    def _extraday_set_index(self,
        df : pd.DataFrame
        ) -> pd.DataFrame :
        df.timestamp = pd.to_datetime(df.timestamp)
        return df.set_index('timestamp')

    '''
    d=DATA_CONTAINER()
    d._data_dico_add_key('BRFS3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/BRFS3.SA.csv'))
    d._data_dico_add_key('VALE3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/VALE3.SA.csv'))
    d._data_dico_add_key('VALE3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/VALE3.SA.csv'))
    d._data_dico_add_key('VALE3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/VALE3.SA.csv'))
    '''
    '''make sure entry present in self.data_dico'''
    def _data_dico_add_key(self,
        option_name : str,
        option_path : Path,
        view_type   : str = 'Dev',
        ) -> None :
        
        if not (option_name,option_path) in self.data_dico:
            self.data_dico[(option_name,option_path)] = pd.read_csv(self._get_filepath(option_name,option_path,view_type))
            if self._logging_level > 1: print(self.name,':','KEYS =',len(self.data_dico),':',(option_name,option_path),'ADD')
        else:
            if self._logging_level > 1: print(self.name,':','KEYS =',len(self.data_dico),':',(option_name,option_path),'pass (no add)')
            pass
    
    '''make sure data NOT present in self.data_dico'''
    def _data_dico_remove_key(self,
        option_name : str,
        option_path : Path,
        ) -> None :
        if (option_name,option_path) in self.data_dico:
            del self.data_dico[(option_name,option_path)]
            if self._logging_level > 1: print(self.name,':','KEYS =',len(self.data_dico),':',(option_name,option_path),'REMOVE')
        else:
            if self._logging_level > 1: print(self.name,':','KEYS =',len(self.data_dico),':',(option_name,option_path),'pass (no remove)')
            pass

    '''
    d=DATA_CONTAINER()
    print(d._get_filepath('BRFS3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/BRFS3.SA.csv'),view_type='Dev'))
    print(d._get_filepath('BRFS3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/BRFS3.SA.csv'),view_type='Daily'))
    print(d._get_filepath('BRFS3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/BRFS3.SA.csv'),view_type='Intraday'))
    print(d._get_filepath('BRFS3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/BRFS3.SA.csv'),view_type='Extraday'))
    print(d._get_filepath('BRFS3.SA',Path(r'C:/Users/ahkar/OneDrive/Documents/Data/B3/BRFS3.SA.csv'),view_type='Factor'))
    '''
    def _get_filepath(self,
        option_name : str,
        option_path : Path,
        view_type   : str = 'Dev'
        ) -> Path :
        print(self.name,':','_get_filepath',option_name,option_path,view_type)

        # build filepath
        BackestPathTuple = eval('DATA_CONTAINER.'+view_type+'()')          # get BTPATHTUPLE via class methods calls
        tup              = [option_path.parent]+list(BackestPathTuple)     # build tuple
        tup              = [option_name if x==None else x for x in tup]    # overwrite None in BackestPathTuple with backtest_name
        filepath         = reduce(lambda x,y:x/y,tup)                      # combine into single Path object
        filepath         = Path(str(filepath) + '.csv')                    # append .csv suffix
        
        # return
        return filepath
        
    def _logging(self):
        if self._logging_level > 0: print(self.name,':','REFERENCE',':','LEN(DATA_DICO) =',len(self.data_dico),'LEN(DF) =',len(self.df))
        for k,v in self.reference.items():
            if self._logging_level > 1: print(self.name,':','REFERENCE',':',k,':',v)
        
    @classmethod
    def Dev(self) -> BTPATHTUPLE :
        return DATA_CONTAINER._dev
    
    @classmethod
    def Daily(self) -> BTPATHTUPLE :
        '''
        purpose
             extraday summary --> macro extraday overview

        dump frequency
            once a day at end of day

        example files
            ./backtest_name/daily/summary_yyyymmdd.csv

        contents of each daily file
            book|pnl|stock volume|future volume|fees
            All|.|.|.|.
            Trading|.|.|.|.
            Quote|.|.|.|.
            Hedge|.|.|.|.

        used to compute summary for entire backtest
            book|return bps|sharpe|daily pnl|daily stock volume|daily future volume|fee bps
            All|.|.|.|.|.|.
            Trading|.|.|.|.|.|.
            Quote|.|.|.|.|.|.
            Hedge|.|.|.|.|.|.
        '''
        return DATA_CONTAINER._daily
    
    @classmethod
    def Extraday(self) -> BTPATHTUPLE :
        '''
        purpose
            extraday behavioural analysis --> customised macro extraday overview

        dump frequency
            once a day at end of day
            reference symbol `Symbol` is used to initialize the dump

        example files
            ./backtest_name/extraday/Trading_yyyymmdd.csv
            ./backtest_name/extraday/Quote_yyyymmdd.csv
            ./backtest_name/extraday/Hedge_yyyymmdd.csv

        contents of each daily file
            TimeStamp|Symbol|PnlTotal|PnlJour|PnlVeille|OpenNom|OpenBidNom|OpenAskNom|...
            EOD|VALE3.SA|.|.|.|.|.|.|...
        '''
        return DATA_CONTAINER._extraday
    
    @classmethod
    def Intraday(self) -> BTPATHTUPLE :
        '''
        purpose
            intraday behavioural analysis --> customised macro intraday overview

        dump frequency
            n-minutely snapshots throughout the day
            reference symbol `Symbol` is used to initialize the dump

        example files
            ./backtest_name/intraday/Trading_yyyymmdd.csv
            ./backtest_name/intraday/Quote_yyyymmdd.csv
            ./backtest_name/intraday/Hedge_yyyymmdd.csv

        contents of each daily file
            TimeStamp|Symbol|PnlTotal|PnlJour|PnlVeille|OpenNom|OpenBidNom|OpenAskNom|...
            10:00:00|VALE3.SA|.|.|.|.|.|.|...
            10:01:00|VALE3.SA|.|.|.|.|.|.|...
            ...
            16:59:00|VALE3.SA|.|.|.|.|.|.|...
            17:00:00|VALE3.SA|.|.|.|.|.|.|...
        '''
        return DATA_CONTAINER._intraday

    def Factor(self) -> BTPATHTUPLE :
        '''
        purpose
            extraday factor analysis --> customised macro factor analysis

        dump frequency
            once a day at end of day
            dump on all symbols

        example files
            ./backtest_name/extraday/Trading_yyyymmdd.csv
            ./backtest_name/extraday/Quote_yyyymmdd.csv
            ./backtest_name/extraday/Hedge_yyyymmdd.csv

        contents of each daily file
            Symbol|PnlTotal|PnlJour|PnlVeille|ExecNom|MedLongNom|MaxLongLongNom|MedOpenBidNom|MedOpenAskNom|...
            sym_0|
            sym_1|
            ...
            sym_n|
        '''
        return DATA_CONTAINER._factor

In [None]:
# __file__ exists if notebook called with %run but doesnt it called manually
# e.g. I only wish to run the example when calling this notebook directly
try:
    __file__
except NameError:
    # example
    a=DATA_CONTAINER()
    display(dir(a))

In [None]:
'''
help(DATA_CONTAINER.daily)

help(DATA_CONTAINER.extraday)

help(DATA_CONTAINER.intraday)

help(DATA_CONTAINER.factor)
'''
None