In [1]:
import os
import pandas as pd
import pickle
import sys
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor


In [2]:

sys.path.insert(0, '/directory/tothe/handshakefile/')
sys.path.append('/home/ak/Documents/PaperCode/stylised_facts')

mfdfaDataFrames = '/media/ak/T71/August11th2022Experiments/mfdfaDataFrames'  # Input Files
LinearMMDOutputFiles = '/media/ak/T71/August11th2022Experiments/ExperimentOne/TestMMDOutputFiles'  # Location to Save Files
experimentOne = '/media/ak/T71/August11th2022Experiments/ExperimentOne'

In [4]:
def process_symbol(symbol):
    symbol_analyzer = SymbolAnalyzer(symbol=symbol, experimentOne=experimentOne,
                                     bars=['tick', 'calendar', 'dollar', 'volume'])
    symbol_analyzer.check_directory()
    symbol_analyzer.process_bars(max_workers=4)

class SymbolAnalyzer:
    """
   A class to analyze symbols using multiple bar choices and save the results.
   """
    def __init__(self, symbol='FB1', experimentOne='', bars=None):
        """
       Initialize the SymbolAnalyzer with a symbol, base directory, and a list of bars.

       :param symbol: str, the symbol to analyze.
       :param experimentOne: str, the base directory containing the symbol data.
       :param bars: list, a list of bar choices to process (e.g., ['tick', '1min', '5min']).
       """
        self.symbol = symbol
        self.symbolPath = os.path.join(experimentOne, str(self.symbol))
        self.resultsMainPath = '/media/ak/T71/August11th2022Experiments/mfdfaDataFrames'
        self.resultsPath = os.path.join(self.resultsMainPath ,self.symbol, 'results')
        os.makedirs(self.resultsPath, exist_ok=True)
        self.bars = bars or ['tick']
        self.variables = ['n_F', 'list_H', 'list_H_intercept', 'tau', 'alpha', 'mfSpect']

    def check_directory(self):
        """
        Print the symbol path and a boolean indicating if the directory exists.
        """
        print(self.symbolPath, os.path.isdir(self.symbolPath))

    def get_files(self, bar):
        """
       Get the list of files for a specific bar choice.

       :param bar: str, the bar choice to search for in the filenames.
       :return: list, a list of filenames containing the specific bar choice.
       """
        files = [f for f in os.listdir(self.symbolPath) if str(bar) in f]
        return files
    
    def save_dataframe_to_pickle(self, df, save_path, protocol=pickle.HIGHEST_PROTOCOL):
        """
        Save a DataFrame to a pickle file.

        :param df: DataFrame, the DataFrame to save.
        :param save_path: str, the path where the pickle file will be saved.
        :param protocol: int, the pickle protocol to use when saving the DataFrame.
        """
        with open(save_path, 'wb') as f:
            pickle.dump(df, f, protocol=protocol)
        print(f'saving: {save_path}')


    def process_file(self, file, bar, variable, result_dict):
        file_loc = os.path.join(self.symbolPath, file)
        variable_array = pd.read_pickle(file_loc)[str(bar)][str(variable)]
        index = file.split('_')[1].split('.')[0]
        result_dict[index] = variable_array

    def save_results(self, result_dict, bar, variable):
        bar_results_path = os.path.join(self.resultsPath, bar)
        os.makedirs(bar_results_path, exist_ok=True)
        save_path = os.path.join(bar_results_path, f"{self.symbol}_{bar}_{variable}.pkl")
        self.save_dataframe_to_pickle(pd.DataFrame(result_dict), save_path)

    def process_files_parallel(self, files, bar, max_workers=4):
        for variable in self.variables:
            result_dict = {}
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = [executor.submit(self.process_file, file, bar, variable, result_dict) for file in files]
            self.save_results(result_dict, bar, variable)

    def process_bars(self, max_workers=4):
        for bar in self.bars:
            files = self.get_files(bar)
            self.process_files_parallel(files, bar, max_workers)




In [None]:

if __name__ == '__main__':
    # ...

    symbol_analyzer3 = SymbolAnalyzer(symbol='TY1', experimentOne=experimentOne,
                                      bars=['tick', 'calendar', 'dollar', 'volume'])
    symbol_analyzer3.check_directory()
    symbol_analyzer3.process_bars(max_workers=4)

In [5]:
from concurrent.futures import ThreadPoolExecutor

if __name__ == '__main__':
    # ...

    # List of symbols to process
    symbols = ['TY1', 'FB1', 'XM1', 'US1', 
               'G_1', 'RX1','TU1', 'XM1', 'FV1', 'YM1', 'DU1', 'JB1']

    # Maximum number of concurrent symbol analyzers
    max_symbol_analyzers = 4

    # Process symbols concurrently
    with ThreadPoolExecutor(max_workers=max_symbol_analyzers) as executor:
        futures = [executor.submit(process_symbol, symbol) for symbol in symbols]

    # Wait for all futures to complete
    for future in futures:
        future.result()

/media/ak/T71/August11th2022Experiments/ExperimentOne/TY1 True
/media/ak/T71/August11th2022Experiments/ExperimentOne/FB1 True
/media/ak/T71/August11th2022Experiments/ExperimentOne/XM1 True
/media/ak/T71/August11th2022Experiments/ExperimentOne/US1 True
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/US1/results/tick/US1_tick_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/US1/results/tick/US1_tick_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/XM1/results/tick/XM1_tick_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/US1/results/tick/US1_tick_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/US1/results/tick/US1_tick_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FB1/results/tick/FB1_tick_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/US1/results/tick/US1_tick_alpha.pkl
saving: /media/ak/T71/August11th2022Experime

saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FB1/results/volume/FB1_volume_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/dollar/TY1_dollar_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FB1/results/volume/FB1_volume_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/XM1/results/volume/XM1_volume_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/G_1/results/tick/G_1_tick_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/dollar/TY1_dollar_alpha.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FB1/results/volume/FB1_volume_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FB1/results/volume/FB1_volume_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/dollar/TY1_dollar_mfSpect.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/

saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/G_1/results/volume/G_1_volume_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/dollar/RX1_dollar_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/G_1/results/volume/G_1_volume_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/G_1/results/volume/G_1_volume_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/XM1/results/dollar/XM1_dollar_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/G_1/results/volume/G_1_volume_alpha.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TU1/results/volume/TU1_volume_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/dollar/RX1_dollar_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/G_1/results/volume/G_1_volume_mfSpect.pkl
/media/ak/T71/August11th2022Experiments/E

saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/DU1/results/calendar/DU1_calendar_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/DU1/results/calendar/DU1_calendar_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/DU1/results/calendar/DU1_calendar_alpha.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/DU1/results/calendar/DU1_calendar_mfSpect.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FV1/results/volume/FV1_volume_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FV1/results/volume/FV1_volume_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/FV1/results/volume/FV1_volume_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/DU1/results/dollar/DU1_dollar_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/DU1/results/dollar/DU1_dollar_list_H.pkl
saving: /media/ak/T71/August

In [7]:
file_title =str('/media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/volume/RX1_volume_tau.pkl')
pd.read_pickle(file_title)

Unnamed: 0,30volume,4volume,5volume,31volume,0volume,1volume,2volume,3volume,6volume,7volume,...,34volume,32volume,35volume,27volume,37volume,38volume,40volume,42volume,36volume,44volume
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,-1.672846,-1.678881,-1.671882,-1.672221,-0.020133,-0.008486,-0.027017,0.102955,-0.010392,-1.674607,...,-0.014737,-0.042294,0.118018,-0.003357,-0.013145,-0.021534,-0.021363,-0.520803,-1.673838,-0.029962
66,-1.720627,-1.726674,-1.719673,-1.720016,-0.019973,-0.008290,-0.025937,0.107419,-0.009869,-1.722401,...,-0.014590,-0.041630,0.122286,-0.003280,-0.012892,-0.021153,-0.020941,-0.535789,-1.721600,-0.029715
67,-1.768407,-1.774468,-1.767464,-1.767811,-0.019823,-0.008100,-0.024896,0.111853,-0.009370,-1.770195,...,-0.014450,-0.041003,0.126516,-0.003206,-0.012652,-0.020794,-0.020541,-0.550808,-1.769362,-0.029488
68,-1.816189,-1.822261,-1.815255,-1.815605,-0.019682,-0.007916,-0.023893,0.116253,-0.008892,-1.817989,...,-0.014319,-0.040413,0.130708,-0.003136,-0.012425,-0.020454,-0.020161,-0.565858,-1.817125,-0.029281
