In [1]:
import os
import pandas as pd
import pickle
import sys
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor


In [2]:

sys.path.insert(0, '/directory/tothe/handshakefile/')
sys.path.append('/home/ak/Documents/PaperCode/stylised_facts')

mfdfaDataFrames = '/media/ak/T71/August11th2022Experiments/mfdfaDataFrames'  # Input Files
LinearMMDOutputFiles = '/media/ak/T71/August11th2022Experiments/ExperimentOne/TestMMDOutputFiles'  # Location to Save Files
experimentOne = '/media/ak/T71/August11th2022Experiments/ExperimentOne'

In [3]:
def process_symbol(symbol):
    symbol_analyzer = SymbolAnalyzer(symbol=symbol, experimentOne=experimentOne,
                                     bars=['tick', 'calendar', 'dollar', 'volume'])
    symbol_analyzer.check_directory()
    symbol_analyzer.process_bars(max_workers=4)

class SymbolAnalyzer:
    """
   A class to analyze symbols using multiple bar choices and save the results.
   """
    def __init__(self, symbol='FB1', experimentOne='', bars=None):
        """
       Initialize the SymbolAnalyzer with a symbol, base directory, and a list of bars.

       :param symbol: str, the symbol to analyze.
       :param experimentOne: str, the base directory containing the symbol data.
       :param bars: list, a list of bar choices to process (e.g., ['tick', '1min', '5min']).
       """
        self.symbol = symbol
        self.symbolPath = os.path.join(experimentOne, str(self.symbol))
        self.resultsMainPath = '/media/ak/T71/August11th2022Experiments/mfdfaDataFrames'
        self.resultsPath = os.path.join(self.resultsMainPath ,self.symbol, 'results')
        os.makedirs(self.resultsPath, exist_ok=True)
        self.bars = bars or ['tick']
        self.variables = ['n_F', 'list_H', 'list_H_intercept', 'tau', 'alpha', 'mfSpect']

    def check_directory(self):
        """
        Print the symbol path and a boolean indicating if the directory exists.
        """
        print(self.symbolPath, os.path.isdir(self.symbolPath))

    def get_files(self, bar):
        """
       Get the list of files for a specific bar choice.

       :param bar: str, the bar choice to search for in the filenames.
       :return: list, a list of filenames containing the specific bar choice.
       """
        files = [f for f in os.listdir(self.symbolPath) if str(bar) in f]
        return files
    
    def save_dataframe_to_pickle(self, df, save_path, protocol=pickle.HIGHEST_PROTOCOL):
        """
        Save a DataFrame to a pickle file.

        :param df: DataFrame, the DataFrame to save.
        :param save_path: str, the path where the pickle file will be saved.
        :param protocol: int, the pickle protocol to use when saving the DataFrame.
        """
        with open(save_path, 'wb') as f:
            pickle.dump(df, f, protocol=protocol)
        print(f'saving: {save_path}')


    def process_file(self, file, bar, variable, result_dict):
        file_loc = os.path.join(self.symbolPath, file)
        variable_array = pd.read_pickle(file_loc)[str(bar)][str(variable)]
        index = file.split('_')[1].split('.')[0]
        result_dict[index] = variable_array

    def save_results(self, result_dict, bar, variable):
        bar_results_path = os.path.join(self.resultsPath, bar)
        os.makedirs(bar_results_path, exist_ok=True)
        save_path = os.path.join(bar_results_path, f"{self.symbol}_{bar}_{variable}.pkl")
        self.save_dataframe_to_pickle(pd.DataFrame(result_dict), save_path)

    def process_files_parallel(self, files, bar, max_workers=4):
        for variable in self.variables:
            result_dict = {}
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = [executor.submit(self.process_file, file, bar, variable, result_dict) for file in files]
            self.save_results(result_dict, bar, variable)

    def process_bars(self, max_workers=4):
        for bar in self.bars:
            files = self.get_files(bar)
            self.process_files_parallel(files, bar, max_workers)




In [4]:

if __name__ == '__main__':
    # ...

    symbol_analyzer3 = SymbolAnalyzer(symbol='TY1', experimentOne=experimentOne,
                                      bars=['tick', 'calendar', 'dollar', 'volume'])
    symbol_analyzer3.check_directory()
    symbol_analyzer3.process_bars(max_workers=4)

/media/ak/T71/August11th2022Experiments/ExperimentOne/TY1 True
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/tick/TY1_tick_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/tick/TY1_tick_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/tick/TY1_tick_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/tick/TY1_tick_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/tick/TY1_tick_alpha.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/tick/TY1_tick_mfSpect.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/calendar/TY1_calendar_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/calendar/TY1_calendar_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/calendar/TY1_calendar_list_H_interce

In [5]:
pd.read_pickle('/media/ak/T71/August11th2022Experiments/mfdfaDataFrames/TY1/results/volume/TY1_volume_alpha.pkl')

Unnamed: 0,70volume,69volume,68volume,67volume,66volume,65volume,64volume,63volume,62volume,61volume,...,8volume,7volume,6volume,5volume,4volume,3volume,2volume,0volume,1volume,mfdfa
0,0.792137,1.317055,0.723835,0.569220,0.849840,0.635783,0.779294,0.680726,0.657969,0.855414,...,0.632283,1.348924,0.617483,0.663597,0.697984,0.593249,1.344848,0.785951,0.561809,
1,0.783122,1.312154,0.715810,0.563115,0.845206,0.626745,0.769997,0.672266,0.649302,0.850614,...,0.625092,1.345770,0.613257,0.657871,0.691729,0.585899,1.341732,0.778359,0.555695,
2,0.772810,1.306254,0.706891,0.556430,0.840109,0.616731,0.759384,0.662884,0.639701,0.845362,...,0.616899,1.341901,0.608740,0.651516,0.684508,0.577905,1.337850,0.769901,0.549151,
3,0.760962,1.299090,0.697089,0.549160,0.834460,0.605820,0.747244,0.652586,0.629200,0.839551,...,0.607673,1.337107,0.603933,0.644471,0.676236,0.569318,1.332969,0.760570,0.542201,
4,0.747349,1.290324,0.686451,0.541311,0.828146,0.594138,0.733387,0.641420,0.617878,0.833046,...,0.597427,1.331118,0.598851,0.636677,0.666856,0.560206,1.326787,0.750396,0.534879,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,-0.025337,0.014260,0.005750,-0.019868,-0.021342,0.043480,-0.022417,-0.008667,0.005314,-0.022593,...,0.023061,0.142672,-0.022030,-0.042288,-0.004105,0.007452,0.051203,0.006083,-0.013504,0.059973
65,-0.026263,0.012029,0.005699,-0.020270,-0.021276,0.042633,-0.023457,-0.008493,0.005224,-0.022718,...,0.021344,0.135000,-0.021966,-0.042764,-0.004037,0.007449,0.044862,0.006139,-0.014090,0.058642
66,-0.026901,0.010394,0.005675,-0.020505,-0.021146,0.042080,-0.024189,-0.008285,0.005166,-0.022763,...,0.020113,0.128463,-0.021842,-0.043030,-0.003946,0.007462,0.039593,0.006207,-0.014499,0.057332
67,-0.027327,0.009218,0.005667,-0.020624,-0.020981,0.041740,-0.024692,-0.008064,0.005127,-0.022759,...,0.019242,0.122948,-0.021688,-0.043155,-0.003845,0.007485,0.035231,0.006279,-0.014782,0.056040


In [None]:
from concurrent.futures import ThreadPoolExecutor

if __name__ == '__main__':
    # ...

    # List of symbols to process
    symbols = ['TY1', 'FB1', 'XM1', 'US1', 
               'G_1', 'RX1','TU1', 'XM1', 'FV1', 'YM1', 'DU1', 'JB1']

    # Maximum number of concurrent symbol analyzers
    max_symbol_analyzers = 4

    # Process symbols concurrently
    with ThreadPoolExecutor(max_workers=max_symbol_analyzers) as executor:
        futures = [executor.submit(process_symbol, symbol) for symbol in symbols]

    # Wait for all futures to complete
    for future in futures:
        future.result()

In [None]:
file_title =str('/media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/volume/RX1_volume_tau.pkl')
pd.read_pickle(file_title)