In [14]:
import os
import pandas as pd
import pickle
import sys
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor
import re

In [15]:

sys.path.insert(0, '/directory/tothe/handshakefile/')
sys.path.append('/home/ak/Documents/PaperCode/stylised_facts')

mfdfaDataFrames = '/media/ak/T71/August11th2022Experiments/mfdfaDataFrames'  # Input Files
LinearMMDOutputFiles = '/media/ak/T71/August11th2022Experiments/ExperimentOne/TestMMDOutputFiles'  # Location to Save Files
experimentOne = '/media/ak/T71/August11th2022Experiments/ExperimentOne'

In [18]:
def process_symbol(symbol):
    symbol_analyzer = SymbolAnalyzer(symbol=symbol, experimentOne=experimentOne,
                                     bars=['tick', 'calendar', 'dollar', 'volume'])
    symbol_analyzer.check_directory()
    symbol_analyzer.process_bars(max_workers=4)
    
    
def process_symbols(symbols, max_symbol_analyzers):
    """
    Process a list of symbols in parallel.

    :param symbols: list, a list of symbols to process.
    :param max_symbol_analyzers: int, the maximum number of concurrent symbol analyzers.
    """
    with ThreadPoolExecutor(max_workers=max_symbol_analyzers) as executor:
        futures = [executor.submit(process_symbol, symbol) for symbol in symbols]

    for future in futures:
        future.result()

class SymbolAnalyzer:
    def __init__(self, symbol='FB1', experimentOne='', bars=None):
        self.symbol = symbol
        self.symbolPath = os.path.join(experimentOne, str(self.symbol))
        self.resultsMainPath = '/media/ak/T71/August11th2022Experiments/mfdfaDataFrames'
        self.resultsPath = os.path.join(self.resultsMainPath, self.symbol, 'results')
        os.makedirs(self.resultsPath, exist_ok=True)
        self.bars = bars or ['tick']
        self.variables = ['n_F', 'list_H', 'list_H_intercept', 'tau', 'alpha', 'mfSpect']

    def check_directory(self):
        print(self.symbolPath, os.path.isdir(self.symbolPath))

    def get_files(self, bar):
        files = [f for f in os.listdir(self.symbolPath) if str(bar) in f]
        return files

    def process_file(self, file, bar, variable):
        file_loc = os.path.join(self.symbolPath, file)
        variable_array = pd.read_pickle(file_loc)[str(bar)][str(variable)]
        index = file.split('_')[1].split('.')[0]
        #Extract the integer from the column name
#         index = re.findall(r'\d+', file.split('_')[1].split('.')[0])[0]
        
        #result_dict[index] = variable_array
        return index, variable_array

    def process_files_parallel(self, files, bar, max_workers=4):
        for variable in self.variables:
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                results = [executor.submit(self.process_file, file, bar, variable) for file in files]
            result_dict = {r.result()[0]: r.result()[1] for r in results}
            bar_results_path = os.path.join(self.resultsPath, bar)
            os.makedirs(bar_results_path, exist_ok=True)
            save_path = os.path.join(bar_results_path, f"{self.symbol}_{bar}_{variable}.pkl")
            self.save_dataframe_to_pickle(pd.DataFrame(result_dict), save_path)

    def save_dataframe_to_pickle(self, df, save_path, protocol=pickle.HIGHEST_PROTOCOL):
        with open(save_path, 'wb') as f:
            pickle.dump(df, f, protocol=protocol)
        print(f'saving: {save_path}')

    def process_bars(self, max_workers=4):
        for bar in self.bars:
            files = self.get_files(bar)
            self.process_files_parallel(files, bar, max_workers)




In [19]:
if __name__ == '__main__':

    experimentOne = experimentOne
    symbols = ['TY1', 'RX1', 'XM1']
    max_symbol_analyzers = 4

    process_symbols(symbols, max_symbol_analyzers)

/media/ak/T71/August11th2022Experiments/ExperimentOne/TY1 True
/media/ak/T71/August11th2022Experiments/ExperimentOne/RX1 True
/media/ak/T71/August11th2022Experiments/ExperimentOne/XM1 True
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/tick/RX1_tick_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/tick/RX1_tick_list_H.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/tick/RX1_tick_list_H_intercept.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/tick/RX1_tick_tau.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/XM1/results/tick/XM1_tick_n_F.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/tick/RX1_tick_alpha.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/tick/RX1_tick_mfSpect.pkl
saving: /media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/calendar/RX1_calendar_n_F.p

In [20]:
file_title =str('/media/ak/T71/August11th2022Experiments/mfdfaDataFrames/RX1/results/volume/RX1_volume_list_H.pkl')
pd.read_pickle(file_title)

Unnamed: 0,4volume,5volume,20volume,30volume,31volume,0volume,1volume,2volume,3volume,6volume,...,32volume,33volume,34volume,35volume,36volume,37volume,38volume,40volume,42volume,44volume
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,-0.193966,-0.191966,-0.193167,-0.192242,-0.192063,0.279962,0.283290,0.277995,0.315130,0.282745,...,0.273630,0.331860,0.281504,0.319434,-0.192525,0.281958,0.279562,0.279611,0.136913,0.277154
66,-0.201854,-0.199909,-0.201072,-0.200174,-0.200004,0.272230,0.275475,0.270573,0.307616,0.275036,...,0.266214,0.324030,0.273725,0.311746,-0.200444,0.274197,0.271902,0.271961,0.128947,0.269524
67,-0.209316,-0.207423,-0.208549,-0.207678,-0.207516,0.264913,0.268081,0.263542,0.300501,0.267738,...,0.259188,0.316612,0.266365,0.304464,-0.207936,0.266851,0.264650,0.264719,0.121403,0.262300
68,-0.216385,-0.214541,-0.215633,-0.214786,-0.214633,0.257978,0.261075,0.256870,0.293751,0.260818,...,0.252523,0.309573,0.259390,0.297555,-0.215033,0.259888,0.257775,0.257852,0.114248,0.255452
