In [88]:
import pandas as pd
import sys
sys.path.append('/home/ak/Documents/Research/PaperCode/stylised_facts/')
from fathon import fathonUtils as fu

import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import pickle
import os
from collections import defaultdict
from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor
import time
import argparse
import itertools
from mdfda import mdfda_experiments_utils as mdf


In [21]:
mainDataPath = '/media/ak/Data/InterestRateFuturesData'
mainMFDFAPath = os.path.join(mainDataPath, 'MFDFA')
reconLOB = os.path.join(mainDataPath,'ReconstructedLOB')

eventClockPaths ='/media/ak/Data/InterestRateFuturesData/EventClocksFiles'
symbols = [f for f in os.listdir(eventClockPaths) if str('.json') not in f]

In [22]:
# def create_directories(symbols):
#     """
#       Create directories with symbols as the names.

#       Args:
#         symbols: A list of symbols.

#       Returns:
#         The list of created directories.
#       """
#     directories = []
#     for symbol in symbols:
#         dir_path = os.path.join(".", symbol)
#     if not os.path.exists(dir_path):
#       os.mkdir(dir_path)
#       directories.append(dir_path)


In [23]:
bars =['tick', 'volume', 'dollar']
directories =['/media/ak/Data/InterestRateFuturesData/MFDFA/DU1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/DU1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/DU1/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/FB1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/FB1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/FB1/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/RX1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/RX1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/RX1/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/TY1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/TY1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/TY1/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/US1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/US1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/US1/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/UST2y/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/UST2y/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/UST2y/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/XM1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/XM1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/XM1/dollar',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/YM1/tick',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/YM1/volume',
 '/media/ak/Data/InterestRateFuturesData/MFDFA/YM1/dollar']

In [24]:


scaler = MinMaxScaler()
standard_scaler = StandardScaler()


In [153]:

# functions

def read_pkl_idx(file_loc, file_idx):
    """
    Reads a pickle file based on the given file location and index.
    This function reads a pickle file based on the given file location and index.
    It can be used to read a specific pickle file containing financial data for further analysis.

    :param file_loc: str, file location containing the pickle files
    :param file_idx: int, index of the file to read
    :return: dict, content of the pickle file
    """
    files = os.listdir(file_loc)
    file_idx_loc = os.path.join(file_loc, files[file_idx])
    dict_idx = pd.read_pickle(file_idx_loc)
    return dict_idx

def mfdfa_output(lob_df_input, bar_type_):
    if lob_df_input is None:
        return None
    else:
        raw_data = lob_df_input.pct_changes.fillna(0) # this is the chosen data to use
        # follows Fathon Example from here onwards
        data_input = mdf.to_agg(raw_data)
    try:
        # Create the winSizes list based on the size of the data
        # Here I'm making the upper limit of the size of the data less one element
        max_window_size = len(raw_data) -1
        winSizes = fu.linRangeByStep(5, max_window_size)  

        qs = np.arange(-3, 4, 0.1)
        revSeg = True
        polOrd = 1
        testClass = mdf.mfdfaquantities(data_input, winSizes, qs, revSeg, polOrd)
        n, F = testClass.n_F_output()
        list_H, list_H_intercept = testClass.H_and_H_intcpt_output()
        alpha, mfSpect = testClass.compute_multi_fractal_spectrum()
      
        mfdfa_output_dict_ = defaultdict(dict)
        mfdfa_output_dict_[bar_type_]['micro_price_change'] = raw_data
        mfdfa_output_dict_[bar_type_]['arrival_rate_median'] = lob_df_input.arrival_rate_median
        mfdfa_output_dict_[bar_type_]['sumple_vol'] = lob_df_input.simple_vol
        mfdfa_output_dict_[bar_type_]['median_traded_volume'] = lob_df_input.TradedVolume_median
        mfdfa_output_dict_[bar_type_]['n_F'] = dict(zip(n, F))
        mfdfa_output_dict_[bar_type_]['list_H'] = list_H
        mfdfa_output_dict_[bar_type_]['list_H_intercept'] = list_H_intercept
        mfdfa_output_dict_[bar_type_]['tau'] = testClass.compute_mass_exponents()
        mfdfa_output_dict_[bar_type_]['alpha'] = alpha
        mfdfa_output_dict_[bar_type_]['mfSpect'] = mfSpect
    except ValueError:
        pass

    return mfdfa_output_dict_



def process_symbol_files(symbol_input_files_loc, symbol_file_idx_, bar_type_, symbol):
    """
       Processes a single symbol file with the given index and bar type.

       :param symbol_input_files_loc: str, file location containing the symbol files
       :param symbol_file_idx_: int, index of the file to process
       :param bar_type_: str, bar type to be used
       :param symbol: str, symbol for the current file
       :return: defaultdict, dictionary containing the MFDFA output
       """
    print('doing index', symbol_file_idx_)
  
    df = (read_pkl_idx(symbol_input_files_loc, symbol_file_idx_))
    
    mfdfa_dict_output = mfdfa_output(df, str(bar_type_))

    
    print('saved')
    
    return mfdfa_dict_output


def all_in_calculations(input_files_loc, symbol_file_idx, bar_type, symbol):
    """
    Performs calculations for a specific symbol file and saves the result as a pickle file.

    :param symbol_input_files_loc: str, path to the location of the symbol input files
    :param symbol_file_idx: int, index of the symbol file to process
    :param bar_type: str, type of the bar (e.g., 'tick')
    :param symbol: str, symbol to be processed (e.g., 'FB1')
    :return: dict, the output dictionary containing the results of the MFDFA calculations
    """
    print('doing index', symbol_file_idx)
    symbol_input_files_loc = os.path.join(input_files_loc, symbol, bar_type)
    
    df = read_pkl_idx(symbol_input_files_loc, symbol_file_idx)

    mfdfa_dict_output = mfdfa_output(df, bar_type)
        
    test_output_loc = os.path.join(mainMFDFAPath,symbol, 
                               bar_types_list[barIdx],
                               str(os.listdir(inputLocation)[symbol_file_idx_].split(".")[0])+"_mfdfa.pkl")
    
    pickle.dump(mfdfa_dict_output, open(test_output_loc, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
    print('saved')
    return mfdfa_dict_output


def main(inputLocation, symbols, bar_types):
    """
    Main function to process multiple symbols and bar types using parallel processing.

    :param symbols: list of str, a list of symbols to be processed
    :param bar_types: list of str, a list of bar types to be processed
    :param experimentsLocation: str, path to the location of the experiments
    :param expOneLocation: str, path to the location of the ExperimentOne folder
    """
    for symbol in symbols:
        for bar_type in bar_types:
            symbol_input_files_loc = os.path.join(inputLocation, symbol, bar_type)
            print(symbol_input_files_loc, os.path.isdir(symbol_input_files_loc))
            symbol_files = os.listdir(symbol_input_files_loc)
            symbol_files_count = len(symbol_files)
            if symbol_files_count == 0:
                print('skip')
            else:
#                 print(symbol_files)
                print(symbol_files_count)

            with Pool() as pool:
                pool.starmap(
                    all_in_calculations,
                    [(inputLocation, symbol_file_idx, bar_type, symbol) for symbol_file_idx in
                     range(symbol_files_count)]
                )




In [154]:
symbolsIdx =1
symbol_ = symbols[symbolsIdx]

barIdx = 0
outputLocation = os.path.join(mainMFDFAPath, symbol_, bar_types_list[barIdx])
inputLocation = os.path.join(eventClockPaths)
#, symbol_, bar_types_list[barIdx])
#     expOneLocation = os.path.join(experimentsLocation, 'ExperimentOne')
#     main(symbols_list, bar_types_list, experimentsLocation, expOneLocation)

In [155]:
#(symbols, bar_types, inputLocation, expOneLocation):
bar_types_list = ['dollar', 'volume', 'tick']  # Add more bar types to the list if needed
print(symbols)
main(inputLocation, symbols, bar_types_list)

['DU1', 'FB1', 'FV1', 'JB1', 'KE1', 'OE1', 'RX1', 'US1', 'US2y', 'XM1', 'YM1']
/media/ak/Data/InterestRateFuturesData/EventClocksFiles/DU1/dollar True
skip
/media/ak/Data/InterestRateFuturesData/EventClocksFiles/DU1/volume True
skip
/media/ak/Data/InterestRateFuturesData/EventClocksFiles/DU1/tick True
skip
/media/ak/Data/InterestRateFuturesData/EventClocksFiles/FB1/dollar True
136
doing indexdoing indexdoing indexdoing indexdoing indexdoing indexdoing indexdoing indexdoing indexdoing indexdoing indexdoing index          18 122142152433452730
36









 39
doing indexdoing index  06
doing indexdoing index  9

3


KeyboardInterrupt: 

In [38]:
listOfFiles = os.listdir(inputLocation)
index = 10
fileLoc = os.path.join(inputLocation, listOfFiles[index])

In [86]:
# df =pd.read_pickle(fileLoc)
inputLocation

'/media/ak/Data/InterestRateFuturesData/EventClocksFiles/FB1/dollar'

In [51]:
raw_data = df.pct_changes.fillna(0)

In [95]:
symbol = 'FB1'
# def process_symbol_files(symbol_input_files_loc, symbol_file_idx_, bar_type_, symbol):
# process_symbol_files(inputLocation, 10, 'dollar', 'FB1')

In [112]:
symbol_file_idx_ = 10
test_output_loc = os.path.join(mainMFDFAPath,symbol, 
                               bar_types_list[barIdx],
                               str(os.listdir(inputLocation)[symbol_file_idx_].split(".")[0])+"_mfdfa.pkl")
# outputLocation
test_output_loc

'/media/ak/Data/InterestRateFuturesData/MFDFA/FB1/dollar/20180430_mfdfa.pkl'

In [84]:
dictExample = mfdfa_output(df, 'tick')
dictExample['tick'].keys()

dict_keys(['micro_price_change', 'arrival_rate_median', 'sumple_vol', 'median_traded_volume', 'n_F', 'list_H', 'list_H_intercept', 'tau', 'alpha', 'mfSpect'])

In [114]:
inputLocation

'/media/ak/Data/InterestRateFuturesData/EventClocksFiles/FB1/dollar'

In [117]:


process_symbol_files(symbol_input_files_loc=inputLocation, 10, bar_type_='dollar', symbol='FB1')

SyntaxError: positional argument follows keyword argument (3303364208.py, line 1)

In [118]:
process_symbol_files(symbol_input_files_loc = inputLocation, 
                     symbol_file_idx_ =10, 
                     bar_type_ ='dollar',
                     symbol='FB1')

doing index 10
saved


defaultdict(dict,
            {'dollar': {'micro_price_change': 0     0.000000
              1     0.000063
              2    -0.000110
              3    -0.000200
              4    -0.000477
              5    -0.000228
              6     0.000512
              7     0.000058
              8    -0.000341
              9    -0.000021
              10    0.000000
              11    0.000000
              12   -0.000029
              13    0.000019
              14    0.000000
              15    0.000193
              16    0.000069
              17   -0.000189
              18   -0.000341
              19    0.000131
              20    0.000278
              21    0.000273
              22    0.000089
              23    0.000625
              24    0.000445
              25    0.000220
              26    0.000047
              27    0.000047
              28    0.000126
              29    0.000314
              30    0.000126
              Name: pct_changes, dtype: float64,
  

In [157]:
dollar_dict =pd.read_pickle('/media/ak/Data/InterestRateFuturesData/MFDFA/XM1/dollar/20180422_mfdfa.pkl')

In [159]:
dollar_dict['dollar'].keys()

dict_keys(['micro_price_change', 'arrival_rate_median', 'sumple_vol', 'median_traded_volume', 'n_F', 'list_H', 'list_H_intercept', 'tau', 'alpha', 'mfSpect'])

In [160]:
dollar_dict['dollar']['tau']

array([-2.78920443, -2.71663923, -2.6445879 , -2.57308706, -2.50217615,
       -2.43189757, -2.36229678, -2.29342231, -2.22532564, -2.15806094,
       -2.0916844 , -2.0262534 , -1.96182512, -1.89845481, -1.83619361,
       -1.77508615, -1.71516822, -1.65646475, -1.59898868, -1.54274088,
       -1.48771129, -1.43388124, -1.38122646, -1.32972051, -1.27933809,
       -1.23005801, -1.18186559, -1.13475453, -1.08872839, -1.04380153,
       -1.        , -0.95736207, -0.91593875, -0.87579413, -0.83700551,
       -0.79966329, -0.76387043, -0.72974135, -0.69740003, -0.66697726,
       -0.63860691, -0.61242119, -0.58854522, -0.56709103, -0.54815171,
       -0.53179613, -0.5180648 , -0.50696738, -0.49848195, -0.49255617,
       -0.48911005, -0.48803985, -0.48922288, -0.49252251, -0.49779312,
       -0.50488471, -0.51364679, -0.52393169, -0.53559709, -0.54850785,
       -0.56253725, -0.57756772, -0.59349111, -0.6102087 , -0.62763092,
       -0.64567689, -0.66427394, -0.68335698, -0.70286788, -0.72