In [1]:
import os
import pandas as pd
from pathlib import Path

class DataAnalyzer:
    def __init__(self, information_clock_dir, symbol, bar_choice, mfdfa_var):
        """
        Initialize the DataAnalyzer class with the required parameters.

        Args:
        - information_clock_dir (str): The directory containing information clock files.
        - symbol (str): The symbol to analyze.
        - bar_choice (str): The bar choice to filter files by.
        - mfdfa_var (str): The MFDFA variable to filter files by.
        """
        self.symbol = symbol
        self.information_clock_dir = information_clock_dir
        self.bar_choice = bar_choice
        self.mfdfa_var = mfdfa_var
        self.symbol_inform_clock_dir = os.path.join(information_clock_dir, symbol)

    def get_symbols(self):
        """
        Get a list of symbols from the information clock directory.

        Returns:
        - list: A list of sorted symbols.
        """
        return sorted(os.listdir(self.information_clock_dir))

    def get_symbol_files(self):
        """
        Get a list of files for the current symbol.

        Returns:
        - list: A list of file names.
        """
        return sorted(os.listdir(self.symbol_inform_clock_dir))

    def get_files_by_keyword(self, files, keyword):
        """
        Get a list of files that contain a specific keyword.

        Args:
        - files (list): A list of file names.
        - keyword (str): The keyword to filter files by.

        Returns:
        - list: A list of filtered file names.
        """
        return [f for f in files if keyword in f]

    def analyze(self):
        """
        Analyze data for the given symbol.

        Returns:
        - dict: A dictionary with the analysis result.
        """
        files_filtered_by_bar_choice = self.get_files_by_keyword(self.get_symbol_files(), self.bar_choice)

        if not files_filtered_by_bar_choice:
            raise ValueError(f"No files found with bar_choice='{self.bar_choice}' and mfdfa_var='{self.mfdfa_var}'.")

        mfdfa_var_dict = {}
        for idx, file in enumerate(files_filtered_by_bar_choice):
            file_content = pd.read_pickle(os.path.join(self.symbol_inform_clock_dir, file))
            mfdfa_var_value = file_content[str(self.bar_choice)][str(self.mfdfa_var)]
            mfdfa_var_dict[idx] = mfdfa_var_value

        return mfdfa_var_dict


In [2]:
# Define the required parameters
information_clock_dir = '/media/ak/T7/August11th2022Experiments/InfoClockDataFrames'
symbol = 'G_1'
bar_choice = 'calendar'
mfdfa_var = 'tau'

# Create an instance of the DataAnalyzer class
analyzer = DataAnalyzer(information_clock_dir, symbol, bar_choice, mfdfa_var)

# Analyze the data for the given symbol
analysis_result = analyzer.analyze()

# Print the analysis result
print(analysis_result)

{0: array([-3.26005682e+00, -3.17560538e+00, -3.09153982e+00, -3.00791726e+00,
       -2.92480916e+00, -2.84230454e+00, -2.76051304e+00, -2.67956696e+00,
       -2.59962054e+00, -2.52084498e+00, -2.44341708e+00, -2.36750050e+00,
       -2.29321966e+00, -2.22062985e+00, -2.14969045e+00, -2.08024981e+00,
       -2.01204823e+00, -1.94473927e+00, -1.87792284e+00, -1.81118110e+00,
       -1.74410878e+00, -1.67633434e+00, -1.60753215e+00, -1.53742848e+00,
       -1.46580467e+00, -1.39250028e+00, -1.31741780e+00, -1.24052957e+00,
       -1.16188676e+00, -1.08162988e+00, -1.00000000e+00, -9.17349252e-01,
       -8.34149592e-01, -7.50997891e-01, -6.68615467e-01, -5.87839616e-01,
       -5.09604359e-01, -4.34907498e-01, -3.64761483e-01, -3.00127224e-01,
       -2.41833317e-01, -1.90488722e-01, -1.46403986e-01, -1.09541891e-01,
       -7.95177283e-02, -5.56575665e-02, -3.71021918e-02, -2.29265365e-02,
       -1.22421209e-02, -4.26345563e-03,  1.66301915e-03,  6.05775300e-03,
        9.32248837e-0

In [3]:
import os

In [19]:
symbol = 'G_1'
LinearMMDInputFiles = '/media/ak/T7/August11th2022Experiments/LinearMMDInputFiles/'
bar_choice = 'volume'
file= os.path.join(LinearMMDInputFiles, [f for f in os.listdir(LinearMMDInputFiles) if (str(symbol) and str(bar_choice)) in f][0])
outputDir = '/media/ak/T7/August11th2022Experiments/LinearMMDOutputFiles'

In [20]:
dicts =pd.read_pickle(file)

In [21]:
pd.DataFrame.from_dict(dicts['tau'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,28,29,30,31,32,33,34,35,36,37
0,-3.260057,-3.150521,-3.380583,-3.351196,-15.442674,-3.145332,-3.245805,,-3.395495,-3.128305,...,-3.382134,-3.755359,-3.948934,-3.083850,-3.336710,-3.707450,-3.368221,-3.150614,-3.388678,-3.094057
1,-3.175605,-3.075617,-3.296493,-3.274181,-14.903088,-3.070161,-3.162049,,-3.317375,-3.055052,...,-3.303028,-3.671277,-3.856215,-3.012105,-3.253240,-3.622710,-3.290419,-3.075985,-3.306147,-3.020520
2,-3.091540,-3.000966,-3.212689,-3.197329,-14.362964,-2.995292,-3.078653,,-3.239416,-2.982135,...,-3.224050,-3.586877,-3.763119,-2.940653,-3.170075,-3.537607,-3.212546,-3.001653,-3.223806,-2.947422
3,-3.007917,-2.926583,-3.129218,-3.120629,-13.822274,-2.920741,-2.995677,,-3.161602,-2.909585,...,-3.145200,-3.502118,-3.669622,-2.869529,-3.087260,-3.452113,-3.134594,-2.927632,-3.141686,-2.874800
4,-2.924809,-2.852488,-3.046146,-3.044069,-13.281000,-2.846527,-2.913194,,-3.083909,-2.837435,...,-3.066478,-3.416956,-3.575696,-2.798771,-3.004852,-3.366203,-3.056553,-2.853935,-3.059829,-2.802696
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,0.020840,0.117213,0.032059,-0.001511,0.010723,0.114987,0.016779,0.019350,0.002039,0.052961,...,-0.004443,0.142091,0.186773,0.049489,0.008165,0.250658,0.366274,0.083000,0.025361,0.042840
66,0.021219,0.122100,0.032776,-0.003365,0.011064,0.119468,0.016769,0.019713,0.000327,0.053611,...,-0.006398,0.149122,0.196236,0.050286,0.007131,0.258780,0.385598,0.086366,0.025866,0.043357
67,0.021599,0.127015,0.033444,-0.005325,0.011396,0.123963,0.016747,0.020042,-0.001490,0.054198,...,-0.008454,0.156073,0.205572,0.051049,0.006046,0.266630,0.404926,0.089758,0.026324,0.043823
68,0.021983,0.131959,0.034068,-0.007385,0.011727,0.128475,0.016713,0.020349,-0.003405,0.054728,...,-0.010605,0.162950,0.214790,0.051783,0.004914,0.274225,0.424261,0.093179,0.026736,0.044245
