In [1]:
import os
import re
import pickle
import pandas as pd
from collections import defaultdict
import tabulate
from tabulate import tabulate

In [2]:


class MFDFAProcessor:
    def __init__(self, symbol, bar_choice, variable, folder_path):
        self.symbol = symbol
        self.bar_choice = bar_choice
        self.variable = variable
        self.folder_path = folder_path
        
    def _read_old_old_pickle_file(self, shift, window):
        file_path = os.path.join(self.folder_path, f"{self.symbol}_{self.variable}_shift_{shift}_wind_{window}_linear_test_ONE.pkl")
        print(f"Trying to read file: {file_path}")
        with open(file_path, "rb") as f:
            return pickle.load(f)

    def _read_old_pickle_file(self, shift, window):
        file_path = os.path.join(self.folder_path, f"{self.symbol}_{self.bar_choice}_{self.variable}_shift_{shift}_wind_{window}_linear_test_ONE.pkl")
        print(f"Trying to read file: {file_path}")
        with open(file_path, "rb") as f:
            return pickle.load(f)
    def _read_new_pickle_file(self):
        file_path = os.path.join(self.folder_path, f"{self.symbol}_{self.bar_choice}_{self.variable}_results.pickle")
        print(f"Trying to read file: {file_path}")
        with open(file_path, "rb") as f:
            return pickle.load(f)

    def process_defaultdict(self, data):
        processed_data = defaultdict(list)
        
        for key, value in data.items():
            shift, window = key
            h0_rejected_percentage = value['H0_rejected'] / len(value['p_values']) * 100
            median_test_stat = value['test_stats'].median()
            median_p_value = value['p_values'].median()
            
            processed_data['Shift'].append(shift)
            processed_data['Window'].append(window)
            processed_data['H0_rejected_percentage'].append(h0_rejected_percentage)
            processed_data['Median_test_stat'].append(median_test_stat)
            processed_data['Median_p_value'].append(median_p_value)
        
        return pd.DataFrame(processed_data)

    def _process_files(self):
        data = self._read_pickle_file(shift=1, window=1)
        summary_df = self.process_defaultdict(data)
        return summary_df

    def create_summary_table(self):
        summary_df = self._process_files()
        short_df = summary_df.round(2)
        short_df['H0_rejected_percentage'] = short_df['H0_rejected_percentage'].apply(lambda x: f"{x:.2f}%")

        latex_table = tabulate(short_df, tablefmt='latex_booktabs', headers='keys', showindex=False, floatfmt=".2f")

        with open(os.path.join(self.unique_dir, f"summary_table_{self.symbol}_{self.bar_choice}_{self.variable}.tex"), "w") as f:
            f.write(latex_table)



In [3]:
def summary_statistics_short(dfs):
    summary_data = []

    for (shift, window), df in dfs.items():
        total_rows = len(df)
        h0_rejected_count = df["H0_rejected"].sum()
        h0_rejected_percentage = (h0_rejected_count / total_rows) * 100
        median_test_stat = df["Test_stat"].median()
        median_p_value = df["P-value"].median()

        summary_data.append({
            "Shift": shift,
            "Window": window,
            "H0_rejected_percentage": h0_rejected_percentage,
            "Median_test_stat": median_test_stat,
            "Median_p_value": median_p_value
        })

    summary_df = pd.DataFrame(summary_data)
    # Format the H0_rejected_percentage column as a percentage
    summary_df["H0_rejected_percentage"] = summary_df["H0_rejected_percentage"].apply(lambda x: f"{x:.2f}%")
    summary_df["Median_test_stat"] = summary_df["Median_test_stat"].round(2)
    summary_df["Median_p_value"] = summary_df["Median_p_value"].round(2)
    return summary_df

In [4]:

def save_summary_table_to_unique_directory(summary_df, unique_dir, filename="summary_table.csv"):
    # Create the unique directory if it doesn't exist
    Path(unique_dir).mkdir(parents=True, exist_ok=True)

    # Save the summary DataFrame as a CSV file in the unique directory
    summary_df.to_csv(os.path.join(unique_dir, filename), index=False)

In [5]:
folder_path = "/media/ak/T71/August11th2022Experiments/ExperimentOne/LinearMMDOutputFiles"
symbol = "TY1"
variable = "alpha"
bar_choice = "dollar"

# Initialize the MFDFAProcessor with the required parameters
processor = MFDFAProcessor(symbol=symbol, bar_choice=bar_choice, variable=variable, folder_path=folder_path)

# Read a specific pickle file with shift=1 and window=1 using the old format
data = processor._read_old_old_pickle_file(shift=1, window=1)

# Print the loaded data
print(data)



Trying to read file: /media/ak/T71/August11th2022Experiments/ExperimentOne/LinearMMDOutputFiles/TY1_alpha_shift_1_wind_1_linear_test_ONE.pkl
defaultdict(<class 'dict'>, {0: {'widths': [0.18975206558303798, 0.21923056070844732, 0.2532886195513252, 0.2926376896856787, 0.33809974398482023, 0.3906244510246872, 0.4513090129556234, 0.5214210852410432, 0.6024252570393066, 0.6960136453843351, 0.8041412422546934, 0.92906675290518, 1.0733997785433653, 1.2401553289621776, 1.432816803856973, 1.655408758460064, 1.9125809735128163, 2.20970558573463, 2.55298930777223, 2.9496030818207237, 3.4078318752843413, 3.937247747597058, 4.548909803440546, 5.255595209233754, 6.072066098657215], 'med': 0.37950413116607595, 'besti': 0, 'powers': array([0.99944702, 0.9977809 , 0.99330218, 0.98440242, 0.97079984,
       0.95398764, 0.93628564, 0.91964945, 0.90517286, 0.89319746,
       0.88361182, 0.87610149, 0.87029915, 0.86585799, 0.86248003,
       0.85992182, 0.85799027, 0.85653496, 0.85544015, 0.85461743,
     

In [7]:
folder_path = "/media/ak/T71/August11th2022Experiments/ExperimentOne/LinearMMDOutputFiles"
symbol = "TY1"
variable = "alpha"
bar_choice = "volume"

# Initialize the MFDFAProcessor with the required parameters
processor = MFDFAProcessor(symbol=symbol, bar_choice=bar_choice, variable=variable, folder_path=folder_path)

# Read the specific pickle file using the new format
data = processor._read_new_pickle_file()

# Print the loaded data
print(data)


Trying to read file: /media/ak/T71/August11th2022Experiments/ExperimentOne/LinearMMDOutputFiles/TY1_volume_alpha_results.pickle


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

